对于内容摘要,信件内容进行全文检索 基于SpringBoot 2.5.6+Postgresql+jpa+hibernate实现

依赖

2.5.6

2.14.0

org.springframework.boot

spring-boot-starter-data-jpa

org.hibernate

hibernate-core

org.hibernate

hibernate-entitymanager

org.hibernate

hibernate-ehcache

com.vladmihalcea

hibernate-types-52

${hibernate-types-52.version}

org.postgresql

postgresql

org.springframework.boot

spring-boot-starter-parent

${spring-boot.version}

pom

import

业务逻辑

登记保存之后,处理完成业务逻辑,发送全文检索事件

//附加类型对应的附件ids

Map> attCategoryToAttIds = new HashMap>();

attCategoryToAttIds.put(cmpRecord.getFileCategory(), files==null?null:files.stream().map(d->d.getId()).collect(Collectors.toList()));

//处理监听事件所需要的数据

MapeventData = Utils.buildMap("recordId", cmpRecord.getId(),"newRecord", true,"attCategoryToAttIds", attCategoryToAttIds);

//创建全文检索事件

DomainEvent de = new DefaultDomainEvent(cmpRecord.getId() + "_Handle_CmpRecord_FullTextSearch", operateInfo, ExecutePoint.CURR_THREAD,

eventData, new Date(), "Handle_CmpRecord_FullTextSearch");

//发布事件

DomainEventPublisherFactory.getRegisteredPublisher().publishEvent(de);

处理业务发送全文检索事件

@Service

@Transactional

@SuppressWarnings("unchecked")

public class HandleCmpRecordFullTextSearchListener implements IDomainEventListener {

@Autowired

private CmpRecordRepository cmpRecordRepository;

@Autowired

private DataChangeLogEventRepository dataChangeLogEventRepository;

@Override

public void onEvent(DomainEvent event) {

AccessTokenUser operator=event.getOperator();

Date operateTime=event.obtainEventTime();

Map otherData=(Map)event.getEventData();

String recordId = (String) otherData.get("recordId");

boolean newRecord=(boolean)otherData.get("newRecord");

String comment = (String) otherData.get("comment");//办理记录的备注

if(StringUtils.isBlank(recordId)) {

throw new RuntimeException("未指定信访记录id");

}

//获取登记信息

CmpRecord cmdRecord = cmpRecordRepository.getCmpRecordById(recordId);

//指定关联关系

RelateProjValObj cmpRdProj=new RelateProjValObj(recordId,RelateProjConstants.PROJTYPE_CMP_RECORD);

//这是关联那个业务

List mainProjs=Arrays.asList(cmpRdProj);

DomainEvent de=null;

//登记信息是无效的 则删除已存在的和这个件相关的

if(cmdRecord==null||!cmdRecord.isValidEntity()) {

//删除全文检索信息

de=new FullTextSearchOperateEvent(recordId+"_FullTextSearch_Remove", null, operator, operateTime,

mainProjs, null);

DomainEventPublisherFactory.getRegisteredPublisher().publishEvent(de);

return;

}

//全文检索 类型前缀

String contentTypepPefix=RelateProjConstants.PROJTYPE_CMP_RECORD;

//在当前线程中执行,保证事务一致性

ExecutePoint executePoint=ExecutePoint.CURR_THREAD;

/***********************************************关键词检索-内容摘要***********************************************/

//全文检索的类型 区分内容摘要 附件内容

List contentTypes=Arrays.asList(contentTypepPefix+"_contentAbstract");

String contentAbstract =cmdRecord.getBaseInfo().getContentAbstract();//内容摘要

if(StringUtils.isBlank(contentAbstract)) contentAbstract="";

if(StringUtils.isNotBlank(comment)) {

if(StringUtils.isNotBlank(contentAbstract)) contentAbstract=contentAbstract + ",";

contentAbstract=contentAbstract+comment;

}

de=new FullTextSearchOperateEvent(recordId+"_FullTextSearch_Update", executePoint, operator, operateTime,

mainProjs, contentTypes, contentAbstract, null);

DomainEventPublisherFactory.getRegisteredPublisher().publishEvent(de);

/***********************************************关键词检索-信件内容***********************************************/

contentTypes=Arrays.asList(contentTypepPefix+"_content");

String content =cmdRecord.getBaseInfo().getContent();//信件内容

de=new FullTextSearchOperateEvent(recordId+"_FullTextSearch_Update", executePoint, operator, operateTime,

mainProjs, contentTypes, content, null);

DomainEventPublisherFactory.getRegisteredPublisher().publishEvent(de);

/***********************************************关键词检索-附件(原信等)***********************************************/

//如果附件也需要检索 设置attIds参数

Map> attCategoryToAttIds=(Map>)otherData.get("attCategoryToAttIds");

if(attCategoryToAttIds!=null && attCategoryToAttIds.size() > 0) {

//按附件类型分开

for (Map.Entry> d : attCategoryToAttIds.entrySet()) {

contentTypes=Arrays.asList(contentTypepPefix+"_att_"+d.getKey());

List attIds=d.getValue();//公文相关附件

de=new FullTextSearchOperateEvent(recordId+"_att_"+d.getKey()+"_FullTextSearch_Update", executePoint,

operator, operateTime, mainProjs, contentTypes, null, attIds);

DomainEventPublisherFactory.getRegisteredPublisher().publishEvent(de);

}

}

}

@Override

public boolean listenOn(String eventType) {

return "Handle_CmpRecord_FullTextSearch".equals(eventType);

}

}

统一处理全文检索事件

@Service

@Transactional

public class FullTextSearchListener extends JpaHibernateRepository implements IDomainEventListener{

@Autowired

private FullTextSearchRepository fullTextSearchRepository;

@Autowired

private IFileSysService fileSysService;

@Override

public void onEvent(DomainEvent event) {

if("true".equals(BaseConstants.getProperty("prefetchingRecordNo", "false"))){

return;

}

FullTextSearchOperateEvent de = null;

if(event instanceof FullTextSearchOperateEvent) {

de=(FullTextSearchOperateEvent)event;

}

if(de==null) {

return;

}

if(FullTextSearchOperateEvent.EVENTTYPE_UPDATE.equals(de.getEventType())) {

/**

"mainProjs":List 必选

"contentType":String 必选

"content":String 可选

"attIds":List 可选 content与attIds都不存在 会删除对应关键词检索

"relProjs":List 可选 指定的需要添加的关系

"removeOtherRelProjs":false 可选 是否清除 指定relProjs以外的关联记录

*/

this.fullTextSearchUpdate(de);

}else if(FullTextSearchOperateEvent.EVENTTYPE_REMOVE.equals(de.getEventType())) {

/**

"mainProjs":List 必选

*/

this.fullTextSearchRemoveByProjs(de);

}

}

//关键词检索增加

private void fullTextSearchUpdate(FullTextSearchOperateEvent de) {

Date date=de.obtainEventTime();

if(date==null) {

date=new Date();

}

List mainProjs=de.getMainProjs();

String contentType=null;

if(de.getContentTypes()!=null&&de.getContentTypes().size()==1) {

contentType=de.getContentTypes().get(0);

}

String content=de.getContent();

List attIds=de.getAttIds();

if(mainProjs==null||mainProjs.size()==0

||StringUtils.isBlank(contentType)

) {

throw new RuntimeException("数据指定错误");

}

Set fullTextIds=new HashSet();

for (RelateProjValObj mainProj : mainProjs) {

if(StringUtils.isBlank(mainProj.getProjId())||StringUtils.isBlank(mainProj.getProjType())) {

continue;

}

fullTextIds.add(new FullTextSearch(mainProj,contentType,null,null).getId());

}

if(fullTextIds.size()==0) {

throw new RuntimeException("数据指定错误");

}

//这是从附件中获取文本数据

if(StringUtils.isBlank(content)&&attIds!=null) {

content="";

try {

if(attIds.size()>0) {

Map attIdToContentMao=ThreadLocalCache.fetchAPIData(null,()->{

return fileSysService.findFileContentByIds(attIds, true);

});

for (String attContent : attIdToContentMao.values()) {

if(StringUtils.isBlank(attContent)) {

continue;

}

if(StringUtils.isNotBlank(content)) {

content+=",";

}

content+=RegExUtils.replaceAll(attContent, "\\u0000", "");//处理掉非法字符

}

}

} catch (Exception e) {

e.printStackTrace();

}

}

//从数据库中获取已经存的

List oldFullTexts=this.fullTextSearchRepository.findFullTextSearchByIds(fullTextIds);

Map oldFullTextMap=oldFullTexts.stream().collect(Collectors.toMap(d->d.getId(),d->d));

//遍历这次需要更新的记录

for (RelateProjValObj mainProj : mainProjs) {

if(StringUtils.isBlank(mainProj.getProjId())||StringUtils.isBlank(mainProj.getProjType())) {

continue;

}

FullTextSearch fullText=new FullTextSearch(mainProj, contentType, content, date);

FullTextSearch oldFullText=oldFullTextMap.get(fullText.getId());

//旧的记录中已存在 则更新

if(oldFullText!=null) {

if(StringUtils.isBlank(content)) {

//如果内容未空 则删除

this.fullTextSearchRepository.removeFullTextSearch(oldFullText);

return;

}

//如果存在内容,则更新

this.fullTextSearchRepository

.updateFullTextSearchContent(fullText.getId(), content, date);

}else {

if(StringUtils.isBlank(content)) {

return;

}

try {//否则 创建全文检索记录

this.fullTextSearchRepository.createFullTextSearch(fullText);

} catch (Exception e) {

e.printStackTrace();

return;

}

}

}

}

//关键词检索删除 根据主相关件

private void fullTextSearchRemoveByProjs(FullTextSearchOperateEvent de) {

Date date=de.obtainEventTime();

if(date==null) {

date=new Date();

}

List mainProjs=de.getMainProjs();

if(mainProjs==null||mainProjs.size()==0) {

throw new RuntimeException("数据指定错误");

}

List projKeys=new ArrayList();

for (RelateProjValObj mainProj : mainProjs) {

projKeys.add(mainProj.getProjKey());

}

Map params=new HashMap();

StringBuffer hql=new StringBuffer();

hql.append("delete from ").append(FullTextSearch.class.getName()).append(" ");

hql.append("where mainProj.projKey IN(:projKeys) ");

params.put("projKeys", projKeys);

if(de.getContentTypes()!=null&&de.getContentTypes().size()>0) {

params.put("contentTypes", de.getContentTypes());

}

this.createHQLQueryByMapParams(hql.toString(), params).executeUpdate();

}

@Override

public boolean listenOn(String eventType) {

return eventType.startsWith(FullTextSearchOperateEvent.class.getName());

}

}

全文检索实体

@Entity

@Table(

name="TV_FULLTEXT_SEARCH",

indexes={

@Index(name="idx_TV_FULLTEXT_SEARCH1",columnList="projKey"),

@Index(name="idx_TV_FULLTEXT_SEARCH2",columnList="contentType")

}

)

public class FullTextSearch extends IEntity {

@Id

@Column(length=200)

private String id;

private RelateProjValObj mainProj;//来源相关件

@Lob

@Type(type="org.hibernate.type.TextType")

private String content;//检索内容

@Column(length=100)

private String contentType;//检索类型

@Column(length=100)

private Date lastUpdateDate;//最后更新时间

public String getId() {

return id;

}

public String getContent() {

return content;

}

public String getContentType() {

return contentType;

}

public RelateProjValObj getMainProj() {

return mainProj;

}

public Date getLastUpdateDate() {

return lastUpdateDate;

}

public FullTextSearch() {

}

public FullTextSearch(RelateProjValObj mainProj, String contentType,

String content, Date lastUpdateDate) {

this.id = mainProj.getProjKey()+"_"+contentType;

this.mainProj = mainProj;

this.content = content;

this.contentType = contentType;

this.lastUpdateDate = lastUpdateDate;

if(this.lastUpdateDate==null){

this.lastUpdateDate = new Date();

}

}

}

存储数据格式

查询

sql大致就是这样的逻辑

select tv.id from tv_cmp_dw_query tv join tv_fulltext_search tvs on tv.id = tvs.proj_id where tvs.contet_type in () and conent like '%测试%'

事件处理机制请看另一篇文章 自定义事件处理机制

文章来源

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。