Ver código fonte

合并优化变更内容

lsm 5 dias atrás
pai
commit
5ffcf4cc2e

+ 1 - 1
BiddingKG/dl/interface/extract.py

@@ -599,7 +599,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     data_res["pb_project_name"] = pb_json.get('pb').get('project_name_refind', '')
 
     # 更正内容
-    data_res['correction_content'] = correction_content[:1500]
+    data_res['change_content'] = correction_content[:500]
     # 资质要求
     data_res['aptitude'] = aptitude_text[:1500]
     # 采购内容

+ 2 - 2
BiddingKG/dl/interface/kvtree_search.py

@@ -18,7 +18,7 @@ pinmu_name_pattern = "采购品目(名称)?([::,]|$)"
 addr_bidopen_pattern = "([开评]标|开启|评选|比选|磋商|遴选|寻源|采购|招标|竞价|议价|委托|询比?价|比价|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|递交\w{,4}文件)[))]?(时间[与及和、])?(地址|地点)([与及和、]时间)?([::,]|$)"
 addr_bidsend_pattern = "((\w{,4}文件)?(提交|递交)(\w{,4}文件)?|投标)(截止时间[与及和、])?地[点址]([与及和、]截止时间)?([::,]|$)"
 
-change_content_pattern = "(变更|更正)(事项[与及和、])?(信息|内容|说明|事项)|现做出如下变更|变更内容如下"
+change_content_pattern = "(变更|更正)(事项[与及和、])?(信息|内容|事项)|现做出如下变更|变更内容如下"
 change_time_pattern = "(变更|更正)日期"
 pattern_dic_single = {
     'requirement': requirement_pattern,
@@ -60,7 +60,7 @@ def get_kvtree_value(html):
                     kv_single_dic[k] = value
             elif re.search('[\u4e00-\u9fa5]{2,}', value): # 包含两个中文以上的才要
                 if k == 'change_content':
-                    value = re.sub('\s', '', value)[:200] # 变更内容去掉空格并限制200字
+                    value = re.sub('\n+', '', value)[:500] # 变更内容去掉空格并限制200字
                 kv_single_dic[k] = value
     for k, v in pattern_dic_addr.items():
         kv_l = _pd.extract_kv(v)