%!s(int64=2) %!d(string=hai) anos · 8724d4e57b
--- a/BiddingKG/dl/bidway/re_bidway.py
+++ b/BiddingKG/dl/bidway/re_bidway.py
@@ -200,6 +200,7 @@ import re
 
				 #
			
 
				 #     return output_list[0], text_index_list[0]
			
 
				 
			
 
				+normal_bidway = "公开招标|邀请招标|竞争性谈判|竞争性磋商|单一来源|框架协议|询价"
			
 
				 
			
 
				 bidway = '单一来源' \
			
 
				          '|国内竞争性磋商|竞争性磋商|竞争性谈判|网络竞价|网上竞价|公开竞谈|公开竞价|电子竞价|竞价|竞标|竞谈竞价|电子书面竞投' \
			
@@ -210,17 +211,17 @@ bidway = '单一来源' \
 
				          '|网上询价|公开询价|非定向询价|定向询价|询比价|询单|询价|询比' \
			
 
				          '|库内邀请|库内公开发包|内部邀标' \
			
 
				          '|定点采购议价|定点采购' \
			
 
				-         '|竞争性评审'
			
 
				+         '|竞争性评审|框架协议'
			
 
				 
			
 
				 not_bidway = '及单一来源|询价小组成员|除单一来源|竞争性谈判邀请函|询价记录|自由竞价' \
			
 
				              '|限时竞价|咨询单位|询价单'
			
 
				 
			
 
				-not_bidway_preffix = "本次|拟|参加|无效|标的|联合体|参与|否决|除"
			
 
				+not_bidway_preffix = "本次|拟|参加|无效|标的|联合体|参与|否决|除|可以选择|包括|涉及|非"
			
 
				 
			
 
				 not_bidway_suffix = "文件|报名|邀请|项目|失败|数量|编号|后|时间|类型|名称|和|成交" \
			
 
				                     "|标题|开始|结束|产品|报价|供应商|部门|监督|需求|范围|入围|内容|人" \
			
 
				                     "|条件|公司|保证金|完毕|事件|成功|活动|地点|标|会|须知|范围" \
			
 
				-                    "|响应|报价|采购公示|的原因|采购供应商|价|采购人员|失败"
			
 
				+                    "|响应|报价|采购公示|的原因|采购供应商|价|采购人员|失败|小组"
			
 
				 
			
 
				 bidway_preffix = '采购方式|竞价方式|招标方式|询价类型|交易方式|寻源策略|招标形式|询价方式' \
			
 
				                  '|发包方式|发包类型|开展方式|招标类型|选取方式|招租方式'
			
@@ -268,21 +269,64 @@ def re_standard_bidway(_str):
 
				     bidway_list = []
			
 
				     if match:
			
 
				         for m in match:
			
 
				-            m_dict = m.groupdict()
			
 
				-            m_span = m.span()
			
 
				-            keyword = ""
			
 
				-            keyword_index = [m_span[0], m_span[1]]
			
 
				-            for key in m_dict.keys():
			
 
				-                if key == "value":
			
 
				-                    keyword = m_dict.get(key)
			
 
				-                else:
			
 
				-                    keyword_index[0] += len(m_dict.get(key))
			
 
				+            keyword = m.group('value')
			
 
				+            keyword_index = list(m.span('value'))
			
 
				+            behind_str = _str[m.start(): m.end()+30]
			
 
				+            if len(re.findall(normal_bidway, behind_str))>1:
			
 
				+                keyword = ''
			
 
				+                for it in re.finditer('(?P<sign>.{1,2})(?P<bidway>'+normal_bidway+')+', behind_str): # 招标方式后面多个选择处理
			
 
				+                    if '□' != it.group('sign')[-1]:
			
 
				+                        keyword = it.group('bidway')
			
 
				+                        keyword_index = [m.start()+it.start('bidway'), m.start()+it.end('bidway')]
			
 
				+                        break
			
 
				+             # m_dict = m.groupdict()
			
 
				+            # m_span = m.span()
			
 
				+            # keyword = ""
			
 
				+            # keyword_index = [m_span[0], m_span[1]]
			
 
				+            # for key in m_dict.keys():
			
 
				+            #     if key == "value":
			
 
				+            #         keyword = m_dict.get(key)
			
 
				+            #     else:
			
 
				+            #         keyword_index[0] += len(m_dict.get(key))
			
 
				             bidway_list.append([keyword, keyword_index])
			
 
				 
			
 
				     return bidway_list
			
 
				 
			
 
				+def re_normal_bidway(_str):
			
 
				+    ser = re.search("("+normal_bidway+")(转为?|变更为|更改为)"+"(?P<bidway>(" + normal_bidway + "))", _str) # 如果方式变更取变更后的
			
 
				+    if ser:
			
 
				+        return [[ser.group('bidway'), list(ser.span('bidway'))]]
			
 
				+    reg_all = "(?P<value>" + normal_bidway + ")"
			
 
				+    match = re.finditer(reg_all, _str)
			
 
				+    bidway_list = []
			
 
				+    bidway_set = set()
			
 
				+    if match:
			
 
				+        for m in match:
			
 
				+            keyword = m.group()
			
 
				+            if keyword == '公开招标' and m.start()>0 and _str[m.start()-1]=='非':
			
 
				+                continue
			
 
				+            keyword_index = list(m.span())
			
 
				+            bidway_set.add(keyword)
			
 
				+            bidway_list.append([keyword, keyword_index])
			
 
				+    if len(bidway_list) == 0: # 如果找不到标准方式，匹配简称方式
			
 
				+        ser = re.search('(?P<bidway>(磋商|谈判))(公告|成交|结果)', _str)
			
 
				+        if ser:
			
 
				+            return [[ser.group('bidway'), list(ser.span('bidway'))]]
			
 
				+    if len(bidway_set) > 1: # 匹配到多种招标方式返回空
			
 
				+        return []
			
 
				+    return bidway_list
			
 
				 
			
 
				 def re_all_bidway(_str):
			
 
				+    reg_all = "(?P<value>" + normal_bidway + ")" # 优先匹配规范的招标方式
			
 
				+    match = re.finditer(reg_all, _str)
			
 
				+    bidway_list = []
			
 
				+    if match:
			
 
				+        for m in match:
			
 
				+            keyword = m.group()
			
 
				+            keyword_index = list(m.span())
			
 
				+            bidway_list.append([keyword, keyword_index])
			
 
				+    return bidway_list
			
 
				+
			
 
				     reg_all = "(?P<value>" + bidway + ")"
			
 
				     match = re.finditer(reg_all, _str)
			
 
				     bidway_list = []
			
@@ -339,6 +383,13 @@ def get_one_word(bidway_list):
 
				 
			
 
				 
			
 
				 def re_bidway(text, title):
			
 
				+    # 优先匹配标题标准招标方式
			
 
				+    if len(title)<100:
			
 
				+        bidway_list = re_normal_bidway(title)
			
 
				+        if bidway_list:
			
 
				+            word, text_index = get_one_word(bidway_list)
			
 
				+            return word, text_index
			
 
				+
			
 
				     # 替换易混淆词
			
 
				     text_clean = re_not_bidway(text)
			
 
				     title_clean = re_not_bidway(title)
			
@@ -406,12 +457,30 @@ bidway_dict = {'询价': '询价', '竞争性谈判': '竞争性谈判',
 
				                '网上电子投标': '公开招标', '公开竞谈': '竞争性谈判',
			
 
				                '竞争性磋商': '竞争性磋商', '采购方式：邀请': '邀请招标',
			
 
				                '公开竞价': '竞价', '其他': '其他', '公开招募': '其他',
			
 
				-               '网上询价': '询价'}
			
 
				+               '网上询价': '询价', '框架协议': '框架协议', '谈判':'竞争性谈判'}
			
 
				 # bidway名称统一规范
			
 
				 def bidway_integrate(bidway):
			
 
				     integrate_name = bidway_dict.get(bidway,"其他")
			
 
				     return integrate_name
			
 
				 
			
 
				+def bidway_normalize(key):
			
 
				+    if re.search('公开招标|公开发包', key):
			
 
				+        return '公开招标'
			
 
				+    elif re.search('单一来源', key):
			
 
				+        return '单一来源'
			
 
				+    elif re.search('磋商', key):
			
 
				+        return '竞争性磋商'
			
 
				+    elif re.search('谈判', key):
			
 
				+        return '竞争性谈判'
			
 
				+    elif re.search('竞谈|竞价|竞投|竞标', key):
			
 
				+        return '竞价'
			
 
				+    elif re.search('询价|询比|比价|询单', key):
			
 
				+        return '询价'
			
 
				+    elif re.search('邀请|邀标', key):
			
 
				+        return '邀请招标'
			
 
				+    else:
			
 
				+        return bidway_dict.get(key, '其他')
			
 
				+
			
 
				 def test_csv():
			
 
				     df = pd.read_csv("C:\\Users\\Administrator\\Desktop\\bidway_text.csv")
			
 
				 
			
@@ -456,13 +525,90 @@ def test_str():
 
				 
			
 
				 
			
 
				 def test_html():
			
 
				-    html_path = "C:/Users/Administrator/Desktop/3.html"
			
 
				+    # html_path = "C:/Users/Administrator/Desktop/3.html"
			
 
				+    html_path = 'd:/html/2.html'
			
 
				 
			
 
				-    with open(html_path, "r") as f:
			
 
				+    with open(html_path, "r", encoding='utf-8') as f:
			
 
				         s = f.read()
			
 
				 
			
 
				     print(extract_bidway(s, title=""))
			
 
				 
			
 
				+def get_valuate():
			
 
				+    import psycopg2
			
 
				+    conn = psycopg2.connect(host='192.168.2.103', port='5432', user='postgres', password='postgres', dbname='iepy')
			
 
				+    cursor = conn.cursor()
			
 
				+    sql = "select c1.docid, c1.doctitle, c1.extract_json, c2.text from corpus_otherinput c1 left join corpus_iedocument c2 on c1.docid=c2.human_identifier where c1.new_extract notnull;" # where docid='110635873'
			
 
				+    # sql = "select c1.docid, c1.doctitle from corpus_otherinput c1;"
			
 
				+    # sql = "select text from corpus_iedocument limit 50000;"
			
 
				+    cursor.execute(sql)
			
 
				+    datas = []
			
 
				+    olds = []
			
 
				+    news = []
			
 
				+    label_old = []
			
 
				+    label_new = []
			
 
				+    labels = []
			
 
				+    for row in cursor.fetchall():
			
 
				+        docid = row[0]
			
 
				+        doctitle = row[1]
			
 
				+        ex = row[2]
			
 
				+        text = row[3]
			
 
				+        ser = re.search('"bidway": "(\w{,6})"', ex)
			
 
				+        # print('ser:', ser)
			
 
				+        old = ser.group(1) if ser else ""
			
 
				+        pred = extract_bidway(text, title=doctitle)
			
 
				+
			
 
				+        # list_bidway = extract_bidway(text, title=doctitle)
			
 
				+        # print('list_bidway', list_bidway)
			
 
				+        # if list_bidway:
			
 
				+        #     bidway = list_bidway[0].get("body")
			
 
				+        #     # bidway名称统一规范
			
 
				+        #     bidway = bidway_integrate(bidway)
			
 
				+        # else:
			
 
				+        #     bidway = ""
			
 
				+        # print('bidway: ', bidway)
			
 
				+
			
 
				+        pred = pred[0]['body'] if len(pred) > 0 else ""
			
 
				+        new = bidway_dict.get(pred, "其他") if pred!="" else ""
			
 
				+        sql2 = "select value from brat_bratannotation where document_id='{0}' and value like '%bidway%' limit 4;".format(docid)
			
 
				+        cursor.execute(sql2)
			
 
				+        lb_new = docid + "_"
			
 
				+        lb_old = docid + "_"
			
 
				+        tmp_l = []
			
 
				+        for row in cursor.fetchall():
			
 
				+            lb = row[0].split()[-1]
			
 
				+            lb = bidway_dict.get(lb, "其他")  # 新准确率：0.9642, 召回率： 0.9642, F1: 0.8965
			
 
				+            # lb = bidway_normalize(lb)   # 旧准确率：0.9287, 召回率： 0.9287, F1: 0.8011  新准确率：0.9692, 召回率： 0.9692, F1: 0.9105
			
 
				+
			
 
				+            tmp_l.append(lb)
			
 
				+            if lb == new:
			
 
				+                lb_new = docid + "_" + lb
			
 
				+            if lb == old:
			
 
				+                lb_old = docid + "_" + lb
			
 
				+        olds.append(docid + "_" + old)
			
 
				+        news.append(docid + "_" + new)
			
 
				+        label_new.append(lb_new)
			
 
				+        label_old.append(lb_old)
			
 
				+        labels.append('；'.join(tmp_l))
			
 
				+        datas.append((docid, docid + "_" + old, lb_old, docid + "_" + new, lb_new, '；'.join(tmp_l)))
			
 
				+
			
 
				+    eq_old = len(set(olds)&set(label_old))
			
 
				+    eq_new = len(set(news)&set(label_new))
			
 
				+
			
 
				+    acc_old = eq_old/len(set(olds))
			
 
				+    recall_old = eq_old/len(set(label_old))
			
 
				+    f1_old = acc_old*recall_old/2*(acc_old+recall_old)
			
 
				+
			
 
				+    acc_new = eq_new/len(set(news))
			
 
				+    recall_new = eq_new/len(set(label_new))
			
 
				+    f1_new = acc_new*recall_new/2*(acc_new+recall_new)
			
 
				+    print('旧准确率：%.4f, 召回率： %.4f, F1: %.4f'%(acc_old, recall_old, f1_old))
			
 
				+    print('新准确率：%.4f, 召回率： %.4f, F1: %.4f'%(acc_new, recall_new, f1_new))
			
 
				+
			
 
				+
			
 
				+    df = pd.DataFrame(datas, columns=['docid', 'pred_old', 'label_old', 'pred_new', 'label_new', 'labels'])
			
 
				+    df['old_pos'] = df.apply(lambda x:1 if x['pred_old']==x['label_old'] else 0, axis=1)
			
 
				+    df['new_pos'] = df.apply(lambda x:1 if x['pred_new']==x['label_new'] else 0, axis=1)
			
 
				+    df.to_csv('E:/其他数据/招标方式预测结果.csv', index=False)
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     # extract_bidway(s)
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -277,8 +277,8 @@ class CodeNamePredict():
 
				     
			
 
				     def predict(self,list_sentences,list_entitys=None,MAX_AREA = 5000):
			
 
				         #@summary: 获取每篇文章的code和name
			
 
				-        pattern_score = re.compile("工程|服务|采购|施工|项目|系统|招标|中标|公告|学校|[大中小]学校?|医院|公司|分公司|研究院|政府采购中心|学院|中心校?|办公室|政府|财[政务]局|办事处|委员会|[部总支]队|警卫局|幼儿园|党委|党校|银行|分行|解放军|发电厂|供电局|管理所|供电公司|卷烟厂|机务段|研究[院所]|油厂|调查局|调查中心|出版社|电视台|监狱|水厂|服务站|信用合作联社|信用社|交易所|交易中心|交易中心党校|科学院|测绘所|运输厅|管理处|局|中心|机关|部门?|处|科|厂|集团|图书馆|馆|所|厅|楼|区|酒店|场|基地|矿|餐厅|酒店")
			
 
				-
			
 
				+        # pattern_score = re.compile("工程|服务|采购|施工|项目|系统|招标|中标|公告|学校|[大中小]学校?|医院|公司|分公司|研究院|政府采购中心|学院|中心校?|办公室|政府|财[政务]局|办事处|委员会|[部总支]队|警卫局|幼儿园|党委|党校|银行|分行|解放军|发电厂|供电局|管理所|供电公司|卷烟厂|机务段|研究[院所]|油厂|调查局|调查中心|出版社|电视台|监狱|水厂|服务站|信用合作联社|信用社|交易所|交易中心|交易中心党校|科学院|测绘所|运输厅|管理处|局|中心|机关|部门?|处|科|厂|集团|图书馆|馆|所|厅|楼|区|酒店|场|基地|矿|餐厅|酒店")
			
 
				+        pattern_score = re.compile('建设项目|服务项目|工程项目|工程施工|建设工程|服务中心|基础设施|物业管理|工程设计|妇幼保健|咨询服务|管理系统|管理中心|改建工程|配套工程|公安局|幼儿园|管理局|使用权|办公楼|教育局|管理处|图书馆|经营权|项目|采购|工程|改造|服务|设备|中心|医院|系统|建设|监理|施工|维修|学院|安装|设计|关于|标段|招标|技术|询价|管理|学校|小学|中学|平台|提升|设施|检测|整治|社区|装修|政府|绿化|物资|租赁|地块|医疗|编制|公开|规划|监控|教育|维护|校区|治理|升级|安置|竞价|购置|评估|勘察|承包|实验|大学|材料|生产|耗材|招租|硬化|维保|用地|消防|审计|拍卖|物业|入围|养护|机关|企业|用房|出让|资产|分局|验收|宣传|处置|校园|研究|咨询|修缮|更换|装饰|劳务|保养|物流|出租|局|院')
			
 
				         result = []
			
 
				         index_unk = self.word2index.get("<unk>")
			
 
				         # index_pad = self.word2index.get("<pad>")
			
@@ -393,20 +393,40 @@ class CodeNamePredict():
 
				 
			
 
				                                 #add code to entitys
			
 
				                                 list_entity.append(temp_entitys[h])
			
 
				-
			
 
				-                                if the_code not in code_set:
			
 
				+                                if re.search(',|/|;|、|，', the_code) and len(the_code)>25:
			
 
				+                                    for it in re.split(',|/|;|、|，', the_code):
			
 
				+                                        if len(it) > 8:
			
 
				+                                            if it not in code_set:
			
 
				+                                                code_set.add(it)
			
 
				+                                                item['code'].append(it)
			
 
				+                                        elif len(item['code']) > 0:
			
 
				+                                            new_it = item['code'][-1] + re.search(',|/|;|、|，', the_code).group(0) + it
			
 
				+                                            if new_it not in code_set:
			
 
				+                                                code_set.add(new_it)
			
 
				+                                                item['code'][-1] = new_it
			
 
				+                                        else:
			
 
				+                                            if the_code not in code_set:
			
 
				+                                                code_set.add(the_code)
			
 
				+                                                item['code'].append(the_code)
			
 
				+                                            break
			
 
				+                                elif the_code not in code_set:
			
 
				                                     code_set.add(the_code)
			
 
				-                                    item['code'] = list(code_set)
			
 
				+                                    item['code'].append(the_code)
			
 
				+
			
 
				+                                # if the_code not in code_set:
			
 
				+                                #     code_set.add(the_code)
			
 
				+                                #     item['code'] = list(code_set)
			
 
				                     for iter in re.finditer(self.PN_pattern,join_predict):
			
 
				                         _name = self.fitDataByRule(pad_sentence[iter.span()[0]:iter.span()[1]])
			
 
				 
			
 
				                         #add name to entitys
			
 
				                         _entity = Entity(doc_id=sentence.doc_id,entity_id="%s_%s_%s_%s"%(sentence.doc_id,sentence.sentence_index,iter.span()[0],iter.span()[1]),entity_text=_name,entity_type="name",sentence_index=sentence.sentence_index,begin_index=0,end_index=0,wordOffset_begin=iter.span()[0],wordOffset_end=iter.span()[1],in_attachment=sentence.in_attachment)
			
 
				                         list_entity.append(_entity)
			
 
				-                        w = 1 if re.search('(项目|工程|招标|合同|标项|标的|计划|询价|询价单|询价通知书|申购)(名称|标题|主题)[:：\s]', pad_sentence[iter.span()[0]-10:iter.span()[0]])!=None else 0.5
			
 
				+                        # w = 1 if re.search('(项目|工程|招标|合同|标项|标的|计划|询价|询价单|询价通知书|申购)(名称|标题|主题)[:：\s]', pad_sentence[iter.span()[0]-10:iter.span()[0]])!=None else 0.5
			
 
				+                        w = 1 if re.search('(项目|工程|招标|采购|合同|标项|标的|计划|询价|询价单|询价通知书|申购)(名称|标题|主题|项目)[:：\s]', pad_sentence[iter.span()[0]-10:iter.span()[0]])!=None else 0.5
			
 
				                         if _name not in dict_name_freq_score:
			
 
				                             # dict_name_freq_score[_name] = [1,len(re.findall(pattern_score,_name))+len(_name)*0.1]
			
 
				-                            dict_name_freq_score[_name] = [1, (len(re.findall(pattern_score, _name)) + len(_name) * 0.05)*w]
			
 
				+                            dict_name_freq_score[_name] = [1, (len(re.findall(pattern_score, _name)) + len(_name) * 0.05)*w+(5-sentence.sentence_index)*0.2]
			
 
				                         else:
			
 
				                             dict_name_freq_score[_name][0] += 1
			
 
				                     '''
			
@@ -423,18 +443,21 @@ class CodeNamePredict():
 
				                 _begin_index += _LEN
			
 
				             
			
 
				             list_name_freq_score = []
			
 
				+            # print('模型预测项目名称：', dict_name_freq_score)
			
 
				 
			
 
				             # 2020/11/23 大网站规则调整
			
 
				             if len(dict_name_freq_score) == 0:
			
 
				-                name_re1 = '(项目|工程|招标|合同|标项|标的|计划|询价|询价单|询价通知书|申购)(名称|标题|主题)[:：\s]+([^，。：；]{2,60})[，。]'
			
 
				+                # name_re1 = '(项目|工程|招标|合同|标项|标的|计划|询价|询价单|询价通知书|申购)(名称|标题|主题)[:：\s]+([^，。：；]{2,60})[，。]'
			
 
				+                name_re1 = '(项目|工程|招标|采购(条目)?|合同|标项|标的|计划|询价|询价单|询价通知书|申购单|申购)(名称|标名|标题|主题)[:：\s]+(?P<name>[^，。：；]{2,60})[，。]'
			
 
				                 for sentence in list_sentence:
			
 
				                     # pad_sentence = sentence.sentence_text
			
 
				                     othername = re.search(name_re1, sentence.sentence_text)
			
 
				                     if othername != None:
			
 
				-                        project_name = othername.group(3)
			
 
				+                        project_name = othername.group('name')
			
 
				                         beg = find_index([project_name], sentence.sentence_text)[0]
			
 
				                         end = beg + len(project_name)
			
 
				                         _name = self.fitDataByRule(sentence.sentence_text[beg:end])
			
 
				+                        # print('规则召回项目名称：', _name)
			
 
				                         # add name to entitys
			
 
				                         _entity = Entity(doc_id=sentence.doc_id, entity_id="%s_%s_%s_%s" % (
			
 
				                         sentence.doc_id, sentence.sentence_index, beg, end), entity_text=_name,
			
@@ -444,7 +467,7 @@ class CodeNamePredict():
 
				                         w = 1
			
 
				                         if _name not in dict_name_freq_score:
			
 
				                             # dict_name_freq_score[_name] = [1,len(re.findall(pattern_score,_name))+len(_name)*0.1]
			
 
				-                            dict_name_freq_score[_name] = [1, (len(re.findall(pattern_score, _name)) + len(_name) * 0.05) * w]
			
 
				+                            dict_name_freq_score[_name] = [1, (len(re.findall(pattern_score, _name)) + len(_name) * 0.05) * w+(5-sentence.sentence_index)*0.2]
			
 
				                         else:
			
 
				                             dict_name_freq_score[_name][0] += 1
			
 
				                 # othername = re.search(name_re1, sentence.sentence_text)
			
@@ -461,6 +484,8 @@ class CodeNamePredict():
 
				             if len(list_name_freq_score)>0:
			
 
				                 list_name_freq_score.sort(key=lambda x:x[1][0]*x[1][1],reverse=True)
			
 
				                 item['name'] = list_name_freq_score[0][0]
			
 
				+                # for it in list_name_freq_score:
			
 
				+                    # print('项目名称及分值：',it[0],it[1], it[1][0]*it[1][1])
			
 
				                 # if list_name_freq_score[0][1][0]>1:
			
 
				                 #     item[1]['name'] = list_name_freq_score[0][0]
			
 
				                 # else:
			
@@ -474,9 +499,10 @@ class CodeNamePredict():
 
				                     # if othercode != None:
			
 
				                     #     item[1]['code'].append(othercode.group(2))
			
 
				                     # 2020/11/23 大网站规则调整
			
 
				-                    othercode = re.search('(项目|采购|招标|品目|询价|竞价|询价单|磋商|订单|账单|交易|文件|计划|场次|标的|标段|标包|分包|标段\(包\)|招标文件|合同|通知书|公告)(单号|编号|标号|编码|代码|备案号|号)[:：\s]+([^，。；：、]{8,30}[a-zA-Z0-9\号])[\)，。]', sentence.sentence_text)
			
 
				+                    othercode = re.search('(项目|采购|招标|品目|询价|竞价|询价[单书]|磋商|订单|账单|交易|文件|计划|场次|标的|标段|标包|分包|标段\(包\)|招标文件|合同|通知书|公告|工程|寻源|标书|包件|谈判|申购)(单据?号|编号|标号|编码|代码|备案号|号)[:：\s]+(?P<code>[^，。；：、]{8,30}[a-zA-Z0-9\号])[\)，。\u4e00-\u9fa5]', sentence.sentence_text)
			
 
				                     if othercode != None:
			
 
				-                        item['code'].append(othercode.group(3))
			
 
				+                        item['code'].append(othercode.group('code'))
			
 
				+                        # print('规则召回项目编号：', othercode.group('code'))
			
 
				             item['code'] = [code for code in item['code'] if len(code)<500]
			
 
				             item['code'].sort(key=lambda x:len(x),reverse=True)
			
 
				             result.append(item)
			
@@ -692,6 +718,7 @@ class PREMPredict():
 
				             text_tup = text_list[i]
			
 
				             front, middle, behind = text_tup
			
 
				             whole = "".join(text_tup)
			
 
				+            # print('模型预测角色：', front, entity.entity_text, label, values)
			
 
				             if label in [0, 1, 2, 3, 4] and values[label] < 0.5: # 小于阈值的设为其他，让后面的规则召回重新判断
			
 
				                 label = 5
			
 
				             elif label in [2,3,4] and re.search('序号：\d+，\w{,2}候选', front):
			
@@ -770,6 +797,7 @@ class PREMPredict():
 
				             whole = "".join(text_tup)
			
 
				             # print('金额： ', entity.entity_text, label, values, front, middle, behind)
			
 
				             if label in [0, 1] and values[label] < 0.5: # 小于阈值的设为其他金额，让后面的规则召回重新判断
			
 
				+                # print('模型预测金额： ', entity.entity_text, label, values, front, middle, behind)
			
 
				                 label = 2
			
 
				             elif label == 1: # 错误中标金额处理
			
 
				                 if re.search('[:：，。](总金额|总价|单价)(（万?元）)?：?$', front) and re.search('(中标|投标|成交|中价)', front)==None:
			
@@ -1435,6 +1463,7 @@ class RoleRulePredictor():
 
				                                         self.pattern_money_other, _span[0]) is None:
			
 
				                                     p_entity.values[0] = 0.8 + p_entity.values[0] / 10
			
 
				                                     p_entity.label = 0
			
 
				+                                    # print('规则召回预算金额：', p_entity.entity_text, _span[0])
			
 
				                                 if re.search(self.pattern_money_tenderer, _span[0]) is not None:
			
 
				                                     if re.search(self.pattern_money_other, _span[0]) is not None:
			
 
				                                         if re.search(self.pattern_money_tenderer, _span[0]).span()[1] > \
			
@@ -1453,6 +1482,7 @@ class RoleRulePredictor():
 
				                                         , _sentence.sentence_text[:p_entity.wordOffset_begin]): # 处理几个标段金额相邻情况 例子：191705231
			
 
				                                     p_entity.values[0] = 0.8 + p_entity.values[0] / 10
			
 
				                                     p_entity.label = 0
			
 
				+                                    # print('规则召回预算金额2：', p_entity.entity_text, _sentence.sentence_text[:p_entity.wordOffset_begin])
			
 
				 
			
 
				             # 增加招标金额扩展，招标金额+连续的未识别金额，并且都可以匹配到标段信息，则将为识别的金额设置为招标金额
			
 
				             list_p = []