Kaynağa Gözat

Merge remote-tracking branch 'origin/master'

luojiehua 5 ay önce
ebeveyn
işleme
3d3f68ac4d

+ 2 - 2
BiddingKG/dl/interface/extract.py

@@ -406,8 +406,6 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
         channel_dic = {"docchannel":
              { "docchannel": "审批项目", "doctype": "审批项目", "life_docchannel": "审批项目" }
         }
-        prem[0]['prem'] = {}  # 审批项目不要这项
-
     else:
         channel_dic, msc = predictor.getPredictor("channel").final_change(channel_dic, prem[0], original_docchannel, msc)
     # print('msc', msc)
@@ -481,6 +479,8 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
     if original_docchannel == 302:
         approval = predictor.getPredictor("approval").predict(list_sentences, list_entitys, text)
+        approval = predictor.getPredictor("approval").add_ree2approval(approval , prem[0]['prem'])
+        data_res['prem'] = {}  # 审批项目不要这项
         data_res['approval'] = approval
 
     if channel_dic['docchannel']['doctype'] == '处罚公告': # 20240627 处罚公告进行失信要素提取

+ 34 - 1
BiddingKG/dl/interface/predictor.py

@@ -6634,7 +6634,7 @@ class TableTag2List():
                                 td_text = cell.get_text()
                             else:
                                 td_text = str(cell.get_text()).strip().replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '').replace("(", "(").replace(')', ')').replace('?', '')
-                            text = [td_text,0]
+                            text = td_text
 
                             # text = str(cell.get_text()).strip().replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '').replace("(", "(").replace(')', ')').replace('?', '')
                             # # text = re.sub('\s', '', text)[:200] # 只需取前200字即可
@@ -7846,6 +7846,7 @@ class ApprovalPredictor():
         found_key = 0
         code_name_set = set() # 项目编号、名称集合
         org_set = set() # 保存可能为审批部门的角色
+        not_sure_role = '' # 不确定角色, 例:单位名称:长沙驰能新能源开发有限公司眉县分公司
         for entity in list_entitys[0]:
             entities[entity.sentence_index].append(entity)
 
@@ -7871,6 +7872,10 @@ class ApprovalPredictor():
                         multi_project[k] = entity.entity_text
                         found_key = 1
                         flag = 0
+                        if not_sure_role == entity.entity_text:
+                            not_sure_role = ''
+                    elif re.search('(,|^)单位名称:', sentences[entity.sentence_index][max(0, b - span):b]):
+                        not_sure_role = entity.entity_text
                     if flag and entity.entity_type == "org" and re.search('(局|委员会|委|厅)$', entity.entity_text):
                         org_set.add(entity.entity_text)
                 elif entity.entity_type in ['person']:
@@ -7980,7 +7985,14 @@ class ApprovalPredictor():
                     multi_project['district'] = district['district']['district']
                 multi_project = {k: v for k, v in multi_project.items() if v != ''}
                 rs_l.append(multi_project)
+        if not_sure_role != '' and rs_dic.get('construct_company', '') == '' and not_sure_role not in org_set: # 补充,单位名称:这种作为建设单位 例:400069851014
+            rs_dic['construct_company'] = not_sure_role
         if len(rs_l)>1 and len(set(rs_l[0].keys()))>2 and set(rs_l[0].keys())==set(rs_l[1].keys()):
+            for k in self.role_type.keys(): # 多项目无建设单位等通过整篇提取补充
+                if rs_dic.get(k, '') != '' and k not in rs_l[0].get(k, '') == '':
+                    for d in rs_l:
+                        if d.get(k, '') == '':
+                            d[k] = rs_dic[k]
             return rs_l
         elif found_key == 1:
             district = getPredictor('district').get_area(
@@ -8031,6 +8043,27 @@ class ApprovalPredictor():
             return [rs_dic]
         return []
 
+    def add_ree2approval(self, approval, prem):
+        '''
+        把招标人补充到审批项目建设单位
+        :param approval:
+        :param prem:
+        :return:
+        '''
+        ree = ''
+        if "Project" in prem:
+            for d in prem["Project"]['roleList']:
+                if d["role_name"] == "tenderee":
+                    ree = d["role_text"]
+                    break
+        if ree != '':
+            for d in approval:
+                if d.get('construct_company', '') == '':
+                    d['construct_company'] = ree
+                else:
+                    break
+        return approval
+
 class BiddingScore():
     def __init__(self):
         self.head_rule_dic = {