浏览代码

补充审批项目建设单位等

lsm 5 月之前
父节点
当前提交
8f165f1acb
共有 2 个文件被更改,包括 29 次插入3 次删除
  1. 2 2
      BiddingKG/dl/interface/extract.py
  2. 27 1
      BiddingKG/dl/interface/predictor.py

+ 2 - 2
BiddingKG/dl/interface/extract.py

@@ -406,8 +406,6 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
         channel_dic = {"docchannel":
              { "docchannel": "审批项目", "doctype": "审批项目", "life_docchannel": "审批项目" }
         }
-        prem[0]['prem'] = {}  # 审批项目不要这项
-
     else:
         channel_dic, msc = predictor.getPredictor("channel").final_change(channel_dic, prem[0], original_docchannel, msc)
     # print('msc', msc)
@@ -481,6 +479,8 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
     if original_docchannel == 302:
         approval = predictor.getPredictor("approval").predict(list_sentences, list_entitys, text)
+        approval = predictor.getPredictor("approval").add_ree2approval(approval , prem[0]['prem'])
+        data_res['prem'] = {}  # 审批项目不要这项
         data_res['approval'] = approval
 
     if channel_dic['docchannel']['doctype'] == '处罚公告': # 20240627 处罚公告进行失信要素提取

+ 27 - 1
BiddingKG/dl/interface/predictor.py

@@ -6634,7 +6634,7 @@ class TableTag2List():
                                 td_text = cell.get_text()
                             else:
                                 td_text = str(cell.get_text()).strip().replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '').replace("(", "(").replace(')', ')').replace('?', '')
-                            text = [td_text,0]
+                            text = td_text
 
                             # text = str(cell.get_text()).strip().replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '').replace("(", "(").replace(')', ')').replace('?', '')
                             # # text = re.sub('\s', '', text)[:200] # 只需取前200字即可
@@ -7981,6 +7981,11 @@ class ApprovalPredictor():
                 multi_project = {k: v for k, v in multi_project.items() if v != ''}
                 rs_l.append(multi_project)
         if len(rs_l)>1 and len(set(rs_l[0].keys()))>2 and set(rs_l[0].keys())==set(rs_l[1].keys()):
+            for k in self.role_type.keys(): # 多项目无建设单位等通过整篇提取补充
+                if rs_dic.get(k, '') != '' and k not in rs_l[0].get(k, '') == '':
+                    for d in rs_l:
+                        if d.get(k, '') == '':
+                            d[k] = rs_dic[k]
             return rs_l
         elif found_key == 1:
             district = getPredictor('district').get_area(
@@ -8031,6 +8036,27 @@ class ApprovalPredictor():
             return [rs_dic]
         return []
 
+    def add_ree2approval(self, approval, prem):
+        '''
+        把招标人补充到审批项目建设单位
+        :param approval:
+        :param prem:
+        :return:
+        '''
+        ree = ''
+        if "Project" in prem:
+            for d in prem["Project"]['roleList']:
+                if d["role_name"] == "tenderee":
+                    ree = d["role_text"]
+                    break
+        if ree != '':
+            for d in approval:
+                if d.get('construct_company', '') == '':
+                    d['construct_company'] = ree
+                else:
+                    break
+        return approval
+
 class BiddingScore():
     def __init__(self):
         self.head_rule_dic = {