2 tahun lalu · 12e989451a
--- a/BiddingKG/dl/interface/Entitys.py
+++ b/BiddingKG/dl/interface/Entitys.py
@@ -296,6 +296,7 @@ class Role():
 
				         # 中投标人属性
			
 
				         self.ratio = "" #2022/01/06 新增 保存中投标金额相关费率
			
 
				         self.serviceTime = "" #2021/01/06 新增 保存服务期限(工期)
			
 
				+        self.address = ""  #2022/08/08 新增 角色地址
			
 
				 
			
 
				     def getString(self):
			
 
				         self.linklist = [item for item in set(self.linklist)]
			
@@ -319,7 +320,7 @@ class Role():
 
				                 discount_ratio = num_value
			
 
				         result = {'role_name':self.role_name,'role_text':fitDataByRule(self.entity_text),
			
 
				                   'role_money': {'money':self.money,'money_unit':self.money_unit,'floating_ratio':floating_ratio,'downward_floating_ratio':downward_floating_ratio,'discount_ratio':discount_ratio},
			
 
				-                  'linklist': self.linklist,'serviceTime':self.serviceTime}
			
 
				+                  'linklist': self.linklist,'serviceTime':self.serviceTime,'address':self.address}
			
 
				         return result
			
 
				 
			
 
				 # 用于KM算法的组合配对
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -8,7 +8,7 @@ import time
 
				 import codecs
			
 
				 
			
 
				 from BiddingKG.dl.ratio.re_ratio import extract_ratio
			
 
				-# from BiddingKG.dl.table_head.predict import predict
			
 
				+from BiddingKG.dl.table_head.predict import predict
			
 
				 
			
 
				 sys.setrecursionlimit(1000000)
			
 
				 sys.path.append(os.path.abspath("../.."))
			
@@ -422,7 +422,11 @@ def tableToText(soup):
 
				     def set_head_model(inner_table):
			
 
				         for i in range(len(inner_table)):
			
 
				             for j in range(len(inner_table[i])):
			
 
				-                inner_table[i][j] = inner_table[i][j][0]
			
 
				+                # 删掉单格前后符号，以免影响表头预测
			
 
				+                col = inner_table[i][j][0]
			
 
				+                col = re.sub("^[^\u4e00-\u9fa5a-zA-Z0-9]+", "", col)
			
 
				+                col = re.sub("[^\u4e00-\u9fa5a-zA-Z0-9]+$", "", col)
			
 
				+                inner_table[i][j] = col
			
 
				 
			
 
				         # 模型预测表头
			
 
				         predict_list = predict(inner_table)
			
@@ -1012,9 +1016,9 @@ def tableToText(soup):
 
				             #inner_table,head_list = setHead_withRule(inner_table,pat_head,pat_value,3)
			
 
				             #inner_table,head_list = setHead_inline(inner_table)
			
 
				             # inner_table, head_list = setHead_initem(inner_table,pat_head)
			
 
				-            # inner_table, head_list = set_head_model(inner_table)
			
 
				-            inner_table,head_list = setHead_incontext(inner_table,pat_head) # 发现setHead_initem挺多明显的表头识别不到，换回此方法
			
 
				-            # print(inner_table)
			
 
				+            inner_table, head_list = set_head_model(inner_table)
			
 
				+            # inner_table,head_list = setHead_incontext(inner_table,pat_head)
			
 
				+            # print("table_head", inner_table)
			
 
				             # for begin in range(len(head_list[:-1])):
			
 
				             #     for item in inner_table[head_list[begin]:head_list[begin+1]]:
			
 
				             #         print(item)
			
@@ -1029,6 +1033,8 @@ def tableToText(soup):
 
				 
			
 
				 
			
 
				             tbody.string = getTableText(inner_table,head_list)
			
 
				+            table_max_len = 30000
			
 
				+            tbody.string = tbody.string[:table_max_len]
			
 
				             #print(tbody.string)
			
 
				             tbody.name = "turntable"
			
 
				             return inner_table
			
@@ -1347,6 +1353,11 @@ def segment(soup,final=True):
 
				         text = _text
			
 
				     # 附件标识前修改为句号，避免正文和附件内容混合在一起
			
 
				     text = re.sub("[^。](?=##attachment##)","。",text)
			
 
				+    text = re.sub("[^。](?=##attachment_begin##)","。",text)
			
 
				+    text = re.sub("[^。](?=##attachment_end##)","。",text)
			
 
				+    text = re.sub("##attachment_begin##。","##attachment_begin##",text)
			
 
				+    text = re.sub("##attachment_end##。","##attachment_end##",text)
			
 
				+
			
 
				     return text
			
 
				 
			
 
				 '''
			
@@ -1862,9 +1873,43 @@ def article_limit(soup,limit_words=30000):
 
				                             attachment_skip = True
			
 
				                     else:
			
 
				                         part.decompose()
			
 
				-
			
 
				     return soup
			
 
				 
			
 
				+def attachment_filelink(soup):
			
 
				+    have_attachment = False
			
 
				+    attachment_part = None
			
 
				+    for child in soup.find_all(recursive=True):
			
 
				+        if child.name == 'div' and 'class' in child.attrs:
			
 
				+            if "richTextFetch" in child['class']:
			
 
				+                attachment_part = child
			
 
				+                have_attachment = True
			
 
				+                break
			
 
				+    if not have_attachment:
			
 
				+        return soup
			
 
				+    else:
			
 
				+        # 附件类型：图片、表格
			
 
				+        attachment_type = re.compile("\.(?:png|jpg|jpeg|tif|bmp|xlsx|xls)$")
			
 
				+        attachment_dict = dict()
			
 
				+        for _attachment in attachment_part.find_all(recursive=False):
			
 
				+            if _attachment.name == 'div' and 'filemd5' in _attachment.attrs:
			
 
				+                # print('filemd5',_attachment['filemd5'])
			
 
				+                attachment_dict[_attachment['filemd5']] = _attachment
			
 
				+        # print(attachment_dict)
			
 
				+        for child in soup.find_all(recursive=True):
			
 
				+            if child.name == 'div' and 'class' in child.attrs:
			
 
				+                if "richTextFetch" in child['class']:
			
 
				+                    break
			
 
				+            if "filelink" in child.attrs and child['filelink'] in attachment_dict:
			
 
				+                if re.search(attachment_type,str(child.string).strip()) or \
			
 
				+                        ('original' in child.attrs and re.search(attachment_type,str(child['original']).strip())):
			
 
				+                    # 附件插入正文标识
			
 
				+                    child.insert_before("。##attachment_begin##")
			
 
				+                    child.insert_after("。##attachment_end##")
			
 
				+                    child.replace_with(attachment_dict[child['filelink']])
			
 
				+
			
 
				+        # print('格式化输出',soup.prettify())
			
 
				+        return soup
			
 
				+
			
 
				 def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
			
 
				     '''
			
 
				     :param articles: 待处理的article source html
			
@@ -1909,7 +1954,10 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
 
				                 _soup.wrap(article_processed.new_tag("span"))
			
 
				         # print(article_processed)
			
 
				         # 正文和附件内容限制字数30000
			
 
				-        article_processed = article_limit(article_processed,limit_words=30000)
			
 
				+        article_processed = article_limit(article_processed, limit_words=30000)
			
 
				+        # 把每个附件识别对应的html放回原来出现的位置
			
 
				+        article_processed = attachment_filelink(article_processed)
			
 
				+
			
 
				         article_processed = get_preprocessed_outline(article_processed)
			
 
				         # print('article_processed')
			
 
				         article_processed = tableToText(article_processed)
			
@@ -1919,6 +1967,7 @@ def get_preprocessed_article(articles,cost_time = dict(),useselffool=True):
 
				         article_processed = article_processed.replace('．','.') # 2021/12/01 修正OCR识别PDF小数点错误问题
			
 
				         article_processed = article_processed.replace('报价限价', '招标限价') #2021/12/17 由于报价限价预测为中投标金额所以修改
			
 
				         article_processed = article_processed.replace('成交工程价款', '成交工程价')  # 2021/12/21 修正为中标价
			
 
				+        article_processed = re.sub('任务(?=编号[:：])', '项目',article_processed)  # 2022/08/10 修正为项目编号
			
 
				         article_processed = article_processed.replace('招标（建设）单位', '招标单位')  #2022/8/10 修正预测不到表达
			
 
				         article_processed = re.sub('(招标|采购)人(概况|信息)[，。]', '采购人信息：', article_processed)  # 2022/8/10统一表达
			
 
				         # 修复OCR金额中“，”、“。”识别错误
			
@@ -2096,16 +2145,23 @@ def get_preprocessed_sentences(list_articles,useselffool=True,cost_time=dict()):
 
				             #限流执行
			
 
				             key_nerToken = "nerToken"
			
 
				             start_time = time.time()
			
 
				-            tokens_all = getTokens(sentences,useselffool=useselffool)
			
 
				+            # tokens_all = getTokens(sentences,useselffool=useselffool)
			
 
				+            tokens_all = getTokens([re.sub("##attachment_begin##|##attachment_end##","",_sen) for _sen in sentences],useselffool=useselffool)
			
 
				             if key_nerToken not in cost_time:
			
 
				                 cost_time[key_nerToken] = 0
			
 
				             cost_time[key_nerToken] += round(time.time()-start_time,2)
			
 
				 
			
 
				             in_attachment = False
			
 
				             for sentence_index in range(len(sentences)):
			
 
				-                if sentence_index == attachment_begin_index:
			
 
				-                    in_attachment = True
			
 
				                 sentence_text = sentences[sentence_index]
			
 
				+                if re.search("##attachment_begin##",sentence_text):
			
 
				+                    in_attachment = True
			
 
				+                    sentence_text = re.sub("##attachment_begin##","",sentence_text)
			
 
				+                elif re.search("##attachment_end##",sentence_text):
			
 
				+                    in_attachment = False
			
 
				+                    sentence_text = re.sub("##attachment_end##", "", sentence_text)
			
 
				+                if sentence_index >= attachment_begin_index and attachment_begin_index!=-1:
			
 
				+                    in_attachment = True
			
 
				                 tokens = tokens_all[sentence_index]
			
 
				 
			
 
				                 #pos_tag = pos_all[sentence_index]
			
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -1427,6 +1427,11 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         relation_list.extend(relationExtraction_model.predict(_text_data,_pre_data))
			
 
				                     temp_data = []
			
 
				             start = start + maxlen - 120
			
 
				+        if temp_data:
			
 
				+            deal_data += len(temp_data)
			
 
				+            if deal_data <= 4:
			
 
				+                for _text_data, _pre_data in temp_data:
			
 
				+                    relation_list.extend(relationExtraction_model.predict(_text_data, _pre_data))
			
 
				         # print("预测数据：",len(temp_data))
			
 
				         # 去重结果
			
 
				         relation_list = list(set(relation_list))
			
@@ -1517,6 +1522,53 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 PackDict["Project"]["roleList"][i].linklist.append((combo[0].entity_text,combo[1].entity_text))
			
 
				                                 break
			
 
				                 # print(3,combo[0].entity_text,combo[1].entity_text)
			
 
				+
			
 
				+        # "公司——地址" 链接规则补充
			
 
				+        company_lacation_EntityList = [ent for ent in pre_entity if ent.entity_type in ['company', 'org', 'location']]
			
 
				+        company_lacation_EntityList = sorted(company_lacation_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
			
 
				+        t_match_list = []
			
 
				+        for ent_idx in range(len(company_lacation_EntityList)):
			
 
				+            entity = company_lacation_EntityList[ent_idx]
			
 
				+            if entity.entity_type in ['company', 'org']:
			
 
				+                match_nums = 0
			
 
				+                company_nums = 0  # 经过其他公司的数量
			
 
				+                location_nums = 0  # 经过电话的数量
			
 
				+                for after_index in range(ent_idx + 1, min(len(company_lacation_EntityList), ent_idx + 5)):
			
 
				+                    after_entity = company_lacation_EntityList[after_index]
			
 
				+                    if after_entity.entity_type == "location":
			
 
				+                        distance = (tokens_num_dict[after_entity.sentence_index] + after_entity.begin_index) - (
			
 
				+                                tokens_num_dict[entity.sentence_index] + entity.end_index)
			
 
				+                        location_nums += 1
			
 
				+                        if distance > 100 or location_nums >= 3:
			
 
				+                            break
			
 
				+                        sentence_distance = after_entity.sentence_index - entity.sentence_index
			
 
				+                        value = (-1 / 2 * (distance ** 2)) / 10000
			
 
				+                        if sentence_distance == 0:
			
 
				+                            if distance < 80:
			
 
				+                                t_match_list.append(Match(entity, after_entity, value))
			
 
				+                                match_nums += 1
			
 
				+                                if company_nums:
			
 
				+                                    break
			
 
				+                        else:
			
 
				+                            if distance < 50:
			
 
				+                                t_match_list.append(Match(entity, after_entity, value))
			
 
				+                                match_nums += 1
			
 
				+                                if company_nums:
			
 
				+                                    break
			
 
				+                    else:
			
 
				+                        # type:company/org
			
 
				+                        company_nums += 1
			
 
				+                        if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
			
 
				+                            break
			
 
				+
			
 
				+        # km算法分配求解
			
 
				+        relate_location_result = dispatch(t_match_list)
			
 
				+        relate_location_result = sorted(relate_location_result, key=lambda x: (x[0].sentence_index, x[0].begin_index))
			
 
				+        for match in relate_location_result:
			
 
				+            _company = match[0]
			
 
				+            _relation = match[1]
			
 
				+            if not _company.pointer_address:
			
 
				+                _company.pointer_address = _relation
			
 
				     # "联系人——联系电话" 链接规则补充
			
 
				     person_phone_EntityList = [ent for ent in pre_entity+ phone_entitys if ent.entity_type not in ['company','org','location']]
			
 
				     person_phone_EntityList = sorted(person_phone_EntityList, key=lambda x: (x.sentence_index, x.begin_index))
			
@@ -1836,7 +1888,7 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                             match_list2.append(Match(entity, after_entity, value))
			
 
				                                             match_nums += 1
			
 
				                             if after_entity.entity_type in ['org', 'company']:
			
 
				-                                if entity.label not in [2, 3, 4] and after_entity.label in [0, 1]:
			
 
				+                                if entity.label in [2, 3, 4] and after_entity.label in [0, 1]:
			
 
				                                     break
			
 
				                                 # 解决在‘地址’中识别出org/company的问题
			
 
				                                 # if entity.label in [0,1] and after_index==index+1 and after_entity.label not in [0,1]:
			
@@ -2075,18 +2127,17 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                         continue
			
 
				 
			
 
				     # 统一同类角色的属性
			
 
				-    if PackDict.get("Project"):
			
 
				-        for i in range(len(PackDict["Project"]["roleList"])):
			
 
				-            # if PackDict["Project"]["roleList"][i].role_name in ["tenderee","agency"]:
			
 
				+    for k in PackDict.keys():
			
 
				+        for i in range(len(PackDict[k]["roleList"])):
			
 
				             for _entity in list_entity:
			
 
				                 if _entity.entity_type in ['org','company']:
			
 
				                     is_same = False
			
 
				                     is_similar = False
			
 
				                     # entity_text相同
			
 
				-                    if _entity.entity_text==PackDict["Project"]["roleList"][i].entity_text:
			
 
				+                    if _entity.entity_text==PackDict[k]["roleList"][i].entity_text:
			
 
				                         is_same = True
			
 
				                     # entity.label为【0，1】
			
 
				-                    if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict["Project"]["roleList"][i].role_name:
			
 
				+                    if _entity.label in [0,1] and dict_role_id[str(_entity.label)]==PackDict[k]["roleList"][i].role_name:
			
 
				                         is_similar = True
			
 
				                     if is_same:
			
 
				                         linked_entitys = _entity.linked_entitys
			
@@ -2096,35 +2147,48 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
 
				                                 for _pointer_person in pointer_person:
			
 
				                                     _phone = [p.entity_text for p in _pointer_person.person_phone] if _pointer_person.person_phone else []
			
 
				                                     for _p in _phone:
			
 
				-                                        if (_pointer_person.entity_text,_p) not in PackDict["Project"]["roleList"][i].linklist:
			
 
				-                                            PackDict["Project"]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
			
 
				+                                        if (_pointer_person.entity_text,_p) not in PackDict[k]["roleList"][i].linklist:
			
 
				+                                            PackDict[k]["roleList"][i].linklist.append((_pointer_person.entity_text,_p))
			
 
				                     elif is_similar:
			
 
				                         pointer_person = _entity.pointer_person if _entity.pointer_person else []
			
 
				                         for _pointer_person in pointer_person:
			
 
				                             _phone = [p.entity_text for p in _pointer_person.person_phone] if _pointer_person.person_phone else []
			
 
				                             for _p in _phone:
			
 
				-                                if (_pointer_person.entity_text, _p) not in PackDict["Project"]["roleList"][i].linklist:
			
 
				-                                    PackDict["Project"]["roleList"][i].linklist.append(
			
 
				+                                if (_pointer_person.entity_text, _p) not in PackDict[k]["roleList"][i].linklist:
			
 
				+                                    PackDict[k]["roleList"][i].linklist.append(
			
 
				                                         (_pointer_person.entity_text, _p))
			
 
				 
			
 
				     # "roleList"中联系人电话去重
			
 
				-    for i in range(len(PackDict["Project"]["roleList"])):
			
 
				-        # print(123, PackDict["Project"]["roleList"][i].linklist)
			
 
				-        # 带有联系人的电话
			
 
				-        with_person = [person_phone[1] for person_phone in PackDict["Project"]["roleList"][i].linklist if person_phone[0]]
			
 
				-        # 带有电话的联系人
			
 
				-        with_phone = [person_phone[0] for person_phone in PackDict["Project"]["roleList"][i].linklist if person_phone[1]]
			
 
				-        remove_list = []
			
 
				-        for item in PackDict["Project"]["roleList"][i].linklist:
			
 
				-            if not item[0]:
			
 
				-                if item[1] in with_person:
			
 
				-                    # 删除重复的无联系人电话
			
 
				-                    remove_list.append(item)
			
 
				-            elif not item[1]:
			
 
				-                if item[0] in with_phone:
			
 
				-                    remove_list.append(item)
			
 
				-        for _item in remove_list:
			
 
				-            PackDict["Project"]["roleList"][i].linklist.remove(_item)
			
 
				+    for k in PackDict.keys():
			
 
				+        for i in range(len(PackDict[k]["roleList"])):
			
 
				+            # 带有联系人的电话
			
 
				+            with_person = [person_phone[1] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[0]]
			
 
				+            # 带有电话的联系人
			
 
				+            with_phone = [person_phone[0] for person_phone in PackDict[k]["roleList"][i].linklist if person_phone[1]]
			
 
				+            remove_list = []
			
 
				+            for item in PackDict[k]["roleList"][i].linklist:
			
 
				+                if not item[0]:
			
 
				+                    if item[1] in with_person:
			
 
				+                        # 删除重复的无联系人电话
			
 
				+                        remove_list.append(item)
			
 
				+                elif not item[1]:
			
 
				+                    if item[0] in with_phone:
			
 
				+                        remove_list.append(item)
			
 
				+            for _item in remove_list:
			
 
				+                PackDict[k]["roleList"][i].linklist.remove(_item)
			
 
				+
			
 
				+    # PackDict更新company/org地址
			
 
				+    for ent in pre_entity:
			
 
				+        if ent.entity_type in ['company','org']:
			
 
				+            if ent.pointer_address:
			
 
				+                for k in PackDict.keys():
			
 
				+                    for i in range(len(PackDict[k]["roleList"])):
			
 
				+                        if PackDict[k]["roleList"][i].entity_text == ent.entity_text:
			
 
				+                            if not PackDict[k]["roleList"][i].address:
			
 
				+                                PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
			
 
				+                            else:
			
 
				+                                if len(ent.pointer_address.entity_text) > len(PackDict[k]["roleList"][i].address):
			
 
				+                                    PackDict[k]["roleList"][i].address = ent.pointer_address.entity_text
			
 
				 
			
 
				     # 联系人——电子邮箱链接
			
 
				     temporary_list3 = [entity for entity in list_entity if entity.entity_type=='email' or (entity.entity_type=='person' and entity.label in [1,2,3])]
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -1519,6 +1519,7 @@ class TendereeRuleRecall():
 
				                                         "询价(机构|企业)|联系(人|方式)，?(单位|公司)(名称)?|联系(人|方式)，名称)[:：][^。；，]{,5}$")
			
 
				 
			
 
				         self.tenderee_right = re.compile("^[^。；：:]{,5}[(（](以?下简?称)?，?[，\"“]*[我本][\u4e00-\u9fa5]{1,2}[，\"”]*[)）]|"
			
 
				+                                         "^[\(（][^。；：:\)）]{,5}称(?:招标|采购)(?:人|单位)|"
			
 
				                                         "^[^。；：:]{,10}[对就][^。；，]+，?[^。；，]{,20}进行[^。；，]*(采购|询比?价|遴选|招投?标|征集)|"
			
 
				                                          "^[^。；：:]{,10}关于[^。；，]+，?[^。；，]{,20}的[^。；，]{,20}公告|"
			
 
				                                          "^[^。；，：:]{,10}的[^。；，]+，?[^。；，]{,20}正在[^。；，]{,5}进行|"
			
@@ -1537,8 +1538,8 @@ class TendereeRuleRecall():
 
				                                 "[）)]?(信息[，：])?((公司|单位)?名称)?([(（](全称|盖章)[）)])?(是|为|：|:)+)(?P<unrecognized>[^，。：:；]+)[，。；：:]")
			
 
				         # 未识别实体尾部判断
			
 
				         self.unrecognized_end1 = re.compile(
			
 
				-            "^[\u4e00-\u9fa5]{2,}?(?:公司|医院|学校|学院|大学|中学|小学|幼儿园|政府|指挥部|办公室|项目部|业主大会|监狱|教育局|委员会|研究所|招标办|采购部|办事处|水利局|公墓|中心)")
			
 
				-        self.unrecognized_end2 = re.compile("^[\u4e00-\u9fa5]{4,}(?:署|局|厅|处|室|科|部|站|所|股|行)")
			
 
				+            "^[\u4e00-\u9fa5]{2,}?(?:公司|医院|学校|学院|大学|中学|小学|幼儿园|政府|指挥部|办公室|项目部|业主大会|监狱|教育局|委员会|研究所|招标办|采购部|办事处|水利局|公墓|中心|联合社|合作社)")
			
 
				+        self.unrecognized_end2 = re.compile("^[\u4e00-\u9fa5]{4,}(?:署|局|厅|处|室|科|部|站|所|股|行|园)")
			
 
				 
			
 
				     def predict(self, list_articles,list_sentences, list_entitys, list_codenames):
			
 
				         # tenderee_notfound = True
			
--- a/BiddingKG/dl/table_head/models/model.py
+++ b/BiddingKG/dl/table_head/models/model.py
@@ -1,10 +1,9 @@
 
				 import sys
			
 
				 import os
			
 
				 import numpy as np
			
 
				+sys.path.append(os.path.abspath(os.path.dirname(__file__)))
			
 
				 from keras.layers import Lambda, Dense, Reshape, Bidirectional, LSTM, Conv2D, BatchNormalization, LeakyReLU, Masking
			
 
				 from keras.preprocessing.sequence import pad_sequences
			
 
				-sys.path.append(os.path.dirname(__file__))
			
 
				-
			
 
				 from models.layer_utils import BatchReshape1, BatchReshape2, MyPadding, MySplit, BatchReshape3, \
			
 
				     BatchReshape4, BatchReshape5, BatchReshape6
			
 
				 from keras import layers, models, Sequential
			
@@ -70,7 +69,7 @@ def model_1(input_shape, output_shape):
 
				     model = models.Model(inputs=[input_1, input_2, input_3, input_4, input_5, input_6],
			
 
				                          outputs=output)
			
 
				 
			
 
				-    model.summary()
			
 
				+    # model.summary()
			
 
				     return model
			
 
				 
			
 
				 
			
--- a/BiddingKG/dl/table_head/post_process.py
+++ b/BiddingKG/dl/table_head/post_process.py
@@ -1,7 +1,8 @@
 
				 
			
 
				 
			
 
				-def table_post_process(table_text_list, predict_result, threshold=0.5):
			
 
				-    predict_result = predict_result.tolist()
			
 
				+def table_post_process(table_text_list, predict_result, threshold=0.5, is_list=False):
			
 
				+    if not is_list:
			
 
				+        predict_result = predict_result.tolist()
			
 
				     predict_list = []
			
 
				     for i in range(0, len(predict_result)):
			
 
				         predict = predict_result[i][0]
			
--- a/BiddingKG/dl/table_head/pre_process.py
+++ b/BiddingKG/dl/table_head/pre_process.py
@@ -440,24 +440,22 @@ def my_data_loader(data_list, data_label_list, batch_size, is_train=True):
 
				                   {'output': Y}
			
 
				 
			
 
				     else:
			
 
				-        while True:
			
 
				-            new_data_list = []
			
 
				-            for j in range(batch_size):
			
 
				-                if i >= data_num:
			
 
				-                    i = 0
			
 
				-
			
 
				-                # 中文字符映射为Embedding
			
 
				-                data = data_list[i]
			
 
				-                data = embedding_word(data, output_shape)
			
 
				-                if data.shape == output_shape:
			
 
				-                    new_data_list.append(data)
			
 
				-                i += 1
			
 
				+        new_data_list = []
			
 
				+        for j in range(batch_size):
			
 
				+            if i >= data_num:
			
 
				+                i = 0
			
 
				+            # 中文字符映射为Embedding
			
 
				+            data = data_list[i]
			
 
				+            data = embedding_word(data, output_shape)
			
 
				+            if data.shape == output_shape:
			
 
				+                new_data_list.append(data)
			
 
				+            i += 1
			
 
				 
			
 
				-            new_data_list = np.array(new_data_list)
			
 
				-            X = new_data_list
			
 
				-            X = np.transpose(X, (1, 0, 2, 3))
			
 
				-            yield {'input_1': X[0], 'input_2': X[1], 'input_3': X[2],
			
 
				-                   'input_4': X[3], 'input_5': X[4], 'input_6': X[5], }
			
 
				+        new_data_list = np.array(new_data_list)
			
 
				+        X = new_data_list
			
 
				+        X = np.transpose(X, (1, 0, 2, 3))
			
 
				+        yield {'input_1': X[0], 'input_2': X[1], 'input_3': X[2],
			
 
				+               'input_4': X[3], 'input_5': X[4], 'input_6': X[5], }
			
 
				 
			
 
				 
			
 
				 def my_data_loader_2(table_list, table_label_list, batch_size, is_train=True):
			
--- a/BiddingKG/dl/table_head/predict.py
+++ b/BiddingKG/dl/table_head/predict.py
--- a/BiddingKG/readme/start.md
+++ b/BiddingKG/readme/start.md
@@ -10,7 +10,7 @@ cd /data/python
 
				 #关闭接口
			
 
				 ps -ef | grep run_extract_server | grep -v grep | cut -c 9-16| xargs kill -9
			
 
				 #启动接口
			
 
				-nohup gunicorn -w 11 --limit-request-fields 0 --limit-request-line 0 -t 1000 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
 
				+nohup /data/anaconda3/envs/py37/bin/gunicorn -w 15 --limit-request-fields 0 --limit-request-line 0 -t 1000 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
 
				 
			
 
				 #19022启动要素提取接口
			
 
				 #激活环境
			
@@ -20,4 +20,4 @@ cd /data/python
 
				 #关闭接口
			
 
				 ps -ef | grep run_extract_server | grep -v grep | cut -c 9-16| xargs kill -9
			
 
				 #启动接口
			
 
				-nohup gunicorn -w 10 --limit-request-fields 0 --limit-request-line 0 -t 1000 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
 
				+nohup /data/anaconda3/envs/py37/bin/gunicorn -w 6 --limit-request-fields 0 --limit-request-line 0 -t 1000 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &