浏览代码

微调包、channel、角色逻辑; 修复乃君非表格产品属性提取bug,美日欧元提取bug

lsm 2 年之前
父节点
当前提交
89c06b9917
共有 4 个文件被更改,包括 111 次插入112 次删除
  1. 1 0
      .gitignore
  2. 3 12
      BiddingKG/dl/interface/Preprocessing.py
  3. 12 9
      BiddingKG/dl/interface/getAttributes.py
  4. 95 91
      BiddingKG/dl/interface/predictor.py

+ 1 - 0
.gitignore

@@ -9,6 +9,7 @@
 /BiddingKG/dl/product/data/
 /BiddingKG/dl/channel/data/
 /BiddingKG/dl_dev/test
+/BiddingKG/dl/test
 node_modules
 /BiddingKG/dl/table_head/train_data/
 /BiddingKG/dl/table_head/check_user_result/

+ 3 - 12
BiddingKG/dl/interface/Preprocessing.py

@@ -2473,9 +2473,9 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
             #                       "front_m":"((?P<text_front_m>(?:[(\(]?\s*(?P<unit_front_m_before>[万元]+)\s*[)\)])\s*[,,::]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千万亿元]*)())",
             #                       "behind_m":"(()()(?P<money_behind_m>[0-9][\d,,]*(?:\.\d+)?(?:,?)[百千万亿]*)[\((]?(?P<unit_behind_m>[万元]+(?P<filter_unit3>[台个只]*))[\))]?)"}
             list_money_pattern = {"cn":"(()()(?P<money_cn>[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,})())",
-                                  "key_word": "((?P<text_key_word>(?:[¥¥]+,?|[单报标限总]价|金额|成交报?价|价格|预算(金额)?|(监理|设计|勘察)(服务)?费|标的基本情况|CNY|成交结果|成交额|中标额)(?:[,,(\(]*\s*(人民币)?(?P<unit_key_word_before>[万亿]?[美日欧]?元?(?P<filter_unit2>[台个只吨]*))\s*(/?费率)?(人民币)?[)\)]?)\s*[,,::]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元编号时间]{,8}?))(第[123一二三]名[::])?(\d+(\*\d+%)+=)?(?P<money_key_word>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千]{,1})(?:[(\(]?(?P<filter_>[%])*\s*(单位[::])?(?P<unit_key_word_behind>[万亿]?[美日欧]?元?(?P<filter_unit1>[台只吨斤棵株页亩方条天]*))\s*[)\)]?))",
-                                  "front_m":"((?P<text_front_m>(?:[(\(]?\s*(?P<unit_front_m_before>[万亿]?[美日欧]?元)\s*[)\)])\s*[,,::]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千]*)())",
-                                  "behind_m":"(()()(?P<money_behind_m>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千]*)(人民币)?[\((]?(?P<unit_behind_m>[万亿]?[美日欧]?元(?P<filter_unit3>[台个只吨斤棵株页亩方条米]*))[\))]?)"}
+                                  "key_word": "((?P<text_key_word>(?:[¥¥]+,?|[单报标限总]价|金额|成交报?价|价格|预算(金额)?|(监理|设计|勘察)(服务)?费|标的基本情况|CNY|成交结果|成交额|中标额)(?:[,,(\(]*\s*(人民币)?(?P<unit_key_word_before>[万亿]?(?:[美日欧]元|元)?(?P<filter_unit2>[台个只吨]*))\s*(/?费率)?(人民币)?[)\)]?)\s*[,,::]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元编号时间]{,8}?))(第[123一二三]名[::])?(\d+(\*\d+%)+=)?(?P<money_key_word>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千]{,1})(?:[(\(]?(?P<filter_>[%])*\s*(单位[::])?(?P<unit_key_word_behind>[万亿]?(?:[美日欧]元|元)?(?P<filter_unit1>[台只吨斤棵株页亩方条天]*))\s*[)\)]?))",
+                                  "front_m":"((?P<text_front_m>(?:[(\(]?\s*(?P<unit_front_m_before>[万亿]?(?:[美日欧]元|元))\s*[)\)])\s*[,,::]*(\s*[^壹贰叁肆伍陆柒捌玖拾佰仟萬億分万元]{,7}?))(?P<money_front_m>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千]*)())",
+                                  "behind_m":"(()()(?P<money_behind_m>[0-9][\d,]*(?:\.\d+)?(?:,?)[百千]*)(人民币)?[\((]?(?P<unit_behind_m>[万亿]?(?:[美日欧]元|元)(?P<filter_unit3>[台个只吨斤棵株页亩方条米]*))[\))]?)"}
             # 2021/7/19 调整金额,单位提取正则,修复部分金额因为单位提取失败被过滤问题。
 
             pattern_money = re.compile("%s|%s|%s|%s"%(list_money_pattern["cn"],list_money_pattern["key_word"],list_money_pattern["behind_m"],list_money_pattern["front_m"]))
@@ -2681,15 +2681,6 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
                         # print('过滤掉金额 notSure and unit=="" and float(entity_text)>100*10000:', entity_text, unit)
                         continue
 
-                    if re.search("美元",_match.group()):
-                        Dollar2RMB = 7
-                        entity_text = str(float(entity_text)*Dollar2RMB)
-                    elif re.search("日元",_match.group()):
-                        JPyen2RMB = 0.05
-                        entity_text = str(float(entity_text)*JPyen2RMB)
-                    elif re.search("欧元",_match.group()):
-                        Euro2RMB = 6.9
-                        entity_text = str(float(entity_text)*Euro2RMB)
 
                     _exists = False
                     for item in list_sentence_entitys:

+ 12 - 9
BiddingKG/dl/interface/getAttributes.py

@@ -910,6 +910,7 @@ def getPackagesFromArticle(list_sentence, list_entity):
 
     def get_package():
         PackageList_scope = []
+        True_package = set()
         for i in range(len(list_sentence)):
             PackageList_item = []
             PackageList_item_scope = []
@@ -923,6 +924,7 @@ def getPackagesFromArticle(list_sentence, list_entity):
                 if re.search('承包|XX|xx', iter.group(0)) or re.search('[a-zA-Z0-9一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ-]{6,}', iter.group(0)):
                     continue
                 temp_package_number = uniform_package_name(iter.group(0))
+                True_package.add(temp_package_number)
                 PackageList_item.append({"name": temp_package_number, "sentence_index": list_sentence[i].sentence_index,
                                          "offsetWords_begin": changeIndexFromWordToWords(tokens, iter.span()[0]),
                                          "offsetWord_begin": iter.span()[0], "offsetWord_end": iter.span()[1]})
@@ -942,11 +944,12 @@ def getPackagesFromArticle(list_sentence, list_entity):
             PackageList_item_scope.sort(key=lambda x: x["offsetWord_begin"])
             PackageList_scope = PackageList_scope + PackageList_item_scope
             PackageList_item.sort(key=lambda x: x["sentence_index"])
-        return PackageList_scope
+        return PackageList_scope, True_package
 
     def get_win_project():
         '''获取多个项目多个中标人的项目'''
         PackageList_scope = []
+        True_package = set()
         # 2020/11/23 大网站规则 调整
         if len(PackageSet) == 0 and len(
                 set([it.entity_text for it in list_entity if
@@ -968,6 +971,7 @@ def getPackagesFromArticle(list_sentence, list_entity):
                     # print('新正则采购包名补充',temp_package_number)
                     if re.search(re_digital, temp_package_number):
                         temp_package_number = str(int(temp_package_number))
+                    True_package.add(temp_package_number)
                     PackageList_item.append(
                         {"name": temp_package_number, "sentence_index": list_sentence[i].sentence_index,
                          "offsetWords_begin": changeIndexFromWordToWords(tokens, iter.span()[0]),
@@ -989,7 +993,7 @@ def getPackagesFromArticle(list_sentence, list_entity):
                 PackageList_item_scope.sort(key=lambda x: x["offsetWord_begin"])
                 PackageList_scope = PackageList_scope + PackageList_item_scope
                 PackageList_item.sort(key=lambda x: x["sentence_index"])
-        return PackageList_scope
+        return PackageList_scope, True_package
 
     def get_package_scope(PackageList_scope):
         PackageList = []
@@ -1058,13 +1062,12 @@ def getPackagesFromArticle(list_sentence, list_entity):
                     PackageList.append(copy_pack)
         return PackageList
 
-    PackageList_scope = get_package()
-    if len(PackageList_scope) > 0: # 找到标段
-        PackageList = get_package_scope(PackageList_scope)
-    else:
-        PackageList_scope = get_win_project()
-        if len(PackageList_scope) > 1: # 同时包含多标段及多中标人的
-            PackageList = get_package_scope(PackageList_scope)
+    PackageList_scope, True_package = get_package()
+
+    PackageList_scope2, True_package2 = get_win_project()
+    if len(True_package2) > 2: # 同时包含多标段及多中标人的
+        PackageList_scope = PackageList_scope + PackageList_scope2
+    PackageList = get_package_scope(PackageList_scope)
 
     return PackageList, PackageSet, dict_packageCode
 

+ 95 - 91
BiddingKG/dl/interface/predictor.py

@@ -685,7 +685,7 @@ class PREMPredict():
                 elif re.search('尊敬的供应商:', text):
                     label = 0
                     values[label] = 0.501
-                elif re.search('[^\w]中标候选人', text) and re.search('[1一]', text) == None:  #修复第4以上的预测错为中标人
+                elif re.search('[^\w]中标候选人', text[:15]) and re.search('[1一]', text[:15]) == None:  #修复第4以上的预测错为中标人
                     label = 5
                     values[label] = 0.5
             elif re.search('是否中标:是,供应商', text) and label == 5:
@@ -1156,8 +1156,8 @@ class RoleRulePredictor():
         self.pattern_winTenderer_left = "(?P<winTenderer_left>(乙|承做|施工|供货|承包|承建|承租|竞得|受让|签约)(候选)?(人|单位|机构|供应商|方|公司|厂商|商)[::是为]+$|" \
                                         "(选定单位|指定的中介服务机构|实施主体|承制单位|供方)[::是为]+$|((评审结果|名次|排名|中标结果)[::]*第?[一1]名?)[::是为]+$|" \
                                         "单一来源(采购)?(供应商|供货商|服务商|方式向)$|((中标|成交)(结果|信息))[::是为]+$|(供应|供货|承销|承保|承包|承接|服务|实施|合作)(机构|单位|商|方)(名称)?[::是为]+$)"
-        self.pattern_winTenderer_left_w0 = "(?P<winTenderer_left_w1>(,|。|^)((中标(投标)?|中选|中价|成交)(候选)?(人|单位|机构|供应商|客户|方|公司|厂商|商)|第?[一1]名)(名称)?[,,]?([((]按综合排名排序[))])?[::,,]$)" #解决表头识别不到加逗号情况,需前面为,。空
-        self.pattern_winTenderer_left_w1 = "(?P<winTenderer_left_w1>(中标|中选|中价|成交|入选)(候选)?(人|单位|机构|供应商|客户|方|公司|厂商|商)(名称)?([((]按综合排名排序[))])?[::是为]+$)" #取消逗号 并拒绝执行改进计划的供应商,华新水泥将可能终止与其合作关系
+        self.pattern_winTenderer_left_w0 = "(?P<winTenderer_left_w1>(,|。|^)((中标(投标)?|中选|中价|成交)(人|单位|机构|供应商|客户|方|公司|厂商|商)|第?[一1]名)(名称)?[,,]?([((]按综合排名排序[))])?[::,,]$)" #解决表头识别不到加逗号情况,需前面为,。空
+        self.pattern_winTenderer_left_w1 = "(?P<winTenderer_left_w1>(中标|中选|中价|成交|入选)(人|单位|机构|供应商|客户|方|公司|厂商|商)(名称)?([((]按综合排名排序[))])?[::是为]+$)" #取消逗号 并拒绝执行改进计划的供应商,华新水泥将可能终止与其合作关系  # 中标候选人不能作为中标
         # self.pattern_winTenderer_center = "(?P<winTenderer_center>第[一1].{,20}[是为]((中标|中选|中价|成交|施工)(人|单位|机构|供应商|公司)|供应商)[::是为])"
         # self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为\(]((采购(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))))|^(报价|价格)最低,确定为本项目成交供应商)"
         self.pattern_winTenderer_right = "(?P<winTenderer_right>(^[是为]((采购|中标)(供应商|供货商|服务商)|(第[一1]|预)?(拟?(中标|中选|中价|成交)(候选)?(人|单位|机构|供应商|公司|厂商)))|" \
@@ -2580,7 +2580,7 @@ class ProductAttributesPredictor():
                         if order_begin_year>=2050 or order_end_year>=2050:
                             order_begin = order_end = ""
                     # print(product,demand,budget,order_begin)
-                    if product!= "" and demand != "" and budget!="" and order_begin != "":
+                    if product!= "" and demand != "" and budget!="" and order_begin != "" and len(budget)<15: # 限制金额小于15位数的才要
                         link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
                                 'order_begin': order_begin, 'order_end': order_end}
                         if link not in demand_link:
@@ -2607,7 +2607,7 @@ class ProductAttributesPredictor():
                 if len(set([re.sub('[::]','',td) for td in tds]) & self.header_set) > len(tds) * 0.2:
                 # if len(set(tds) & self.header_set) > len(tds) * 0.2:
                     header_dic, found_header, header_list, header_list2 = self.find_header(tds, self.p1, self.p2)
-                    if found_header:
+                    if found_header and len(headers)<1:  # 只保留出现的第一个表头
                         headers.append('_'.join(header_list))
                         headers_demand.append('_'.join(header_list2))
                         header_colnum = len(tds)
@@ -2653,7 +2653,7 @@ class ProductAttributesPredictor():
                                 brand = ""
                         if id5 != "":
                             if re.search('\w', tds[id5]):
-                                specs = tds[id5]
+                                specs = tds[id5][:500] # 限制最多500字
                             else:
                                 specs = ""
                         if id6 != "":
@@ -2676,6 +2676,9 @@ class ProductAttributesPredictor():
                                 order_time = tds[id8].strip()
                                 order_begin, order_end = self.fix_time(order_time, html, page_time)
                         if quantity != "" or unitPrice != "" or brand != "" or specs != "":
+                            if len(unitPrice) > 15 or len(product)>100:  # 单价大于15位数或 产品名称长于100字
+                                i += 1
+                                continue
                             link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
                                                       'brand': brand[:50], 'specs':specs}
                             if link not in product_link:
@@ -2683,7 +2686,7 @@ class ProductAttributesPredictor():
                                 mat = re.match('([0-9.,]+)[((]?\w{,3}[))]?$', link['quantity'])
                                 if link['unitPrice'] != "" and mat:
                                     try:
-                                        total_product_money += float(link['unitPrice'])*float(mat.group(1).replace(',', ''))
+                                        total_product_money += float(link['unitPrice'])*float(mat.group(1).replace(',', '')) if float(mat.group(1).replace(',', ''))<50000 else 0
                                     except:
                                         log('产品属性单价数量相乘出错, 单价: %s, 数量: %s'%(link['unitPrice'], link['quantity']))
                         if order_begin != "" and order_end != "":
@@ -2804,88 +2807,88 @@ class ProductAttributesPredictor():
                             headers.append('_'.join(header_list))
                             headers_demand.append('_'.join(header_list2))
                             header_col.append('_'.join(tmp_head_list))
-                        # print('header_dic: ',header_dic)
-                        id1 = header_dic.get('名称', "")
-                        id2 = header_dic.get('数量', "")
-                        id3 = header_dic.get('单价', "")
-                        id4 = header_dic.get('品牌', "")
-                        id5 = header_dic.get('规格', "")
-
-                        id6 = header_dic.get('需求', "")
-                        id7 = header_dic.get('预算', "")
-                        id8 = header_dic.get('时间', "")
-                        if re.search('[a-zA-Z\u4e00-\u9fa5]', deal_list[id1]) and deal_list[id1] not in self.header_set and \
-                                re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', deal_list[id1]) == None:
-                            product = deal_list[id1]
-                            if id2 != "":
-                                if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', deal_list[id2]):
-                                    quantity = deal_list[id2]
-                                else:
-                                    quantity = ""
-                            if id3 != "":
-                                if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id3]):
-                                    _unitPrice = deal_list[id3]
-                                    re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
-                                    if re_price:
-                                        _unitPrice = re_price[0]
-                                        if '万元' in header_list[2] and '万' not in _unitPrice:
-                                            _unitPrice += '万元'
-                                        unitPrice = str(getUnifyMoney(_unitPrice))
-                            if id4 != "":
-                                if re.search('\w', deal_list[id4]):
-                                    brand = deal_list[id4]
-                                else:
-                                    brand = ""
-                            if id5 != "":
-                                if re.search('\w', deal_list[id5]):
-                                    specs = deal_list[id5]
-                                else:
-                                    specs = ""
-                            if id6 != "":
-                                if re.search('\w', deal_list[id6]):
-                                    demand = deal_list[id6]
-                                else:
-                                    demand = ""
-                            if id7 != "":
-                                if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id7]):
-                                    _budget = deal_list[id7]
-                                    re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
-                                    if re_price:
-                                        _budget = re_price[0]
-                                        if '万元' in header_list2[2] and '万' not in _budget:
-                                            _budget += '万元'
-                                        budget = str(getUnifyMoney(_budget))
-
-                            if id8 != "":
-                                if re.search('\w', deal_list[id8]):
-                                    order_time = deal_list[id8].strip()
-                                    order_begin, order_end = self.fix_time(order_time, html, page_time)
-                            # print(quantity,unitPrice,brand,specs)
-                            if quantity != "" or unitPrice != "" or brand != "" or specs != "":
-                                link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
-                                        'brand': brand[:50], 'specs': specs}
-                                if link not in product_link:
-                                    product_link.append(link)
-                                    # mat = re.match('([0-9.,]+)[((]?\w{,3}[))]?$', link['quantity'])
-                                    # if link['unitPrice'] != "" and mat:
-                                    #     try:
-                                    #         total_product_money += float(link['unitPrice']) * float(
-                                    #             mat.group(1).replace(',', ''))
-                                    #     except:
-                                    #         log('产品属性单价数量相乘出错, 单价: %s, 数量: %s' % (
-                                    #         link['unitPrice'], link['quantity']))
-                            if order_begin != "" and order_end != "":
-                                order_begin_year = int(order_begin.split("-")[0])
-                                order_end_year = int(order_end.split("-")[0])
-                                # 限制附件错误识别时间
-                                if order_begin_year >= 2050 or order_end_year >= 2050:
-                                    order_begin = order_end = ""
-                            # print(budget, order_time)
-                            if budget != "" and order_time != "":
-                                link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
-                                        'order_begin': order_begin, 'order_end': order_end}
-                                if link not in demand_link:
-                                    demand_link.append(link)
+                            # print('header_dic: ',header_dic)
+                            id1 = header_dic.get('名称', "")
+                            id2 = header_dic.get('数量', "")
+                            id3 = header_dic.get('单价', "")
+                            id4 = header_dic.get('品牌', "")
+                            id5 = header_dic.get('规格', "")
+
+                            id6 = header_dic.get('需求', "")
+                            id7 = header_dic.get('预算', "")
+                            id8 = header_dic.get('时间', "")
+                            if re.search('[a-zA-Z\u4e00-\u9fa5]', deal_list[id1]) and deal_list[id1] not in self.header_set and \
+                                    re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', deal_list[id1]) == None:
+                                product = deal_list[id1]
+                                if id2 != "":
+                                    if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', deal_list[id2]):
+                                        quantity = deal_list[id2]
+                                    else:
+                                        quantity = ""
+                                if id3 != "":
+                                    if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id3]):
+                                        _unitPrice = deal_list[id3]
+                                        re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_unitPrice)
+                                        if re_price:
+                                            _unitPrice = re_price[0]
+                                            if '万元' in header_list[2] and '万' not in _unitPrice:
+                                                _unitPrice += '万元'
+                                            unitPrice = str(getUnifyMoney(_unitPrice))
+                                if id4 != "":
+                                    if re.search('\w', deal_list[id4]):
+                                        brand = deal_list[id4]
+                                    else:
+                                        brand = ""
+                                if id5 != "":
+                                    if re.search('\w', deal_list[id5]):
+                                        specs = deal_list[id5]
+                                    else:
+                                        specs = ""
+                                if id6 != "":
+                                    if re.search('\w', deal_list[id6]):
+                                        demand = deal_list[id6]
+                                    else:
+                                        demand = ""
+                                if id7 != "":
+                                    if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', deal_list[id7]):
+                                        _budget = deal_list[id7]
+                                        re_price = re.findall("[零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]{3,}|\d[\d,]*(?:\.\d+)?万?",_budget)
+                                        if re_price:
+                                            _budget = re_price[0]
+                                            if '万元' in header_list2[2] and '万' not in _budget:
+                                                _budget += '万元'
+                                            budget = str(getUnifyMoney(_budget))
+
+                                if id8 != "":
+                                    if re.search('\w', deal_list[id8]):
+                                        order_time = deal_list[id8].strip()
+                                        order_begin, order_end = self.fix_time(order_time, html, page_time)
+                                # print(quantity,unitPrice,brand,specs)
+                                if quantity != "" or unitPrice != "" or brand != "" or specs != "":
+                                    link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
+                                            'brand': brand[:50], 'specs': specs}
+                                    if link not in product_link:
+                                        product_link.append(link)
+                                        # mat = re.match('([0-9.,]+)[((]?\w{,3}[))]?$', link['quantity'])
+                                        # if link['unitPrice'] != "" and mat:
+                                        #     try:
+                                        #         total_product_money += float(link['unitPrice']) * float(
+                                        #             mat.group(1).replace(',', ''))
+                                        #     except:
+                                        #         log('产品属性单价数量相乘出错, 单价: %s, 数量: %s' % (
+                                        #         link['unitPrice'], link['quantity']))
+                                if order_begin != "" and order_end != "":
+                                    order_begin_year = int(order_begin.split("-")[0])
+                                    order_end_year = int(order_end.split("-")[0])
+                                    # 限制附件错误识别时间
+                                    if order_begin_year >= 2050 or order_end_year >= 2050:
+                                        order_begin = order_end = ""
+                                # print(budget, order_time)
+                                if budget != "" and order_time != "":
+                                    link = {'project_name': product, 'product': [], 'demand': demand, 'budget': budget,
+                                            'order_begin': order_begin, 'order_end': order_end}
+                                    if link not in demand_link:
+                                        demand_link.append(link)
 
                     if len(product_link) > 0:
                         attr_dic = {'product_attrs': {'data': product_link, 'header': list(set(headers)), 'header_col': list(set(header_col))}}
@@ -2958,7 +2961,7 @@ class DocChannel():
       self.life_dic = {
           '采购意向': '采购意向|招标意向|选取意向|意向公告|意向公示',
           '招标预告': '(预计|计划)(采购|招标)(时间|日期)|采购(计划编号|需求方案|预告|预案)|(预|需求)公示|需求(方案|信息|论证|公告|公示)',
-          '招标公告': '(采购|招标|竞选|报名)条件|报名(时间|流程|方法|\w{,5}材料)|参加竞价采购交易资格|(申请人|投标人|供应商|报价人|参选人)的?资格要求|获取(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件的?(获取|领取)',
+          '招标公告': '(采购|招标|竞选|报名)条件|报名(时间|流程|方法|要求|\w{,5}材料)[:\s]|参加竞价采购交易资格|(申请人|投标人|供应商|报价人|参选人)的?资格要求|获取(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答)文件的?(获取|领取)',
           '资审结果': '资审及业绩公示|资审结果及业绩|资格后审情况报告|资格(后审|预审|审查)结果(公告|公示)|(预审|审查)工作已经?结束|未通过原因', #|资格
           '招标答疑': '现澄清(为|如下)|答疑补遗|澄清内容如下|第[0-9一二三四五]次澄清|答疑澄清|(最高(投标)?限价|控制价|拦标价)公示',  # |异议的回复
           '公告变更': '第[\d一二]次变更|(更正|变更)(公告|公示|信息|内容|事项|原因|理由|日期|时间|如下)|原公告((主要)?(信息|内容)|发布时间)|(变更|更正)[前后]内容|现?在?(变更|更正|修改|更改)(内容)?为|(公告|如下|信息|内容|事项|结果|文件|发布|时间|日期)(更正|变更)',
@@ -2970,7 +2973,8 @@ class DocChannel():
       # |确定成交供应商[:,\s]
           '合同公告': '合同(公告|公示|信息|内容)|合同(编号|名称|主体|基本情况|签订日期)|(供应商乙方|乙方供应商):|合同总?金额',
           '废标公告': '(终止|中止|废标|流标|失败|作废|异常|撤销)(结果)?(公告|公示|招标|采购|竞价)|(谈判结果为|结果类型):?废标|((本|该)(项目|标段|合同|合同包|采购包|次)\w{,5})((失败|终止|流标|废标)|予以废标|(按|做|作)?(流标|废标)处理)|(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|竞谈|应答|项目)(终止|中止|废标|流标|失败|作废|异常|撤销)',
-          '废标公告2': '(无效|中止|终止|废标|流标|失败|作废|异常|撤销)的?(原因|理由)|本项目因故取消|本(项目|次)(公开)?\w{2}失败|已终止\s*原因:|(人|人数|供应商|单位)(不足|未达\w{,3}数量)|已终止|不足[3三]家|无(废标)|成交情况:\s*[流废]标'
+          '废标公告2': '(无效|中止|终止|废标|流标|失败|作废|异常|撤销)的?(原因|理由)|本项目因故取消|本(项目|次)(公开)?\w{2}失败|已终止\s*原因:|(人|人数|供应商|单位)(不足|未达\w{,3}数量)|已终止|不足[3三]家|无(废标)|成交情况:\s*[流废]标',
+          '废标公告neg': '超过此报价将作为[废流]标处理|否则按[废流]标处理|终止规则:|视为流标'
       }
       self.title_life_dic = {
           '采购意向': '采购意向|招标意向|选取意向|意向公告|意向公示|意向公开',