Преглед изворни кода

补充规格提取中标人实体金额;优化地区匹配,补充国家编码;乃君时间修改

lsm пре 1 недеља
родитељ
комит
82e3dd96f1

+ 2 - 0
BiddingKG/dl/common/Utils.py

@@ -1378,6 +1378,8 @@ def get_money_entity(sentence_text, found_yeji=0, in_attachment=False):
             if re.search('[%%‰折]|费率|下浮率', text_beforeMoney) and float(entity_text)<1000: # 过滤掉可能是费率的金额
                 # print('过滤掉可能是费率的金额')
                 continue
+            if notes == '单价' and float(entity_text) > 10000 and re.search('单价((万元))?:', sentence_text[max(0, _match.start()-3):_match.end()])==None: #644190779 中标价(费率或单价等:1940.899509万元,
+                notes = ""
             money_list.append((entity_text, start_index, end_index, unit, notes))
     return money_list, found_yeji
 

BIN
BiddingKG/dl/interface/district_tuple.pkl


+ 2 - 2
BiddingKG/dl/interface/extract.py

@@ -536,7 +536,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
     # data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
-    version_date = {'version_date': '2025-07-03'}
+    version_date = {'version_date': '2025-07-11'}
     data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys, **pb_json)
 
     if original_docchannel == 302:
@@ -616,7 +616,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     text_main, text_attn = 0, 0
     for sentence in list_sentences[0]:
         if sentence.in_attachment:
-            text_attn += len(sentence.sentence_text)
+            text_attn += len(re.sub("##attachment##[,。]?", "", sentence.sentence_text))
         else:
             text_main += len(sentence.sentence_text)
     data_res['word_count'] = {'正文': text_main, '附件': text_attn}

+ 33 - 10
BiddingKG/dl/interface/getAttributes.py

@@ -3505,17 +3505,29 @@ def getTimeAttributes(list_entity,list_sentence,page_time):
     # time_entitys = [[_entity,my_timeFormat(_entity.entity_text,page_time)] for _entity in time_entitys]
     new_time_entitys = []
     year_list = []
-    if page_time:
-        year_list.append(page_time[:4])
     for _entity in time_entitys:
         _time_list,_year = my_timeFormat(_entity.entity_text,page_time)
+        _in_attachment = _entity.in_attachment
         if _time_list:
             new_time_entitys.append([_entity,_time_list,_year])
-            year_list.append(_year)
+            year_list.append([_year,_in_attachment])
+    get_all_time = False if False in [i[1] for i in year_list] else True
+    if page_time:
+        current_year = time.strftime("%Y",time.localtime(int(datetime.strptime(page_time, '%Y-%m-%d').timestamp())))
+        year_list.append([current_year,False])
+    else:
+        current_year = time.strftime("%Y",time.localtime())
+    if get_all_time:
+        year_list = [i[0] for i in year_list]
+    else:
+        year_list = [i[0] for i in year_list if not i[1]]
     year_list = [(y,year_list.count(y)) for y in year_list if y[:2]=='20']
     year_list.sort(key=lambda x:x[1],reverse=True)
-    most_year = year_list[0][0]
-    time_entitys = [item for item in new_time_entitys if int(item[2])-int(most_year)<=10 and int(item[2])-int(most_year)>=-1]
+    most_year = year_list[0][0] if year_list else ""
+    if most_year:
+        time_entitys = [item for item in new_time_entitys if int(item[2])-int(most_year)<=10 and int(item[2])-int(most_year)>=-1]
+    else:
+        time_entitys = new_time_entitys
 
     # print(time_entitys)
     for entity_idx in range(len(time_entitys)):
@@ -5045,7 +5057,7 @@ def update_prem(old_prem, new_prem, in_attachment=False):
     # return old_prem
 
 def rule_add_role(docid, prem, channel, content, web_source_no, nlp_enterprise):
-    def add_role(ent_name, role_type, prem):
+    def add_role(ent_name, role_type, prem, money=0):
         if 'Project' in prem:
             prem['Project']['roleList'].append(
                 {
@@ -5055,7 +5067,7 @@ def rule_add_role(docid, prem, channel, content, web_source_no, nlp_enterprise):
                         "discount_ratio": "",
                         "downward_floating_ratio": "",
                         "floating_ratio": "",
-                        "money": 0,
+                        "money": money,
                         "money_unit": ""
                     },
                     "role_name": role_type,
@@ -5079,7 +5091,7 @@ def rule_add_role(docid, prem, channel, content, web_source_no, nlp_enterprise):
                             "discount_ratio": "",
                             "downward_floating_ratio": "",
                             "floating_ratio": "",
-                            "money": 0,
+                            "money": money,
                             "money_unit": ""
                         },
                         "role_name": role_type,
@@ -5110,8 +5122,19 @@ def rule_add_role(docid, prem, channel, content, web_source_no, nlp_enterprise):
             ent_name = match.group('name')
             if re.search('测试|演示|某|\d号|\*|XX', ent_name)==None and re.search('^\w{1,5}[省市县区][\w()]{2,25}[厂店铺市场行部城室馆中心站处社会狱所园关局署段厅院队小学]((个体工商户)?|(普通合伙)?)?$',
                          ent_name):  #  or is_enterprise_exist(ent_name)
-                log('规则补充中标人角色:%s,docid:%s'%(ent_name, docid))
-                add_role(ent_name, "win_tenderer", prem)
+                b_idx = match.end()
+                idx_ = content[b_idx:].find('。')
+                e_idx = idx_ if idx_ > 0 else 200
+                text = content[b_idx:b_idx+e_idx]
+                money_list, _ = get_money_entity(text)
+                money = 0
+                for money_tup in money_list:
+                    entity_text, b, e, unit, notes = money_tup
+                    if re.search('(中标|成交|中选|投标|合同)总?(价格?|金额)', text[max(0, b-10):b]):
+                        money = float(entity_text)
+                        break
+                log('规则补充中标人角色:%s, 金额:%s,docid:%s'%(ent_name, money, docid))
+                add_role(ent_name, "win_tenderer", prem, money)
 
 
 def confirm_prem(docid, prem, channel_dic, content, is_deposit_project=False, total_tendereeMoney=0):

+ 66 - 35
BiddingKG/dl/interface/predictor.py

@@ -2930,7 +2930,7 @@ class ProductPredictor():
 class ProductAttributesPredictor():
     def __init__(self,):
         self.p0 = '(类别|类型|物类|目录|类目|分类)(名称|$)|^品名|^品类|^品目|(标项|分项|项目|计划|包组|标段|[分子]?包|子目|服务|招标|中标|成交|工程|招标内容)(名称|内容|描述)'
-        self.p1 = '(标的|维修|系统|报价构成|商品|产品|物料|物资|货物|设备|采购品|采购条目|物品|材料|印刷品?|采购|物装|配件|资产|耗材|清单|器材|仪器|器械|备件|拍卖物|标的物|物件|药品|药材|药械|货品|食品|食材|品目|^品名|气体)[\))的]?([、\w]{,4}名称|内容|描述)'
+        self.p1 = '(标的|维修|系统|报价构成|商品|产品|物料|物资|货物|设备|采购品|采购条目|物品|材料|印刷品?|采购|物装|配件|资产|耗材|清单|器材|仪器|器械|备件|拍卖物|标的物|物件|药品|药材|药械|货品|食品|食材|品目|^品名|气体)[\))的]?(名称|内容|描述)'  # [、\w]{,4} 避免提取采购人名称 等
         self.p2 = '标的|标项|项目$|商品|产品|物料|物资|货物|设备|采购品|采购条目|物品|材料|印刷品|物装|配件|资产|招标内容|耗材|清单|器材|仪器|器械|备件|拍卖物|标的物|物件|药品|药材|药械|货品|食品|食材|菜名|^品目$|^品名$|^名称|^内容$|(标项|分项|项目|计划|包组|标段|[分子]?包|子目|服务|招标|中标|成交|工程|招标内容)(名称|内容|描述)'
         # self.p1 = '(设备|货物|商品|产品|物品|货品|材料|物资|物料|物件|耗材|备件|食材|食品|品目|标的|标的物|标项|资产|拍卖物|仪器|器材|器械|药械|药品|药材|采购品?|项目|招标|工程|服务)[\))]?(名称|内容|描述)'
         # self.p2 = '设备|货物|商品|产品|物品|货品|材料|物资|物料|物件|耗材|备件|食材|食品|品目|标的|标的物|资产|拍卖物|仪器|器材|器械|药械|药品|药材|采购品|项目|品名|菜名|内容|名称'
@@ -6052,7 +6052,7 @@ class DistrictPredictor():
         text = str(text).replace('(', '(').replace(')', ')')
         text = re.sub('\d{2,4}年度?|[\d/-]{1,5}[月日]|\d+|[a-zA-Z0-9]+', ' ', text)
         text = re.sub(
-            '复合肥|海南岛|兴业银行|双河口|阳光|杭州湾|新城区|中粮屯河|老城(区|改造|更新|升级|翻新)|沙县小吃|北京时间|福田汽车|中山(大学|公园|纪念堂)|孙中山|海天水泥|阳光采购|示范县|珠江城|西九龙站|广州路北|安阳山村|电信|联通|北京现代', # 570445994 广州路北侧 预测为 广州 路北
+            '复合肥|海南岛|兴业银行|双河口|阳光|杭州湾|新城区|中粮屯河|老城(区|改造|更新|升级|翻新)|沙县小吃|北京时间|福田汽车|中山(大学|公园|纪念堂)|孙中山|海天水泥|阳光采购|示范县|珠江城|西九龙站|广州路北|安阳山村|电信|联通|北京现代|祁连山', # 570445994 广州路北侧 预测为 广州 路北
             ' ', text)  # 544151395 赤壁市老城区燃气管道老化更新改造
         text = re.sub('珠海城市', '珠海', text)  # 修复 426624023 珠海城市 预测为海城市
         text = re.sub('怒江州', '怒江傈僳族自治州', text)  # 修复 423589589  所属地域:怒江州 识别为广西 - 崇左 - 江州
@@ -6107,7 +6107,11 @@ class DistrictPredictor():
                         elif k in ['dist', 'dist1', 'dist2']:
                             if v in ['东区', '西区', '城区', '郊区', '矿区', '东至']:
                                 continue
-                            if v in full_dic['district'] and len(v) > 2:
+                            if v in ['向阳区', '宝山区', '南沙区', '和平区', '新城区', '鼓楼区', '南山区', '白云区', '朝阳区',
+                                     '江北区', '城关区', '永定区', '普陀区', '长安区', '市中区', '西安区', '通州区', '西湖区',
+                                     '龙华区', '城中区', '河东区', '桥西区', '青山区', '新华区', '铁西区', '铁东区', '海州区']: # 多个城市有的区概率降低
+                                score = 0.5
+                            elif v in full_dic['district'] and (len(v) > 2 or v.endswith('县')): # 20250709 修复 萧县 等概率过低
                                 score = 2
                             else:
                                 score = 0.5
@@ -6240,6 +6244,11 @@ class DistrictPredictor():
         final_city = ""
         prob = 0
         max_score = 0
+        code_dic = {
+            'province_code': '',
+            'city_code': '',
+            'district_code': ''
+        }
         if len(pro_ids) >= 1:
             pro_l = sorted([(k, v) for k, v in pro_ids.items()], key=lambda x: x[1], reverse=True)
             scores = [it[1] for it in pro_l]
@@ -6249,25 +6258,30 @@ class DistrictPredictor():
             if score >= 0.01:
                 pred_pro = idx_dic[final_pro]['返回名称']
                 big_area = idx_dic[final_pro]['大区']
+                code_dic['province_code'] = idx_dic[final_pro]['编码']
         if pred_pro != "" and len(city_ids) >= 1:
             city_l = sorted([(k, v) for k, v in city_ids.items()], key=lambda x: x[1], reverse=True)
             for it in city_l:
                 if idx_dic[it[0]]['省'] == final_pro:
                     final_city = it[0]
                     pred_city = idx_dic[final_city]['返回名称']
+                    code_dic['city_code'] = idx_dic[final_city]['编码']
                     break
         if final_city != "" and len(set(dis_ids)) >= 1:
             dis_l = sorted([(k, v) for k, v in dis_ids.items()], key=lambda x: x[1], reverse=True)
             for it in dis_l:
                 if idx_dic[it[0]]['市'] == final_city:
                     pred_dis = idx_dic[it[0]]['返回名称']
+                    code_dic['district_code'] = idx_dic[it[0]]['编码']
         elif pred_pro != "" and pred_city == "" and len(set(dis_ids)) >= 1:  # 20241111 省份不为空,市为空,如果区县在省份下,补充对应的市县
             dis_l = sorted([(k, v) for k, v in dis_ids.items()], key=lambda x: x[1], reverse=True)
             for it in dis_l:
                 if idx_dic[it[0]]['省'] == final_pro:
                     pred_city = idx_dic[idx_dic[it[0]]['市']]['返回名称']
                     pred_dis = idx_dic[it[0]]['返回名称']
-        return big_area, pred_pro, pred_city, pred_dis, prob, max_score
+                    code_dic['city_code'] = idx_dic[idx_dic[it[0]]['市']]['编码']
+                    code_dic['district_code'] = idx_dic[it[0]]['编码']
+        return big_area, pred_pro, pred_city, pred_dis, prob, max_score, code_dic
     @staticmethod
     def get_ree_addr(prem):
         tenderee = ""
@@ -6288,7 +6302,7 @@ class DistrictPredictor():
            4:招标、代理一起,两个地址一起 招标人:xxx, 代理人:xxx, 地址:xxx, 地址:xxx.
         '''
         p3 = '(招标|采购|甲)(人|方|单位)(信息:|(甲方))?(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{1,13}(自治[区州县旗]|地区|[省市区县旗盟])[^\w]*)+|\w{2,15}[,。])'
-        p4 = '(招标|采购|甲)(人|方|单位)(信息:|(甲方))?(名称)?:[\w()]{4,15},(招标|采购)?代理(人|机构)(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{1,13}(自治[区州县旗]|地区|[省市区县旗盟])[^\w]*)+|\w{2,15}[,。])'
+        p4 = '(招标|采购|甲)(人|方|单位)(信息:|(甲方))?(名称)?:[\w()]{4,15},(招标|采购)?代理(人|机构)(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{1,13}(自治[区州县旗]|地区|[省市区县旗盟])[^\w]*)+|\w{2,35}),(联系)?地址:'
         p5 = '(采购|招标)(人|单位)(联系)?地址:(?P<addr>(\w{1,13}(自治[区州县旗]|地区|[省市区县旗盟])[^\w]*)+|\w{2,15}[,。])'
         if re.search(p3, text):
             return re.search(p3, text).group('addr')
@@ -6319,10 +6333,14 @@ class DistrictPredictor():
         addr_contact = addr_dic.get('addr_contact', '')
         in_content = False
         not_sure = True # 是否不确定地区
+
         province_l, city_l, district_l = self.find_whole_areas('%s %s'%(title, addr_project), self.pettern, self.area_variance_dic, self.full_dic)
         pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
-        big_area_1, pred_pro_1, pred_city_1, pred_dis_1, prob, max_score = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
-        big_area, pred_pro, pred_city, pred_dis = big_area_1, pred_pro_1, pred_city_1, pred_dis_1
+        big_area_1, pred_pro_1, pred_city_1, pred_dis_1, prob, max_score, code_dic_1 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
+        big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_1, pred_pro_1, pred_city_1, pred_dis_1, code_dic_1
+        # print('关键词1:', province_l, city_l, district_l)
+        # print('输入:', '标题:%s; 项目地址:%s'%(title, addr_project))
+        # print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
         if pred_city_1 == "" or prob < 0.7 or max_score<2:
             ree, addr = self.get_ree_addr(prem)
             if ree in title:
@@ -6333,28 +6351,33 @@ class DistrictPredictor():
 
             # addr = content
             # ree = ''
-            province_l2, city_l2, district_l2 = self.find_whole_areas('%s %s %s' % (ree, addr, addr_delivery), self.pettern, self.area_variance_dic, self.full_dic, weight=0.8)
+            province_l2, city_l2, district_l2 = self.find_whole_areas('%s %s %s' % (ree, addr, addr_delivery), self.pettern, self.area_variance_dic, self.full_dic, weight=1)
             province_l.extend(province_l2)
             city_l.extend(city_l2)
             district_l.extend(district_l2)
             pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
-            big_area_2, pred_pro_2, pred_city_2, pred_dis_2, prob, max_score = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
-            big_area, pred_pro, pred_city, pred_dis = big_area_2, pred_pro_2, pred_city_2, pred_dis_2
+            big_area_2, pred_pro_2, pred_city_2, pred_dis_2, prob, max_score, code_dic_2 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
+            big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_2, pred_pro_2, pred_city_2, pred_dis_2, code_dic_2
             # print('关键词2:', province_l, city_l, district_l)
+            # print('输入:', '招标人:%s; 招标人地址:%s; 收货地址:%s' % (ree, addr, addr_delivery))
             # print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
-            if re.search('省|市|自治', addr_project) and pred_pro_1 != '' and pred_pro_1 != pred_pro_2: # 如果有项目地址使用项目地址
+            if re.search('省|市|县|自治', addr_project) and pred_pro_1 != '' and pred_pro_1 != pred_pro_2: # 如果有项目地址使用项目地址  要有省市县等 275127622 工程地点为狮山镇颜峰综合区岐山至人和段道路, 提错 岐山
                 not_sure = False
-                big_area, pred_pro, pred_city, pred_dis = big_area_1, pred_pro_1, pred_city_1, pred_dis_1
+                big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_1, pred_pro_1, pred_city_1, pred_dis_1, code_dic_1
             if not_sure and (pred_city_2 == "" or prob < 0.7 or max_score<2):
-                province_l3, city_l3, district_l3 = self.find_whole_areas('%s %s %s'%(addr_contact, addr_bidopen, addr_bidsend), self.pettern, self.area_variance_dic, self.full_dic, weight=0.6)
+                province_l3, city_l3, district_l3 = self.find_whole_areas('%s %s %s'%(addr_contact, addr_bidopen, addr_bidsend), self.pettern, self.area_variance_dic, self.full_dic, weight=0.6)
                 province_l.extend(province_l3)
                 city_l.extend(city_l3)
                 district_l.extend(district_l3)
                 pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
-                big_area_3, pred_pro_3, pred_city_3, pred_dis_3, prob, max_score = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
-                big_area, pred_pro, pred_city, pred_dis = big_area_3, pred_pro_3, pred_city_3, pred_dis_3
+                big_area_3, pred_pro_3, pred_city_3, pred_dis_3, prob, max_score, code_dic_3 = self.get_final_addr(pro_ids, city_ids, dis_ids, self.idx_dic)
+                big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_3, pred_pro_3, pred_city_3, pred_dis_3, code_dic_3
                 # print('关键词3:', province_l, city_l, district_l)
+                # print('输入:', '联系:%s, 开标:%s, 邮寄:%s'%(addr_contact, addr_bidopen, addr_bidsend))
                 # print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
+                if pred_city_2 != "" and pred_city_2 != pred_city_3:
+                    not_sure = False
+                    big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_2, pred_pro_2, pred_city_2, pred_dis_2, code_dic_2 # 如果招标人、招标人地址、收货地址与开标地址、联系地址等不一致,取招标人地址
                 if not_sure and (pred_city_3 == "" or prob < 0.6 or max_score < 2):
                     all_addr, tenderees = self.get_all_addr(list_entity)
                     province_l4, city_l4, district_l4 = self.find_whole_areas('%s %s %s' % (web_source_name, tenderees, all_addr), self.pettern, self.area_variance_dic, self.full_dic, weight=0.3)
@@ -6362,11 +6385,15 @@ class DistrictPredictor():
                     city_l.extend(city_l4)
                     district_l.extend(district_l4)
                     pro_ids, city_ids, dis_ids = self.merge_score(province_l, city_l, district_l, self.full_dic, self.short_dic, self.idx_dic)
-                    big_area_4, pred_pro_4, pred_city_4, pred_dis_4, prob, max_score = self.get_final_addr(pro_ids, city_ids,dis_ids, self.idx_dic)
-                    big_area, pred_pro, pred_city, pred_dis = big_area_4, pred_pro_4, pred_city_4, pred_dis_4
+                    big_area_4, pred_pro_4, pred_city_4, pred_dis_4, prob, max_score, code_dic_4 = self.get_final_addr(pro_ids, city_ids,dis_ids, self.idx_dic)
+                    big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_4, pred_pro_4, pred_city_4, pred_dis_4, code_dic_4
+                    if pred_city_3 != "" and pred_city_3 != pred_city_4:
+                        not_sure = False
+                        big_area, pred_pro, pred_city, pred_dis, code_dic = big_area_3, pred_pro_3, pred_city_3, pred_dis_3, code_dic_3  # 如果开标地址等提取的城市与所有地址提取的城市不一致,取开标地址等
                     if pred_pro_3 != pred_pro_4 and (prob < 0.6 or max_score < 2):
                         in_content = True
                     # print('关键词4:', province_l, city_l, district_l)
+                    # print('输入:', '站源:%s, 角色:%s, 地址:%s' % (web_source_name, tenderees, all_addr))
                     # print('分数:', pro_ids, city_ids, dis_ids, prob, max_score)
 
         if pred_city in ['北京', '天津', '上海', '重庆']:
@@ -6381,9 +6408,13 @@ class DistrictPredictor():
             area_dic['city'] = pred_city
         if pred_dis != "":
             area_dic['district'] = pred_dis
+        for k, v in code_dic.items():
+            if v != '':
+                area_dic[k] = v
         area_dic['is_in_text'] = in_content
         # area_dic['prob'] = prob
         # area_dic['max_score'] = max_score
+        # print('最终地址:', pred_pro, pred_city, pred_dis)
         return {'district': area_dic}
 
     def get_area(self, text, web_name, in_content=False):
@@ -9187,29 +9218,29 @@ if __name__=="__main__":
     #     # print("cost_time:", json.loads(requests_result.text)['cost_time'])
     #     # print(MAX_LEN, len(sentence), len(list_sentence))
 
-    # docid = ""
-    # title = ''
-    # with open('d:/html/2.html', 'r', encoding='utf-8') as f:
-    #     html = f.read()
-    # product_attr = ProductAttributesPredictor()
-    # rs = product_attr.predict(docid='', html=html, page_time="")
-    # print(rs)
-
     docid = ""
-    title = '甘肃省妇幼保健院(甘肃省中心医院)(第二期)采购结果公告'
+    title = ''
     with open('d:/html/2.html', 'r', encoding='utf-8') as f:
         html = f.read()
-    tb_extract = TablePremExtractor()
-    rs = tb_extract.predict(html, [
-        "江苏中联铸本混凝土有限公司",
-        "鼓楼区协荣机械设备经销部"
-    ], web_source_name = '', all_winner=False)
-    print('标段数:',len(rs[0]))
+    product_attr = ProductAttributesPredictor()
+    rs = product_attr.predict(docid='', html=html, page_time="")
     print(rs)
-    # bdscore = BiddingScore()
-    # rs = bdscore.predict(html)
-    # print(type(rs), len(rs))
+
+    # docid = ""
+    # title = '甘肃省妇幼保健院(甘肃省中心医院)(第二期)采购结果公告'
+    # with open('d:/html/2.html', 'r', encoding='utf-8') as f:
+    #     html = f.read()
+    # tb_extract = TablePremExtractor()
+    # rs = tb_extract.predict(html, [
+    #     "江苏中联铸本混凝土有限公司",
+    #     "鼓楼区协荣机械设备经销部"
+    # ], web_source_name = '', all_winner=False)
+    # print('标段数:',len(rs[0]))
     # print(rs)
+    # # bdscore = BiddingScore()
+    # # rs = bdscore.predict(html)
+    # # print(type(rs), len(rs))
+    # # print(rs)
 
     # # # ids = [199601430, 195636197, 123777031, 195191849, 163533442, 121845385, 217782764, 163370956, 238134423, 191700799, 148218772, 189295942, 145940984, 166830213, 119271266, 90157660, 180314485, 136564968, 119094883, 89822506, 209263355, 132839357, 85452163, 110204324, 204773640, 83910716, 126657693, 107244197, 79107109, 47810780, 233548561, 237887867, 79134266, 77124584, 75804469, 43206978, 237560666, 67472815, 42078089, 66307082, 38382419, 224367857, 224751772, 54913238, 237390205, 60511017, 33170000, 228578442, 69042200, 228535928, 79997322, 233492018, 51828144, 219494938, 240514770]
     # # # ids = [42078089, 51828144, 54913238, 60511017, 67472815, 69042200, 75804469, 77124584, 79107109, 79997322, 83910716, 85452163, 89822506, 90157660, 107244197, 110204324, 119094883, 121845385, 123777031, 132839357, 136564968, 145940984, 148218772, 163370956, 163533442, 166830213, 180314485, 191700799, 195191849, 199601430, 204773640, 209263355, 217782764, 219494938, 224367857, 224751772, 228535928, 228578442, 233492018, 237390205, 237560666, 237887867, 238134423, 240514770]