Browse Source

"项目标签"额外新增‘空净通’标签

znj 1 năm trước cách đây
mục cha
commit
cf44e1f9ae

+ 2 - 0
BiddingKG/dl/interface/extract.py

@@ -345,6 +345,8 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
 
     '''根据关键词表生成项目标签'''
     project_label = predictor.getPredictor('project_label').predict(title,product=','.join(product_list),project_name=codeName[0]['name'],prem=prem)
+    # 额外需求的标签
+    project_label = predictor.getPredictor('project_label').predict_other(project_label,industry,title,codeName[0]['name'],','.join(product_list),list_articles)
     # print(project_label)
 
     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]

+ 218 - 0
BiddingKG/dl/interface/kongjing_label_keywords.csv

@@ -0,0 +1,218 @@
+关键词,组合词(并关系),匹配字段,限定行业(info_type)
+污染治理,室内环境,正文,
+通风系统,集中空调,正文,
+通风系统,中央空调,正文,
+清洁消毒服务,公共场所,正文,
+循环水处理服务,,正文,
+清洗,油烟,正文,
+病媒生物防制,,正文,
+清洁消毒服务,医院,正文,
+白蚁防治服务,,正文,
+空气净化器,,产品,
+消毒机,,产品,
+新风机,,产品,
+新风系统,,产品,
+污水消毒耗材,,正文,
+废气治理,,正文,
+医用耗材,,产品,
+医疗耗材,,产品,
+污水处理服务,,正文,
+水处理服务,,正文,
+空调清洗,院部,正文,
+保洁服务,,正文,
+室内空气净化,,正文,
+除甲醛,,产品,
+清洗服务,中央空调,正文,
+食堂,外包,正文,
+除异味设施,,正文,
+清洗消毒服务,设备,正文,
+保洁服务,垃圾,正文,
+杀虫灭害服务,,正文,
+杀虫服务,,正文,
+保洁服务,除四害,正文,
+保洁服务,医院,正文,
+管护服务,公厕,正文,
+病媒生物防治,,正文,
+消毒服务,学校,正文,
+消毒服务,小学,正文,
+物业管理服务,,正文,
+空气治理,,正文,
+消杀服务,码头,正文,
+仓库,,产品,水资源管理|水文服务|污水处理及其再生利用|环境污染防治设备|环境治理业|环境与生态监测检测服务|清洁服务|卫生|社会工作|生态保护|环保咨询
+空气净化,,产品,
+水质检测,,产品,
+空调维修,,正文,
+车内保养异味,,正文,
+大健康,,产品,
+宠物毛发,,正文,
+甲醛,医院,正文,
+消毒服务,养殖,正文,
+消毒服务,,正文,
+空调清洗,,正文,
+冷却塔清洗,,正文,
+消杀服务,,正文,
+异味处理,,正文,
+净水设备,,正文,
+白蚁防治,,正文,
+水处理药品,,正文,
+水处理,,产品,
+保洁项目,,正文,
+污水处理,物料采购,正文,
+空调维保,,产品,
+水处理,监测,正文,
+水箱清洗,空调,正文,
+水处理,设备运维,正文,
+健康,空调,正文,
+健康,陶瓷,正文,
+健康,油烟机,正文,
+健康,燃气灶,正文,
+健康,集成灶,正文,
+健康,橱柜,正文,
+健康,乘用车,正文,
+健康,酒店,正文,
+健康,空气净化器,正文,
+健康,新风系统,正文,
+健康,新风机,正文,
+健康,照明,正文,
+健康,涂料,正文,
+空气净化,产品,产品,
+空气净化,OEM,产品,
+ODM企业,,正文,
+室内环境污染治理企业,,正文,
+室内空气质量及相关产品检验检测机构,,正文,
+空调运维,,正文,
+灭白蚁,,正文,
+防治白蚁,,正文,
+杀虫灭鼠,,正文,
+空气污染,,正文,
+甲醛治理,,正文,
+除味,,产品,
+味道治理,,正文,
+装修治理,,正文,
+空气检测,,正文,
+甲醛检测,,正文,
+CMA检测,,正文,
+TVOC治理,,正文,
+治理药剂,,产品,
+除醛剂,,产品,
+除味剂,,产品,
+光触媒,,产品,
+生物酶,,产品,
+甲醛封闭剂,,产品,
+空气消毒,,正文,
+室内消毒,,正文,
+净化器,,正文,
+消毒器,,正文,
+治理药水,,产品,
+物业保洁,,正文,
+社区保洁,,正文,
+工厂保洁,,正文,
+校园保洁,,正文,
+道路保洁,,正文,
+室内保洁,,正文,
+保洁劳务,,正文,
+景区保洁,,正文,
+医院保洁,,正文,
+清扫服务,,正文,
+清洁卫生,,正文,
+保洁外派,,正文,
+保洁外包,,正文,
+开荒保洁,,正文,
+聘请专业保洁,,正文,
+开荒清洁,,正文,
+保洁劳务外包,,正文,
+清洁服务,,正文,
+清扫保洁,,正文,
+办公楼保洁,,正文,
+消杀,,产品,
+公司保洁,,正文,
+保洁承包,,正文,
+氟化物废物,,正文,
+废油气,,正文,
+含氰废物,,正文,
+精馏残渣,,正文,
+蒸馏残渣,,正文,
+废吸附剂,,正文,
+废密封剂,,正文,
+化学废物,,正文,
+爆炸性废物,,正文,
+焚烧处置残渣,,正文,
+氰化物废物,,正文,
+废齿轮油,,正文,
+涂料废物,,正文,
+废发动机油,,正文,
+废酸,,产品,
+废防锈油,,正文,
+废碱,,产品,
+废冷冻机油,,正文,
+磷化合物废物,,正文,
+废润滑油,,正文,
+废石蜡,,正文,
+废液压油,,正文,
+医疗废物,,正文,
+废催化剂,,正文,
+废银催化剂,,正文,
+废溶剂,,正文,
+医药废物,,正文,
+农药废物,,正文,
+废液,,产品,
+有机溶剂废物,,产品,
+废油,,产品,
+废焦油,,产品,
+废矿物油,,产品,
+染料废物,,产品,
+活性炭过滤器,,产品,
+复合式净化器,,产品,
+虑芯,,产品,
+分离过滤器,,产品,
+聚结器,,产品,
+清洗服务,,产品,
+物业维修,,产品,
+物业项目,,产品,
+物业服务,,产品,
+物业采购,,产品,
+物业管理,,产品,
+物业业务,,产品,
+物业外包,,产品,
+物业合同,,产品,
+小区物业,,产品,
+后勤服务,,产品,
+办公物业,,产品,
+后勤保障,,产品,
+物管服务,,产品,
+后勤社会化,,产品,
+物业托管,,产品,
+物业委托,,产品,
+宿舍管理,,产品,
+公寓管理,,产品,
+后勤管理,,产品,
+后勤综合,,产品,
+物业综合,,产品,
+消毒剂,,产品,
+恒温培养箱,,产品,
+医用真空干燥,,产品,
+灭菌烘箱,,产品,
+微波智能设备,,产品,
+高温干蒸设备,,产品,
+医用干燥柜,,产品,
+等离子消毒器,,产品,
+清洗消毒器,,产品,
+高温灭菌器,,产品,
+低温灭菌器,,产品,
+除异味,,产品,
+有害气体,,产品,
+污染防治,,产品,
+空气污染物,,产品,
+物业管理,,行业,
+清洁服务,,行业,
+污水处理及其再生利用,,行业,
+卫生,,行业,
+水资源管理,,行业,
+水文服务,,行业,
+生态保护,,行业,
+社会工作,,行业,
+环境治理业,,行业,
+环境与生态监测检测服务,,行业,
+环境污染防治设备,,行业,
+环保咨询,,行业,
+仓储业,,行业,

+ 71 - 1
BiddingKG/dl/interface/predictor.py

@@ -4530,6 +4530,7 @@ class ProjectLabel():
     def __init__(self, ):
 
         self.keyword_list = self.get_label_keywords()
+        self.kongjing_keyword_list = self.get_kongjing_keywords()
 
     def get_label_keywords(self):
         import csv
@@ -4549,6 +4550,25 @@ class ProjectLabel():
                 key_word_list.append((type, key_wrod, key_paichuci, type_paichuci))
         return key_word_list
 
+    def get_kongjing_keywords(self):
+        import csv
+        path = os.path.dirname(__file__)+'/kongjing_label_keywords.csv'
+        with open(path, 'r',encoding='utf-8') as f:
+            reader = csv.reader(f)
+            key_word_list = []
+            for r in reader:
+                if r[0] == '关键词':
+                    continue
+                key_wrod = r[0]
+                key_wrod2 = str(r[1])
+                key_wrod2 = key_wrod2 if key_wrod2 and key_wrod2 != 'nan' else ""
+                search_type = r[2]
+                info_type_list = str(r[3])
+                info_type_list = info_type_list if info_type_list and info_type_list != 'nan' else ""
+
+                key_word_list.append((key_wrod, key_wrod2, search_type, info_type_list))
+        return key_word_list
+
     def predict(self, doctitle,product,project_name,prem):
 
         doctitle = doctitle if doctitle else ""
@@ -4567,7 +4587,8 @@ class ProjectLabel():
                     if link['role_name'] == 'agency' and agency == "":
                         agency = link['role_text']
         except Exception as e:
-            print('解析prem 获取招标人、代理人出错')
+            # print('解析prem 获取招标人、代理人出错')
+            pass
         sub_project_names = ";".join(sub_project_names)
         # 核心字段:标题+产品词+项目名称+标段名称
         main_text = ",".join([doctitle, product, project_name, sub_project_names])
@@ -4640,6 +4661,55 @@ class ProjectLabel():
 
         return {"标题":doctitle_dict,"核心字段":main_text_dict}
 
+    def predict_other(self,project_label,industry,doctitle,project_name,product,list_articles):
+        # doctextcon 取正文内容
+        doctextcon = list_articles[0].content.split('##attachment##')[0]
+        info_type = industry.get('industry',{}).get("class_name","")
+        doctitle = doctitle if doctitle else ""
+        product = product if product else ""
+        product = ",".join(set(product.split(',')))  # 产品词去重
+        project_name = project_name if project_name else ""
+
+        get_kongjing_label = False
+        keywords_list = []
+        for item in self.kongjing_keyword_list:
+            key_wrod = item[0]
+            key_wrod2 = item[1]
+            search_type = item[2]
+            info_type_list = item[3]
+            info_type_list = info_type_list.split("|") if info_type_list else []
+
+            search_text = ""
+            if search_type=='正文':
+                search_text = doctextcon
+            elif search_type=='产品':
+                search_text = ",".join([doctitle,project_name,product])
+            if search_type=='行业':
+                # ’行业’类型直接用info_type匹配关键词
+                if info_type==key_wrod:
+                    # 匹配关键词记录
+                    keywords_list.append(key_wrod)
+                    get_kongjing_label = True
+                    # break
+            else:
+                if key_wrod in search_text:
+                    if key_wrod2 and key_wrod2 not in search_text:
+                        continue
+                    if info_type_list and info_type not in info_type_list:
+                        continue
+                    # 匹配关键词记录
+                    if key_wrod2:
+                        keywords_list.append(key_wrod+'+'+key_wrod2)
+                    else:
+                        keywords_list.append(key_wrod)
+                    get_kongjing_label = True
+                    # break
+        if get_kongjing_label:
+            project_label["核心字段"]["空净通"] = [[word,1] for word in keywords_list][:10]
+
+        return project_label
+
+
 # 总价单价提取
 class TotalUnitMoney:
     def __init__(self):