|
@@ -4530,6 +4530,7 @@ class ProjectLabel():
|
|
def __init__(self, ):
|
|
def __init__(self, ):
|
|
|
|
|
|
self.keyword_list = self.get_label_keywords()
|
|
self.keyword_list = self.get_label_keywords()
|
|
|
|
+ self.kongjing_keyword_list = self.get_kongjing_keywords()
|
|
|
|
|
|
def get_label_keywords(self):
|
|
def get_label_keywords(self):
|
|
import csv
|
|
import csv
|
|
@@ -4549,6 +4550,25 @@ class ProjectLabel():
|
|
key_word_list.append((type, key_wrod, key_paichuci, type_paichuci))
|
|
key_word_list.append((type, key_wrod, key_paichuci, type_paichuci))
|
|
return key_word_list
|
|
return key_word_list
|
|
|
|
|
|
|
|
+ def get_kongjing_keywords(self):
|
|
|
|
+ import csv
|
|
|
|
+ path = os.path.dirname(__file__)+'/kongjing_label_keywords.csv'
|
|
|
|
+ with open(path, 'r',encoding='utf-8') as f:
|
|
|
|
+ reader = csv.reader(f)
|
|
|
|
+ key_word_list = []
|
|
|
|
+ for r in reader:
|
|
|
|
+ if r[0] == '关键词':
|
|
|
|
+ continue
|
|
|
|
+ key_wrod = r[0]
|
|
|
|
+ key_wrod2 = str(r[1])
|
|
|
|
+ key_wrod2 = key_wrod2 if key_wrod2 and key_wrod2 != 'nan' else ""
|
|
|
|
+ search_type = r[2]
|
|
|
|
+ info_type_list = str(r[3])
|
|
|
|
+ info_type_list = info_type_list if info_type_list and info_type_list != 'nan' else ""
|
|
|
|
+
|
|
|
|
+ key_word_list.append((key_wrod, key_wrod2, search_type, info_type_list))
|
|
|
|
+ return key_word_list
|
|
|
|
+
|
|
def predict(self, doctitle,product,project_name,prem):
|
|
def predict(self, doctitle,product,project_name,prem):
|
|
|
|
|
|
doctitle = doctitle if doctitle else ""
|
|
doctitle = doctitle if doctitle else ""
|
|
@@ -4567,7 +4587,8 @@ class ProjectLabel():
|
|
if link['role_name'] == 'agency' and agency == "":
|
|
if link['role_name'] == 'agency' and agency == "":
|
|
agency = link['role_text']
|
|
agency = link['role_text']
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- print('解析prem 获取招标人、代理人出错')
|
|
|
|
|
|
+ # print('解析prem 获取招标人、代理人出错')
|
|
|
|
+ pass
|
|
sub_project_names = ";".join(sub_project_names)
|
|
sub_project_names = ";".join(sub_project_names)
|
|
# 核心字段:标题+产品词+项目名称+标段名称
|
|
# 核心字段:标题+产品词+项目名称+标段名称
|
|
main_text = ",".join([doctitle, product, project_name, sub_project_names])
|
|
main_text = ",".join([doctitle, product, project_name, sub_project_names])
|
|
@@ -4640,6 +4661,55 @@ class ProjectLabel():
|
|
|
|
|
|
return {"标题":doctitle_dict,"核心字段":main_text_dict}
|
|
return {"标题":doctitle_dict,"核心字段":main_text_dict}
|
|
|
|
|
|
|
|
+ def predict_other(self,project_label,industry,doctitle,project_name,product,list_articles):
|
|
|
|
+ # doctextcon 取正文内容
|
|
|
|
+ doctextcon = list_articles[0].content.split('##attachment##')[0]
|
|
|
|
+ info_type = industry.get('industry',{}).get("class_name","")
|
|
|
|
+ doctitle = doctitle if doctitle else ""
|
|
|
|
+ product = product if product else ""
|
|
|
|
+ product = ",".join(set(product.split(','))) # 产品词去重
|
|
|
|
+ project_name = project_name if project_name else ""
|
|
|
|
+
|
|
|
|
+ get_kongjing_label = False
|
|
|
|
+ keywords_list = []
|
|
|
|
+ for item in self.kongjing_keyword_list:
|
|
|
|
+ key_wrod = item[0]
|
|
|
|
+ key_wrod2 = item[1]
|
|
|
|
+ search_type = item[2]
|
|
|
|
+ info_type_list = item[3]
|
|
|
|
+ info_type_list = info_type_list.split("|") if info_type_list else []
|
|
|
|
+
|
|
|
|
+ search_text = ""
|
|
|
|
+ if search_type=='正文':
|
|
|
|
+ search_text = doctextcon
|
|
|
|
+ elif search_type=='产品':
|
|
|
|
+ search_text = ",".join([doctitle,project_name,product])
|
|
|
|
+ if search_type=='行业':
|
|
|
|
+ # ’行业’类型直接用info_type匹配关键词
|
|
|
|
+ if info_type==key_wrod:
|
|
|
|
+ # 匹配关键词记录
|
|
|
|
+ keywords_list.append(key_wrod)
|
|
|
|
+ get_kongjing_label = True
|
|
|
|
+ # break
|
|
|
|
+ else:
|
|
|
|
+ if key_wrod in search_text:
|
|
|
|
+ if key_wrod2 and key_wrod2 not in search_text:
|
|
|
|
+ continue
|
|
|
|
+ if info_type_list and info_type not in info_type_list:
|
|
|
|
+ continue
|
|
|
|
+ # 匹配关键词记录
|
|
|
|
+ if key_wrod2:
|
|
|
|
+ keywords_list.append(key_wrod+'+'+key_wrod2)
|
|
|
|
+ else:
|
|
|
|
+ keywords_list.append(key_wrod)
|
|
|
|
+ get_kongjing_label = True
|
|
|
|
+ # break
|
|
|
|
+ if get_kongjing_label:
|
|
|
|
+ project_label["核心字段"]["空净通"] = [[word,1] for word in keywords_list][:10]
|
|
|
|
+
|
|
|
|
+ return project_label
|
|
|
|
+
|
|
|
|
+
|
|
# 总价单价提取
|
|
# 总价单价提取
|
|
class TotalUnitMoney:
|
|
class TotalUnitMoney:
|
|
def __init__(self):
|
|
def __init__(self):
|