|
@@ -4102,6 +4102,40 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
|
|
|
def getMoneyRange(RoleList):
|
|
|
pass
|
|
|
|
|
|
+
|
|
|
+def getProjectContacts(list_entity, list_sentence):
|
|
|
+ # project_contacts "项目联系人"提取
|
|
|
+ temp_person_entitys = [ent for ent in list_entity if ent.entity_type=='person' and ent.label in [1,2,3]]
|
|
|
+ temp_person_entitys = sorted(temp_person_entitys,key=lambda x:(x.sentence_index,x.wordOffset_begin))
|
|
|
+ project_contacts_patterns = ['项目.?联系[人方]', '项目.?负责人']
|
|
|
+ project_contacts_patterns_prob = [0.9, 0.8]
|
|
|
+ project_contacts_patterns_res = []
|
|
|
+ for ent in temp_person_entitys:
|
|
|
+ sent_idx = ent.sentence_index
|
|
|
+ word_begin = ent.wordOffset_begin
|
|
|
+ # word_end = ent.wordOffset_end
|
|
|
+ in_att = ent.in_attachment
|
|
|
+ if word_begin >= 5: # > len('项目联系人')
|
|
|
+ left_text = list_sentence[sent_idx].sentence_text[max(0, word_begin - 10):word_begin]
|
|
|
+ # print('left_text', left_text)
|
|
|
+ for pattern, prob in zip(project_contacts_patterns, project_contacts_patterns_prob):
|
|
|
+ if re.search(pattern, left_text):
|
|
|
+ project_contacts_patterns_res.append([ent, sent_idx, word_begin, prob if not in_att else prob / 2])
|
|
|
+ project_contacts_patterns_res = sorted(project_contacts_patterns_res, key=lambda x: (x[3], -x[1], -x[2]),
|
|
|
+ reverse=True)
|
|
|
+ # print('project_contacts_patterns_res', project_contacts_patterns_res)
|
|
|
+ contacts_person = ""
|
|
|
+ contacts_phone = ""
|
|
|
+ if project_contacts_patterns_res:
|
|
|
+ project_contacts_patterns_res = project_contacts_patterns_res[0]
|
|
|
+ # print('project_contacts_patterns_res2',
|
|
|
+ # project_contacts_patterns_res[0].entity_text, project_contacts_patterns_res[0].person_phone[0].entity_text if project_contacts_patterns_res[0].person_phone else "")
|
|
|
+ contacts_person = project_contacts_patterns_res[0].entity_text
|
|
|
+ contacts_phone = project_contacts_patterns_res[0].person_phone[0].entity_text if project_contacts_patterns_res[0].person_phone else ""
|
|
|
+
|
|
|
+
|
|
|
+ return {'project_contacts':{'contacts_person':contacts_person, 'contacts_phone':contacts_phone}}
|
|
|
+
|
|
|
def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
|
|
|
'''
|
|
|
@param:
|
|
@@ -4114,6 +4148,7 @@ def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
|
|
|
RoleList = getPackageRoleMoney(list_sentence,list_entity,list_outline)
|
|
|
result.append(dict({"prem": RoleList, "docid": list_article.doc_id},
|
|
|
**getTimeAttributes(list_entity, list_sentence,page_time),
|
|
|
+ **getProjectContacts(list_entity, list_sentence),
|
|
|
**{"fingerprint": list_article.fingerprint,
|
|
|
"match_enterprise": list_article.match_enterprise,
|
|
|
"match_enterprise_type": list_article.match_enterprise_type,
|