Преглед на файлове

新增"项目联系人"提取

znj преди 6 месеца
родител
ревизия
1d9d1eca3d
променени са 1 файла, в които са добавени 35 реда и са изтрити 0 реда
  1. 35 0
      BiddingKG/dl/interface/getAttributes.py

+ 35 - 0
BiddingKG/dl/interface/getAttributes.py

@@ -4102,6 +4102,40 @@ def getOtherAttributes(list_entity,page_time,prem,channel_dic):
 def getMoneyRange(RoleList):
     pass
 
+
+def getProjectContacts(list_entity, list_sentence):
+    # project_contacts "项目联系人"提取
+    temp_person_entitys = [ent for ent in list_entity if ent.entity_type=='person' and ent.label in [1,2,3]]
+    temp_person_entitys = sorted(temp_person_entitys,key=lambda x:(x.sentence_index,x.wordOffset_begin))
+    project_contacts_patterns = ['项目.?联系[人方]', '项目.?负责人']
+    project_contacts_patterns_prob = [0.9, 0.8]
+    project_contacts_patterns_res = []
+    for ent in temp_person_entitys:
+        sent_idx = ent.sentence_index
+        word_begin = ent.wordOffset_begin
+        # word_end = ent.wordOffset_end
+        in_att = ent.in_attachment
+        if word_begin >= 5:  # > len('项目联系人')
+            left_text = list_sentence[sent_idx].sentence_text[max(0, word_begin - 10):word_begin]
+            # print('left_text', left_text)
+            for pattern, prob in zip(project_contacts_patterns, project_contacts_patterns_prob):
+                if re.search(pattern, left_text):
+                    project_contacts_patterns_res.append([ent, sent_idx, word_begin, prob if not in_att else prob / 2])
+    project_contacts_patterns_res = sorted(project_contacts_patterns_res, key=lambda x: (x[3], -x[1], -x[2]),
+                                           reverse=True)
+    # print('project_contacts_patterns_res', project_contacts_patterns_res)
+    contacts_person = ""
+    contacts_phone = ""
+    if project_contacts_patterns_res:
+        project_contacts_patterns_res = project_contacts_patterns_res[0]
+        # print('project_contacts_patterns_res2',
+        #       project_contacts_patterns_res[0].entity_text, project_contacts_patterns_res[0].person_phone[0].entity_text if project_contacts_patterns_res[0].person_phone else "")
+        contacts_person = project_contacts_patterns_res[0].entity_text
+        contacts_phone = project_contacts_patterns_res[0].person_phone[0].entity_text if project_contacts_patterns_res[0].person_phone else ""
+
+
+    return {'project_contacts':{'contacts_person':contacts_person, 'contacts_phone':contacts_phone}}
+
 def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
     '''
     @param:
@@ -4114,6 +4148,7 @@ def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
         RoleList = getPackageRoleMoney(list_sentence,list_entity,list_outline)
         result.append(dict({"prem": RoleList, "docid": list_article.doc_id},
                            **getTimeAttributes(list_entity, list_sentence,page_time),
+                           **getProjectContacts(list_entity, list_sentence),
                            **{"fingerprint": list_article.fingerprint,
                               "match_enterprise": list_article.match_enterprise,
                               "match_enterprise_type": list_article.match_enterprise_type,