Эх сурвалжийг харах

新增"项目联系人"提取

znj 6 сар өмнө
parent
commit
399d88ac34

+ 16 - 9
BiddingKG/dl/interface/getAttributes.py

@@ -4124,17 +4124,24 @@ def getProjectContacts(list_entity, list_sentence):
     project_contacts_patterns_res = sorted(project_contacts_patterns_res, key=lambda x: (x[3], -x[1], -x[2]),
                                            reverse=True)
     # print('project_contacts_patterns_res', project_contacts_patterns_res)
-    contacts_person = ""
-    contacts_phone = ""
+    project_contacts_list = []
+    phone_set = set()
+    have_in_text = False
     if project_contacts_patterns_res:
-        project_contacts_patterns_res = project_contacts_patterns_res[0]
-        # print('project_contacts_patterns_res2',
-        #       project_contacts_patterns_res[0].entity_text, project_contacts_patterns_res[0].person_phone[0].entity_text if project_contacts_patterns_res[0].person_phone else "")
-        contacts_person = project_contacts_patterns_res[0].entity_text
-        contacts_phone = project_contacts_patterns_res[0].person_phone[0].entity_text if project_contacts_patterns_res[0].person_phone else ""
-
+        for item in project_contacts_patterns_res:
+            in_att = item[0].in_attachment
+            contacts_person = item[0].entity_text
+            contacts_phone = item[0].person_phone[0].entity_text if item[0].person_phone else ""
+            if contacts_phone:
+                if not in_att:
+                    have_in_text = True
+                if in_att and have_in_text: # 正文已提取,则排除附件的
+                    break
+                if contacts_phone not in phone_set:
+                    phone_set.add(contacts_phone)
+                    project_contacts_list.append([contacts_person,contacts_phone])
 
-    return {'project_contacts':{'contacts_person':contacts_person, 'contacts_phone':contacts_phone}}
+    return {'project_contacts':project_contacts_list}
 
 def getPREMs(list_sentences,list_entitys,list_articles,list_outlines,page_time):
     '''