Bladeren bron

"项目联系人"提取优化

znj 6 maanden geleden
bovenliggende
commit
b77fc5274c
1 gewijzigde bestanden met toevoegingen van 3 en 3 verwijderingen
  1. 3 3
      BiddingKG/dl/interface/getAttributes.py

+ 3 - 3
BiddingKG/dl/interface/getAttributes.py

@@ -4107,8 +4107,8 @@ def getProjectContacts(list_entity, list_sentence):
     # project_contacts "项目联系人"提取
     temp_person_entitys = [ent for ent in list_entity if ent.entity_type=='person' and ent.label in [1,2,3]]
     temp_person_entitys = sorted(temp_person_entitys,key=lambda x:(x.sentence_index,x.wordOffset_begin))
-    project_contacts_patterns = ['项目.?联系[人方]', '项目.?负责人']
-    project_contacts_patterns_prob = [0.9, 0.8]
+    project_contacts_patterns = ['项目.?联系[人方]','项目.?联系.?方式', '项目.?负责人']
+    project_contacts_patterns_prob = [0.9, 0.85, 0.8]
     project_contacts_patterns_res = []
     for ent in temp_person_entitys:
         sent_idx = ent.sentence_index
@@ -4116,7 +4116,7 @@ def getProjectContacts(list_entity, list_sentence):
         # word_end = ent.wordOffset_end
         in_att = ent.in_attachment
         if word_begin >= 5:  # > len('项目联系人')
-            left_text = list_sentence[sent_idx].sentence_text[max(0, word_begin - 10):word_begin]
+            left_text = list_sentence[sent_idx].sentence_text[max(0, word_begin - 15):word_begin]
             # print('left_text', left_text)
             for pattern, prob in zip(project_contacts_patterns, project_contacts_patterns_prob):
                 if re.search(pattern, left_text):