admin 4 лет назад
Родитель
Сommit
bb61aa4d8a
1 измененных файлов с 14 добавлено и 10 удалено
  1. 14 10
      BiddingKG/dl/money/moneySource/ruleExtra.py

+ 14 - 10
BiddingKG/dl/money/moneySource/ruleExtra.py

@@ -147,15 +147,19 @@ def extract_moneySource(text):
     list_moneySource = []
     for result in first:
         entity_text = sub.sub("",result['moneySource'])
-        wordOffset_begin = result['index'] + re.search(entity_text,result['start']+result['moneySource']).start()
-        wordOffset_end = wordOffset_begin + len(entity_text)
-        # print(entity_text,wordOffset_begin,wordOffset_end)
-        _moneySource = dict()
-        _moneySource['body'] = entity_text
-        _moneySource['begin_index'] = wordOffset_begin
-        _moneySource['end_index'] = wordOffset_end
-        # print(_moneySource)
-        list_moneySource.append(_moneySource)
+        # wordOffset_begin = result['index'] + re.search(entity_text,result['start']+result['moneySource']).start()
+        if entity_text is None:
+            continue
+        else:
+            wordOffset_begin = result['index'] + (result['start']+result['moneySource']).find(entity_text)
+            wordOffset_end = wordOffset_begin + len(entity_text)
+            print(entity_text,wordOffset_begin,wordOffset_end)
+            _moneySource = dict()
+            _moneySource['body'] = entity_text
+            _moneySource['begin_index'] = wordOffset_begin
+            _moneySource['end_index'] = wordOffset_end
+            # print(_moneySource)
+            list_moneySource.append(_moneySource)
     return list_moneySource
 
 
@@ -163,6 +167,6 @@ def extract_moneySource(text):
 if __name__ == '__main__':
     # re_rule()
     test ="a建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,as,建设资金来自呜呜呜。"
-    # 11,15 11
+    # 11,23 35,37
     extract_moneySource(test)
     pass