|
@@ -147,15 +147,19 @@ def extract_moneySource(text):
|
|
list_moneySource = []
|
|
list_moneySource = []
|
|
for result in first:
|
|
for result in first:
|
|
entity_text = sub.sub("",result['moneySource'])
|
|
entity_text = sub.sub("",result['moneySource'])
|
|
- wordOffset_begin = result['index'] + re.search(entity_text,result['start']+result['moneySource']).start()
|
|
|
|
- wordOffset_end = wordOffset_begin + len(entity_text)
|
|
|
|
- # print(entity_text,wordOffset_begin,wordOffset_end)
|
|
|
|
- _moneySource = dict()
|
|
|
|
- _moneySource['body'] = entity_text
|
|
|
|
- _moneySource['begin_index'] = wordOffset_begin
|
|
|
|
- _moneySource['end_index'] = wordOffset_end
|
|
|
|
- # print(_moneySource)
|
|
|
|
- list_moneySource.append(_moneySource)
|
|
|
|
|
|
+ # wordOffset_begin = result['index'] + re.search(entity_text,result['start']+result['moneySource']).start()
|
|
|
|
+ if entity_text is None:
|
|
|
|
+ continue
|
|
|
|
+ else:
|
|
|
|
+ wordOffset_begin = result['index'] + (result['start']+result['moneySource']).find(entity_text)
|
|
|
|
+ wordOffset_end = wordOffset_begin + len(entity_text)
|
|
|
|
+ print(entity_text,wordOffset_begin,wordOffset_end)
|
|
|
|
+ _moneySource = dict()
|
|
|
|
+ _moneySource['body'] = entity_text
|
|
|
|
+ _moneySource['begin_index'] = wordOffset_begin
|
|
|
|
+ _moneySource['end_index'] = wordOffset_end
|
|
|
|
+ # print(_moneySource)
|
|
|
|
+ list_moneySource.append(_moneySource)
|
|
return list_moneySource
|
|
return list_moneySource
|
|
|
|
|
|
|
|
|
|
@@ -163,6 +167,6 @@ def extract_moneySource(text):
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
# re_rule()
|
|
# re_rule()
|
|
test ="a建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,as,建设资金来自呜呜呜。"
|
|
test ="a建设资金来源及性质:资本金40%,自筹60%,,xx.=建设资金来源自筹,项目出资比例为100%,as,建设资金来自呜呜呜。"
|
|
- # 11,15 11
|
|
|
|
|
|
+ # 11,23 35,37
|
|
extract_moneySource(test)
|
|
extract_moneySource(test)
|
|
pass
|
|
pass
|