123456789101112131415161718192021222324252627282930 |
- import sys
- import os
- sys.path.append(os.path.abspath("../.."))
- import re
- # from BiddingKG.dl.interface import Entitys
- def extract_email(text):
- re_pattern = re.compile("[a-zA-Z0-9][a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*@"
- "[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*(?:\.[a-zA-Z]{2,})")
- list_email = []
- for match_result in re_pattern.finditer(text):
- entity_text = match_result.group()
- wordOffset_begin = match_result.start()
- wordOffset_end = match_result.end()
- # print(text[wordOffset_begin:wordOffset_end])
- _email = dict()
- _email['body'] = entity_text
- _email['begin_index'] = wordOffset_begin
- _email['end_index'] = wordOffset_end
- list_email.append(_email)
- return list_email
- if __name__ == '__main__':
- text ="联系人: 李春宜 联系电话:电话:0755-89663666-2492 邮箱:chun_yi.li@ci16-3mc.com.cn 邮箱:邮箱:chun_yi.li@qq.com"
- # extract_email(text)
- # print(extract_email(text))
- pass
|