import sys import os sys.path.append(os.path.abspath("../..")) import re # from BiddingKG.dl.interface import Entitys def extract_email(text): re_pattern = re.compile("[a-zA-Z0-9][a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*@" "[a-zA-Z0-9_-]+(?:\.[a-zA-Z0-9_-]+)*(?:\.[a-zA-Z]{2,})") list_email = [] for match_result in re_pattern.finditer(text): entity_text = match_result.group() wordOffset_begin = match_result.start() wordOffset_end = match_result.end() # print(text[wordOffset_begin:wordOffset_end]) _email = dict() _email['body'] = entity_text _email['begin_index'] = wordOffset_begin _email['end_index'] = wordOffset_end list_email.append(_email) return list_email if __name__ == '__main__': text ="联系人: 李春宜 联系电话:电话:0755-89663666-2492 邮箱:chun_yi.li@ci16-3mc.com.cn 邮箱:邮箱:chun_yi.li@qq.com" # extract_email(text) # print(extract_email(text)) pass