modelFactory.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. '''
  2. Created on 2019年5月16日
  3. @author: User
  4. '''
  5. import os
  6. import sys
  7. sys.path.append(os.path.abspath("../.."))
  8. from keras import models
  9. from keras import layers
  10. # from keras_contrib.layers import CRF
  11. from keras.preprocessing.sequence import pad_sequences
  12. from keras import optimizers,losses,metrics
  13. from BiddingKG.dl.common.Utils import *
  14. import tensorflow as tf
  15. import json
  16. class Model_role_classify():
  17. def __init__(self,lazyLoad=getLazyLoad()):
  18. #self.model_role_file = os.path.abspath("../role/models/model_role.model.hdf5")
  19. self.model_role_file = os.path.dirname(__file__)+"/../role/log/new_biLSTM-ep012-loss0.028-val_loss0.040-f10.954.h5"
  20. self.model_role = None
  21. self.graph = tf.get_default_graph()
  22. if not lazyLoad:
  23. self.getModel()
  24. def getModel(self):
  25. if self.model_role is None:
  26. self.model_role = models.load_model(self.model_role_file,custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
  27. return self.model_role
  28. def encode(self,tokens,begin_index,end_index,**kwargs):
  29. return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128))
  30. def predict(self,x):
  31. x = np.transpose(np.array(x),(1,0,2,3))
  32. with self.graph.as_default():
  33. return self.getModel().predict([x[0],x[1]])
  34. class Model_role_classify_word():
  35. def __init__(self,lazyLoad=getLazyLoad(),config=None):
  36. if USE_PAI_EAS:
  37. lazyLoad = True
  38. #self.model_role_file = os.path.abspath("../role/log/ep071-loss0.107-val_loss0.122-f10.956.h5")
  39. # self.model_role_file = os.path.dirname(__file__)+"/../role/models/ep038-loss0.140-val_loss0.149-f10.947.h5"
  40. #self.model_role_file = os.path.abspath("../role/log/textcnn_ep017-loss0.088-val_loss0.125-f10.955.h5")
  41. self.model_role = None
  42. self.sess_role = tf.Session(graph=tf.Graph(),config=config)
  43. if not lazyLoad:
  44. self.getModel()
  45. def getModel(self):
  46. if self.model_role is None:
  47. with self.sess_role.as_default() as sess:
  48. with self.sess_role.graph.as_default():
  49. meta_graph_def = tf.saved_model.loader.load(sess=self.sess_role, tags=["serve"], export_dir=os.path.dirname(__file__)+"/role_savedmodel")
  50. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  51. signature_def = meta_graph_def.signature_def
  52. input0 = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
  53. input1 = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
  54. # input2 = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name)
  55. output = self.sess_role.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  56. self.model_role = [[input0,input1],output] #,input2
  57. return self.model_role
  58. '''
  59. def load_weights(self):
  60. model = self.getModel()
  61. model.load_weights(self.model_role_file)
  62. '''
  63. def encode(self,tokens,begin_index,end_index,entity_text,**kwargs):
  64. _span = spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=20,center_include=False,word_flag=True,text=entity_text) #size=12 center_include=True
  65. # print(_span)
  66. _encode_span = encodeInput(_span, word_len=20, word_flag=True,userFool=False) # word_len=20
  67. # print(_encode_span)
  68. return _encode_span
  69. def fix_digit_eng(self, text):
  70. '''
  71. 处理数字及英文编号等
  72. :param text:
  73. :return:
  74. '''
  75. text = re.sub('第[一二三1-3]([条项章轮次场]|中学|医院|附属)|第三方(服务机构)?|(中标|成交|中选)(候选|结果)?(单位|人)?公[示告]', 'xxx', text) # 修复 642200681 第二轮推荐中选供应商:第一中选候选人: 预测错为第二名
  76. text = re.sub('第01(中标|成交)?候选人', '第一中标候选人', text)
  77. text = re.sub('(标[段的包项]?|品目)[一二三1-3]', '标段', text)
  78. text = re.sub('第?[一二三1-3](标段?|[分子标]?包)', 'd标段', text)
  79. text = re.sub('[a-zA-Z][a-zA-Z0-9=&_—-]{3,}', 'abc', text)
  80. text = re.sub('[【(\[][0-9]{2,}[\])】]|\d+([::.-]\d+)+', 'd', text)
  81. text = re.sub('[一二三四五六七八九十]{2,}|[四五六七八九十]+', 'd', text)
  82. text = re.sub('\d{2,}(\.\d+)?|\d\.\d+|[04-9]', 'd', text)
  83. text = re.sub('序号:\d+', '序号:d', text)
  84. text = re.sub('第?[一二三四五六七八九十\d]+次|[一二三四五六七八九十\d]+、|([^\w]|^)序:?\d+', ' d', text) # ,序:1,单位名称:
  85. text = re.sub('(中标|成交|中选|入围)(工程|项目)', '工程', text) # 修复易错分为中标人
  86. text = re.sub('约定|(盖章|签名):?', ' ', text) # 修复 233233636 错分为中标人 国有产权网上竞价有关约定 辽阳市公共资源交易中心 ,标 修复 273505905 乡镇签名:盖章: 次村产权交易服务中心 预测为中标
  87. text = re.sub('中介机构', '投标机构', text) # 251058999 错分为中标人 序号:2,中介机构名称:
  88. text = re.sub('(采购|招标)人名称、地址和联系方式:|采购方,指', '采购人:', text) # 275065998 修复 224703143 采购的中标人;采购方,指 预测为中标
  89. if re.search('(最终)?排名:', text) and re.search('(最终)?排名:第?[123一二三]', text)==None:
  90. text = re.sub('(最终)?排名:', ' ', text)
  91. text = re.sub('交易单位', '发布单位', text)
  92. text = re.sub('[,:]各种数据:', ':', text) # 20240620优化 478331984 山东省交通运输厅站源提取不到 各种数据:中标单位,各种数据:济南金曰公路工程有限公司,
  93. text = re.sub('电子签章', '', text) # 20240924 修复 529923459 电子签名:投标人名称(电子签章:西君兰信息科技有限公司,2024年9月7日 预测为中标
  94. text = re.sub('采购方式', 'xxxx', text) # 修复 499096797 招标人预测错误
  95. text = re.sub('中标人\d名称', '中标人名称', text) # 修复 499096797 中标人预测错误
  96. text = re.sub('\|候选人', '、候选人', text) # 修复 626660259 排名:1|候选人:库尔勒海南广电工程有限责任公司
  97. text = re.sub('(中标|成交|中选)(出版社|回收商)', '成交供应商', text) # 修复 642076501 成交出版社:科学出版社成都有限责任公司,
  98. text = re.sub('((品牌|签名|盖章))?:', ' ', text) # 修复 642283019 成交人(品牌: 预测为5
  99. return text.replace('(', '(').replace(')', ')').replace('單', '单').replace('稱','承').replace('標', '标').replace('採購', '采购').replace('機構', '机构')
  100. def encode_word(self, sentence_text, begin_index, end_index, size=20, **kwargs):
  101. '''
  102. 上下文数字化,使用字偏移
  103. :param sentence_text: 句子文本
  104. :param begin_index: 实体字开始位置
  105. :param end_index: 实体字结束位置
  106. :param size: 字偏移量
  107. :param kwargs:
  108. :return:
  109. '''
  110. _span = get_context(sentence_text, begin_index, end_index,size=size, center_include=False) # size=12 center_include=True
  111. # print(_span)
  112. _span = [self.fix_digit_eng(text) for text in _span]
  113. _encode_span = encodeInput(_span, word_len=30, word_flag=True, userFool=False) # word_len=20
  114. # print(_encode_span)
  115. return _encode_span
  116. def predict(self,x):
  117. x = np.transpose(np.array(x),(1,0,2))
  118. model_role = self.getModel()
  119. assert len(x)==len(model_role[0])
  120. feed_dict = {}
  121. for _x,_t in zip(x,model_role[0]):
  122. feed_dict[_t] = _x
  123. list_result = limitRun(self.sess_role,[model_role[1]],feed_dict)[0]
  124. return list_result
  125. #return self.sess_role.run(model_role[1],feed_dict=feed_dict)
  126. class Model_money_classify():
  127. def __init__(self,lazyLoad=getLazyLoad(),config=None):
  128. if USE_PAI_EAS:
  129. lazyLoad = True
  130. self.model_money_file = os.path.dirname(__file__)+"/../money/models/model_money_word.h5"
  131. self.model_money = None
  132. self.sess_money = tf.Session(graph=tf.Graph(),config=config)
  133. if not lazyLoad:
  134. self.getModel()
  135. def getModel(self):
  136. if self.model_money is None:
  137. with self.sess_money.as_default() as sess:
  138. with sess.graph.as_default():
  139. meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/money_savedmodel")
  140. # meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/money_savedmodel_bilstmonly")
  141. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  142. signature_def = meta_graph_def.signature_def
  143. input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
  144. input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
  145. input2 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name)
  146. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  147. self.model_money = [[input0,input1,input2],output]
  148. return self.model_money
  149. '''
  150. if self.model_money is None:
  151. self.model_money = models.load_model(self.model_money_file,custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
  152. return self.model_money
  153. '''
  154. '''
  155. def load_weights(self):
  156. model = self.getModel()
  157. model.load_weights(self.model_money_file)
  158. '''
  159. def encode(self,tokens,begin_index,end_index,**kwargs):
  160. _span = spanWindow(tokens=tokens, begin_index=begin_index, end_index=end_index, size=10, center_include=True, word_flag=True)
  161. # print(_span)
  162. return encodeInput(_span, word_len=30, word_flag=True,userFool=False)
  163. return embedding_word(_span,shape=(3,100,60))
  164. def predict(self,x):
  165. # print("shape",np.shape(x))
  166. x = np.transpose(np.array(x),(1,0,2))
  167. model_money = self.getModel()
  168. assert len(x)==len(model_money[0])
  169. feed_dict = {}
  170. for _x,_t in zip(x,model_money[0]):
  171. feed_dict[_t] = _x
  172. list_result = limitRun(self.sess_money,[model_money[1]],feed_dict)[0]
  173. #return self.sess_money.run(model_money[1],feed_dict=feed_dict)
  174. return list_result
  175. '''
  176. with self.graph.as_default():
  177. return self.getModel().predict([x[0],x[1],x[2]])
  178. '''
  179. from itertools import groupby
  180. from BiddingKG.dl.relation_extraction.model import get_words_matrix
  181. class Model_relation_extraction():
  182. def __init__(self,lazyLoad=getLazyLoad()):
  183. if USE_PAI_EAS:
  184. lazyLoad = True
  185. self.subject_model_file = os.path.dirname(__file__)+"/../relation_extraction/models2/subject_model"
  186. self.object_model_file = os.path.dirname(__file__)+"/../relation_extraction/models2/object_model"
  187. self.model_subject = None
  188. self.model_object = None
  189. self.sess_subject = tf.Session(graph=tf.Graph())
  190. self.sess_object = tf.Session(graph=tf.Graph())
  191. if not lazyLoad:
  192. self.getModel1()
  193. self.getModel2()
  194. self.entity_type_dict = {
  195. 'org': '<company/org>',
  196. 'company': '<company/org>',
  197. 'location': '<location>',
  198. 'phone': '<phone>',
  199. 'person': '<contact_person>'
  200. }
  201. self.id2predicate = {
  202. 0: "rel_person", # 公司——联系人
  203. 1: "rel_phone", # 联系人——电话
  204. 2: "rel_address" # 公司——地址
  205. }
  206. self.words_size = 128
  207. # subject_model
  208. def getModel1(self):
  209. if self.model_subject is None:
  210. with self.sess_subject.as_default() as sess:
  211. with sess.graph.as_default():
  212. meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=self.subject_model_file)
  213. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  214. signature_def = meta_graph_def.signature_def
  215. input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
  216. input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
  217. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  218. self.model_subject = [[input0,input1],output]
  219. return self.model_subject
  220. # object_model
  221. def getModel2(self):
  222. if self.model_object is None:
  223. with self.sess_object.as_default() as sess:
  224. with sess.graph.as_default():
  225. meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=self.object_model_file)
  226. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  227. signature_def = meta_graph_def.signature_def
  228. input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
  229. input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
  230. input2 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input2"].name)
  231. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  232. self.model_object = [[input0,input1,input2],output]
  233. return self.model_object
  234. def encode(self,entity_list,list_sentence):
  235. list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)
  236. entity_list = sorted(entity_list, key=lambda x: (x.sentence_index, x.begin_index))
  237. pre_data = []
  238. text_data = []
  239. last_sentence_index = -1
  240. for key, group in groupby(entity_list, key=lambda x: x.sentence_index):
  241. if key - last_sentence_index > 1:
  242. for i in range(last_sentence_index + 1, key):
  243. pre_data.extend(list_sentence[i].tokens)
  244. text_data.extend([0] * len(list_sentence[i].tokens))
  245. group = list(group)
  246. for i in range(len(group)):
  247. ent = group[i]
  248. _tokens = list_sentence[key].tokens
  249. if i == len(group) - 1:
  250. if i == 0:
  251. pre_data.extend(_tokens[:ent.begin_index])
  252. text_data.extend([0] * len(_tokens[:ent.begin_index]))
  253. pre_data.append(self.entity_type_dict[ent.entity_type])
  254. text_data.append(ent)
  255. pre_data.extend(_tokens[ent.end_index + 1:])
  256. text_data.extend([0] * len(_tokens[ent.end_index + 1:]))
  257. break
  258. else:
  259. pre_data.append(self.entity_type_dict[ent.entity_type])
  260. text_data.append(ent)
  261. pre_data.extend(_tokens[ent.end_index + 1:])
  262. text_data.extend([0] * len(_tokens[ent.end_index + 1:]))
  263. break
  264. if i == 0:
  265. pre_data.extend(_tokens[:ent.begin_index])
  266. text_data.extend([0] * len(_tokens[:ent.begin_index]))
  267. pre_data.append(self.entity_type_dict[ent.entity_type])
  268. text_data.append(ent)
  269. pre_data.extend(_tokens[ent.end_index + 1:group[i + 1].begin_index])
  270. text_data.extend([0] * len(_tokens[ent.end_index + 1:group[i + 1].begin_index]))
  271. else:
  272. pre_data.append(self.entity_type_dict[ent.entity_type])
  273. text_data.append(ent)
  274. pre_data.extend(_tokens[ent.end_index + 1:group[i + 1].begin_index])
  275. text_data.extend([0] * len(_tokens[ent.end_index + 1:group[i + 1].begin_index]))
  276. last_sentence_index = key
  277. return text_data, pre_data
  278. def check_data(self, words):
  279. # 检查数据是否包含可预测的subject和object
  280. # 没有需要预测的链接属性,直接return
  281. company_relation = 0
  282. person_relation = 0
  283. if '<company/org>' in words:
  284. company_relation += 1
  285. if '<contact_person>' in words:
  286. person_relation += 1
  287. if company_relation:
  288. company_relation += 1
  289. # 暂时不考虑地址location实体
  290. # if '<location>' in words and company_relation:
  291. # company_relation += 1
  292. if '<phone>' in words and company_relation:
  293. person_relation += 1
  294. if company_relation < 2 and person_relation < 2:
  295. return False
  296. return True
  297. def predict_by_api(self,text_in,words,sentence_vetor):
  298. status_code = 0
  299. # save([words,sentence_vetor.tolist()],"C:/Users/Administrator/Desktop/test_data.pk")
  300. try:
  301. requests_result = requests.post(API_URL + "/predict_relation", json={"sentence_vetor": sentence_vetor.tolist(), "words": words},
  302. verify=True)
  303. status_code = requests_result.status_code
  304. triple_index_list = json.loads(requests_result.text)['triple_list']
  305. # print("triple_list:",json.loads(requests_result.text)['triple_list'])
  306. print("cost_time:",json.loads(requests_result.text)['cost_time'])
  307. triple_list = [(text_in[triple[0]], triple[1], text_in[triple[2]]) for triple in triple_index_list]
  308. return triple_list,status_code
  309. except Exception as e:
  310. print(e)
  311. return [],status_code
  312. def predict(self,text_in, words, rate=0.5):
  313. _t2 = np.zeros((len(words), self.words_size))
  314. for i in range(len(words)):
  315. _t2[i] = np.array(get_words_matrix(words[i]))
  316. # a = time.time()
  317. # triple_list, status_code = self.predict_by_api(text_in, words,_t2)
  318. # print('time',time.time()-a)
  319. # print("status_code",status_code)
  320. # if status_code==200:
  321. # return triple_list
  322. # else:
  323. # 使用模型预测
  324. triple_list = []
  325. # print("tokens:",words)
  326. # _t2 = [self.words2id.get(c, 1) for c in words]
  327. _t2 = np.array([_t2])
  328. _t3 = [1 for _ in words]
  329. _t3 = np.array([_t3])
  330. # _k1 = self.model_subject.predict([_t2, _t3])
  331. _k1 = limitRun(self.sess_subject,[self.model_subject[1]],feed_dict={self.model_subject[0][0]:_t2,
  332. self.model_subject[0][1]:_t3})[0]
  333. _k1 = _k1[0, :, 0]
  334. _k1 = np.where(_k1 > rate)[0]
  335. # print('k1',_k1)
  336. _subjects = []
  337. for i in _k1:
  338. _subject = text_in[i]
  339. _subjects.append((_subject, i, i))
  340. if _subjects:
  341. _t2 = np.repeat(_t2, len(_subjects), 0)
  342. _t3 = np.repeat(_t3, len(_subjects), 0)
  343. _k1, _ = np.array([_s[1:] for _s in _subjects]).T.reshape((2, -1, 1))
  344. # _o1 = self.model_object.predict([_t2, _t3, _k1])
  345. _o1 = limitRun(self.sess_object, [self.model_object[1]], feed_dict={self.model_object[0][0]: _t2,
  346. self.model_object[0][1]: _t3,
  347. self.model_object[0][2]: _k1})[0]
  348. for i, _subject in enumerate(_subjects):
  349. _oo1 = np.where(_o1[i] > 0.5)
  350. # print('_oo1', _oo1)
  351. for _ooo1, _c1 in zip(*_oo1):
  352. _object = text_in[_ooo1]
  353. _predicate = self.id2predicate[_c1]
  354. triple_list.append((_subject[0], _predicate, _object))
  355. # print([(t[0].entity_text,t[1],t[2].entity_text) for t in triple_list])
  356. return triple_list
  357. else:
  358. return []
  359. class Model_person_classify():
  360. def __init__(self,lazyLoad=getLazyLoad(),config=None):
  361. if USE_PAI_EAS:
  362. lazyLoad = True
  363. self.model_person_file = os.path.dirname(__file__)+"/../person/models/model_person.model.hdf5"
  364. self.model_person = None
  365. self.sess_person = tf.Session(graph=tf.Graph(),config=config)
  366. if not lazyLoad:
  367. self.getModel()
  368. def getModel(self):
  369. if self.model_person is None:
  370. with self.sess_person.as_default() as sess:
  371. with sess.graph.as_default():
  372. # meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/person_savedmodel_new")
  373. meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir=os.path.dirname(__file__)+"/person_savedmodel_new_znj")
  374. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  375. signature_def = meta_graph_def.signature_def
  376. input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
  377. input1 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input1"].name)
  378. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  379. self.model_person = [[input0,input1],output]
  380. return self.model_person
  381. '''
  382. if self.model_person is None:
  383. self.model_person = models.load_model(self.model_person_file,custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
  384. return self.model_person
  385. '''
  386. '''
  387. def load_weights(self):
  388. model = self.getModel()
  389. model.load_weights(self.model_person_file)
  390. '''
  391. def encode(self,tokens,begin_index,end_index,**kwargs):
  392. # return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=10),shape=(2,10,128))
  393. return embedding(spanWindow(tokens=tokens,begin_index=begin_index,end_index=end_index,size=20),shape=(2,20,128))
  394. def predict(self,x):
  395. x = np.transpose(np.array(x),(1,0,2,3))
  396. model_person = self.getModel()
  397. assert len(x)==len(model_person[0])
  398. feed_dict = {}
  399. for _x,_t in zip(x,model_person[0]):
  400. feed_dict[_t] = _x
  401. list_result = limitRun(self.sess_person,[model_person[1]],feed_dict)[0]
  402. return list_result
  403. #return self.sess_person.run(model_person[1],feed_dict=feed_dict)
  404. '''
  405. with self.graph.as_default():
  406. return self.getModel().predict([x[0],x[1]])
  407. '''
  408. class Model_form_line():
  409. def __init__(self,lazyLoad=getLazyLoad()):
  410. self.model_file = os.path.dirname(__file__)+"/../form/model/model_form.model - 副本.hdf5"
  411. self.model_form = None
  412. self.graph = tf.get_default_graph()
  413. if not lazyLoad:
  414. self.getModel()
  415. def getModel(self):
  416. if self.model_form is None:
  417. self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
  418. return self.model_form
  419. def encode(self,data,shape=(100,60),expand=False,**kwargs):
  420. embedding = np.zeros(shape)
  421. word_model = getModel_word()
  422. for i in range(len(data)):
  423. if i>=shape[0]:
  424. break
  425. if data[i] in word_model.vocab:
  426. embedding[i] = word_model[data[i]]
  427. if expand:
  428. embedding = np.expand_dims(embedding,0)
  429. return embedding
  430. def predict(self,x):
  431. with self.graph.as_default():
  432. return self.getModel().predict(x)
  433. class Model_form_item():
  434. def __init__(self,lazyLoad=getLazyLoad(),config=None):
  435. self.model_file = os.path.dirname(__file__)+"/../form/log/ep039-loss0.038-val_loss0.064-f10.9783.h5"
  436. self.model_form = None
  437. self.sess_form = tf.Session(graph=tf.Graph(),config=config)
  438. if not lazyLoad:
  439. self.getModel()
  440. def getModel(self):
  441. if self.model_form is None:
  442. with self.sess_form.as_default() as sess:
  443. with sess.graph.as_default():
  444. meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir="%s/form_savedmodel"%(os.path.dirname(__file__)))
  445. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  446. signature_def = meta_graph_def.signature_def
  447. inputs = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["inputs"].name)
  448. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  449. self.model_form = [[inputs],output]
  450. return self.model_form
  451. '''
  452. if self.model_form is None:
  453. with self.graph.as_defalt():
  454. self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
  455. return self.model_form
  456. '''
  457. def encode(self,data,**kwargs):
  458. return encodeInput([data], word_len=50, word_flag=True,userFool=False)[0]
  459. return encodeInput_form(data)
  460. def predict(self,x):
  461. if USE_API:
  462. requests_result = requests.post(API_URL+"/predict_form_item",json={"inputs":x.tolist()}, verify=True)
  463. list_result = json.loads(requests_result.text)['result']
  464. else:
  465. model_form = self.getModel()
  466. list_result = limitRun(self.sess_form,[model_form[1]],feed_dict={model_form[0][0]:x})[0]
  467. return list_result
  468. # return self.sess_form.run(model_form[1],feed_dict={model_form[0][0]:x})
  469. '''
  470. with self.graph.as_default():
  471. return self.getModel().predict(x)
  472. '''
  473. class Model_form_context():
  474. def __init__(self,lazyLoad=getLazyLoad(),config=None):
  475. self.model_form = None
  476. self.sess_form = tf.Session(graph=tf.Graph(),config=config)
  477. if not lazyLoad:
  478. self.getModel()
  479. def getModel(self):
  480. if self.model_form is None:
  481. with self.sess_form.as_default() as sess:
  482. with sess.graph.as_default():
  483. meta_graph_def = tf.saved_model.loader.load(sess,tags=["serve"],export_dir="%s/form_context_savedmodel"%(os.path.dirname(__file__)))
  484. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  485. signature_def = meta_graph_def.signature_def
  486. inputs = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["inputs"].name)
  487. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  488. self.model_form = [[inputs],output]
  489. return self.model_form
  490. '''
  491. if self.model_form is None:
  492. with self.graph.as_defalt():
  493. self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
  494. return self.model_form
  495. '''
  496. def encode_table(self,inner_table,size=30):
  497. def encode_item(_table,i,j):
  498. _x = [_table[j-1][i-1],_table[j-1][i],_table[j-1][i+1],
  499. _table[j][i-1],_table[j][i],_table[j][i+1],
  500. _table[j+1][i-1],_table[j+1][i],_table[j+1][i+1]]
  501. e_x = [encodeInput_form(_temp[0],MAX_LEN=30) for _temp in _x]
  502. _label = _table[j][i][1]
  503. # print(_x)
  504. # print(_x[4],_label)
  505. return e_x,_label,_x
  506. def copytable(inner_table):
  507. table = []
  508. for line in inner_table:
  509. list_line = []
  510. for item in line:
  511. list_line.append([item[0][:size],item[1]])
  512. table.append(list_line)
  513. return table
  514. table = copytable(inner_table)
  515. padding = ["#"*30,0]
  516. width = len(table[0])
  517. height = len(table)
  518. table.insert(0,[padding for i in range(width)])
  519. table.append([padding for i in range(width)])
  520. for item in table:
  521. item.insert(0,padding.copy())
  522. item.append(padding.copy())
  523. data_x = []
  524. data_y = []
  525. data_text = []
  526. data_position = []
  527. for _i in range(1,width+1):
  528. for _j in range(1,height+1):
  529. _x,_y,_text = encode_item(table,_i,_j)
  530. data_x.append(_x)
  531. _label = [0,0]
  532. _label[_y] = 1
  533. data_y.append(_label)
  534. data_text.append(_text)
  535. data_position.append([_i-1,_j-1])
  536. # input = table[_j][_i][0]
  537. # item_y = [0,0]
  538. # item_y[table[_j][_i][1]] = 1
  539. # data_x.append(encodeInput([input], word_len=50, word_flag=True,userFool=False)[0])
  540. # data_y.append(item_y)
  541. return data_x,data_y,data_text,data_position
  542. def encode(self,inner_table,**kwargs):
  543. data_x,_,_,data_position = self.encode_table(inner_table)
  544. return data_x,data_position
  545. def predict(self,x):
  546. model_form = self.getModel()
  547. list_result = limitRun(self.sess_form,[model_form[1]],feed_dict={model_form[0][0]:x})[0]
  548. return list_result
  549. # class Model_form_item():
  550. # def __init__(self,lazyLoad=False):
  551. # self.model_file = os.path.dirname(__file__)+"/ep039-loss0.038-val_loss0.064-f10.9783.h5"
  552. # self.model_form = None
  553. #
  554. # if not lazyLoad:
  555. # self.getModel()
  556. # self.graph = tf.get_default_graph()
  557. #
  558. # def getModel(self):
  559. # if self.model_form is None:
  560. # self.model_form = models.load_model(self.model_file,custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
  561. # return self.model_form
  562. #
  563. # def encode(self,data,**kwargs):
  564. #
  565. # return encodeInput_form(data)
  566. #
  567. # def predict(self,x):
  568. # with self.graph.as_default():
  569. # return self.getModel().predict(x)