predict.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import os,sys
  2. import numpy as np
  3. # from BiddingKG.dl.common.models import *
  4. import re
  5. import tensorflow as tf
  6. import jieba
  7. # from keras.layers import *
  8. # from keras.models import Model
  9. # from keras.utils import Sequence,to_categorical
  10. # import keras.backend as K
  11. # from keras.callbacks import Callback
  12. import pandas as pd
  13. maxlen = 512
  14. words_size = 128
  15. # w2v_filepath = os.path.dirname(__file__)+"/../wiki_128_word_embedding_new.vector"
  16. w2v_filepath = "wiki_128_word_embedding_new.vector"
  17. import gensim
  18. model_w2v = gensim.models.KeyedVectors.load_word2vec_format(w2v_filepath,binary=True)
  19. def get_words_matrix(words):
  20. if words in model_w2v.vocab:
  21. return model_w2v[words]
  22. else:
  23. return model_w2v['unk']
  24. class Model_relation_extraction():
  25. def __init__(self):
  26. self.model_file = os.path.dirname(__file__)+"/models/model_attachment_classify"
  27. # print(self.model_file)
  28. self.sess = tf.Session(graph=tf.Graph())
  29. self.classes_dict = {
  30. 0: '其他',
  31. 1: '招标文件',
  32. 2: '限价(控制价)',
  33. 3: '工程量清单',
  34. 4: '采购清单',
  35. 5: '评标办法'
  36. }
  37. self.getModel()
  38. def getModel(self):
  39. with self.sess.as_default() as sess:
  40. with sess.graph.as_default():
  41. meta_graph_def = tf.saved_model.loader.load(sess, tags=["serve"], export_dir=self.model_file)
  42. signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
  43. signature_def = meta_graph_def.signature_def
  44. input0 = sess.graph.get_tensor_by_name(signature_def[signature_key].inputs["input0"].name)
  45. print(input0.shape)
  46. output = sess.graph.get_tensor_by_name(signature_def[signature_key].outputs["outputs"].name)
  47. self.model = [input0, output]
  48. return self.model
  49. def text_process(self,attachmentcon):
  50. text = attachmentcon
  51. text = re.sub("\n+", ',', text)
  52. text = re.sub("\s+|?+", '', text)
  53. text = re.sub("[\.·_]{2,}", ',', text)
  54. text = re.sub("_", '', text)
  55. text = text[:2500]
  56. tokens = list(jieba.cut(text))
  57. return tokens
  58. def evaluate(self,attachmentcon):
  59. text = str(attachmentcon)
  60. tokens = self.text_process(text)
  61. maxlen = 512
  62. tokens = tokens[:maxlen]
  63. words_matrix = np.zeros((maxlen, words_size))
  64. for i in range(len(tokens)):
  65. words_matrix[i] = np.array(get_words_matrix(tokens[i]))
  66. words_matrix = np.array([words_matrix])
  67. pred = limitRun(self.sess, [self.model[1]], feed_dict={self.model[0]: words_matrix})[0]
  68. pred_label = np.argmax(pred[0])
  69. cn_label = self.classes_dict[pred_label]
  70. return pred_label,cn_label
  71. def limitRun(sess,list_output,feed_dict,MAX_BATCH=1024):
  72. len_sample = 0
  73. if len(feed_dict.keys())>0:
  74. len_sample = len(feed_dict[list(feed_dict.keys())[0]])
  75. if len_sample>MAX_BATCH:
  76. list_result = [[] for _ in range(len(list_output))]
  77. _begin = 0
  78. while(_begin<len_sample):
  79. new_dict = dict()
  80. for _key in feed_dict.keys():
  81. if isinstance(feed_dict[_key],(float,int,np.int32,np.float_,np.float16,np.float32,np.float64)):
  82. new_dict[_key] = feed_dict[_key]
  83. else:
  84. new_dict[_key] = feed_dict[_key][_begin:_begin+MAX_BATCH]
  85. _output = sess.run(list_output,feed_dict=new_dict)
  86. for _index in range(len(list_output)):
  87. list_result[_index].extend(_output[_index])
  88. _begin += MAX_BATCH
  89. else:
  90. list_result = sess.run(list_output,feed_dict=feed_dict)
  91. return list_result
  92. if __name__ == '__main__':
  93. text = '''招标文件项目编号:SDGP370302202102000110项目名称:淄川经济开发区中心小学校园智能化采购项目采购人:山东淄川经
  94. 济开发区管理委员会采购代理机构:淄博正益招标有限公司发出日期:2021年8月目录第一章投标邀请7一、项目基本情况7二、申请人的资格要
  95. 求8三、获取招标文件8四、提交投标文件截止时间、开标时间和地点8五、公告期限9六、其他补充事宜9第二章投标人须知11一、总则161.采
  96. 购人、采购代理机构及投标人162.资金来源183.投标费用184.适用法律18二、招标文件185.招标文件构成186.招标文件的澄清与修改207.投
  97. 标截止时间的顺延20三、投标文件的编制208.编制要求209.投标范围及投标文件中标准和计量单位的使用2110.投标文件构成2211.投标报价241
  98. 2.电子版投标文件2513.投标保证金2614.投标有效期2615.投标文件的签署及规定26四、投标文件的递交2616.投标文件的递交2617.递交
  99. 投标文件的截止时间2718.投标文件的接收、修改与撤回27五、开标及评标2719.开标2720.资格审查2821.组建评标委员会2922.投标文件符
  100. 合性审查与澄清3023.投标偏离3224.投标无效3225.比较和评价3326.废标3527.保密要求36六、确定中标3628.中标候选人的确定原则及标
  101. 准3629.确定中标候选人和中标人3630.采购任务取消3631.中标通知书3632.签订合同3633.履约保证金3734.政府采购融资担保3735.预付
  102. 款3736.廉洁自律规定3737.人员回避3738.质疑与接收3739.项目其他相关费用3940.合同公示3941.验收4042.履约验收公示4043.招标文
  103. 件解释权40第三章货物需求41一、项目概述41
  104. '''
  105. test_text = re.sub('\n','',text)
  106. model = Model_relation_extraction()
  107. print(model.evaluate(test_text))
  108. pass