2 жил өмнө · bd48e7f132
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -189,9 +189,10 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',original_docchann
 
				     log("get attributes done of doc_id%s"%(doc_id))
			
 
				     cost_time["attrs"] = round(time.time()-start_time,2)
			
 
				 
			
 
				-    start_time = time.time() #失信数据要素提取
			
 
				-    list_punish_dic = predictor.getPredictor("punish").get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				-    cost_time["punish"] = round(time.time()-start_time,2)
			
 
				+    #暂时不执行
			
 
				+    # start_time = time.time() #失信数据要素提取
			
 
				+    # list_punish_dic = predictor.getPredictor("punish").get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				+    # cost_time["punish"] = round(time.time()-start_time,2)
			
 
				 
			
 
				 
			
 
				     '''修正采购公告表格形式多种采购产品中标价格；中标金额小于所有产品总金额则改为总金额'''
			
--- a/BiddingKG/dl/interface/modelFactory.py
+++ b/BiddingKG/dl/interface/modelFactory.py
@@ -41,7 +41,7 @@ class Model_role_classify():
 
				             return self.getModel().predict([x[0],x[1]])
			
 
				     
			
 
				 class Model_role_classify_word():
			
 
				-    def __init__(self,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,lazyLoad=getLazyLoad(),config=None):
			
 
				         if USE_PAI_EAS:
			
 
				             lazyLoad = True
			
 
				         #self.model_role_file = os.path.abspath("../role/log/ep071-loss0.107-val_loss0.122-f10.956.h5")
			
@@ -49,7 +49,7 @@ class Model_role_classify_word():
 
				         #self.model_role_file = os.path.abspath("../role/log/textcnn_ep017-loss0.088-val_loss0.125-f10.955.h5")
			
 
				         self.model_role = None
			
 
				         
			
 
				-        self.sess_role = tf.Session(graph=tf.Graph())
			
 
				+        self.sess_role = tf.Session(graph=tf.Graph(),config=config)
			
 
				         if not lazyLoad:
			
 
				             self.getModel()
			
 
				         
			
@@ -94,12 +94,12 @@ class Model_role_classify_word():
 
				         
			
 
				     
			
 
				 class Model_money_classify():
			
 
				-    def __init__(self,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,lazyLoad=getLazyLoad(),config=None):
			
 
				         if USE_PAI_EAS:
			
 
				             lazyLoad = True
			
 
				         self.model_money_file = os.path.dirname(__file__)+"/../money/models/model_money_word.h5"
			
 
				         self.model_money = None
			
 
				-        self.sess_money = tf.Session(graph=tf.Graph())
			
 
				+        self.sess_money = tf.Session(graph=tf.Graph(),config=config)
			
 
				         if not lazyLoad:
			
 
				             self.getModel()
			
 
				         
			
@@ -345,12 +345,12 @@ class Model_relation_extraction():
 
				 
			
 
				     
			
 
				 class Model_person_classify():
			
 
				-    def __init__(self,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,lazyLoad=getLazyLoad(),config=None):
			
 
				         if USE_PAI_EAS:
			
 
				             lazyLoad = True
			
 
				         self.model_person_file = os.path.dirname(__file__)+"/../person/models/model_person.model.hdf5"
			
 
				         self.model_person = None
			
 
				-        self.sess_person = tf.Session(graph=tf.Graph())
			
 
				+        self.sess_person = tf.Session(graph=tf.Graph(),config=config)
			
 
				         if not lazyLoad:
			
 
				             self.getModel()
			
 
				         
			
@@ -436,10 +436,10 @@ class Model_form_line():
 
				             return self.getModel().predict(x)
			
 
				     
			
 
				 class Model_form_item():
			
 
				-    def __init__(self,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,lazyLoad=getLazyLoad(),config=None):
			
 
				         self.model_file = os.path.dirname(__file__)+"/../form/log/ep039-loss0.038-val_loss0.064-f10.9783.h5"
			
 
				         self.model_form = None
			
 
				-        self.sess_form = tf.Session(graph=tf.Graph())
			
 
				+        self.sess_form = tf.Session(graph=tf.Graph(),config=config)
			
 
				         if not lazyLoad:
			
 
				             self.getModel()
			
 
				 
			
@@ -485,9 +485,9 @@ class Model_form_item():
 
				         '''
			
 
				 
			
 
				 class Model_form_context():
			
 
				-    def __init__(self,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,lazyLoad=getLazyLoad(),config=None):
			
 
				         self.model_form = None
			
 
				-        self.sess_form = tf.Session(graph=tf.Graph())
			
 
				+        self.sess_form = tf.Session(graph=tf.Graph(),config=config)
			
 
				         if not lazyLoad:
			
 
				             self.getModel()
			
 
				 
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -27,6 +27,13 @@ import calendar
 
				 import datetime
			
 
				 # import fool   # 统一用 selffool ，阿里云上只有selffool 包
			
 
				 
			
 
				+cpu_num = int(os.environ.get("CPU_NUM",0))
			
 
				+sess_config = tf.ConfigProto(
			
 
				+                        inter_op_parallelism_threads = cpu_num,
			
 
				+                        intra_op_parallelism_threads = cpu_num,
			
 
				+                        log_device_placement=True)
			
 
				+sess_config = None
			
 
				+
			
 
				 from threading import RLock
			
 
				 dict_predictor = {"codeName":{"predictor":None,"Lock":RLock()},
			
 
				               "prem":{"predictor":None,"Lock":RLock()},
			
@@ -51,11 +58,11 @@ def getPredictor(_type):
 
				         with dict_predictor[_type]["Lock"]:
			
 
				             if dict_predictor[_type]["predictor"] is None:
			
 
				                 if _type == "codeName":
			
 
				-                    dict_predictor[_type]["predictor"] = CodeNamePredict()
			
 
				+                    dict_predictor[_type]["predictor"] = CodeNamePredict(config=sess_config)
			
 
				                 if _type == "prem":
			
 
				-                    dict_predictor[_type]["predictor"] = PREMPredict()
			
 
				+                    dict_predictor[_type]["predictor"] = PREMPredict(config=sess_config)
			
 
				                 if _type == "epc":
			
 
				-                    dict_predictor[_type]["predictor"] = EPCPredict()
			
 
				+                    dict_predictor[_type]["predictor"] = EPCPredict(config=sess_config)
			
 
				                 if _type == "roleRule":
			
 
				                     dict_predictor[_type]["predictor"] = RoleRulePredictor()
			
 
				                 if _type == "roleRuleFinal":
			
@@ -63,17 +70,17 @@ def getPredictor(_type):
 
				                 if _type == "tendereeRuleRecall":
			
 
				                     dict_predictor[_type]["predictor"] = TendereeRuleRecall()
			
 
				                 if _type == "form":
			
 
				-                    dict_predictor[_type]["predictor"] = FormPredictor()
			
 
				+                    dict_predictor[_type]["predictor"] = FormPredictor(config=sess_config)
			
 
				                 if _type == "time":
			
 
				-                    dict_predictor[_type]["predictor"] = TimePredictor()
			
 
				+                    dict_predictor[_type]["predictor"] = TimePredictor(config=sess_config)
			
 
				                 if _type == "punish":
			
 
				                     dict_predictor[_type]["predictor"] = Punish_Extract()
			
 
				                 if _type == "product":
			
 
				-                    dict_predictor[_type]["predictor"] = ProductPredictor()
			
 
				+                    dict_predictor[_type]["predictor"] = ProductPredictor(config=sess_config)
			
 
				                 if _type == "product_attrs":
			
 
				                     dict_predictor[_type]["predictor"] = ProductAttributesPredictor()
			
 
				                 if _type == "channel":
			
 
				-                    dict_predictor[_type]["predictor"] = DocChannel()
			
 
				+                    dict_predictor[_type]["predictor"] = DocChannel(config=sess_config)
			
 
				                 if _type == 'deposit_payment_way':
			
 
				                     dict_predictor[_type]["predictor"] = DepositPaymentWay()
			
 
				                 if _type == 'total_unit_money':
			
@@ -87,7 +94,7 @@ def getPredictor(_type):
 
				 # 编号名称模型
			
 
				 class CodeNamePredict():
			
 
				     
			
 
				-    def __init__(self,EMBED_DIM=None,BiRNN_UNITS=None,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,EMBED_DIM=None,BiRNN_UNITS=None,lazyLoad=getLazyLoad(),config=None):
			
 
				         
			
 
				         self.model = None
			
 
				         self.MAX_LEN = None
			
@@ -123,8 +130,8 @@ class CodeNamePredict():
 
				         
			
 
				         self.inputs = None
			
 
				         self.outputs = None
			
 
				-        self.sess_codename = tf.Session(graph=tf.Graph())
			
 
				-        self.sess_codesplit = tf.Session(graph=tf.Graph())
			
 
				+        self.sess_codename = tf.Session(graph=tf.Graph(),config=config)
			
 
				+        self.sess_codesplit = tf.Session(graph=tf.Graph(),config=config)
			
 
				         self.inputs_code = None
			
 
				         self.outputs_code = None
			
 
				         if not lazyLoad:
			
@@ -535,11 +542,11 @@ class CodeNamePredict():
 
				 class PREMPredict():
			
 
				 
			
 
				     
			
 
				-    def __init__(self):
			
 
				+    def __init__(self,config=None):
			
 
				         #self.model_role_file = os.path.abspath("../role/models/model_role.model.hdf5")
			
 
				         self.model_role_file = os.path.dirname(__file__)+"/../role/log/new_biLSTM-ep012-loss0.028-val_loss0.040-f10.954.h5"
			
 
				-        self.model_role = Model_role_classify_word()
			
 
				-        self.model_money = Model_money_classify()
			
 
				+        self.model_role = Model_role_classify_word(config=config)
			
 
				+        self.model_money = Model_money_classify(config=config)
			
 
				         
			
 
				         return
			
 
				     
			
@@ -734,8 +741,8 @@ class PREMPredict():
 
				 #联系人模型    
			
 
				 class EPCPredict():
			
 
				     
			
 
				-    def __init__(self):
			
 
				-        self.model_person = Model_person_classify()
			
 
				+    def __init__(self,config=None):
			
 
				+        self.model_person = Model_person_classify(config=config)
			
 
				 
			
 
				 
			
 
				     
			
@@ -1074,13 +1081,13 @@ class EPCPredict():
 
				 #表格预测
			
 
				 class FormPredictor():
			
 
				     
			
 
				-    def __init__(self,lazyLoad=getLazyLoad()):
			
 
				+    def __init__(self,lazyLoad=getLazyLoad(),config=None):
			
 
				         self.model_file_line = os.path.dirname(__file__)+"/../form/model/model_form.model_line.hdf5"
			
 
				         self.model_file_item = os.path.dirname(__file__)+"/../form/model/model_form.model_item.hdf5"
			
 
				-        self.model_form_item = Model_form_item()
			
 
				-        self.model_form_context = Model_form_context()
			
 
				+        self.model_form_item = Model_form_item(config=config)
			
 
				         self.model_dict = {"line":[None,self.model_file_line]}
			
 
				-        
			
 
				+        self.model_form_context = Model_form_context(config=config)
			
 
				+
			
 
				         
			
 
				     def getModel(self,type):
			
 
				         if type=="item":
			
@@ -1690,8 +1697,8 @@ class TendereeRuleRecall():
 
				 
			
 
				 # 时间类别
			
 
				 class TimePredictor():
			
 
				-    def __init__(self):
			
 
				-        self.sess = tf.Session(graph=tf.Graph())
			
 
				+    def __init__(self,config=None):
			
 
				+        self.sess = tf.Session(graph=tf.Graph(),config=config)
			
 
				         self.inputs_code = None
			
 
				         self.outputs_code = None
			
 
				         self.input_shape = (2,40,128)
			
@@ -1795,11 +1802,11 @@ class TimePredictor():
 
				 
			
 
				 # 产品字段提取
			
 
				 class ProductPredictor():
			
 
				-    def __init__(self):
			
 
				+    def __init__(self,config=None):
			
 
				         vocabpath = os.path.dirname(__file__) + "/codename_vocab.pk"
			
 
				         self.vocab = load(vocabpath)
			
 
				         self.word2index = dict((w, i) for i, w in enumerate(np.array(self.vocab)))
			
 
				-        self.sess = tf.Session(graph=tf.Graph())
			
 
				+        self.sess = tf.Session(graph=tf.Graph(),config=config)
			
 
				         self.load_model()
			
 
				 
			
 
				     def load_model(self):
			
@@ -2515,9 +2522,9 @@ class ProductAttributesPredictor():
 
				 
			
 
				 # docchannel类型提取
			
 
				 class DocChannel():
			
 
				-  def __init__(self, life_model='/channel_savedmodel/channel.pb', type_model='/channel_savedmodel/doctype.pb'):
			
 
				+  def __init__(self, life_model='/channel_savedmodel/channel.pb', type_model='/channel_savedmodel/doctype.pb',config=None):
			
 
				     self.lift_sess, self.lift_title, self.lift_content, self.lift_prob, self.lift_softmax,\
			
 
				-    self.mask, self.mask_title = self.load_life(life_model)
			
 
				+    self.mask, self.mask_title = self.load_life(life_model,config)
			
 
				     self.type_sess, self.type_title, self.type_content, self.type_prob, self.type_softmax,\
			
 
				     self.type_mask, self.type_mask_title = self.load_type(type_model)
			
 
				     self.sequen_len = 200  # 150 200
			
@@ -2578,7 +2585,7 @@ class DocChannel():
 
				           '招标公告': '(采购|招标|询价|议价|竞价|比价|比选|遴选|邀请|邀标|磋商|洽谈|约谈|谈判|拍卖|招租|交易|出让)的?(公告|公示|$)|公开(采购|招标|招租|拍卖|挂牌|出让)|(资审|预审|后审)公告',
			
 
				       }
			
 
				 
			
 
				-  def load_life(self,life_model):
			
 
				+  def load_life(self,life_model,config):
			
 
				     with tf.Graph().as_default() as graph:
			
 
				       output_graph_def = graph.as_graph_def()
			
 
				       with open(os.path.dirname(__file__)+life_model, 'rb') as f:
			
@@ -2586,7 +2593,7 @@ class DocChannel():
 
				         tf.import_graph_def(output_graph_def, name='')
			
 
				         # print("%d ops in the final graph" % len(output_graph_def.node))
			
 
				         del output_graph_def
			
 
				-        sess = tf.Session(graph=graph)
			
 
				+        sess = tf.Session(graph=graph,config=config)
			
 
				         sess.run(tf.global_variables_initializer())
			
 
				         inputs = sess.graph.get_tensor_by_name('inputs/inputs:0')
			
 
				         prob = sess.graph.get_tensor_by_name('inputs/dropout:0')
			
--- a/BiddingKG/dl_dev/test/test4.py
+++ b/BiddingKG/dl_dev/test/test4.py
@@ -109,6 +109,12 @@ def run_one():
 
				     # test(12,content)
			
 
				     # test(12,text)
			
 
				     print("takes",time.time()-a)
			
 
				+    print("start")
			
 
				+    _time1 = time.time()
			
 
				+    print(predict("12", content,"打印机",original_docchannel=52))
			
 
				+    # test(12,content)
			
 
				+    # test(12,text)
			
 
				+    print("takes",time.time()-a)
			
 
				     pass
			
 
				 
			
 
				 if __name__=="__main__":
			
--- a/BiddingKG/readme/start.md
+++ b/BiddingKG/readme/start.md
@@ -3,21 +3,17 @@
 
				 #项目路径在/data/python/BiddingKG
			
 
				 
			
 
				 #11022启动要素提取接口
			
 
				-#激活环境
			
 
				-source activate py37
			
 
				 #切换目录
			
 
				 cd /data/python
			
 
				 #关闭接口
			
 
				 ps -ef | grep run_extract_server | grep -v grep | cut -c 9-16| xargs kill -9
			
 
				 #启动接口
			
 
				-nohup /data/anaconda3/envs/py37/bin/gunicorn -w 15 --limit-request-fields 0 --limit-request-line 0 -t 1000 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
 
				+nohup /data/anaconda3/envs/py37/bin/gunicorn -w 17 --limit-request-fields 0 --limit-request-line 0 -t 1000 --keep-alive 600 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
 
				 
			
 
				 #19022启动要素提取接口
			
 
				-#激活环境
			
 
				-source activate py37
			
 
				 #切换目录
			
 
				 cd /data/python
			
 
				 #关闭接口
			
 
				 ps -ef | grep run_extract_server | grep -v grep | cut -c 9-16| xargs kill -9
			
 
				 #启动接口
			
 
				-nohup /data/anaconda3/envs/py37/bin/gunicorn -w 6 --limit-request-fields 0 --limit-request-line 0 -t 1000 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
 
				+nohup /data/anaconda3/envs/py37/bin/gunicorn -w 5 --limit-request-fields 0 --limit-request-line 0 -t 1000  --keep-alive 600 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
			
--- a/BiddingKG/run_extract_server.py
+++ b/BiddingKG/run_extract_server.py
@@ -17,6 +17,13 @@ os.environ["KERAS_BACKEND"] = "tensorflow"
 
				 app = Flask(__name__)
			
 
				 app.config['JSON_AS_ASCII'] = False
			
 
				 
			
 
				+limit_num = "4"
			
 
				+os.environ["OMP_NUM_THREADS"] = limit_num # 1为一个核，设置为5的时候，系统显示用了10个核，不太清楚之间的具体数量关系
			
 
				+os.environ["OMP_NUM_THREADS"] = limit_num # export OMP_NUM_THREADS=1
			
 
				+os.environ["OPENBLAS_NUM_THREADS"] = limit_num # export OPENBLAS_NUM_THREADS=1
			
 
				+os.environ["MKL_NUM_THREADS"] = limit_num # export MKL_NUM_THREADS=1
			
 
				+os.environ["VECLIB_MAXIMUM_THREADS"] = limit_num # export VECLIB_MAXIMUM_THREADS=1
			
 
				+os.environ["NUMEXPR_NUM_THREADS"] = limit_num # export NUMEXPR_NUM_THREADS=1
			
 
				 
			
 
				 import time
			
 
				 import uuid
			
@@ -30,7 +37,7 @@ import traceback
 
				 import json
			
 
				 
			
 
				 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
			
 
				-os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
			
 
				+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
			
 
				 sys.path.append(os.path.abspath("."))