|
@@ -20,55 +20,61 @@ import tensorflow as tf
|
|
|
from BiddingKG.dl.product.data_util import decode, process_data
|
|
|
from BiddingKG.dl.interface.Entitys import Entity
|
|
|
from BiddingKG.dl.complaint.punish_predictor import Punish_Extract
|
|
|
+from BiddingKG.dl.money.re_money_total_unit import extract_total_money, extract_unit_money
|
|
|
from bs4 import BeautifulSoup
|
|
|
import copy
|
|
|
import calendar
|
|
|
import datetime
|
|
|
|
|
|
from threading import RLock
|
|
|
-dict_predictor = {"codeName":{"predictor":None,"Lock":RLock()},
|
|
|
- "prem":{"predictor":None,"Lock":RLock()},
|
|
|
- "epc":{"predictor":None,"Lock":RLock()},
|
|
|
- "roleRule":{"predictor":None,"Lock":RLock()},
|
|
|
- "form":{"predictor":None,"Lock":RLock()},
|
|
|
- "time":{"predictor":None,"Lock":RLock()},
|
|
|
- "punish":{"predictor":None,"Lock":RLock()},
|
|
|
- "product":{"predictor":None,"Lock":RLock()},
|
|
|
- "product_attrs":{"predictor":None,"Lock":RLock()},
|
|
|
+dict_predictor = {"codeName": {"predictor": None, "Lock": RLock()},
|
|
|
+ "prem": {"predictor": None, "Lock": RLock()},
|
|
|
+ "epc": {"predictor": None, "Lock": RLock()},
|
|
|
+ "roleRule": {"predictor": None, "Lock": RLock()},
|
|
|
+ "form": {"predictor": None, "Lock": RLock()},
|
|
|
+ "time": {"predictor": None, "Lock": RLock()},
|
|
|
+ "punish": {"predictor": None, "Lock": RLock()},
|
|
|
+ "product": {"predictor": None, "Lock": RLock()},
|
|
|
+ "product_attrs": {"predictor": None, "Lock": RLock()},
|
|
|
"channel": {"predictor": None, "Lock": RLock()},
|
|
|
- "deposit_payment_way": {"predictor": None, "Lock": RLock()}}
|
|
|
+ "deposit_payment_way": {"predictor": None, "Lock": RLock()},
|
|
|
+ "total_unit_money": {"predictor": None, "Lock": RLock()}
|
|
|
+ }
|
|
|
|
|
|
|
|
|
def getPredictor(_type):
|
|
|
if _type in dict_predictor:
|
|
|
with dict_predictor[_type]["Lock"]:
|
|
|
if dict_predictor[_type]["predictor"] is None:
|
|
|
- if _type=="codeName":
|
|
|
+ if _type == "codeName":
|
|
|
dict_predictor[_type]["predictor"] = CodeNamePredict()
|
|
|
- if _type=="prem":
|
|
|
+ if _type == "prem":
|
|
|
dict_predictor[_type]["predictor"] = PREMPredict()
|
|
|
- if _type=="epc":
|
|
|
+ if _type == "epc":
|
|
|
dict_predictor[_type]["predictor"] = EPCPredict()
|
|
|
- if _type=="roleRule":
|
|
|
+ if _type == "roleRule":
|
|
|
dict_predictor[_type]["predictor"] = RoleRulePredictor()
|
|
|
- if _type=="form":
|
|
|
+ if _type == "form":
|
|
|
dict_predictor[_type]["predictor"] = FormPredictor()
|
|
|
- if _type=="time":
|
|
|
+ if _type == "time":
|
|
|
dict_predictor[_type]["predictor"] = TimePredictor()
|
|
|
- if _type=="punish":
|
|
|
+ if _type == "punish":
|
|
|
dict_predictor[_type]["predictor"] = Punish_Extract()
|
|
|
- if _type=="product":
|
|
|
+ if _type == "product":
|
|
|
dict_predictor[_type]["predictor"] = ProductPredictor()
|
|
|
- if _type=="product_attrs":
|
|
|
+ if _type == "product_attrs":
|
|
|
dict_predictor[_type]["predictor"] = ProductAttributesPredictor()
|
|
|
if _type == "channel":
|
|
|
dict_predictor[_type]["predictor"] = DocChannel()
|
|
|
if _type == 'deposit_payment_way':
|
|
|
dict_predictor[_type]["predictor"] = DepositPaymentWay()
|
|
|
+ if _type == 'total_unit_money':
|
|
|
+ dict_predictor[_type]["predictor"] = TotalUnitMoney()
|
|
|
return dict_predictor[_type]["predictor"]
|
|
|
raise NameError("no this type of predictor")
|
|
|
|
|
|
-#编号名称模型
|
|
|
+
|
|
|
+# 编号名称模型
|
|
|
class CodeNamePredict():
|
|
|
|
|
|
def __init__(self,EMBED_DIM=None,BiRNN_UNITS=None,lazyLoad=getLazyLoad()):
|
|
@@ -525,7 +531,7 @@ class CodeNamePredict():
|
|
|
return result
|
|
|
'''
|
|
|
|
|
|
-#角色金额模型
|
|
|
+# 角色金额模型
|
|
|
class PREMPredict():
|
|
|
|
|
|
|
|
@@ -704,7 +710,7 @@ class PREMPredict():
|
|
|
self.predict_money(list_sentences,list_entitys)
|
|
|
|
|
|
|
|
|
-#联系人模型
|
|
|
+# 联系人模型
|
|
|
class EPCPredict():
|
|
|
|
|
|
def __init__(self):
|
|
@@ -1044,7 +1050,7 @@ class EPCPredict():
|
|
|
def predict(self,list_sentences,list_entitys):
|
|
|
self.predict_person(list_sentences,list_entitys)
|
|
|
|
|
|
-#表格预测
|
|
|
+# 表格预测
|
|
|
class FormPredictor():
|
|
|
|
|
|
def __init__(self,lazyLoad=getLazyLoad()):
|
|
@@ -1076,10 +1082,9 @@ class FormPredictor():
|
|
|
else:
|
|
|
return self.getModel(type).predict(form_datas)
|
|
|
|
|
|
-
|
|
|
|
|
|
-#角色规则
|
|
|
-#依据正则给所有无角色的实体赋予角色,给予等于阈值的最低概率
|
|
|
+# 角色规则
|
|
|
+# 依据正则给所有无角色的实体赋予角色,给予等于阈值的最低概率
|
|
|
class RoleRulePredictor():
|
|
|
|
|
|
def __init__(self):
|
|
@@ -1371,6 +1376,7 @@ class RoleRulePredictor():
|
|
|
if p_entity.entity_text in self.SET_NOT_TENDERER:
|
|
|
p_entity.label=5
|
|
|
|
|
|
+
|
|
|
# 时间类别
|
|
|
class TimePredictor():
|
|
|
def __init__(self):
|
|
@@ -1476,6 +1482,7 @@ class TimePredictor():
|
|
|
values[0] = 0.5
|
|
|
entity.set_Role(label, values)
|
|
|
|
|
|
+
|
|
|
# 产品字段提取
|
|
|
class ProductPredictor():
|
|
|
def __init__(self):
|
|
@@ -1552,6 +1559,7 @@ class ProductPredictor():
|
|
|
result.append(item) # 修正bug
|
|
|
return result
|
|
|
|
|
|
+
|
|
|
# 产品数量单价品牌规格提取 #2021/11/10 添加表格中的项目、需求、预算、时间要素提取
|
|
|
class ProductAttributesPredictor():
|
|
|
def __init__(self,):
|
|
@@ -2023,6 +2031,7 @@ class ProductAttributesPredictor():
|
|
|
demand_dic = {'demand_info':{'data':[], 'header':[], 'header_col':[]}}
|
|
|
return [attr_dic, demand_dic]
|
|
|
|
|
|
+
|
|
|
# docchannel类型提取
|
|
|
class DocChannel():
|
|
|
def __init__(self, life_model='/channel_savedmodel/channel.pb', type_model='/channel_savedmodel/doctype.pb'):
|
|
@@ -2204,6 +2213,7 @@ class DocChannel():
|
|
|
# return self.id2type[id], prob
|
|
|
return [{'docchannel':self.id2type[id]}]
|
|
|
|
|
|
+
|
|
|
# 保证金支付方式提取
|
|
|
class DepositPaymentWay():
|
|
|
def __init__(self,):
|
|
@@ -2237,6 +2247,39 @@ class DepositPaymentWay():
|
|
|
else:
|
|
|
return pay_way
|
|
|
|
|
|
+
|
|
|
+# 总价单价提取
|
|
|
+class TotalUnitMoney:
|
|
|
+ def __init__(self):
|
|
|
+ pass
|
|
|
+
|
|
|
+ def predict(self, list_sentences, list_entitys):
|
|
|
+ for i in range(len(list_entitys)):
|
|
|
+ list_entity = list_entitys[i]
|
|
|
+
|
|
|
+ # 总价单价
|
|
|
+ for _entity in list_entity:
|
|
|
+ if _entity.entity_type == 'money':
|
|
|
+ word_of_sentence = list_sentences[i][_entity.sentence_index].sentence_text
|
|
|
+ # 总价在中投标金额中
|
|
|
+ if _entity.label == 1:
|
|
|
+ result = extract_total_money(word_of_sentence,
|
|
|
+ _entity.entity_text,
|
|
|
+ [_entity.wordOffset_begin, _entity.wordOffset_end])
|
|
|
+ if result:
|
|
|
+ _entity.is_total_money = 1
|
|
|
+
|
|
|
+ # 单价在普通金额中
|
|
|
+ else:
|
|
|
+ result = extract_unit_money(word_of_sentence,
|
|
|
+ _entity.entity_text,
|
|
|
+ [_entity.wordOffset_begin, _entity.wordOffset_end])
|
|
|
+ if result:
|
|
|
+ _entity.is_unit_money = 1
|
|
|
+ # print("total_unit_money", _entity.entity_text,
|
|
|
+ # _entity.is_total_money, _entity.is_unit_money)
|
|
|
+
|
|
|
+
|
|
|
def getSavedModel():
|
|
|
#predictor = FormPredictor()
|
|
|
graph = tf.Graph()
|
|
@@ -2250,7 +2293,8 @@ def getSavedModel():
|
|
|
inputs={"image": model.input},
|
|
|
outputs={"scores": model.output}
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
def getBiLSTMCRFModel(MAX_LEN,vocab,EMBED_DIM,BiRNN_UNITS,chunk_tags,weights):
|
|
|
'''
|
|
|
model = models.Sequential()
|
|
@@ -2354,6 +2398,7 @@ def h5_to_graph(sess,graph,h5file):
|
|
|
print(name,graph.get_tensor_by_name(name),np.shape(value))
|
|
|
sess.run(tf.assign(graph.get_tensor_by_name(name),value))
|
|
|
|
|
|
+
|
|
|
def initialize_uninitialized(sess):
|
|
|
global_vars = tf.global_variables()
|
|
|
is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in global_vars])
|
|
@@ -2435,7 +2480,8 @@ def save_role_model():
|
|
|
"input2":model.input[2]},
|
|
|
outputs={"outputs":model.output}
|
|
|
)
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
def save_money_model():
|
|
|
model_file = os.path.dirname(__file__)+"/../money/models/model_money_word.h5"
|
|
|
graph = tf.Graph()
|
|
@@ -2487,7 +2533,8 @@ def save_person_model():
|
|
|
inputs={"input0":model.input[0],
|
|
|
"input1":model.input[1]},
|
|
|
outputs = {"outputs":model.output})
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
def save_form_model():
|
|
|
model_form = FormPredictor()
|
|
|
with model_form.graph.as_default():
|
|
@@ -2499,7 +2546,8 @@ def save_form_model():
|
|
|
"./form_savedmodel/",
|
|
|
inputs={"inputs":model.input},
|
|
|
outputs = {"outputs":model.output})
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
def save_codesplit_model():
|
|
|
filepath_code = "../projectCode/models/model_code.hdf5"
|
|
|
|
|
@@ -2517,6 +2565,7 @@ def save_codesplit_model():
|
|
|
"input2":model_code.input[2]},
|
|
|
outputs={"outputs":model_code.output})
|
|
|
|
|
|
+
|
|
|
def save_timesplit_model():
|
|
|
filepath = '../time/model_label_time_classify.model.hdf5'
|
|
|
with tf.Graph().as_default() as graph:
|