4 år sedan · 07d547b5e5
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -1134,7 +1134,7 @@ class TimePredictor():
 
				         self.sess = tf.Session(graph=tf.Graph())
			
 
				         self.inputs_code = None
			
 
				         self.outputs_code = None
			
 
				-        self.input_shape = (2,30,60)
			
 
				+        self.input_shape = (2,10,128)
			
 
				         self.load_model()
			
 
				 
			
 
				     def load_model(self):
			
@@ -1168,10 +1168,13 @@ class TimePredictor():
 
				                     while(p_sentences<len(list_sentence)):
			
 
				                         sentence = list_sentence[p_sentences]
			
 
				                         if entity.doc_id == sentence.doc_id and entity.sentence_index == sentence.sentence_index:
			
 
				-                            left = sentence.sentence_text[max(0,entity.wordOffset_begin-self.input_shape[1]):entity.wordOffset_begin]
			
 
				-                            right = sentence.sentence_text[entity.wordOffset_end:entity.wordOffset_end+self.input_shape[1]]
			
 
				+                            # left = sentence.sentence_text[max(0,entity.wordOffset_begin-self.input_shape[1]):entity.wordOffset_begin]
			
 
				+                            # right = sentence.sentence_text[entity.wordOffset_end:entity.wordOffset_end+self.input_shape[1]]
			
 
				+                            s = spanWindow(tokens=sentence.tokens,begin_index=entity.begin_index,end_index=entity.end_index,size=self.input_shape[1])
			
 
				+                            left = s[0]
			
 
				+                            right = s[1]
			
 
				                             context = [left, right]
			
 
				-                            x = embedding_word(context, shape=self.input_shape)
			
 
				+                            x = embedding(context, shape=self.input_shape)
			
 
				                             data_x.append(x)
			
 
				                             points_entitys.append(entity)
			
 
				                             break
			
--- a/BiddingKG/dl/interface/timesplit_model/saved_model.pb
+++ b/BiddingKG/dl/interface/timesplit_model/saved_model.pb
--- a/BiddingKG/dl/interface/timesplit_model/variables/variables.data-00000-of-00001
+++ b/BiddingKG/dl/interface/timesplit_model/variables/variables.data-00000-of-00001
--- a/BiddingKG/dl/interface/timesplit_model/variables/variables.index
+++ b/BiddingKG/dl/interface/timesplit_model/variables/variables.index
--- a/BiddingKG/dl/test/test4.py
+++ b/BiddingKG/dl/test/test4.py
@@ -114,7 +114,7 @@ def test(name,content):
 
				 if __name__=="__main__":
			
 
				     # filename = "比地_52_79929693.html"
			
 
				     #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
			
 
				-    text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
			
 
				+    text = codecs.open("C:\\Users\\admin\\Desktop\\新建文本文档 (2).txt","r",encoding="utf8").read()
			
 
				     content = str(BeautifulSoup(text).find("div",id="pcontent"))
			
 
				     # df_a = {"html":[]}
			
 
				     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
			
@@ -134,11 +134,18 @@ if __name__=="__main__":
 
				     # 建设资金来源及性质：资本金40%，自筹60%，,xx.=建设资金来源自筹，项目出资比例为100%，\
			
 
				     # 二次供水泵房浊度仪进行国内组织公开招标采购，时间：2020-05-26，15:15:00，竞价结束时间：2020-05-26，15:45:00允许延时：是，'
			
 
				     a = time.time()
			
 
				-    # text = '''
			
 
				-    # ,光大证券统一认证系统服务器硬件设备更新项目中标候选人公示，项目名称：光大证券统一认证系统服务器硬件设备更新项目，招标编号：CG-202011-030-001，公告日期：2020年12月3日，评标日期：2020年11月30日13时32分，评标地点：光大证券集中采购管理平台，推荐中标候选人：上海致为信息技术有限公司，联系人：殷志超，联系电话：021-22169419
			
 
				-    # '''
			
 
				+    text = '''
			
 
				+    ，清远市清新区治理道路货物运输车辆非法超限超载工作领导小组清远市清新区治理道路货物运输车辆非法超限超载工作领导小组喷墨打印机网上商城合同
			
 
				+    验收报告，一、合同编号：GDMALL2019123563,。二、合同名称：清远市清新区治理道路货物运输车辆非法超限超载工作领导小组喷墨打印机网上商城合同。
			
 
				+    三、中标、成交供应商：广州爱联科技有限公司，地址：广州市黄埔大道西468号勤建商务大厦14层。联系人：周勇联系电话：020-85180120,。
			
 
				+    四、合同金额(元)：￥3,270.00,。五、合同详细信息：。采购项目编号：:441827-201910-531001-0013，中标/成交标的名称：：喷墨打印机，
			
 
				+    数量：:1台。采购项目名称：：喷墨打印机，规格型号：:WF-7218，中标/成交金额(元)：:3,270.00。服务要求：:,。,。六、验收结论：已通过。
			
 
				+    七、验收小组成员名单：:。八、联系事项：。(一)采购人：清远市清新区治理道路货物运输车辆非法超限超载工作领导小组，地址：太和镇玄真路49号。
			
 
				+    联系人：苏美彩，联系电话：0763-5835988,。(二)采购代理机构：地址：:。联系人：联系电话：:。附件：:。
			
 
				+    发布人：清远市清新区治理道路货物运输车辆非法超限超载工作领导小组。发布时间：2019年11月26日
			
 
				+    '''
			
 
				     print("start")
			
 
				-    print(predict("12",content,"重庆市綦江区人民法院关于重庆市綦江区文龙街道沙溪路22号银海新城六期45号楼、46号楼、47号楼负一层213号车位（第一次拍卖）的公告"))
			
 
				+    print(predict("12",text,"重庆市綦江区人民法院关于重庆市綦江区文龙街道沙溪路22号银海新城六期45号楼、46号楼、47号楼负一层213号车位（第一次拍卖）的公告"))
			
 
				     # print(predict("投诉处理公告", text))
			
 
				     #test("12",text)
			
 
				     print("takes",time.time()-a)
			
--- a/BiddingKG/dl/time/model_label_time_classify.model.hdf5
+++ b/BiddingKG/dl/time/model_label_time_classify.model.hdf5
--- a/BiddingKG/dl/time/train_2.py
+++ b/BiddingKG/dl/time/train_2.py
@@ -13,14 +13,52 @@ from sklearn.utils import shuffle,class_weight
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				 input_shape = (2,30,60)
			
 
				+input_shape2 = (2,10,128)
			
 
				 output_shape = [4]
			
 
				 
			
 
				+def get_data():
			
 
				+    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
 
				+    id_set = set()
			
 
				+    for id in data_load['document_id']:
			
 
				+        id_set.add(id)
			
 
				+    conn = psycopg2.connect(dbname="iepy", user="postgres", password="postgres", host="192.168.2.101")
			
 
				+    sql = "SELECT A.human_identifier,A.sentences,A.tokens,A.offsets_to_text,B.value " \
			
 
				+          "FROM corpus_iedocument A,brat_bratannotation B " \
			
 
				+          "WHERE A.human_identifier = '%s' " \
			
 
				+          "AND A.human_identifier = B.document_id "
			
 
				+    db_data = []
			
 
				+    count = 0
			
 
				+    for id in list(id_set):
			
 
				+        count+=1
			
 
				+        print(count)
			
 
				+        cur1 = conn.cursor()
			
 
				+        cur1.execute(sql % (id))
			
 
				+        db_data.extend(cur1.fetchall())
			
 
				+        cur1.close()
			
 
				+    conn.close()
			
 
				+    columns = ['document_id','sentences','tokens','offsets_to_text','value']
			
 
				+    df = pd.DataFrame(db_data, columns=columns)
			
 
				+    df = df[df['value'].str.contains('time')]
			
 
				+    df = df.reset_index(drop=True)
			
 
				+    print(len(df))
			
 
				+    time_label = df['value'].str.split(expand=True)
			
 
				+    time_label.columns = ['_', 'label_type', 'begin_index', 'end_index', 'entity_text']
			
 
				+    time_label = time_label.drop('_', axis=1)
			
 
				+    df = pd.concat([df, time_label], axis=1)
			
 
				+    print(df.info())
			
 
				+    df['tokens'] = [token[2:-2].split("', '") for token in df['tokens']]
			
 
				+    df['sentences'] = [sentence[1:-1].split(", ") for sentence in df['sentences']]
			
 
				+    df['sentences'] = [[int(s) for s in sentence] for sentence in df['sentences']]
			
 
				+    df['offsets_to_text'] = [offset[1:-1].split(", ") for offset in df['offsets_to_text']]
			
 
				+    df['offsets_to_text'] = [[int(o) for o in offset] for offset in df['offsets_to_text']]
			
 
				+    save(df,'db_time_data.pk')
			
 
				+
			
 
				 def getModel():
			
 
				     '''
			
 
				     @summary: 时间分类模型
			
 
				     '''
			
 
				-    L_input = layers.Input(shape=input_shape[1:], dtype='float32')
			
 
				-    R_input = layers.Input(shape=input_shape[1:], dtype='float32')
			
 
				+    L_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+    R_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				     L_lstm = layers.Bidirectional(layers.LSTM(40,return_sequences=True,dropout=0.1))(L_input)
			
 
				     # L_lstm = layers.LSTM(32,return_sequences=True,dropout=0.2)(L_input)
			
 
				     avg_l = layers.GlobalAveragePooling1D()(L_lstm)
			
@@ -40,36 +78,11 @@ def getModel():
 
				     model.summary()
			
 
				     return model
			
 
				 
			
 
				-def getModel_center():
			
 
				-    '''
			
 
				-    @summary: 时间分类模型
			
 
				-    '''
			
 
				-    L_input = layers.Input(shape=input_shape[1:], dtype='float32')
			
 
				-    R_input = layers.Input(shape=input_shape[1:], dtype='float32')
			
 
				-    center_shape = (25, 60)
			
 
				-    C_input = layers.Input(shape=center_shape, dtype='float32')
			
 
				-    L_lstm = layers.Bidirectional(layers.LSTM(32,return_sequences=True,dropout=0.2))(L_input)
			
 
				-    avg_l = layers.GlobalAveragePooling1D()(L_lstm)
			
 
				-    C_lstm = layers.LSTM(32,return_sequences=True,dropout=0.2)(C_input)
			
 
				-    avg_c = layers.GlobalAveragePooling1D()(C_lstm)
			
 
				-    R_lstm = layers.Bidirectional(layers.LSTM(32,return_sequences=True,dropout=0.2))(R_input)
			
 
				-    avg_r = layers.GlobalAveragePooling1D()(R_lstm)
			
 
				-    concat = layers.merge([avg_l, avg_c, avg_r], mode='concat')
			
 
				-
			
 
				-    output = layers.Dense(output_shape[0],activation="softmax")(concat)
			
 
				-
			
 
				-    model = models.Model(inputs=[L_input,C_input,R_input], outputs=output)
			
 
				-    learn_rate = 0.0005
			
 
				-    model.compile(optimizer=optimizers.Adam(lr=learn_rate),
			
 
				-                  loss=losses.binary_crossentropy,
			
 
				-                  metrics=[precision,recall,f1_score])
			
 
				-    model.summary()
			
 
				-    return model
			
 
				-
			
 
				 
			
 
				 def training():
			
 
				     data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
 
				-    test_data = data_load.sample(frac=0.2, random_state=7)
			
 
				+    data_load = data_load.reset_index(drop=True)
			
 
				+    test_data = data_load.sample(frac=0.2, random_state=8)
			
 
				     train_data = data_load.drop(test_data.index, axis=0)
			
 
				     train_data =train_data.reset_index(drop=True)
			
 
				 
			
@@ -139,35 +152,32 @@ def training():
 
				     res2 = classification_report(np.argmax(train_y, axis=1), np.argmax(y_pre2, axis=1))
			
 
				     print(res2)
			
 
				 
			
 
				-def training_center():
			
 
				-    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata.csv", index_col=0)
			
 
				-    test_data = data_load.sample(frac=0.25, random_state=7)
			
 
				+def train2():
			
 
				+    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\tokens_data.csv", index_col=0)
			
 
				+    data_load = data_load.reset_index(drop=True)
			
 
				+    data_load['context_left'] = [left[2:-2].split("', '") for left in data_load['context_left']]
			
 
				+    data_load['context_right'] = [right[2:-2].split("', '") for right in data_load['context_right']]
			
 
				+    test_data = data_load.sample(frac=0.2, random_state=8)
			
 
				     train_data = data_load.drop(test_data.index, axis=0)
			
 
				     train_data =train_data.reset_index(drop=True)
			
 
				 
			
 
				     train_x = []
			
 
				     train_y = []
			
 
				-    for left, center, right, label in zip(train_data['context_left'], train_data['entity_time'], train_data['context_right'], train_data['re_label']):
			
 
				+    for left, right, label in zip(train_data['context_left'], train_data['context_right'], train_data['label']):
			
 
				         y = np.zeros(output_shape)
			
 
				         y[label] = 1
			
 
				-        left = ''.join(str(left))
			
 
				-        right = ''.join(str(right))
			
 
				-        center = ''.join(str(center))
			
 
				-        context = [left,center, right]
			
 
				-        x = embedding_word(context, shape=(3,25,60))
			
 
				+        context = [left, right]
			
 
				+        x = embedding(context, shape=input_shape2)
			
 
				         train_x.append(x)
			
 
				         train_y.append(y)
			
 
				 
			
 
				     test_x = []
			
 
				     test_y = []
			
 
				-    for left, center, right, label in zip(test_data['context_left'], train_data['entity_time'], test_data['context_right'], test_data['re_label']):
			
 
				+    for left, right, label in zip(test_data['context_left'], test_data['context_right'], test_data['label']):
			
 
				         y = np.zeros(output_shape)
			
 
				         y[label] = 1
			
 
				-        left = ''.join(str(left))
			
 
				-        right = ''.join(str(right))
			
 
				-        center = ''.join(str(center))
			
 
				-        context = [left, center, right]
			
 
				-        x = embedding_word(context, shape=(3,25,60))
			
 
				+        context = [left, right]
			
 
				+        x = embedding(context, shape=input_shape2)
			
 
				         test_x.append(x)
			
 
				         test_y.append(y)
			
 
				 
			
@@ -175,79 +185,83 @@ def training_center():
 
				     train_x, test_x = (np.array(train_x), np.array(test_x))
			
 
				     train_x, test_x = (np.transpose(train_x, (1, 0, 2, 3)), np.transpose(test_x, (1, 0, 2, 3)))
			
 
				 
			
 
				-    model = getModel_center()
			
 
				-    epochs = 70
			
 
				+    model = getModel()
			
 
				+    epochs = 150
			
 
				     batch_size = 256
			
 
				     checkpoint = ModelCheckpoint("model_label_time_classify.model.hdf5", monitor="val_loss", verbose=1,
			
 
				                                  save_best_only=True, mode='min')
			
 
				     # cw = class_weight.compute_class_weight('auto',np.unique(np.argmax(train_y,axis=1)),np.argmax(train_y,axis=1))
			
 
				     # cw = dict(enumerate(cw))
			
 
				     history = model.fit(
			
 
				-        x=[train_x[0], train_x[1], train_x[2]],
			
 
				+        x=[train_x[0], train_x[1]],
			
 
				         y=train_y,
			
 
				-        validation_data=([test_x[0], test_x[1], test_x[2]], test_y),
			
 
				-        # validation_data=(test_x[0],test_y),
			
 
				+        validation_data=([test_x[0], test_x[1]], test_y),
			
 
				         epochs=epochs,
			
 
				         batch_size=batch_size,
			
 
				         shuffle=True,
			
 
				         callbacks=[checkpoint],
			
 
				         class_weight='auto'
			
 
				     )
			
 
				-    plot_loss(history = history)
			
 
				+    # plot_loss(history=history)
			
 
				     load_model = models.load_model("model_label_time_classify.model.hdf5",
			
 
				                                    custom_objects={'precision': precision, 'recall': recall, 'f1_score': f1_score})
			
 
				-    y_pre = load_model.predict([test_x[0], test_x[1], test_x[2]])
			
 
				+    y_pre = load_model.predict([test_x[0], test_x[1]])
			
 
				     # y_pre = load_model.predict(test_x[0])
			
 
				     # 各类别预测评估
			
 
				     res1 = classification_report(np.argmax(test_y, axis=1), np.argmax(y_pre, axis=1))
			
 
				     print(res1)
			
 
				-    y_pre2 = load_model.predict([train_x[0], train_x[1], train_x[2]])
			
 
				+    y_pre2 = load_model.predict([train_x[0], train_x[1]])
			
 
				     # y_pre2 = load_model.predict(train_x[0])
			
 
				     res2 = classification_report(np.argmax(train_y, axis=1), np.argmax(y_pre2, axis=1))
			
 
				     print(res2)
			
 
				 
			
 
				-def predict():
			
 
				+
			
 
				+def predict2():
			
 
				     model1 = models.load_model("model_label_time_classify.model.hdf5",custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
			
 
				-    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30.csv", index_col=0)
			
 
				+    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\tokens_data.csv", index_col=0)
			
 
				+    data_load['context_left'] = [left[2:-2].split("', '") for left in data_load['context_left']]
			
 
				+    data_load['context_right'] = [right[2:-2].split("', '") for right in data_load['context_right']]
			
 
				     test_x = []
			
 
				     test_y = []
			
 
				-    for left, right, label in zip(data_load['context_left'], data_load['context_right'], data_load['re_label']):
			
 
				+    for left, right, label in zip(data_load['context_left'], data_load['context_right'], data_load['label']):
			
 
				         y = np.zeros(output_shape)
			
 
				         y[label] = 1
			
 
				-        left = ''.join(str(left))
			
 
				-        right = ''.join(str(right))
			
 
				         context = [left, right]
			
 
				-        x = embedding_word(context, shape=input_shape)
			
 
				+        x = embedding(context, shape=input_shape2)
			
 
				         test_x.append(x)
			
 
				         test_y.append(y)
			
 
				     test_x = np.transpose(np.array(test_x), (1, 0, 2, 3))
			
 
				     pre_y = model1.predict([test_x[0],test_x[1]])
			
 
				     data_load['pre'] = [np.argmax(item) for item in pre_y]
			
 
				-    error_data = data_load[data_load['re_label']!=data_load['pre']]
			
 
				+    error_data = data_load[data_load['label']!=data_load['pre']]
			
 
				     # print(error_data.info())
			
 
				-    error_data.to_csv("C:\\Users\\admin\\Desktop\\test\\error4-0.2-0.6_30.csv")
			
 
				+    error_data.to_csv("C:\\Users\\admin\\Desktop\\error4-30.csv")
			
 
				 
			
 
				-def predict_center():
			
 
				+def predict():
			
 
				     model1 = models.load_model("model_label_time_classify.model.hdf5",custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
			
 
				-    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata.csv", index_col=0)
			
 
				+    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
 
				     test_x = []
			
 
				     test_y = []
			
 
				-    for left, center, right, label in zip(data_load['context_left'],data_load['entity_time'], data_load['context_right'], data_load['re_label']):
			
 
				+    for left, right, label in zip(data_load['context_left'], data_load['context_right'], data_load['re_label']):
			
 
				         y = np.zeros(output_shape)
			
 
				         y[label] = 1
			
 
				-        left = ''.join(str(left))
			
 
				-        right = ''.join(str(right))
			
 
				-        center = ''.join(str(center))
			
 
				-        context = [left, center, right]
			
 
				-        x = embedding_word(context, shape=(3, 25, 60))
			
 
				+        left = str(left)
			
 
				+        right = str(right)
			
 
				+        if left == 'nan': left = ''
			
 
				+        if right == 'nan': right = ''
			
 
				+        left = list(left)
			
 
				+        right = list(right)
			
 
				+        context = [left, right]
			
 
				+        x = embedding_word(context, shape=input_shape)
			
 
				         test_x.append(x)
			
 
				         test_y.append(y)
			
 
				     test_x = np.transpose(np.array(test_x), (1, 0, 2, 3))
			
 
				-    pre_y = model1.predict([test_x[0],test_x[1],test_x[2]])
			
 
				+    pre_y = model1.predict([test_x[0],test_x[1]])
			
 
				     data_load['pre'] = [np.argmax(item) for item in pre_y]
			
 
				     error_data = data_load[data_load['re_label']!=data_load['pre']]
			
 
				     # print(error_data.info())
			
 
				-    error_data.to_csv("C:\\Users\\admin\\Desktop\\test\\error_center.csv")
			
 
				+    error_data.to_csv("C:\\Users\\admin\\Desktop\\error4-30.csv")
			
 
				+
			
 
				 
			
 
				 def data_process():
			
 
				     data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30.csv", index_col=0)
			
@@ -273,6 +287,93 @@ def data_process():
 
				     data_load['context_right'] = right_list
			
 
				     data_load.to_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv")
			
 
				 
			
 
				+def data_process2():
			
 
				+    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
 
				+    left_list = []
			
 
				+    right_list = []
			
 
				+    for left, right in zip(data_load['context_left'], data_load['context_right']):
			
 
				+        left = str(left)
			
 
				+        right = str(right)
			
 
				+        if right=='nan':
			
 
				+            right = ''
			
 
				+        if left=='nan':
			
 
				+            left = ''
			
 
				+        left = left[max(len(left)-20,0):]
			
 
				+        right = right[:20]
			
 
				+        left_list.append(left)
			
 
				+        right_list.append(right)
			
 
				+    data_load['context_left'] = left_list
			
 
				+    data_load['context_right'] = right_list
			
 
				+    data_load.to_csv("C:\\Users\\admin\\Desktop\\newdata_20_prc.csv")
			
 
				+
			
 
				+def data_process3():
			
 
				+    data = load('db_time_data.pk')
			
 
				+    data = data.drop('value', axis=1)
			
 
				+    token_begin = []
			
 
				+    token_end = []
			
 
				+    context_left = []
			
 
				+    context_right = []
			
 
				+    data2 = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc2.csv")
			
 
				+    label = []
			
 
				+    # data=data[:20]
			
 
				+    for id,sentences,tokens,offset,begin,end,entity_text in zip(data['document_id'],data['sentences'],data['tokens'],data['offsets_to_text'],
			
 
				+                                                             data['begin_index'],data['end_index'],data['entity_text']):
			
 
				+        _label = data2[(data2['document_id']==int(id)) & (data2['begin_index']==int(begin))][:1]
			
 
				+        if not _label.empty:
			
 
				+            _label = int(_label['re_label'])
			
 
				+        else:
			
 
				+            _label=0
			
 
				+        label.append(_label)
			
 
				+        begin = int(begin)
			
 
				+        end = int(end)
			
 
				+        entity_tbegin = 0
			
 
				+        entity_tend = 0
			
 
				+        find_begin = False
			
 
				+
			
 
				+        for t in range(len(offset)):
			
 
				+            if not find_begin:
			
 
				+                if offset[t]==begin:
			
 
				+                    entity_tbegin = t
			
 
				+                    find_begin = True
			
 
				+                if offset[t]>begin:
			
 
				+                    entity_tbegin = t-1
			
 
				+                    find_begin = True
			
 
				+            if offset[t] >= end:
			
 
				+                entity_tend = t
			
 
				+                break
			
 
				+        token_begin.append(entity_tbegin)
			
 
				+        token_end.append(entity_tend)
			
 
				+        s = spanWindow(tokens=tokens,begin_index=entity_tbegin,end_index=entity_tend,size=10)
			
 
				+        s1 = s[0]
			
 
				+        _temp1 = []
			
 
				+        for i in range(len(s1)):
			
 
				+            if s1[i]=="。":
			
 
				+                _temp1.append(i)
			
 
				+        if _temp1:
			
 
				+            s1 = s1[_temp1[-1]+1:]
			
 
				+        s2 = s[1]
			
 
				+        _temp2 = []
			
 
				+        for i in range(len(s2)):
			
 
				+            if s2[i] == "。":
			
 
				+                _temp2.append(i)
			
 
				+                break
			
 
				+        if _temp2:
			
 
				+            s2 = s2[:_temp2[0]+1]
			
 
				+            # print(s2)
			
 
				+        context_left.append(s1)
			
 
				+        context_right.append(s2)
			
 
				+        print(id)
			
 
				+        # print(_label)
			
 
				+        # print(entity_text)
			
 
				+        # print(tokens[entity_tbegin:entity_tend])
			
 
				+    data['token_begin'] = token_begin
			
 
				+    data['token_end'] = token_end
			
 
				+    data['context_left'] = context_left
			
 
				+    data['context_right'] = context_right
			
 
				+    data['label'] = label
			
 
				+    data = data.drop(['tokens','offsets_to_text','sentences'],axis=1)
			
 
				+    data.to_csv("C:\\Users\\admin\\Desktop\\tokens_data.csv")
			
 
				+
			
 
				 def plot_loss(history):
			
 
				     plt.plot(history.history['loss'])
			
 
				     plt.plot(history.history['val_loss'])
			
@@ -283,25 +384,14 @@ def plot_loss(history):
 
				     plt.show()
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				+    # get_data()
			
 
				     # getModel()
			
 
				-    # getModel_center()
			
 
				     # training()
			
 
				+    # train2()
			
 
				     # data_process()
			
 
				-    # training_center()
			
 
				+    # data_process2()
			
 
				+    # data_process3()
			
 
				     # predict()
			
 
				-    # predict_center()
			
 
				-    model1 = models.load_model("model_label_time_classify.model.hdf5",
			
 
				-                               custom_objects={'precision': precision, 'recall': recall, 'f1_score': f1_score})
			
 
				-    test_x = []
			
 
				-    test_y = []
			
 
				-    left = '8675.20元人民币，(3)服务期限：'
			
 
				-    right = '(4)质量：符合竞争性磋商文件规定的质'
			
 
				-    context = [left, right]
			
 
				-    x = embedding_word(context, shape=input_shape)
			
 
				-    test_x.append(x)
			
 
				-    test_x = np.transpose(np.array(test_x), (1, 0, 2, 3))
			
 
				-    pre_y = model1.predict([test_x[0],test_x[1]])
			
 
				-    rs = [np.argmax(item) for item in pre_y]
			
 
				-    print(pre_y, rs)
			
 
				+    # predict2()
			
 
				 
			
 
				     pass