3 năm trước cách đây · efc874fcac
--- a/BiddingKG/dl/interface/getAttributes.py
+++ b/BiddingKG/dl/interface/getAttributes.py
@@ -2071,6 +2071,11 @@ def getOtherAttributes(list_entity):
 
				                   "product":[],
			
 
				                   "total_tendereeMoney":0,
			
 
				                   "total_tendereeMoneyUnit":''}
			
 
				+    dict_time = {
			
 
				+        "time_release": [],
			
 
				+        "time_bidopen": [],
			
 
				+        "time_bidclose": []
			
 
				+    }
			
 
				     for entity in list_entity:
			
 
				         if entity.entity_type == 'bidway':
			
 
				             dict_other["bidway"] = turnBidWay(entity.entity_text)
			
@@ -2079,11 +2084,17 @@ def getOtherAttributes(list_entity):
 
				         elif entity.entity_type=='serviceTime':
			
 
				             dict_other["serviceTime"] = entity.entity_text
			
 
				         elif entity.entity_type == 'time' and entity.label==1:
			
 
				-            dict_other["time_release"] = timeFormat(entity.entity_text)
			
 
				+            if entity.values[entity.label]>0.6:
			
 
				+                dict_time['time_release'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				+            # dict_other["time_release"] = timeFormat(entity.entity_text)
			
 
				         elif entity.entity_type == 'time' and entity.label==2:
			
 
				-            dict_other["time_bidopen"] = timeFormat(entity.entity_text)
			
 
				+            if entity.values[entity.label]>0.6:
			
 
				+                dict_time['time_bidopen'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				+            # dict_other["time_bidopen"] = timeFormat(entity.entity_text)
			
 
				         elif entity.entity_type == 'time' and entity.label == 3:
			
 
				-            dict_other["time_bidclose"] = timeFormat(entity.entity_text)
			
 
				+            if entity.values[entity.label]>0.6:
			
 
				+                dict_time['time_bidclose'].append((timeFormat(entity.entity_text),entity.values[entity.label]))
			
 
				+            # dict_other["time_bidclose"] = timeFormat(entity.entity_text)
			
 
				         elif entity.entity_type=="person" and entity.label ==4:
			
 
				             dict_other["person_review"].append(entity.entity_text)
			
 
				         elif entity.entity_type=='product':
			
@@ -2091,6 +2102,12 @@ def getOtherAttributes(list_entity):
 
				         elif entity.entity_type=='money' and entity.notes=='总投资' and dict_other["total_tendereeMoney"]<float(entity.entity_text):
			
 
				             dict_other["total_tendereeMoney"] = float(entity.entity_text)
			
 
				             dict_other["total_tendereeMoneyUnit"] = entity.money_unit
			
 
				+    # 时间类别
			
 
				+    for time_type,value in dict_time.items():
			
 
				+        list_time = dict_time[time_type]
			
 
				+        if list_time:
			
 
				+            list_time.sort(key=lambda x:x[1],reverse=True)
			
 
				+            dict_other[time_type] = list_time[0][0]
			
 
				     dict_other["product"] = list(set(dict_other["product"]))
			
 
				     return dict_other
			
 
				 
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -1357,7 +1357,7 @@ class TimePredictor():
 
				         self.sess = tf.Session(graph=tf.Graph())
			
 
				         self.inputs_code = None
			
 
				         self.outputs_code = None
			
 
				-        self.input_shape = (2,10,128)
			
 
				+        self.input_shape = (2,40,128)
			
 
				         self.load_model()
			
 
				 
			
 
				     def load_model(self):
			
@@ -1385,6 +1385,7 @@ class TimePredictor():
 
				         for list_sentence, list_entity in zip(list_sentences, list_entitys):
			
 
				             p_entitys = 0
			
 
				             p_sentences = 0
			
 
				+            list_sentence.sort(key=lambda x: x.sentence_index)
			
 
				             while(p_entitys<len(list_entity)):
			
 
				                 entity = list_entity[p_entitys]
			
 
				                 if entity.entity_type in ['time']:
			
@@ -1397,7 +1398,7 @@ class TimePredictor():
 
				                             left = s[0]
			
 
				                             right = s[1]
			
 
				                             context = [left, right]
			
 
				-                            x = embedding(context, shape=self.input_shape)
			
 
				+                            x = self.embedding_words(context, shape=self.input_shape)
			
 
				                             data_x.append(x)
			
 
				                             points_entitys.append(entity)
			
 
				                             break
			
@@ -1408,6 +1409,33 @@ class TimePredictor():
 
				         data_x = np.transpose(np.array(data_x), (1, 0, 2, 3))
			
 
				         return [data_x, points_entitys]
			
 
				 
			
 
				+    def embedding_words(self, datas, shape):
			
 
				+        '''
			
 
				+        @summary:查找词汇对应的词向量
			
 
				+        @param:
			
 
				+            datas:词汇的list
			
 
				+            shape:结果的shape
			
 
				+        @return: array,返回对应shape的词嵌入
			
 
				+        '''
			
 
				+        model_w2v = getModel_w2v()
			
 
				+        embed = np.zeros(shape)
			
 
				+        length = shape[1]
			
 
				+        out_index = 0
			
 
				+        for data in datas:
			
 
				+            index = 0
			
 
				+            for item in data:
			
 
				+                item_not_space = re.sub("\s*", "", item)
			
 
				+                if index >= length:
			
 
				+                    break
			
 
				+                if item_not_space in model_w2v.vocab:
			
 
				+                    embed[out_index][index] = model_w2v[item_not_space]
			
 
				+                    index += 1
			
 
				+                else:
			
 
				+                    embed[out_index][index] = model_w2v['unk']
			
 
				+                    index += 1
			
 
				+            out_index += 1
			
 
				+        return embed
			
 
				+
			
 
				     def predict(self, list_sentences,list_entitys):
			
 
				         datas = self.search_time_data(list_sentences, list_entitys)
			
 
				         if datas is None:
			
@@ -1422,7 +1450,11 @@ class TimePredictor():
 
				                 values = []
			
 
				                 for item in predict_y[i]:
			
 
				                     values.append(item)
			
 
				-                    entity.set_Role(label, values)
			
 
				+                if label != 0:
			
 
				+                    if not timeFormat(entity.entity_text):
			
 
				+                        label = 0
			
 
				+                        values[0] = 0.5
			
 
				+                entity.set_Role(label, values)
			
 
				 
			
 
				 # 产品字段提取
			
 
				 class ProductPredictor():
			
@@ -2232,7 +2264,7 @@ def save_timesplit_model():
 
				 if __name__=="__main__":
			
 
				     #save_role_model()
			
 
				     # save_codename_model()
			
 
				-    save_money_model()
			
 
				+    # save_money_model()
			
 
				     #save_person_model()
			
 
				     #save_form_model()
			
 
				     #save_codesplit_model()
			
--- a/BiddingKG/dl/interface/timesplit_model/saved_model.pb
+++ b/BiddingKG/dl/interface/timesplit_model/saved_model.pb
--- a/BiddingKG/dl/interface/timesplit_model/variables/variables.data-00000-of-00001
+++ b/BiddingKG/dl/interface/timesplit_model/variables/variables.data-00000-of-00001
--- a/BiddingKG/dl/interface/timesplit_model/variables/variables.index
+++ b/BiddingKG/dl/interface/timesplit_model/variables/variables.index
--- a/BiddingKG/dl/relation_extraction/model.py
+++ b/BiddingKG/dl/relation_extraction/model.py
@@ -198,8 +198,8 @@ def position_id(x):
 
				 
			
 
				 add_dict = load(os.path.dirname(__file__)+'/../relation_extraction/add_words_dict.pkl')
			
 
				 add_words = ['<unk>','<company/org>','<location>','<phone>','<contact_person>']
			
 
				-model_w2v = getModel_w2v()
			
 
				 def get_words_matrix(words):
			
 
				+    model_w2v = getModel_w2v()
			
 
				     if words in add_words:
			
 
				         return add_dict[words]
			
 
				     else:
			
--- a/BiddingKG/dl/test/测试整个要素提取流程.py
+++ b/BiddingKG/dl/test/测试整个要素提取流程.py
@@ -51,6 +51,7 @@ codeNamePredict = predictor.CodeNamePredict()
 
				 premPredict = predictor.PREMPredict()
			
 
				 epcPredict = predictor.EPCPredict()
			
 
				 roleRulePredict = predictor.RoleRulePredictor()
			
 
				+timePredictor = predictor.TimePredictor()
			
 
				 
			
 
				 #自定义jsonEncoder
			
 
				 class MyEncoder(json.JSONEncoder):
			
@@ -91,6 +92,8 @@ def predict(doc_id,text):
 
				     roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
			
 
				     # print("epcPredict")
			
 
				     epcPredict.predict(list_sentences,list_entitys)
			
 
				+
			
 
				+    timePredictor.predict(list_sentences,list_entitys)
			
 
				     # print("entityLink")
			
 
				     entityLink.link_entitys(list_entitys)
			
 
				     # print("getPREMs")
			
@@ -98,7 +101,7 @@ def predict(doc_id,text):
 
				     # print("getPREMs")
			
 
				     print("公司——联系人：", end=' ')
			
 
				     print(prem[0])
			
 
				-    print(prem[0]['prem']['Project']['roleList'])
			
 
				+    # print(prem[0]['prem']['Project']['roleList'])
			
 
				 
			
 
				     
			
 
				     ''''''
			
@@ -123,7 +126,7 @@ def predict(doc_id,text):
 
				                 print(entity.sentence_index)
			
 
				             elif entity.entity_type=="time":
			
 
				                 print("time:",end=" ")
			
 
				-                print(entity.entity_text)
			
 
				+                print(entity.entity_text, entity.label, entity.values)
			
 
				             elif entity.entity_type in ['org','company']:
			
 
				                 _sentence = list_sentences[0][entity.sentence_index]
			
 
				                 if entity.pointer_person:
			
@@ -141,9 +144,6 @@ def predict(doc_id,text):
 
				             #         print('pointer_pack_name:',entity.pointer_pack.entity_text)
			
 
				             # elif entity.entity_type in ['package']:
			
 
				             #     print('pack_entity:',entity.entity_text)
			
 
				-            # elif entity.entity_type=='time':
			
 
				-            #     print("时间：", end=' ')
			
 
				-            #     print(entity.entity_text, entity.label, entity.values)
			
 
				             # print(entity.entity_text,entity.entity_type,entity.label,entity.values,entity.sentence_index,entity.wordOffset_begin,entity.wordOffset_end)
			
 
				 
			
 
				     #print(prem)
			
@@ -429,9 +429,9 @@ if __name__=="__main__":
 
				 # '''
			
 
				     a = time.time()
			
 
				     print("start")
			
 
				-    print(predict("12",content))
			
 
				+    # print(predict("12",content))
			
 
				     # result = predict("12",text)
			
 
				-    # result = predict("12",content)
			
 
				+    result = predict("12",content)
			
 
				     # print(json.loads(result))
			
 
				     #test("12",text)
			
 
				     print("takes",time.time()-a)
			
--- a/BiddingKG/dl/time/model_label_time_classify.model.hdf5
+++ b/BiddingKG/dl/time/model_label_time_classify.model.hdf5
--- a/BiddingKG/dl/time/model_time_classify.weights
+++ b/BiddingKG/dl/time/model_time_classify.weights
--- a/BiddingKG/dl/time/models/timesplit_model/saved_model.pb
+++ b/BiddingKG/dl/time/models/timesplit_model/saved_model.pb
--- a/BiddingKG/dl/time/models/timesplit_model/variables/variables.data-00000-of-00001
+++ b/BiddingKG/dl/time/models/timesplit_model/variables/variables.data-00000-of-00001
--- a/BiddingKG/dl/time/models/timesplit_model/variables/variables.index
+++ b/BiddingKG/dl/time/models/timesplit_model/variables/variables.index
--- a/BiddingKG/dl/time/train_2.py
+++ b/BiddingKG/dl/time/train_2.py
@@ -1,11 +1,13 @@
 
				 import sys
			
 
				 import os
			
 
				 sys.path.append(os.path.abspath("../.."))
			
 
				+# sys.path.append('/data/python_znj/znj/BIDI_ML_INFO_EXTRACTION/')
			
 
				 import pandas as pd
			
 
				 import re
			
 
				 import psycopg2
			
 
				 from keras.callbacks import ModelCheckpoint
			
 
				 from keras import layers,models,optimizers,losses
			
 
				+from keras.layers import *
			
 
				 from BiddingKG.dl.common.Utils import *
			
 
				 from BiddingKG.dl.common.models import *
			
 
				 from sklearn.metrics import classification_report
			
@@ -13,15 +15,15 @@ from sklearn.utils import shuffle,class_weight
 
				 import matplotlib.pyplot as plt
			
 
				 
			
 
				 input_shape = (2,30,60)
			
 
				-input_shape2 = (2,10,128)
			
 
				+input_shape2 = (2,40,128)
			
 
				 output_shape = [4]
			
 
				 
			
 
				 def get_data():
			
 
				-    data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
 
				+    data_load = pd.read_csv("newdata_30_prc.csv", index_col=0)
			
 
				     id_set = set()
			
 
				     for id in data_load['document_id']:
			
 
				         id_set.add(id)
			
 
				-    conn = psycopg2.connect(dbname="iepy", user="postgres", password="postgres", host="192.168.2.101")
			
 
				+    conn = psycopg2.connect(dbname="iepy", user="postgres", password="postgres", host="192.168.2.103")
			
 
				     sql = "SELECT A.human_identifier,A.sentences,A.tokens,A.offsets_to_text,B.value " \
			
 
				           "FROM corpus_iedocument A,brat_bratannotation B " \
			
 
				           "WHERE A.human_identifier = '%s' " \
			
@@ -47,10 +49,12 @@ def get_data():
 
				     df = pd.concat([df, time_label], axis=1)
			
 
				     print(df.info())
			
 
				     df['tokens'] = [token[2:-2].split("', '") for token in df['tokens']]
			
 
				-    df['sentences'] = [sentence[1:-1].split(", ") for sentence in df['sentences']]
			
 
				-    df['sentences'] = [[int(s) for s in sentence] for sentence in df['sentences']]
			
 
				-    df['offsets_to_text'] = [offset[1:-1].split(", ") for offset in df['offsets_to_text']]
			
 
				-    df['offsets_to_text'] = [[int(o) for o in offset] for offset in df['offsets_to_text']]
			
 
				+    df['sentences'] = [eval(sentence) for sentence in df['sentences']]
			
 
				+    # df['sentences'] = [sentence[1:-1].split(", ") for sentence in df['sentences']]
			
 
				+    # df['sentences'] = [[int(s) for s in sentence] for sentence in df['sentences']]
			
 
				+    df['offsets_to_text'] = [eval(offset) for offset in df['offsets_to_text']]
			
 
				+    # df['offsets_to_text'] = [offset[1:-1].split(", ") for offset in df['offsets_to_text']]
			
 
				+    # df['offsets_to_text'] = [[int(o) for o in offset] for offset in df['offsets_to_text']]
			
 
				     save(df,'db_time_data.pk')
			
 
				 
			
 
				 def getModel():
			
@@ -78,6 +82,163 @@ def getModel():
 
				     model.summary()
			
 
				     return model
			
 
				 
			
 
				+def getModel2():
			
 
				+    '''
			
 
				+    @summary: 时间分类模型
			
 
				+    '''
			
 
				+    L_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+    L_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(L_input)
			
 
				+    R_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+    R_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(R_input)
			
 
				+
			
 
				+    L_input_drop = Dropout(0.2)(L_input)
			
 
				+    R_input_drop = Dropout(0.2)(R_input)
			
 
				+    # L_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(L_input)
			
 
				+    L_lstm = OurBidirectional(GRU(64, return_sequences=True))([L_input_drop,L_mask])
			
 
				+    L_att = Attention02()(L_lstm,mask=K.squeeze(L_mask,axis=-1))
			
 
				+    # R_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(R_input)
			
 
				+    R_lstm = OurBidirectional(GRU(64, return_sequences=True))([R_input_drop,R_mask])
			
 
				+    R_att = Attention02()(R_lstm,mask=K.squeeze(R_mask,axis=-1))
			
 
				+    concat = layers.merge([L_att, R_att], mode='concat')
			
 
				+    concat = Dropout(0.3)(concat)
			
 
				+    output = layers.Dense(output_shape[0],activation="softmax")(concat)
			
 
				+
			
 
				+    model = models.Model(inputs=[L_input,R_input], outputs=output)
			
 
				+
			
 
				+    learn_rate = 0.00005
			
 
				+    model.compile(optimizer=optimizers.Adam(lr=learn_rate),
			
 
				+                  loss=losses.binary_crossentropy,
			
 
				+                  metrics=[precision,recall,f1_score])
			
 
				+    model.summary()
			
 
				+    return model
			
 
				+
			
 
				+def getModel3():
			
 
				+    '''
			
 
				+    @summary: 时间分类模型
			
 
				+    '''
			
 
				+    L_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+    L_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(L_input)
			
 
				+    R_input = layers.Input(shape=input_shape2[1:], dtype='float32')
			
 
				+    R_mask = Lambda(lambda x: K.cast(K.not_equal(K.sum(x,axis=-1,keepdims=True), 0), 'float32'))(R_input)
			
 
				+
			
 
				+    L_input_drop = Dropout(0.2)(L_input)
			
 
				+    R_input_drop = Dropout(0.2)(R_input)
			
 
				+    # L_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(L_input)
			
 
				+    L_lstm = OurBidirectional(GRU(64, return_sequences=True))([L_input_drop,L_mask])
			
 
				+    # L_att = Attention02()(L_lstm,mask=K.squeeze(L_mask,axis=-1))
			
 
				+    # R_lstm = layers.Bidirectional(layers.GRU(40,return_sequences=True,dropout=0.1))(R_input)
			
 
				+    R_lstm = OurBidirectional(GRU(64, return_sequences=True))([R_input_drop,R_mask])
			
 
				+    concat = layers.merge([L_lstm,R_lstm], mode='concat',concat_axis=1)
			
 
				+    concat_mask = layers.merge([L_mask,R_mask], mode='concat',concat_axis=1)
			
 
				+    att = Attention02()(concat,mask=K.squeeze(concat_mask,axis=-1))
			
 
				+    # R_att = Attention02()(R_lstm,mask=K.squeeze(R_mask,axis=-1))
			
 
				+    # concat = layers.merge([L_att, R_att], mode='concat')
			
 
				+    att = Dropout(0.3)(att)
			
 
				+    output = layers.Dense(output_shape[0],activation="softmax")(att)
			
 
				+
			
 
				+    model = models.Model(inputs=[L_input,R_input], outputs=output)
			
 
				+
			
 
				+    learn_rate = 0.0001
			
 
				+    model.compile(optimizer=optimizers.Adam(lr=learn_rate),
			
 
				+                  loss=losses.binary_crossentropy,
			
 
				+                  metrics=[precision,recall,f1_score])
			
 
				+    model.summary()
			
 
				+    return model
			
 
				+
			
 
				+class Attention02(Layer):
			
 
				+    def __init__(self, **kwargs):
			
 
				+        self.init = initializers.get('normal')
			
 
				+        self.supports_masking = True
			
 
				+        self.attention_dim = 50
			
 
				+        super(Attention02, self).__init__(**kwargs)
			
 
				+
			
 
				+    def build(self, input_shape):
			
 
				+        assert len(input_shape) == 3
			
 
				+        self.W = K.variable(self.init((input_shape[-1], 1)))
			
 
				+        self.b = K.variable(self.init((self.attention_dim,)))
			
 
				+        self.u = K.variable(self.init((self.attention_dim, 1)))
			
 
				+        self.trainable_weights = [self.W, self.b, self.u]
			
 
				+        super(Attention02, self).build(input_shape)
			
 
				+
			
 
				+    def compute_mask(self, inputs, mask=None):
			
 
				+        return mask
			
 
				+
			
 
				+    def call(self, x, mask=None):
			
 
				+        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
			
 
				+        ait = K.dot(uit, self.u)
			
 
				+        ait = K.squeeze(ait, -1)
			
 
				+        ait = K.exp(ait)
			
 
				+
			
 
				+        if mask is not None:
			
 
				+            ait = ait * K.cast(mask, K.floatx())
			
 
				+            # ait = ait * mask
			
 
				+
			
 
				+        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
			
 
				+        ait = K.expand_dims(ait)
			
 
				+        weighted_input = x * ait
			
 
				+        output = K.sum(weighted_input, axis=1)
			
 
				+        return output
			
 
				+
			
 
				+    def compute_output_shape(self, input_shape):
			
 
				+        return (input_shape[0], input_shape[-1])
			
 
				+
			
 
				+class OurLayer(Layer):
			
 
				+    """定义新的Layer，增加reuse方法，允许在定义Layer时调用现成的层
			
 
				+    """
			
 
				+    def reuse(self, layer, *args, **kwargs):
			
 
				+        if not layer.built:
			
 
				+            if len(args) > 0:
			
 
				+                inputs = args[0]
			
 
				+            else:
			
 
				+                inputs = kwargs['inputs']
			
 
				+            if isinstance(inputs, list):
			
 
				+                input_shape = [K.int_shape(x) for x in inputs]
			
 
				+            else:
			
 
				+                input_shape = K.int_shape(inputs)
			
 
				+            layer.build(input_shape)
			
 
				+        outputs = layer.call(*args, **kwargs)
			
 
				+        for w in layer.trainable_weights:
			
 
				+            if w not in self._trainable_weights:
			
 
				+                self._trainable_weights.append(w)
			
 
				+        for w in layer.non_trainable_weights:
			
 
				+            if w not in self._non_trainable_weights:
			
 
				+                self._non_trainable_weights.append(w)
			
 
				+        for u in layer.updates:
			
 
				+            if not hasattr(self, '_updates'):
			
 
				+                self._updates = []
			
 
				+            if u not in self._updates:
			
 
				+                self._updates.append(u)
			
 
				+        return outputs
			
 
				+class OurBidirectional(OurLayer):
			
 
				+    """自己封装双向RNN，允许传入mask，保证对齐
			
 
				+    """
			
 
				+    def __init__(self, layer, **args):
			
 
				+        super(OurBidirectional, self).__init__(**args)
			
 
				+        self.forward_layer = layer.__class__.from_config(layer.get_config())
			
 
				+        self.backward_layer = layer.__class__.from_config(layer.get_config())
			
 
				+        self.forward_layer.name = 'forward_' + self.forward_layer.name
			
 
				+        self.backward_layer.name = 'backward_' + self.backward_layer.name
			
 
				+    def reverse_sequence(self, x, mask):
			
 
				+        """这里的mask.shape是[batch_size, seq_len, 1]
			
 
				+        """
			
 
				+        seq_len = K.round(K.sum(mask, 1)[:, 0])
			
 
				+        seq_len = K.cast(seq_len, 'int32')
			
 
				+        return tf.reverse_sequence(x, seq_len, seq_dim=1)
			
 
				+    def call(self, inputs):
			
 
				+        x, mask = inputs
			
 
				+        x_forward = self.reuse(self.forward_layer, x)
			
 
				+        x_backward = self.reverse_sequence(x, mask)
			
 
				+        x_backward = self.reuse(self.backward_layer, x_backward)
			
 
				+        x_backward = self.reverse_sequence(x_backward, mask)
			
 
				+        x = K.concatenate([x_forward, x_backward], -1)
			
 
				+        if K.ndim(x) == 3:
			
 
				+            return x * mask
			
 
				+        else:
			
 
				+            return x
			
 
				+    def compute_output_shape(self, input_shape):
			
 
				+        return input_shape[0][:-1] + (self.forward_layer.units * 2,)
			
 
				+
			
 
				+
			
 
				 
			
 
				 def training():
			
 
				     data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
@@ -215,6 +376,222 @@ def train2():
 
				     res2 = classification_report(np.argmax(train_y, axis=1), np.argmax(y_pre2, axis=1))
			
 
				     print(res2)
			
 
				 
			
 
				+def train3():
			
 
				+    # data_load = pd.read_excel("tokens_tolabel_data1.xlsx", index_col=0)
			
 
				+    data_load = pd.read_excel("tokens_tolabel_data1_res12.xlsx", index_col=0)
			
 
				+    # data_load = pd.concat([data_load[data_load['re_label']==0],data_load])
			
 
				+    # data_load = data_load[data_load['pre_label_prob']>0.97]
			
 
				+    # data_load = data_load[data_load['is_same']==1]
			
 
				+    data_zero = pd.read_excel("tokens_label0_data1.xlsx")
			
 
				+    # data_old = pd.read_excel("tokens_data_02.xlsx")
			
 
				+    data_old = pd.read_excel("tokens_data_02_res6.xlsx")
			
 
				+    data_zero = data_zero[(data_zero['label']!=0)|(data_zero['is_same']==2)]
			
 
				+    # data_zero = pd.concat([data_zero,data_zero])
			
 
				+    # data_zero = pd.concat([data_zero[(data_zero['label']!=0)|(data_zero['is_same']==2)],data_zero.sample(n=3000)])
			
 
				+    # data_zero = data_zero.sample(n=80000)
			
 
				+    print("输入shape：",input_shape2)
			
 
				+    data_x = []
			
 
				+    data_y = []
			
 
				+    for left, right, label,_label in zip(data_load['context_left'], data_load['context_right'], data_load['re_label'], data_load['label']):
			
 
				+        if label==_label:
			
 
				+            y = np.zeros(output_shape)
			
 
				+            y[label] = 1
			
 
				+            left = eval(left)
			
 
				+            left = left[-40:]
			
 
				+            right = eval(right)
			
 
				+            right = right[:40]
			
 
				+            context = [left, right]
			
 
				+            # x = embedding(context, shape=input_shape2)
			
 
				+            data_x.append(context)
			
 
				+            data_y.append(y)
			
 
				+    data_load2 = data_load[data_load['re_label']==0]
			
 
				+    for left, right, label,_label in zip(data_load2['context_left'], data_load2['context_right'], data_load2['re_label'], data_load2['label']):
			
 
				+            if label==_label:
			
 
				+                y = np.zeros(output_shape)
			
 
				+                y[label] = 1
			
 
				+                left = eval(left)
			
 
				+                left = left[-40:]
			
 
				+                if len(left)>30:
			
 
				+                    left = left[2:]
			
 
				+                elif len(left)>15:
			
 
				+                    left = left[1:]
			
 
				+                right = eval(right)
			
 
				+                right = right[:40]
			
 
				+                if len(right)>15:
			
 
				+                    right = right[:-1]
			
 
				+                context = [left, right]
			
 
				+                # x = embedding(context, shape=input_shape2)
			
 
				+                data_x.append(context)
			
 
				+                data_y.append(y)
			
 
				+
			
 
				+    for left, right, label in zip(data_zero['context_left'], data_zero['context_right'], data_zero['label']):
			
 
				+        y = np.zeros(output_shape)
			
 
				+        y[label] = 1
			
 
				+        left = eval(left)
			
 
				+        left = left[-40:]
			
 
				+        right = eval(right)
			
 
				+        right = right[:40]
			
 
				+        context = [left, right]
			
 
				+        # x = embedding(context, shape=input_shape2)
			
 
				+        data_x.append(context)
			
 
				+        data_y.append(y)
			
 
				+
			
 
				+    for left, right, label in zip(data_zero['context_left'], data_zero['context_right'], data_zero['label']):
			
 
				+            y = np.zeros(output_shape)
			
 
				+            y[label] = 1
			
 
				+            left = eval(left)
			
 
				+            left = left[-40:]
			
 
				+            if len(left) > 30:
			
 
				+                left = left[2:]
			
 
				+            elif len(left) > 15:
			
 
				+                left = left[1:]
			
 
				+            right = eval(right)
			
 
				+            right = right[:40]
			
 
				+            if len(right) > 15:
			
 
				+                right = right[:-1]
			
 
				+            context = [left, right]
			
 
				+            # x = embedding(context, shape=input_shape2)
			
 
				+            data_x.append(context)
			
 
				+            data_y.append(y)
			
 
				+
			
 
				+    # for left, right, label in zip(data_old['context_left'], data_old['context_right'], data_old['label']):
			
 
				+    #         y = np.zeros(output_shape)
			
 
				+    #         y[label] = 1
			
 
				+    #         left = eval(left)
			
 
				+    #         left = left[-40:]
			
 
				+    #         right = eval(right)
			
 
				+    #         right = right[:40]
			
 
				+    #         context = [left, right]
			
 
				+    #         # x = embedding(context, shape=input_shape2)
			
 
				+    #         data_x.append(context)
			
 
				+    #         data_y.append(y)
			
 
				+
			
 
				+    _data = [d for d in zip(data_x,data_y)]
			
 
				+    import random
			
 
				+    random.shuffle(_data)
			
 
				+    data_x = [i[0] for i in _data]
			
 
				+    data_y = [i[1] for i in _data]
			
 
				+    test_len = int(len(data_x) * 0.13)
			
 
				+    test_x = data_x[:test_len]
			
 
				+    test_y = data_y[:test_len]
			
 
				+    print("测试数据量：", len(test_x))
			
 
				+    train_x = data_x[test_len:]
			
 
				+    train_y = data_y[test_len:]
			
 
				+
			
 
				+    for left, right, label in zip(data_old['context_left'], data_old['context_right'], data_old['label']):
			
 
				+            y = np.zeros(output_shape)
			
 
				+            y[label] = 1
			
 
				+            left = eval(left)
			
 
				+            left = left[-40:]
			
 
				+            right = eval(right)
			
 
				+            right = right[:40]
			
 
				+            context = [left, right]
			
 
				+            # x = embedding(context, shape=input_shape2)
			
 
				+            train_x.append(context)
			
 
				+            train_y.append(y)
			
 
				+    print("训练数据量：", len(train_x))
			
 
				+
			
 
				+    # train_y, test_y = np.array(train_y), np.array(test_y)
			
 
				+    # train_x = np.array(train_x)
			
 
				+    # test_x = np.array(test_x)
			
 
				+    # test_x = np.transpose(test_x, (1, 0, 2, 3))
			
 
				+    # train_x, test_x = (np.transpose(train_x, (1, 0, 2, 3)), np.transpose(test_x, (1, 0, 2, 3)))
			
 
				+    training_generator = DataGenerator(train_x, train_y)
			
 
				+    # training_generator = DataGenerator(data_x, data_y)
			
 
				+    validation_generator = DataGenerator(test_x, test_y)
			
 
				+
			
 
				+    # model = getModel3()
			
 
				+    model = getModel2()
			
 
				+    epochs = 100
			
 
				+    # batch_size = 256
			
 
				+    checkpoint = ModelCheckpoint("model_time_classify.weights",save_weights_only=True, monitor="val_loss", verbose=1,
			
 
				+                                 save_best_only=True, mode='min')
			
 
				+    # checkpoint = ModelCheckpoint("model_time_classify2.weights",save_weights_only=True, monitor="loss", verbose=1,
			
 
				+    #                                  save_best_only=True, mode='min')
			
 
				+
			
 
				+    history = model.fit_generator(
			
 
				+        generator=training_generator,
			
 
				+        validation_data=validation_generator,
			
 
				+        use_multiprocessing=True, workers=2,
			
 
				+        epochs=epochs,
			
 
				+        shuffle=True,
			
 
				+        callbacks=[checkpoint],
			
 
				+        class_weight='auto'
			
 
				+    )
			
 
				+    # plot_loss(history=history)
			
 
				+    # load_model = models.load_model("model_label_time_classify.model.hdf5",
			
 
				+    #                                custom_objects={'precision': precision, 'recall': recall, 'f1_score': f1_score})
			
 
				+    # y_pre = load_model.predict([test_x[0], test_x[1]])
			
 
				+    # # y_pre = load_model.predict(test_x[0])
			
 
				+    # # 各类别预测评估
			
 
				+    # res1 = classification_report(np.argmax(test_y, axis=1), np.argmax(y_pre, axis=1))
			
 
				+    # print(res1)
			
 
				+    # y_pre2 = load_model.predict([train_x[0], train_x[1]])
			
 
				+    # # y_pre2 = load_model.predict(train_x[0])
			
 
				+    # res2 = classification_report(np.argmax(train_y, axis=1), np.argmax(y_pre2, axis=1))
			
 
				+    # print(res2)
			
 
				+from keras.utils import Sequence,to_categorical
			
 
				+class DataGenerator(Sequence):
			
 
				+    'Generates data for Keras'
			
 
				+    def __init__(self, texts, labels, batch_size=256,
			
 
				+                 n_classes=4, shuffle=True):
			
 
				+        'Initialization'
			
 
				+        # self.dim = dim
			
 
				+        self.batch_size = batch_size
			
 
				+        self.labels = labels
			
 
				+        self.texts = texts
			
 
				+        self.n_classes = n_classes
			
 
				+        self.shuffle = shuffle
			
 
				+        self.on_epoch_end()
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        'Denotes the number of batches per epoch'
			
 
				+        _len = len(self.texts) // self.batch_size
			
 
				+        if len(self.texts) % self.batch_size != 0:
			
 
				+            _len += 1
			
 
				+        return _len
			
 
				+
			
 
				+    def __getitem__(self, index):
			
 
				+        'Generate one batch of data'
			
 
				+        # Generate indexes of the batch
			
 
				+        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
			
 
				+
			
 
				+        # Find list of IDs
			
 
				+        list_texts = [self.texts[k] for k in indexes]
			
 
				+        _label = [self.labels[k] for k in indexes]
			
 
				+        # Generate data
			
 
				+        X, y = self.__data_generation(list_texts,_label)
			
 
				+
			
 
				+        return X, y
			
 
				+
			
 
				+    def on_epoch_end(self):
			
 
				+        'Updates indexes after each epoch'
			
 
				+        self.indexes = np.arange(len(self.texts))
			
 
				+        if self.shuffle == True:
			
 
				+            np.random.shuffle(self.indexes)
			
 
				+
			
 
				+    def __data_generation(self, list_texts,_label):
			
 
				+        'Generates data containing batch_size samples'
			
 
				+        # Initialization
			
 
				+        # X = np.empty((self.batch_size, *self.dim))
			
 
				+        # y = np.empty((self.batch_size), dtype=int)
			
 
				+        # batch_len = len(list_texts)
			
 
				+        # x = np.empty((batch_len, *self.dim))
			
 
				+        x = []
			
 
				+        # y = np.empty((batch_len), dtype=int)
			
 
				+
			
 
				+        # Generate data
			
 
				+        for i, context in enumerate(list_texts):
			
 
				+            # Store sample
			
 
				+            # tokens = preprocess2(text)
			
 
				+            # tokens = tokens[:maxlen]
			
 
				+            words_matrix = embedding_mywords(context, shape=input_shape2)
			
 
				+            # Store class
			
 
				+            # y[i] = _label[i]
			
 
				+            x.append(words_matrix)
			
 
				+        x = np.array(x)
			
 
				+        x = np.transpose(x, (1, 0, 2, 3))
			
 
				+        return [x[0],x[1]], np.array(_label)
			
 
				 
			
 
				 def predict2():
			
 
				     model1 = models.load_model("model_label_time_classify.model.hdf5",custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
			
@@ -237,6 +614,73 @@ def predict2():
 
				     # print(error_data.info())
			
 
				     error_data.to_csv("C:\\Users\\admin\\Desktop\\error4-30.csv")
			
 
				 
			
 
				+def predict3():
			
 
				+    data = pd.read_csv("new_tokens_data1.csv", chunksize=5000)
			
 
				+    model1 = models.load_model("model_label_time_classify.model.hdf5",custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
			
 
				+    new_data = pd.DataFrame()
			
 
				+    idx = 0
			
 
				+    for _data in data:
			
 
				+
			
 
				+        test_x = []
			
 
				+        test_y = []
			
 
				+        for left, right, label in zip(_data['context_left'], _data['context_right'], _data['label']):
			
 
				+            left = eval(left)
			
 
				+            left = left[-10:]
			
 
				+            right = eval(right)
			
 
				+            right = right[:10]
			
 
				+            label = int(label)
			
 
				+            y = np.zeros(output_shape)
			
 
				+            y[label] = 1
			
 
				+            context = [left, right]
			
 
				+            x = embedding(context, shape=input_shape2)
			
 
				+            test_x.append(x)
			
 
				+            test_y.append(y)
			
 
				+        test_x = np.transpose(np.array(test_x), (1, 0, 2, 3))
			
 
				+        pre_y = model1.predict([test_x[0], test_x[1]])
			
 
				+        _data['pre'] = [np.argmax(item) for item in pre_y]
			
 
				+        _data['is_same'] = [1 if int(_label)==_pre else 0 for _label,_pre in zip(_data['label'],_data['pre'])]
			
 
				+        # data['label'] = label
			
 
				+        new_data = pd.concat([new_data, _data])
			
 
				+        idx += 5000
			
 
				+        print(idx)
			
 
				+    # data.to_csv("new_tokens_data1.csv")
			
 
				+    new_data.to_excel("new_tokens_data1_res.xlsx")
			
 
				+
			
 
				+def predict4():
			
 
				+    data = pd.read_csv("tokens_tolabel_data1_res11.csv", chunksize=3000)
			
 
				+    model1 = getModel2()
			
 
				+    model1.load_weights("model_time_classify.weights")
			
 
				+    new_data = pd.DataFrame()
			
 
				+    idx = 0
			
 
				+    for _data in data:
			
 
				+        test_x = []
			
 
				+        test_y = []
			
 
				+        for left, right, label in zip(_data['context_left'], _data['context_right'], _data['re_label']):
			
 
				+            left = eval(left)
			
 
				+            left = left[-40:]
			
 
				+            right = eval(right)
			
 
				+            right = right[:40]
			
 
				+            label = int(label)
			
 
				+            y = np.zeros(output_shape)
			
 
				+            y[label] = 1
			
 
				+            context = [left, right]
			
 
				+            x = embedding_mywords(context, shape=input_shape2)
			
 
				+            test_x.append(x)
			
 
				+            test_y.append(y)
			
 
				+        test_x = np.transpose(np.array(test_x), (1, 0, 2, 3))
			
 
				+        pre_y = model1.predict([test_x[0], test_x[1]])
			
 
				+        _data['pre_label'] = [np.argmax(item) for item in pre_y]
			
 
				+        _data['pre_label_prob'] = [max(item) for item in pre_y]
			
 
				+        _data['is_same'] = [1 if int(_label)==_pre else 0 for _label,_pre in zip(_data['re_label'],_data['pre_label'])]
			
 
				+        # _data['is_same'] = [1 if int(_re)==int(_pre) and int(_re)==int(_label) else 0 for _label,_re,_pre in zip(_data['label'],_data['re_label'],_data['pre_label'])]
			
 
				+        # data['label'] = label
			
 
				+        new_data = pd.concat([new_data, _data])
			
 
				+        idx += 3000
			
 
				+        print(idx)
			
 
				+    # data.to_csv("new_tokens_data1.csv")
			
 
				+    new_data.to_excel("tokens_tolabel_data1_res12.xlsx")
			
 
				+
			
 
				+
			
 
				 def predict():
			
 
				     model1 = models.load_model("model_label_time_classify.model.hdf5",custom_objects={'precision':precision,'recall':recall,'f1_score':f1_score})
			
 
				     data_load = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc.csv", index_col=0)
			
@@ -313,7 +757,7 @@ def data_process3():
 
				     token_end = []
			
 
				     context_left = []
			
 
				     context_right = []
			
 
				-    data2 = pd.read_csv("C:\\Users\\admin\\Desktop\\newdata_30_prc2.csv")
			
 
				+    data2 = pd.read_csv("newdata_30_prc2.csv")
			
 
				     label = []
			
 
				     # data=data[:20]
			
 
				     for id,sentences,tokens,offset,begin,end,entity_text in zip(data['document_id'],data['sentences'],data['tokens'],data['offsets_to_text'],
			
@@ -343,7 +787,7 @@ def data_process3():
 
				                 break
			
 
				         token_begin.append(entity_tbegin)
			
 
				         token_end.append(entity_tend)
			
 
				-        s = spanWindow(tokens=tokens,begin_index=entity_tbegin,end_index=entity_tend,size=10)
			
 
				+        s = spanWindow(tokens=tokens,begin_index=entity_tbegin,end_index=entity_tend-1,size=40)
			
 
				         s1 = s[0]
			
 
				         _temp1 = []
			
 
				         for i in range(len(s1)):
			
@@ -372,7 +816,8 @@ def data_process3():
 
				     data['context_right'] = context_right
			
 
				     data['label'] = label
			
 
				     data = data.drop(['tokens','offsets_to_text','sentences'],axis=1)
			
 
				-    data.to_csv("C:\\Users\\admin\\Desktop\\tokens_data.csv")
			
 
				+    # data.to_csv("tokens_data_02.csv")
			
 
				+    data.to_excel("tokens_data_02.xlsx")
			
 
				 
			
 
				 def plot_loss(history):
			
 
				     plt.plot(history.history['loss'])
			
@@ -383,15 +828,64 @@ def plot_loss(history):
 
				     plt.legend(['Train', 'Test'], loc='upper left')
			
 
				     plt.show()
			
 
				 
			
 
				+def embedding_mywords(datas,shape):
			
 
				+    '''
			
 
				+    @summary:查找词汇对应的词向量
			
 
				+    @param:
			
 
				+        datas:词汇的list
			
 
				+        shape:结果的shape
			
 
				+    @return: array,返回对应shape的词嵌入
			
 
				+    '''
			
 
				+    model_w2v = getModel_w2v()
			
 
				+    embed = np.zeros(shape)
			
 
				+    length = shape[1]
			
 
				+    out_index = 0
			
 
				+    #print(datas)
			
 
				+    for data in datas:
			
 
				+        index = 0
			
 
				+        for item in data:
			
 
				+            item_not_space = re.sub("\s*","",item)
			
 
				+            if index>=length:
			
 
				+                break
			
 
				+            if item_not_space in model_w2v.vocab:
			
 
				+                embed[out_index][index] = model_w2v[item_not_space]
			
 
				+                index += 1
			
 
				+            else:
			
 
				+                embed[out_index][index] = model_w2v['unk']
			
 
				+                index += 1
			
 
				+        out_index += 1
			
 
				+    return embed
			
 
				+
			
 
				+def save_model():
			
 
				+    graph = tf.Graph()
			
 
				+    with graph.as_default() as graph:
			
 
				+        with tf.Session(graph=graph).as_default() as sess:
			
 
				+            test_model = getModel2()
			
 
				+            test_model.load_weights("model_time_classify.weights")
			
 
				+            tf.saved_model.simple_save(sess,
			
 
				+                                       "models/timesplit_model/",
			
 
				+                                       inputs={"input0": test_model.input[0],
			
 
				+                                               "input1":test_model.input[1]
			
 
				+                                               },
			
 
				+                                       outputs={"outputs": test_model.output})
			
 
				+
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     # get_data()
			
 
				     # getModel()
			
 
				+    # getModel2()
			
 
				+    # getModel3()
			
 
				     # training()
			
 
				     # train2()
			
 
				+    # train3()
			
 
				     # data_process()
			
 
				     # data_process2()
			
 
				     # data_process3()
			
 
				     # predict()
			
 
				     # predict2()
			
 
				+    # predict3()
			
 
				+    # predict4()
			
 
				+    save_model()
			
 
				 
			
 
				     pass