|
@@ -10,8 +10,8 @@ import numpy as np
|
|
|
from random import random
|
|
|
import json
|
|
|
|
|
|
-def getData():
|
|
|
- list_data = load("./data/2021-06-25-mergeTrain.pk")
|
|
|
+def getData(list_data):
|
|
|
+ # list_data = load("./data/2021-06-25-mergeTrain.pk")
|
|
|
train_x = []
|
|
|
train_y = []
|
|
|
test_x = []
|
|
@@ -21,20 +21,20 @@ def getData():
|
|
|
for _data in list_data:
|
|
|
_index += 1
|
|
|
matrix = json.loads(_data["json_matrix"])
|
|
|
- new_matrix = []
|
|
|
- for i in range(len(matrix)):
|
|
|
- if i <56:
|
|
|
- if matrix[i] == -1:
|
|
|
- matrix[i] = 0
|
|
|
- if i%2==1:
|
|
|
- matrix[i] /= 10
|
|
|
- new_matrix.append(matrix[i])
|
|
|
- elif i<63:
|
|
|
- matrix[i] /= 10
|
|
|
- new_matrix.append(matrix[i])
|
|
|
- else:
|
|
|
- new_matrix.append(matrix[i])
|
|
|
- matrix = np.array(new_matrix)
|
|
|
+ # new_matrix = []
|
|
|
+ # for i in range(len(matrix)):
|
|
|
+ # if i <56:
|
|
|
+ # if matrix[i] == -1:
|
|
|
+ # matrix[i] = 0
|
|
|
+ # if i%2==1:
|
|
|
+ # matrix[i] /= 10
|
|
|
+ # new_matrix.append(matrix[i])
|
|
|
+ # elif i<63:
|
|
|
+ # matrix[i] /= 10
|
|
|
+ # new_matrix.append(matrix[i])
|
|
|
+ # else:
|
|
|
+ # new_matrix.append(matrix[i])
|
|
|
+ matrix = np.array(matrix)
|
|
|
_data["json_matrix"] = matrix
|
|
|
label = [1,0] if _data["prob"] is None else [0,1]
|
|
|
if random()>0.2:
|
|
@@ -46,10 +46,15 @@ def getData():
|
|
|
test_y.append(label)
|
|
|
return np.array(train_x),np.array(train_y),np.array(test_x),np.array(test_y),list_data,test_index
|
|
|
|
|
|
+
|
|
|
def getModel():
|
|
|
|
|
|
- input = Input(shape=(36,))
|
|
|
+ input = Input(shape=(46,))
|
|
|
|
|
|
+ # def _f():
|
|
|
+ # v1 = tf.get_variable("dense_kernel",shape=(46,2),dtype=tf.float32)
|
|
|
+ # b1 = tf.get_variable("bias_kernel",shape=(2,),dtype=tf.float32)
|
|
|
+ # Lambda()
|
|
|
b = Dense(2,activation="tanh")(input)
|
|
|
|
|
|
out = Softmax()(b)
|
|
@@ -65,20 +70,83 @@ def getModel():
|
|
|
|
|
|
def train():
|
|
|
model = getModel()
|
|
|
- train_x,train_y,test_x,test_y,list_data,test_index = getData()
|
|
|
|
|
|
- model.fit(x=train_x,y=train_y,batch_size=300,epochs=30,validation_data=(test_x,test_y))
|
|
|
+ for i in range(20):
|
|
|
+ file1 = "2021-07-15-mergeTrain_isnotnull_part%d.pk"%i
|
|
|
+ file2 = "2021-07-15-mergeTrain_isnull_part%d.pk"%i
|
|
|
+ data1 = load(os.path.join("F:\\Workspace2016\\DataMining\\data",file1))
|
|
|
+ data2 = load(os.path.join("F:\\Workspace2016\\DataMining\\data",file2))
|
|
|
+ data1.extend(data2)
|
|
|
+ train_x,train_y,test_x,test_y,list_data,test_index = getData(data1)
|
|
|
+
|
|
|
+ model.fit(x=train_x,y=train_y,batch_size=300,epochs=30,validation_data=(test_x,test_y))
|
|
|
+
|
|
|
+ predict = model.predict(test_x)
|
|
|
+ _count = 0
|
|
|
+ for _p,_l,_index in zip(predict,test_y,test_index):
|
|
|
+ if np.argmax(_p)!=np.argmax(_l):
|
|
|
+ _count += 1
|
|
|
+ print("===================")
|
|
|
+ print(list_data[_index])
|
|
|
+ print(_p)
|
|
|
+ print(_l)
|
|
|
+ print('diff count:%d'%_count)
|
|
|
+ model.save("model/merge.h5")
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+class MergePredictor():
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.input_size = 46
|
|
|
+ self.output_size = 2
|
|
|
+ self.matrix = np.array([[-5.817399024963379, 3.367797374725342], [-18.3098201751709, 17.649206161499023], [-7.115952014923096, 9.236002922058105], [-5.054129123687744, 1.8316771984100342], [6.391637325286865, -7.57396125793457], [-2.8721542358398438, 6.826520919799805], [-5.426159858703613, 10.235260009765625], [-4.240962982177734, -0.32092899084091187], [-0.6378090381622314, 0.4834124445915222], [-1.7574478387832642, -0.17846578359603882], [4.325063228607178, -2.345501661300659], [0.6086963415145874, 0.8325914740562439], [2.5674285888671875, 1.8432368040084839], [-11.195490837097168, 17.4630184173584], [-11.334247589111328, 10.294097900390625], [2.639320135116577, -8.072785377502441], [-2.2689898014068604, -3.6194612979888916], [-11.129570960998535, 18.907018661499023], [4.526485919952393, 4.57423210144043], [-3.170452356338501, -1.3847776651382446], [-0.03280467540025711, -3.0471489429473877], [-6.601675510406494, -10.05613899230957], [-2.9116673469543457, 4.819308280944824], [1.4398306608200073, -0.6549674272537231], [7.091512203216553, -0.142232745885849], [-0.14478975534439087, 0.06628061085939407], [-6.775437831878662, 9.279582023620605], [-0.006781991105526686, 1.6472798585891724], [3.83730149269104, 1.4072834253311157], [1.2229349613189697, -2.1653425693511963], [1.445560336112976, -0.8397432565689087], [-11.325132369995117, 11.231744766235352], [2.3229124546051025, -4.623719215393066], [0.38562265038490295, -1.2645516395568848], [-1.3670002222061157, 2.4323790073394775], [-3.6994268894195557, 0.7515658736228943], [-0.11617227643728256, -0.820703387260437], [4.089913368225098, -4.693605422973633], [-0.4959050714969635, 1.5272167921066284], [-2.7135870456695557, -0.5120691657066345], [0.573157548904419, -1.9375460147857666], [-4.262857437133789, 0.6375582814216614], [-1.8825865983963013, 2.427532911300659], [-4.565115451812744, 4.0269083976745605], [-4.339804649353027, 6.754288196563721], [-4.31907320022583, 0.28193211555480957]])
|
|
|
+ self.bias = np.array([16.79706382751465, -13.713337898254395])
|
|
|
+ self.model = load_model("model/merge.h5",custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
|
|
|
+
|
|
|
+ def activation(self,vec,_type):
|
|
|
+ if _type=="relu":
|
|
|
+ _vec = np.array(vec)
|
|
|
+ return _vec*(_vec>0)
|
|
|
+ if _type=="tanh":
|
|
|
+ return np.tanh(vec)
|
|
|
+ if _type=="softmax":
|
|
|
+ _vec = np.array(vec)
|
|
|
+ _exp = np.exp(_vec)
|
|
|
+ return _exp/np.sum(_exp)
|
|
|
+
|
|
|
+ def predict(self,input):
|
|
|
+ print(self.activation(self.activation(np.matmul(np.array(input).reshape(-1,46),self.matrix)+self.bias,"tanh"),"softmax"))
|
|
|
+ print(self.model.predict(np.array(input).reshape(-1,46)))
|
|
|
|
|
|
- predict = model.predict(test_x)
|
|
|
- _count = 0
|
|
|
- for _p,_l,_index in zip(predict,test_y,test_index):
|
|
|
- if np.argmax(_p)!=np.argmax(_l):
|
|
|
- _count += 1
|
|
|
- print("===================")
|
|
|
- print(list_data[_index])
|
|
|
- print(_p)
|
|
|
- print(_l)
|
|
|
- print('diff count:%d'%_count)
|
|
|
+import tensorflow as tf
|
|
|
+def getVariable():
|
|
|
+ graph=tf.Graph()
|
|
|
+ sess = tf.Session(graph=graph)
|
|
|
+ with graph.as_default():
|
|
|
+ with sess.as_default():
|
|
|
+ model = getModel()
|
|
|
+ model.load_weights("model/merge.h5")
|
|
|
+ # model = load_model("model/merge.h5",custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
|
|
|
+ model.summary()
|
|
|
+ # a = Model()
|
|
|
+ print(model.get_weights())
|
|
|
+ for _w in model.get_weights():
|
|
|
+ print(np.array(_w).tolist())
|
|
|
|
|
|
if __name__=="__main__":
|
|
|
- train()
|
|
|
+ # train()
|
|
|
+ # getVariable()
|
|
|
+ mp = MergePredictor()
|
|
|
+ mp.predict([0. , 0. , 0. , 0. , 0. ,
|
|
|
+ 0. , 0. , 0. , 0. , 0. ,
|
|
|
+ 0. , 0. , 1. , 0. , 0. ,
|
|
|
+ 0. , 0. , 0. , 0. , 0. ,
|
|
|
+ 0. , 0. , 0. , 0. , 0. ,
|
|
|
+ 0. , 0. , 0. , 0. , 0. ,
|
|
|
+ 0. , 0. , 0. , 0. , 0. ,
|
|
|
+ 0. , 0. , 0. , 0.6 , 1. ,
|
|
|
+ 0.27272727, 1. , 0.6 , 0.6 , 0.2 ,
|
|
|
+ 1. ])
|