luojiehua
/
BIDI_ML_INFO_EXTRACTION


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
							

from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.losses import *

from BiddingKG.dl.common.Utils import *
import numpy as np
from random import random
import json

def getData(list_data):
    # list_data = load("./data/2021-06-25-mergeTrain.pk")
    train_x = []
    train_y = []
    test_x = []
    test_y = []
    test_index = []
    _index = -1
    for _data in list_data:
        _index += 1
        matrix = json.loads(_data["json_matrix"])
        # new_matrix = []
        # for i in range(len(matrix)):
        #     if i <56:
        #         if matrix[i] == -1:
        #             matrix[i] = 0
        #         if i%2==1:
        #             matrix[i] /= 10
        #             new_matrix.append(matrix[i])
        #     elif i<63:
        #         matrix[i] /= 10
        #         new_matrix.append(matrix[i])
        #     else:
        #         new_matrix.append(matrix[i])
        matrix = np.array(matrix)
        _data["json_matrix"] = matrix
        label = [1,0] if _data["prob"] is None else [0,1]
        if random()>0.2:
            train_x.append(matrix)
            train_y.append(label)
        else:
            test_index.append(_index)
            test_x.append(matrix)
            test_y.append(label)
    return np.array(train_x),np.array(train_y),np.array(test_x),np.array(test_y),list_data,test_index


def getModel():

    input = Input(shape=(46,))

    # def _f():
    #     v1 = tf.get_variable("dense_kernel",shape=(46,2),dtype=tf.float32)
    #     b1 = tf.get_variable("bias_kernel",shape=(2,),dtype=tf.float32)
    # Lambda()
    b = Dense(2,activation="tanh")(input)

    out = Softmax()(b)

    model = Model(inputs=input,outputs=out)

    optimizer = Adadelta()
    _loss = categorical_crossentropy
    model.compile(optimizer,_loss,metrics=[precision,recall])

    model.summary()
    return model

def train():
    model = getModel()

    for i in range(20):
        file1 = "2021-07-15-mergeTrain_isnotnull_part%d.pk"%i
        file2 = "2021-07-15-mergeTrain_isnull_part%d.pk"%i
        data1 = load(os.path.join("F:\\Workspace2016\\DataMining\\data",file1))
        data2 = load(os.path.join("F:\\Workspace2016\\DataMining\\data",file2))
        data1.extend(data2)
        train_x,train_y,test_x,test_y,list_data,test_index = getData(data1)

        model.fit(x=train_x,y=train_y,batch_size=300,epochs=30,validation_data=(test_x,test_y))

        predict = model.predict(test_x)
        _count = 0
        for _p,_l,_index in zip(predict,test_y,test_index):
            if np.argmax(_p)!=np.argmax(_l):
                _count += 1
                print("===================")
                print(list_data[_index])
                print(_p)
                print(_l)
        print('diff count:%d'%_count)
    model.save("model/merge.h5")


class MergePredictor():

    def __init__(self):
        self.input_size = 46
        self.output_size = 2
        self.matrix = np.array([[-5.817399024963379, 3.367797374725342], [-18.3098201751709, 17.649206161499023], [-7.115952014923096, 9.236002922058105], [-5.054129123687744, 1.8316771984100342], [6.391637325286865, -7.57396125793457], [-2.8721542358398438, 6.826520919799805], [-5.426159858703613, 10.235260009765625], [-4.240962982177734, -0.32092899084091187], [-0.6378090381622314, 0.4834124445915222], [-1.7574478387832642, -0.17846578359603882], [4.325063228607178, -2.345501661300659], [0.6086963415145874, 0.8325914740562439], [2.5674285888671875, 1.8432368040084839], [-11.195490837097168, 17.4630184173584], [-11.334247589111328, 10.294097900390625], [2.639320135116577, -8.072785377502441], [-2.2689898014068604, -3.6194612979888916], [-11.129570960998535, 18.907018661499023], [4.526485919952393, 4.57423210144043], [-3.170452356338501, -1.3847776651382446], [-0.03280467540025711, -3.0471489429473877], [-6.601675510406494, -10.05613899230957], [-2.9116673469543457, 4.819308280944824], [1.4398306608200073, -0.6549674272537231], [7.091512203216553, -0.142232745885849], [-0.14478975534439087, 0.06628061085939407], [-6.775437831878662, 9.279582023620605], [-0.006781991105526686, 1.6472798585891724], [3.83730149269104, 1.4072834253311157], [1.2229349613189697, -2.1653425693511963], [1.445560336112976, -0.8397432565689087], [-11.325132369995117, 11.231744766235352], [2.3229124546051025, -4.623719215393066], [0.38562265038490295, -1.2645516395568848], [-1.3670002222061157, 2.4323790073394775], [-3.6994268894195557, 0.7515658736228943], [-0.11617227643728256, -0.820703387260437], [4.089913368225098, -4.693605422973633], [-0.4959050714969635, 1.5272167921066284], [-2.7135870456695557, -0.5120691657066345], [0.573157548904419, -1.9375460147857666], [-4.262857437133789, 0.6375582814216614], [-1.8825865983963013, 2.427532911300659], [-4.565115451812744, 4.0269083976745605], [-4.339804649353027, 6.754288196563721], [-4.31907320022583, 0.28193211555480957]])
        self.bias = np.array([16.79706382751465, -13.713337898254395])
        # self.model = load_model("model/merge.h5",custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})

    def activation(self,vec,_type):
        if _type=="relu":
            _vec = np.array(vec)
            return _vec*(_vec>0)
        if _type=="tanh":
            return np.tanh(vec)
        if _type=="softmax":
            _vec = np.array(vec)
            _exp = np.exp(_vec)
            return _exp/np.sum(_exp)

    def predict(self,input):
        _out = self.activation(self.activation(np.matmul(np.array(input).reshape(-1,self.input_size),self.matrix)+self.bias,"tanh"),"softmax")
        # print(self.model.predict(np.array(input).reshape(-1,46)))
        return _out

import tensorflow as tf
def getVariable():
    graph=tf.Graph()
    sess = tf.Session(graph=graph)
    with graph.as_default():
        with sess.as_default():
            model = getModel()
            model.load_weights("model/merge.h5")
            # model = load_model("model/merge.h5",custom_objects={"precision":precision,"recall":recall,"f1_score":f1_score})
            model.summary()
            # a = Model()
            print(model.get_weights())
            for _w in model.get_weights():
                print(np.array(_w).tolist())

if __name__=="__main__":
    # train()
    # getVariable()
    mp = MergePredictor()
    mp.predict([0.        , 0.        , 0.        , 0.        , 0.        ,
                0.        , 0.        , 0.        , 0.        , 0.        ,
                0.        , 0.        , 1.        , 0.        , 0.        ,
                0.        , 0.        , 0.        , 0.        , 0.        ,
                0.        , 0.        , 0.        , 0.        , 0.        ,
                0.        , 0.        , 0.        , 0.        , 0.        ,
                0.        , 0.        , 0.        , 0.        , 0.        ,
                0.        , 0.        , 0.        , 0.6       , 1.        ,
                0.27272727, 1.        , 0.6       , 0.6       , 0.2       ,
                1.        ])