''' Created on 2019年8月12日 @author: User ''' import os import sys sys.path.append(os.path.abspath("../../..")) from module import model from module.Utils import * from keras.callbacks import ModelCheckpoint,Callback import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "" import featureEngine from module import htmlDrawing as hd def train(): train_file = "source_11input_padding.pk" model1 = model.getBiRNNModel(input_shape=[None,11], out_len=2,TRANSFORMER=True) data = load(train_file) new_data1 = [] for sample in data[1]: new_label = [] for item in sample: if np.argmax(item)==0: new_label.append(0) else: new_label.append(1) new_data1.append(new_label) data[1] = np.array(new_data1) train_percent = 0.8 test_percent=0.9 train_len = round(len(data[0])*train_percent) test_len = round(len(data[0])*test_percent) callback = ModelCheckpoint("log/ep{epoch:03d}-acc{precision:.3f}-loss{loss:.3f}-val_acc{val_precision:.3f}-val_loss{val_loss:.3f}.h5",save_best_only=True,save_weights_only=True, monitor="val_loss", verbose=1, mode="min") history_model = model1.fit(x=data[0][:train_len],y=data[1][:train_len],validation_data=[data[0][train_len:test_len],data[1][train_len:test_len]],epochs=100,batch_size=48,shuffle=True,callbacks=[callback]) def predict(x): '''''' model1 = model.getBiRNNModel(input_shape=[None,11], out_len=2,TRANSFORMER=True) model1.load_weights("log/ep043-acc0.976-loss0.025-val_acc0.951-val_loss0.054.h5") #path = "log/ep005-acc0.970-loss0.047-val_acc0.944-val_loss0.077.h5" #model1 = models.load_model(path, custom_objects={"acc":acc,"precision":precision,"recall":recall,"f1_score":f1_score,"my_loss":my_loss}) return model1.predict(x,batch_size=1) def val(): pk_file = "source_11input_padding.pk" data = load(pk_file) train_percent = 0.9 train_len = round(len(data[0])*train_percent) #print(np.shape(data)) predict_y = np.argmax(predict(data[0][train_len:]),1) label_y = np.argmax(data[1][train_len:],1) list_url = data[2][train_len:] size_predict = 0 size_considence = 0 dict_root_true_wrong = dict() for _predict,_label,_url in zip(predict_y,label_y,list_url): root = _url.split("/")[2] if root not in dict_root_true_wrong: dict_root_true_wrong[root] = [0,0] if _predict[1]==_label[1]: size_considence += 1 dict_root_true_wrong[root][0] += 1 else: dict_root_true_wrong[root][1] += 1 print(_url) size_predict += 1 list_root_true_wrong = [] for _key in dict_root_true_wrong.keys(): list_root_true_wrong.append([_key,dict_root_true_wrong[_key]]) list_root_true_wrong.sort(key=lambda x:x[1][1]/(x[1][0]+x[1][1])) print(list_root_true_wrong) print(size_considence,size_predict) def test(url): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "" browser = hd.getdriver() data = featureEngine.getInput_byJS(browser,url,"") if data: x,list_inner,list_xpath = data print("x:",x) p = predict(x) print(p) print(np.argmax(p,1)) print(p[0][np.argmax(p,1)[0][1]]) print(list_inner[np.argmax(p,1)[0][1]]) print(list_xpath[np.argmax(p,1)[0][1]]) if __name__=="__main__": #train() #val() test("https://www.600757.com.cn/list-106-21.html")