train.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. '''
  2. Created on 2019年8月12日
  3. @author: User
  4. '''
  5. import os
  6. import sys
  7. sys.path.append(os.path.abspath("../../.."))
  8. from module import model
  9. from module.Utils import *
  10. from keras.callbacks import ModelCheckpoint,Callback
  11. import os
  12. os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  13. os.environ["CUDA_VISIBLE_DEVICES"] = ""
  14. import featureEngine
  15. from module import htmlDrawing as hd
  16. def train():
  17. train_file = "source_11input_padding.pk"
  18. model1 = model.getBiRNNModel(input_shape=[None,11], out_len=2,TRANSFORMER=True)
  19. data = load(train_file)
  20. new_data1 = []
  21. for sample in data[1]:
  22. new_label = []
  23. for item in sample:
  24. if np.argmax(item)==0:
  25. new_label.append(0)
  26. else:
  27. new_label.append(1)
  28. new_data1.append(new_label)
  29. data[1] = np.array(new_data1)
  30. train_percent = 0.8
  31. test_percent=0.9
  32. train_len = round(len(data[0])*train_percent)
  33. test_len = round(len(data[0])*test_percent)
  34. callback = ModelCheckpoint("log/ep{epoch:03d}-acc{precision:.3f}-loss{loss:.3f}-val_acc{val_precision:.3f}-val_loss{val_loss:.3f}.h5",save_best_only=True,save_weights_only=True, monitor="val_loss", verbose=1, mode="min")
  35. history_model = model1.fit(x=data[0][:train_len],y=data[1][:train_len],validation_data=[data[0][train_len:test_len],data[1][train_len:test_len]],epochs=100,batch_size=48,shuffle=True,callbacks=[callback])
  36. def predict(x):
  37. ''''''
  38. model1 = model.getBiRNNModel(input_shape=[None,11], out_len=2,TRANSFORMER=True)
  39. model1.load_weights("log/ep043-acc0.976-loss0.025-val_acc0.951-val_loss0.054.h5")
  40. #path = "log/ep005-acc0.970-loss0.047-val_acc0.944-val_loss0.077.h5"
  41. #model1 = models.load_model(path, custom_objects={"acc":acc,"precision":precision,"recall":recall,"f1_score":f1_score,"my_loss":my_loss})
  42. return model1.predict(x,batch_size=1)
  43. def val():
  44. pk_file = "source_11input_padding.pk"
  45. data = load(pk_file)
  46. train_percent = 0.9
  47. train_len = round(len(data[0])*train_percent)
  48. #print(np.shape(data))
  49. predict_y = np.argmax(predict(data[0][train_len:]),1)
  50. label_y = np.argmax(data[1][train_len:],1)
  51. list_url = data[2][train_len:]
  52. size_predict = 0
  53. size_considence = 0
  54. dict_root_true_wrong = dict()
  55. for _predict,_label,_url in zip(predict_y,label_y,list_url):
  56. root = _url.split("/")[2]
  57. if root not in dict_root_true_wrong:
  58. dict_root_true_wrong[root] = [0,0]
  59. if _predict[1]==_label[1]:
  60. size_considence += 1
  61. dict_root_true_wrong[root][0] += 1
  62. else:
  63. dict_root_true_wrong[root][1] += 1
  64. print(_url)
  65. size_predict += 1
  66. list_root_true_wrong = []
  67. for _key in dict_root_true_wrong.keys():
  68. list_root_true_wrong.append([_key,dict_root_true_wrong[_key]])
  69. list_root_true_wrong.sort(key=lambda x:x[1][1]/(x[1][0]+x[1][1]))
  70. print(list_root_true_wrong)
  71. print(size_considence,size_predict)
  72. def test(url):
  73. os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  74. os.environ["CUDA_VISIBLE_DEVICES"] = ""
  75. browser = hd.getdriver()
  76. data = featureEngine.getInput_byJS(browser,url,"")
  77. if data:
  78. x,list_inner,list_xpath = data
  79. print("x:",x)
  80. p = predict(x)
  81. print(p)
  82. print(np.argmax(p,1))
  83. print(p[0][np.argmax(p,1)[0][1]])
  84. print(list_inner[np.argmax(p,1)[0][1]])
  85. print(list_xpath[np.argmax(p,1)[0][1]])
  86. if __name__=="__main__":
  87. #train()
  88. #val()
  89. test("https://www.600757.com.cn/list-106-21.html")