label_web_server2.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Jun 1 18:03:03 2018
  4. @author: DONG
  5. """
  6. import sys
  7. import os
  8. import codecs
  9. sys.path.append(os.path.abspath("../.."))
  10. print(sys.path)
  11. from flask import Flask, jsonify
  12. from flask import abort
  13. from flask import request
  14. import logging
  15. import time
  16. import BiddingKG.dl.interface.predictor as predictor
  17. import BiddingKG.dl.interface.Preprocessing as Preprocessing
  18. from Entity2DB import *
  19. import psycopg2
  20. import glob
  21. os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
  22. os.environ["CUDA_VISIBLE_DEVICES"] = ""
  23. premPredict = predictor.PREMPredict()
  24. epcPredict = predictor.EPCPredict()
  25. codeNamePredict = predictor.CodeNamePredict()
  26. for file in glob.glob("C:\\Users\\User\\Desktop\\20190416要素\\*.html"):
  27. try:
  28. conn = psycopg2.connect(dbname="article_label",user="postgres",password="postgres",host="192.168.2.101")
  29. #id = "比地_101_61298166.html"
  30. #content = codecs.open("C:\\Users\\User\\Desktop\\20190416要素\\"+id,"r",encoding="utf8").read()
  31. id = file.split("\\")[-1]
  32. content = codecs.open(file,"r",encoding="utf8").read()
  33. sql = " select count(1) from articles_processed where id='"+id+"' "
  34. cursor = conn.cursor()
  35. cursor.execute(sql)
  36. rows = cursor.fetchall()
  37. if rows[0][0]>0:
  38. continue
  39. list_articles,list_sentences,list_entitys = Preprocessing.get_preprocessed([[id,content]])
  40. role_datas = Preprocessing.search_role_data(list_sentences,list_entitys)
  41. money_datas = Preprocessing.search_money_data(list_sentences,list_entitys)
  42. person_datas = Preprocessing.search_person_data(list_sentences,list_entitys)
  43. premPredict.predict(role_datas,money_datas)
  44. epcPredict.predict(person_datas)
  45. codeName = codeNamePredict.predict(list_articles)
  46. persistArticle(conn, list_articles,codeName)
  47. for sentences in list_sentences:
  48. persistSentence(conn, sentences)
  49. for entitys in list_entitys:
  50. persistEntity(conn, entitys)
  51. except Exception as e:
  52. pass
  53. finally:
  54. conn.commit()
  55. conn.close()