Ver código fonte

暂时取消实体替换,会有替换错的情况

luojiehua 2 anos atrás
pai
commit
6079a2ad7c

+ 15 - 13
BiddingKG/dl/entityLink/entityLink.py

@@ -76,17 +76,19 @@ def link_entitys(list_entitys,on_value=1):#on_value=0.81
             if _entity.entity_type in ["org","company"]:
                 range_entity.append(_entity)
         range_entity = range_entity[:1000]
-        for first_i in range(len(range_entity)):
-            _entity = range_entity[first_i]
-            for second_i in range(first_i+1,len(range_entity)):
-                _ent = range_entity[second_i]
-                # 2021/5/21 update: 两个实体标签互斥(一个是招标人、一个是代理人)且entity_text不相等时,跳过
-                if _entity.entity_text != _ent.entity_text and _entity.label != _ent.label and _entity.label in [0,1] and _ent.label in [0, 1]:
-                    continue
-                _score = jaccard_score(re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_entity.entity_text), re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_ent.entity_text))
-                if _entity.entity_text!=_ent.entity_text and _score>=on_value:
-                    _entity.linked_entitys.append(_ent)
-                    _ent.linked_entitys.append(_entity)
+        #替换公司的逻辑有问题,先取消
+        # for first_i in range(len(range_entity)):
+        #     _entity = range_entity[first_i]
+        #     for second_i in range(first_i+1,len(range_entity)):
+        #         _ent = range_entity[second_i]
+        #         # 2021/5/21 update: 两个实体标签互斥(一个是招标人、一个是代理人)且entity_text不相等时,跳过
+        #         if _entity.entity_text != _ent.entity_text and _entity.label != _ent.label and _entity.label in [0,1] and _ent.label in [0, 1]:
+        #             continue
+        #         _score = jaccard_score(re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_entity.entity_text), re.sub("%s|%s"%("股份|责任|有限|公司",place_pattern),"",_ent.entity_text))
+        #         if _entity.entity_text!=_ent.entity_text and _score>=on_value:
+        #             _entity.linked_entitys.append(_ent)
+        #             _ent.linked_entitys.append(_entity)
+        #             print("=-===",_entity.entity_text,_ent.entity_text,_score)
         #替换公司名称
         for _entity in range_entity:
             if re.search("公司",_entity.entity_text) is None:
@@ -433,6 +435,6 @@ if __name__=="__main__":
     # print(match_enterprise_max_first(sentences))
     #
     # print("takes %d s"%(time.time()-_time))
-    fix_LEGAL_ENTERPRISE()
-    # print(jaccard_score("中国南方航空股份有限公司上海分公司","南方航空上海分公司"))
+    # fix_LEGAL_ENTERPRISE()
+    print(jaccard_score("吉林省九台","吉林省建苑设计集团有限公司"))
     # print(match_enterprise_max_first("中国南方航空股份有限公司黑龙江分公司"))

+ 9 - 9
BiddingKG/dl_dev/test/test4.py

@@ -53,9 +53,9 @@ def test(name,content,_url=None):
     # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)
 
     # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
-    _url = "http://192.168.2.102:15030/test"
-    _url = "http://192.168.2.102:15030/industry_extract"
-    _url = "http://192.168.2.102:15030/content_extract"
+    _url = "http://127.0.0.1:15030/content_extract"
+    # _url = "http://192.168.2.102:15030/industry_extract"
+    # _url = "http://192.168.2.102:15030/content_extract"
 
     _resp = session.post(_url, json=user,verify=True,timeout=1000)
     # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
@@ -103,12 +103,12 @@ def run_one():
     # text = '''
     # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
     # '''
-    print("start")
-    _time1 = time.time()
-    print(predict("12", content,"打印机",original_docchannel=52))
-    # test(12,content)
-    # test(12,text)
-    print("takes",time.time()-a)
+    # print("start")
+    # _time1 = time.time()
+    # print(predict("12", content,"打印机",original_docchannel=52))
+    # # test(12,content)
+    # # test(12,text)
+    # print("takes",time.time()-a)
     print("start")
     _time1 = time.time()
     print(predict("12", content,"打印机",original_docchannel=52))

+ 6 - 0
BiddingKG/hello.html

@@ -0,0 +1,6 @@
+from Flask</title>
+{% if name %}
+<h1>Hello {{ name }}!</h1>
+{% else %}
+<h1>Hello World!</h1>
+{% endif %}

+ 5 - 3
BiddingKG/readme/start.md

@@ -8,12 +8,14 @@ cd /data/python
 #关闭接口
 ps -ef | grep run_extract_server | grep -v grep | cut -c 9-16| xargs kill -9
 #启动接口
-nohup /data/anaconda3/envs/py37/bin/gunicorn -w 17 --limit-request-fields 0 --limit-request-line 0 -t 1000 --keep-alive 600 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
+#nohup /data/anaconda3/envs/py37/bin/gunicorn -w 15 --limit-request-fields 0 --limit-request-line 0 -t 1000 --keep-alive 600 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
+nohup /data/anaconda3/envs/py37/bin/python run_extract_server.py >> extract.log port=15030 worker=14 &
 
 #19022启动要素提取接口
 #切换目录
-cd /data/python
+cd /data/python 
 #关闭接口
 ps -ef | grep run_extract_server | grep -v grep | cut -c 9-16| xargs kill -9
 #启动接口
-nohup /data/anaconda3/envs/py37/bin/gunicorn -w 5 --limit-request-fields 0 --limit-request-line 0 -t 1000  --keep-alive 600 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
+#nohup /data/anaconda3/envs/py37/bin/gunicorn -w 5 --limit-request-fields 0 --limit-request-line 0 -t 1000  --keep-alive 600 -b 0.0.0.0:15030 run_extract_server:app >> extract.log &
+nohup /data/anaconda3/envs/py37/bin/python run_extract_server.py >> extract.log port=15030 worker=7 &

+ 36 - 6
BiddingKG/run_extract_server.py

@@ -27,8 +27,6 @@ os.environ["NUMEXPR_NUM_THREADS"] = limit_num # export NUMEXPR_NUM_THREADS=1
 
 import time
 import uuid
-from BiddingKG.dl.common.Utils import log
-from BiddingKG.dl.interface.extract import predict
 import numpy as np
 import ctypes
 import inspect
@@ -37,7 +35,7 @@ import traceback
 import json
 
 os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
-os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 sys.path.append(os.path.abspath("."))
 
 
@@ -105,6 +103,9 @@ def run_thread(data,list_result):
 
 @app.route("/test",methods=['POST'])
 def test():
+    from BiddingKG.dl.common.Utils import log
+    from BiddingKG.dl.interface.extract import predict
+    global predict,log
     _time = time.time()
     a = request.form.get("content")
     log("get form takes %.2fs"%(time.time()-_time))
@@ -114,7 +115,9 @@ def test():
 
 @app.route('/content_extract', methods=['POST'])
 def text_predict():
-
+    from BiddingKG.dl.common.Utils import log
+    from BiddingKG.dl.interface.extract import predict
+    global predict,log
     _time = time.time()
     data = request.json
 
@@ -143,6 +146,7 @@ def text_predict():
 
 def getPort(argv):
     port = 15030
+    print(argv)
     for item in argv:
         _l = str(item).split("port=")
         if len(_l)>1:
@@ -150,8 +154,34 @@ def getPort(argv):
             break
     return port
 
-if __name__ == '__main__':
+def getWorkers(argv):
+    worker = 15
+    for item in argv:
+        _l = str(item).split("worker=")
+        if len(_l)>1:
+            worker = int(_l[-1])
+            break
+    return worker
+
+def start_with_tornado(port,process_num):
+    from tornado.wsgi import WSGIContainer
+    from tornado.httpserver import HTTPServer
+    from tornado.ioloop import IOLoop
+
+    http_server = HTTPServer(WSGIContainer(app))
+    # http_server.listen(port) #shortcut for bind and start
+    http_server.bind(port)
+    http_server.start(process_num)
+    IOLoop.instance().start()
+
+def start_with_flask():
     port = getPort(argv=sys.argv)
     app.run(host='0.0.0.0', port=port, threaded=True, debug=False)
     log("ContentExtractor running")
-    # app.run()
+    # app.run()
+
+if __name__ == '__main__':
+    port = getPort(argv=sys.argv)
+    workers = getWorkers(argv=sys.argv)
+    start_with_tornado(port,workers)
+    pass

+ 35 - 0
BiddingKG/test_deployment.py

@@ -0,0 +1,35 @@
+
+
+from flask import Flask,render_template
+from flask import request
+
+app = Flask(__name__)
+app.config['JSON_AS_ASCII'] = False
+
+@app.route("/test")
+def test():
+    data = request.json
+    j = 0
+    for i in range(10000):
+       j += i**2
+
+    return render_template("hello.html")
+
+@app.route("/render")
+def render():
+    return render_template("hello.html")
+
+
+def test_with_tornado():
+    from tornado.httpserver import HTTPServer
+    from tornado.wsgi import WSGIContainer
+    from tornado.ioloop import IOLoop
+
+    httpserver = HTTPServer(WSGIContainer(app))
+    httpserver.bind(15000)
+    httpserver.start(1)
+    IOLoop.instance().start()
+
+if __name__ == '__main__':
+    test_with_tornado()
+