Explorar o código

extract_count增加联系人

luojiehua hai 1 ano
pai
achega
2ca8159c18

+ 8 - 0
BiddingKG/dl/interface/extract.py

@@ -61,6 +61,7 @@ def extractCount(extract_dict):
     bidding_budget = ""
     win_tenderer = ""
     win_bid_price = ""
+    linklist_count = 0
     for _key in dict_pack.keys():
         if "tendereeMoney" in dict_pack[_key] and dict_pack[_key]["tendereeMoney"]!='' and float(dict_pack[_key]["tendereeMoney"])>0:
             extract_count += 1
@@ -99,6 +100,13 @@ def extractCount(extract_dict):
                                 win_bid_price = str(float(_role["role_money"]["money"]))
                 if _role["role_name"]=="agency":
                     agency = _role["role_text"]
+                linklist = _role.get("linklist",[])
+                for link in linklist:
+                    for l in link:
+                        if l!="":
+                            linklist_count += 1
+
+    extract_count += linklist_count//2
 
     if project_code!="":
         extract_count += 1

+ 10 - 9
BiddingKG/dl/metrics/extractMetric.py

@@ -257,7 +257,8 @@ class ExtractMetric():
         print(metrics)
 
     def extractFromInterface(self,content):
-        return json.loads(test("",content))
+        _json = test("",content)
+        return json.loads(_json)
 
     def getDiff(self,_inter,_inter2):
         _dict = {}
@@ -310,18 +311,18 @@ class ExtractMetric():
             if float(v.get("tendereeMoney",0))>0:
                 dict_project["%s_inter2"%("tendereeMoney")] = [float(v.get("tendereeMoney"))]
             for _role in v.get("roleList",[]):
-                dict_project["%s_inter2"%_role[0]] = [_role[1]]
-                if _role[0] in ["win_tenderer","second_tenderer","third_tenderer"]:
-                    if float(_role[2])>0:
-                        dict_project["%s_money_inter2"%_role[0]] = [float(_role[2])]
-                for item in _role[3]:
+                dict_project["%s_inter2"%_role.get("role_type")] = [_role.get("role_text")]
+                if _role.get("role_type") in ["win_tenderer","second_tenderer","third_tenderer"]:
+                    if float(_role.get("role_money").get("money",0))>0:
+                        dict_project["%s_money_inter2"%_role.get("role_type")] = [float(_role.get("role_money").get("money",0))]
+                for item in _role.get("linklist"):
                     _person = item[0]
                     _phone = item[1]
                     if _person=="" or _phone=="":
                         continue
-                    if "%s_person_inter2"%_role[0] not in dict_project:
-                        dict_project["%s_person_inter2"%_role[0]] = []
-                    dict_project["%s_person_inter2"%_role[0]].append("%s-%s"%(_role[1],_person))
+                    if "%s_person_inter2"%_role.get("role_type") not in dict_project:
+                        dict_project["%s_person_inter2"%_role.get("role_type")] = []
+                    dict_project["%s_person_inter2"%_role.get("role_type")].append("%s-%s"%(_role.get("role_text"),_person))
                     if "person_phone_inter2" not in dict_project:
                         dict_project["person_phone_inter2"] = []
                     dict_project["person_phone_inter2"].append("%s-%s"%(_person,_phone))

+ 3 - 0
BiddingKG/dl/test/compare1.txt

@@ -0,0 +1,3 @@
+
+
+import json

+ 16 - 3
BiddingKG/dl_dev/test/test4.py

@@ -22,6 +22,9 @@ logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(le
 import json
 import random
 
+from ipywidgets import Layout
+
+
 session = requests.Session()
 
 def test(name,content,_url=None):
@@ -99,13 +102,13 @@ def run_one():
     text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
     # text = codecs.open("2.html","r",encoding="utf8").read()
     content = str(BeautifulSoup(text).find("div",id="pcontent"))
+    # content = "招标人:广州比地数据科技有限公司"
     a = time.time()
     # text = '''
     # 购安装工程二标段,第一中标候选人,投标人名称,南阳市宝琛装饰工程有限责任公司,投标报价:147892
     # '''
-    print("start")
     _time1 = time.time()
-    print(predict("12", text,""))
+    print(predict("12", content+"抚顺经济开发区拉古经济区卫生院(抚顺经济开发区拉古经济区预防保健所)","打印机"))
     # test(12,content)
     # test(12,text)
     print("takes",time.time()-a)
@@ -114,6 +117,16 @@ def run_one():
     # print("takes", time.time() - a)
     pass
 
+def test_ner():
+    import fool
+    _text = '''
+    一、 *采购人名称:中共黄山市黄山区委统一战线工作部
+
+二、 *履约供应商名称:黄山区睿智办公设备销售中心
+    '''
+    print(fool.ner(_text))
+
 if __name__=="__main__":
     # presure_test()
-    run_one()
+    # run_one()
+    test_ner()

+ 1 - 0
BiddingKG/run_extract_server.py

@@ -170,6 +170,7 @@ def start_with_tornado(port,process_num):
     from tornado.httpserver import HTTPServer
     from tornado.ioloop import IOLoop
 
+    print("import ")
     http_server = HTTPServer(WSGIContainer(app))
     # http_server.listen(port) #shortcut for bind and start
     http_server.bind(port)