3 年之前 · 8d09b5c09b
--- a/.idea/sonarlint/issuestore/index.pb
+++ b/.idea/sonarlint/issuestore/index.pb
@@ -15,14 +15,10 @@ S
 
				 #BiddingKG/dl/time/re_servicetime.py,4\4\4454e65be42efdd433a1de3147c6f3cb69cf116b
			
 
				 O
			
 
				 BiddingKG/dl/common/nerUtils.py,8\2\82c3c87116c1da9281790ac9c71f57821e9207cf
			
 
				-I
			
 
				-BiddingKG/maxcompute/1.py,5\8\58fa6fe30194ad773c47ea70f4e48401242a1a88
			
 
				 H
			
 
				 BiddingKG/dl/__init__.py,a\c\ac12bb80834a26e34df8eaf4c762410dfcfc0a27
			
 
				 U
			
 
				 %BiddingKG/dl/metrics/extractMetric.py,f\e\fed725bbe7e61499dcc542a2cd6279850a62cb79
			
 
				-L
			
 
				-BiddingKG/dl/common/Utils.py,f\4\f4c35e30342829a2fc89108259e28edc0a425cce
			
 
				 W
			
 
				 'BiddingKG/maxcompute/article_extract.py,1\d\1d533d48614eebe6b6e03d0bf64b381cdf4beca0
			
 
				 ]
			
@@ -33,9 +29,15 @@ P
 
				  BiddingKG/dl/test/t2/__init__.py,6\e\6e2a437853f56392367a0fb812234f339cb553b4
			
 
				 T
			
 
				 $BiddingKG/dl/complaint/test/test1.py,2\0\20a445a789f907f8d2f1946e5ee6afd692a84716
			
 
				-P
			
 
				- BiddingKG/maxcompute/cycleRec.py,b\d\bdbd92638e7f5983e655c67b07bb464d62021b36
			
 
				-G
			
 
				-BiddingKG/dl/test/12.py,5\c\5c99d16b0fcfaac86fa00d720a060d38778939c6
			
 
				 B
			
 
				-BiddingKG/setup.py,5\9\5940e92844c5eec502a3109dcee2bbc5880b37a4
			
 
				+BiddingKG/setup.py,5\9\5940e92844c5eec502a3109dcee2bbc5880b37a4
			
 
				+Q
			
 
				+!BiddingKG/dl/interface/Entitys.py,6\3\6394a73f962d314de6209d5bb823941b56eda9d7
			
 
				+L
			
 
				+BiddingKG/maxcompute/test.py,d\e\de565067c7b40720cc108eec25c6762d518e57df
			
 
				+Q
			
 
				+!BiddingKG/dl/form/generateData.py,7\e\7e590aa47c1871cc7d75ac844d5769bff50a6e70
			
 
				+U
			
 
				+%BiddingKG/maxcompute/attachmentRec.py,b\e\be8f50b8961bc8ae61e105517763f21c707ea3ec
			
 
				+W
			
 
				+'BiddingKG/dl/interface/getAttributes.py,9\7\97778d596d5a6e43488b0f3c116ada7183116989
			
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -172,14 +172,14 @@ def predict(doc_id,text,title="",page_time="",**kwargs):
 
				     data_res["unit_money"] = unit_money_list
			
 
				     data_res["ratio"] = ratio_list
			
 
				 
			
 
				-    # for _article in list_articles:
			
 
				-    #     log(_article.content)
			
 
				-    #
			
 
				-    # for list_entity in list_entitys:
			
 
				-    #     for _entity in list_entity:
			
 
				-    #         log("type:%s,text:%s,label:%s,values:%s,sentence:%s,begin_index:%s,end_index:%s"%
			
 
				-    #               (str(_entity.entity_type),str(_entity.entity_text),str(_entity.label),str(_entity.values),str(_entity.sentence_index),
			
 
				-    #                str(_entity.begin_index),str(_entity.end_index)))
			
 
				+    for _article in list_articles:
			
 
				+        log(_article.content)
			
 
				+
			
 
				+    for list_entity in list_entitys:
			
 
				+        for _entity in list_entity:
			
 
				+            log("type:%s,text:%s,label:%s,values:%s,sentence:%s,begin_index:%s,end_index:%s"%
			
 
				+                  (str(_entity.entity_type),str(_entity.entity_text),str(_entity.label),str(_entity.values),str(_entity.sentence_index),
			
 
				+                   str(_entity.begin_index),str(_entity.end_index)))
			
 
				 
			
 
				     return json.dumps(data_res,cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
			
 
				 
			
--- a/BiddingKG/dl/test/test4.py
+++ b/BiddingKG/dl/test/test4.py
@@ -28,8 +28,9 @@ def test(name,content):
 
				             "timeout":60
			
 
				             }
			
 
				     myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}
			
 
				-    _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
			
 
				-    # _url = "http://192.168.2.102:15030/article_extract"
			
 
				+
			
 
				+    # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
			
 
				+    _url = "http://192.168.2.102:15030/article_extract"
			
 
				     _resp = requests.post(_url, json=user, headers=myheaders, verify=True)
			
 
				     # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
			
 
				     resp_json = _resp.content.decode("utf-8")
			
@@ -70,7 +71,10 @@ if __name__=="__main__":
 
				     # 广州比地数据科技有限公司翻译服务工程招标
			
 
				     # '''
			
 
				     # print(predict("12",content,title="关于人防工程技术咨询服务项目【重新招标】单一来源谈判的通知"))
			
 
				+
			
 
				+    # print(predict("12", content,"打印机"))
			
 
				+    a = time.time()
			
 
				     print(predict("12", content,"打印机"))
			
 
				     # test(12,content)
			
 
				-    print("takes",time.time()-_time1)
			
 
				+    print("takes",time.time()-a)
			
 
				     pass
			
--- a/BiddingKG/maxcompute/documentDumplicate.py
+++ b/BiddingKG/maxcompute/documentDumplicate.py
@@ -650,6 +650,7 @@ class decare_document(BaseUDTF):
 
				                             self.forward(_doc1["id"],_doc2["id"],json.dumps(new_json_set_docid))
			
 
				 
			
 
				 def getBestDocid(list_pair):
			
 
				+    # [docid1,extract_count1,docid2,extract_count2]
			
 
				     # list_pair.sort(key=lambda x:x[3],reverse=True)
			
 
				     # _max_count = max(list_pair[0][3],list_pair[0][1])
			
 
				     # set_candidate = set()
			
--- a/BiddingKG/maxcompute/documentMerge.py
+++ b/BiddingKG/maxcompute/documentMerge.py
@@ -545,7 +545,7 @@ class f_remege_limit_num_contain_bychannel(BaseUDAF):
 
				             print(new_dict_channel_id)
			
 
				             channel_dict = {}
			
 
				             for k,v in new_dict_channel_id.items():
			
 
				-                v.sort(key=lambda x:x["page_time_stamp"])
			
 
				+                v.sort(key=lambda x:x["docid"])
			
 
				                 v.sort(key=lambda x:x["extract_count"],reverse=True)
			
 
				                 channel_dict[v[0]["docid"]] = []
			
 
				                 for _docs in v[1:]: