|
@@ -2430,12 +2430,12 @@ class Dataflow_dumplicate(Dataflow):
|
|
set_docid.add(_docid)
|
|
set_docid.add(_docid)
|
|
set_docid.add(_docid)
|
|
set_docid.add(_docid)
|
|
|
|
|
|
- def appendRule(self,list_rules,_dict,base_dict,must_not_dict,confidence,item,to_log=True):
|
|
|
|
|
|
+ def appendRule(self,list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=False):
|
|
for k,v in _dict.items():
|
|
for k,v in _dict.items():
|
|
if getLength(v)==0:
|
|
if getLength(v)==0:
|
|
return
|
|
return
|
|
_dict.update(base_dict)
|
|
_dict.update(base_dict)
|
|
- if to_log:
|
|
|
|
|
|
+ if b_log:
|
|
log(str(_dict))
|
|
log(str(_dict))
|
|
_query = self.generate_dumplicate_query(_dict,must_not_dict)
|
|
_query = self.generate_dumplicate_query(_dict,must_not_dict)
|
|
_rule = {"confidence":confidence,
|
|
_rule = {"confidence":confidence,
|
|
@@ -3108,7 +3108,6 @@ class Dataflow_dumplicate(Dataflow):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
- print("choose_dict",choose_dict)
|
|
|
|
for _key,_value in choose_dict.items():
|
|
for _key,_value in choose_dict.items():
|
|
_l = []
|
|
_l = []
|
|
for k,v in _value.items():
|
|
for k,v in _value.items():
|
|
@@ -3159,7 +3158,6 @@ class Dataflow_dumplicate(Dataflow):
|
|
if p_page_time=="":
|
|
if p_page_time=="":
|
|
p_page_time = page_time
|
|
p_page_time = page_time
|
|
|
|
|
|
- print("docid %s page_time:%s docchannel %s"%(str(_docid),str(page_time),str(_docchannel)))
|
|
|
|
if zhao_biao_page_time=="" and _docchannel in (51,52,102,103,114):
|
|
if zhao_biao_page_time=="" and _docchannel in (51,52,102,103,114):
|
|
zhao_biao_page_time = page_time
|
|
zhao_biao_page_time = page_time
|
|
if zhong_biao_page_time=="" and _docchannel in (101,118,119,120):
|
|
if zhong_biao_page_time=="" and _docchannel in (101,118,119,120):
|
|
@@ -3706,7 +3704,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
self.dumplicate_document_in_merge(list_projects)
|
|
self.dumplicate_document_in_merge(list_projects)
|
|
|
|
|
|
project_json = self.to_project_json(list_projects)
|
|
project_json = self.to_project_json(list_projects)
|
|
- print("project_json",project_json)
|
|
|
|
|
|
+ # print("project_json",project_json)
|
|
return project_json
|
|
return project_json
|
|
|
|
|
|
def dumplicate_comsumer_handle(self,item,result_queue,ots_client,get_all=False,upgrade=True):
|
|
def dumplicate_comsumer_handle(self,item,result_queue,ots_client,get_all=False,upgrade=True):
|