|
@@ -448,6 +448,7 @@ class Dataflow():
|
|
project_name = _dict.get(document_tmp_project_name,"")
|
|
project_name = _dict.get(document_tmp_project_name,"")
|
|
tenderee = _dict.get(document_tmp_tenderee,"")
|
|
tenderee = _dict.get(document_tmp_tenderee,"")
|
|
agency = _dict.get(document_tmp_agency,"")
|
|
agency = _dict.get(document_tmp_agency,"")
|
|
|
|
+ doctitle = _dict.get(document_tmp_doctitle,"")
|
|
doctitle_refine = _dict.get(document_tmp_doctitle_refine,"")
|
|
doctitle_refine = _dict.get(document_tmp_doctitle_refine,"")
|
|
win_tenderer = _dict.get("win_tenderer","")
|
|
win_tenderer = _dict.get("win_tenderer","")
|
|
bidding_budget = _dict.get("bidding_budget","")
|
|
bidding_budget = _dict.get("bidding_budget","")
|
|
@@ -459,7 +460,7 @@ class Dataflow():
|
|
page_time = _dict.get(document_tmp_page_time,"")
|
|
page_time = _dict.get(document_tmp_page_time,"")
|
|
fingerprint = _dict.get(document_tmp_fingerprint,"")
|
|
fingerprint = _dict.get(document_tmp_fingerprint,"")
|
|
product = _dict.get(document_tmp_product,"")
|
|
product = _dict.get(document_tmp_product,"")
|
|
- return docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product
|
|
|
|
|
|
+ return docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product
|
|
|
|
|
|
def f_set_docid_limitNum_contain(self,item, _split,singleNum_keys=["tenderee","win_tenderer"],contain_keys=[],multiNum_keys=[],notlike_keys=["project_code"]):
|
|
def f_set_docid_limitNum_contain(self,item, _split,singleNum_keys=["tenderee","win_tenderer"],contain_keys=[],multiNum_keys=[],notlike_keys=["project_code"]):
|
|
flag = True
|
|
flag = True
|
|
@@ -550,7 +551,7 @@ class Dataflow():
|
|
set_docid.add(_docid)
|
|
set_docid.add(_docid)
|
|
|
|
|
|
def translate_dumplicate_rules(self,status_from,item):
|
|
def translate_dumplicate_rules(self,status_from,item):
|
|
- docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
|
|
|
|
|
|
+ docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
|
|
if page_time=='':
|
|
if page_time=='':
|
|
page_time = getCurrent_date("%Y-%m-%d")
|
|
page_time = getCurrent_date("%Y-%m-%d")
|
|
base_dict = {
|
|
base_dict = {
|
|
@@ -1463,7 +1464,7 @@ class Dataflow():
|
|
|
|
|
|
def merge_document(self,item,status_to=None):
|
|
def merge_document(self,item,status_to=None):
|
|
self.post_extract(item)
|
|
self.post_extract(item)
|
|
- docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
|
|
|
|
|
|
+ docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
|
|
|
|
|
|
|
|
|
|
_d = {"partitionkey":item["partitionkey"],
|
|
_d = {"partitionkey":item["partitionkey"],
|
|
@@ -2274,6 +2275,9 @@ class Dataflow_dumplicate(Dataflow):
|
|
_dict["dict_time"] = self.get_dict_time(_extract)
|
|
_dict["dict_time"] = self.get_dict_time(_extract)
|
|
_dict["punish"] = _extract.get("punish",{})
|
|
_dict["punish"] = _extract.get("punish",{})
|
|
_dict["approval"] = _extract.get("approval",[])
|
|
_dict["approval"] = _extract.get("approval",[])
|
|
|
|
+ _dict["products_original"] = _extract.get("product_attrs_original", {}).get("data",[])
|
|
|
|
+ _dict["products"] = _dict.get("products") if _dict.get("products") is not None else []
|
|
|
|
+ _dict["products"] = _dict["products"] if isinstance(_dict["products"], list) else json.loads(_dict["products"])
|
|
|
|
|
|
# 专项债字段
|
|
# 专项债字段
|
|
issue_details = _extract.get("debt_dic",{}).get("issue_details",[])
|
|
issue_details = _extract.get("debt_dic",{}).get("issue_details",[])
|
|
@@ -2680,7 +2684,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
list_rules.append(_rule)
|
|
list_rules.append(_rule)
|
|
|
|
|
|
def translate_dumplicate_rules(self,status_from,item,get_all=False,to_log=False,day_dis=7,table_name ="document_tmp",table_index="document_tmp_index"):
|
|
def translate_dumplicate_rules(self,status_from,item,get_all=False,to_log=False,day_dis=7,table_name ="document_tmp",table_index="document_tmp_index"):
|
|
- docchannel,project_code,project_name,tenderee,agency,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
|
|
|
|
|
|
+ docchannel,project_code,project_name,tenderee,agency,doctitle,doctitle_refine,win_tenderer,bidding_budget,win_bid_price,page_time,fingerprint,product = self.get_dump_columns(item)
|
|
current_date = getCurrent_date("%Y-%m-%d")
|
|
current_date = getCurrent_date("%Y-%m-%d")
|
|
if page_time=='':
|
|
if page_time=='':
|
|
page_time = current_date
|
|
page_time = current_date
|
|
@@ -2715,6 +2719,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
}
|
|
}
|
|
must_not_dict = {"docid":item.get("docid")}
|
|
must_not_dict = {"docid":item.get("docid")}
|
|
doctitle_refine_name = "doctitle"
|
|
doctitle_refine_name = "doctitle"
|
|
|
|
+ doctitle_refine = doctitle
|
|
else:
|
|
else:
|
|
_status = [201,300]
|
|
_status = [201,300]
|
|
base_dict = {
|
|
base_dict = {
|
|
@@ -2724,6 +2729,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
}
|
|
}
|
|
must_not_dict = {"docid":item.get("docid")}
|
|
must_not_dict = {"docid":item.get("docid")}
|
|
doctitle_refine_name = "doctitle"
|
|
doctitle_refine_name = "doctitle"
|
|
|
|
+ doctitle_refine = doctitle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -5256,6 +5262,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
if item:
|
|
if item:
|
|
log("start dumplicate_comsumer_handle")
|
|
log("start dumplicate_comsumer_handle")
|
|
self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=False,upgrade=False)
|
|
self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=False,upgrade=False)
|
|
|
|
+ # self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=True,upgrade=False)
|
|
return
|
|
return
|
|
|
|
|
|
def test_merge(self,list_docid_less,list_docid_greater):
|
|
def test_merge(self,list_docid_less,list_docid_greater):
|