|
@@ -409,8 +409,8 @@ class Dataflow():
|
|
extract_count += 1
|
|
extract_count += 1
|
|
if sub_docs_json is not None:
|
|
if sub_docs_json is not None:
|
|
sub_docs = json.loads(sub_docs_json)
|
|
sub_docs = json.loads(sub_docs_json)
|
|
- sub_docs.sort(key=lambda x:x.get("bidding_budget",0),reverse=True)
|
|
|
|
- sub_docs.sort(key=lambda x:x.get("win_bid_price",0),reverse=True)
|
|
|
|
|
|
+ sub_docs.sort(key=lambda x:float(x.get("bidding_budget",0)),reverse=True)
|
|
|
|
+ sub_docs.sort(key=lambda x:float(x.get("win_bid_price",0)),reverse=True)
|
|
# log("==%s"%(str(sub_docs)))
|
|
# log("==%s"%(str(sub_docs)))
|
|
for sub_docs in sub_docs:
|
|
for sub_docs in sub_docs:
|
|
for _key_sub_docs in sub_docs.keys():
|
|
for _key_sub_docs in sub_docs.keys():
|
|
@@ -2544,6 +2544,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
for _dict in list_dict:
|
|
for _dict in list_dict:
|
|
_docid = _dict.get(document_tmp_docid)
|
|
_docid = _dict.get(document_tmp_docid)
|
|
confidence = _dict["confidence"]
|
|
confidence = _dict["confidence"]
|
|
|
|
+ print("confidence",_docid,confidence)
|
|
if confidence>0.1:
|
|
if confidence>0.1:
|
|
if _docid not in set_docid:
|
|
if _docid not in set_docid:
|
|
base_list.append(_dict)
|
|
base_list.append(_dict)
|
|
@@ -2576,7 +2577,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
table_name = "document_tmp"
|
|
table_name = "document_tmp"
|
|
table_index = "document_tmp_index"
|
|
table_index = "document_tmp_index"
|
|
base_dict = {
|
|
base_dict = {
|
|
- "docchannel":item["docchannel"],
|
|
|
|
|
|
+ "docchannel":item.get("docchannel",52),
|
|
"status":[status_from[0]],
|
|
"status":[status_from[0]],
|
|
"page_time":[timeAdd(page_time,-2),timeAdd(page_time,2)]
|
|
"page_time":[timeAdd(page_time,-2),timeAdd(page_time,2)]
|
|
}
|
|
}
|
|
@@ -2603,186 +2604,186 @@ class Dataflow_dumplicate(Dataflow):
|
|
singleNum_keys = ["tenderee","win_tenderer"]
|
|
singleNum_keys = ["tenderee","win_tenderer"]
|
|
|
|
|
|
confidence = 100
|
|
confidence = 100
|
|
- self.appendRule(list_rules,{document_tmp_fingerprint:fingerprint},base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,{document_tmp_fingerprint:fingerprint},base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
confidence = 90
|
|
confidence = 90
|
|
_dict = {document_tmp_agency:agency,
|
|
_dict = {document_tmp_agency:agency,
|
|
"win_tenderer":win_tenderer,
|
|
"win_tenderer":win_tenderer,
|
|
"win_bid_price":win_bid_price}
|
|
"win_bid_price":win_bid_price}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {document_tmp_agency:agency,
|
|
_dict = {document_tmp_agency:agency,
|
|
"win_tenderer":win_tenderer,
|
|
"win_tenderer":win_tenderer,
|
|
"bidding_budget":bidding_budget}
|
|
"bidding_budget":bidding_budget}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {document_tmp_agency:agency,
|
|
_dict = {document_tmp_agency:agency,
|
|
"win_bid_price":win_bid_price,
|
|
"win_bid_price":win_bid_price,
|
|
"bidding_budget":bidding_budget}
|
|
"bidding_budget":bidding_budget}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {win_tenderer:win_tenderer,
|
|
_dict = {win_tenderer:win_tenderer,
|
|
"win_bid_price":win_bid_price,
|
|
"win_bid_price":win_bid_price,
|
|
"bidding_budget":bidding_budget}
|
|
"bidding_budget":bidding_budget}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"win_tenderer":win_tenderer,
|
|
"win_tenderer":win_tenderer,
|
|
"win_bid_price":win_bid_price}
|
|
"win_bid_price":win_bid_price}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"win_tenderer":win_tenderer,
|
|
"win_tenderer":win_tenderer,
|
|
"bidding_budget":bidding_budget}
|
|
"bidding_budget":bidding_budget}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"win_bid_price":win_bid_price,
|
|
"win_bid_price":win_bid_price,
|
|
"bidding_budget":bidding_budget}
|
|
"bidding_budget":bidding_budget}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"agency":agency,
|
|
"agency":agency,
|
|
"win_tenderer":win_tenderer}
|
|
"win_tenderer":win_tenderer}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"agency":agency,
|
|
"agency":agency,
|
|
"win_bid_price":win_bid_price}
|
|
"win_bid_price":win_bid_price}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"agency":agency,
|
|
"agency":agency,
|
|
"bidding_budget":bidding_budget}
|
|
"bidding_budget":bidding_budget}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
confidence=85
|
|
confidence=85
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"agency":agency
|
|
"agency":agency
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"project_codes":project_code
|
|
"project_codes":project_code
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"project_name":project_name
|
|
"project_name":project_name
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
if getLength(product)>0:
|
|
if getLength(product)>0:
|
|
l_p = product.split(",")
|
|
l_p = product.split(",")
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"product":l_p[0]
|
|
"product":l_p[0]
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"win_tenderer":win_tenderer
|
|
"win_tenderer":win_tenderer
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"win_bid_price":win_bid_price
|
|
"win_bid_price":win_bid_price
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
"bidding_budget":bidding_budget
|
|
"bidding_budget":bidding_budget
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"tenderee":tenderee,
|
|
_dict = {"tenderee":tenderee,
|
|
doctitle_refine_name:doctitle_refine
|
|
doctitle_refine_name:doctitle_refine
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"agency":agency,
|
|
_dict = {"agency":agency,
|
|
"project_codes":project_code
|
|
"project_codes":project_code
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"agency":agency,
|
|
_dict = {"agency":agency,
|
|
"project_name":project_name
|
|
"project_name":project_name
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_codes":project_code,
|
|
_dict = {"project_codes":project_code,
|
|
"project_name":project_name
|
|
"project_name":project_name
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_codes":project_code,
|
|
_dict = {"project_codes":project_code,
|
|
"win_tenderer":win_tenderer
|
|
"win_tenderer":win_tenderer
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_codes":project_code,
|
|
_dict = {"project_codes":project_code,
|
|
"win_bid_price":win_bid_price
|
|
"win_bid_price":win_bid_price
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_codes":project_code,
|
|
_dict = {"project_codes":project_code,
|
|
"bidding_budget":bidding_budget
|
|
"bidding_budget":bidding_budget
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_codes":project_code,
|
|
_dict = {"project_codes":project_code,
|
|
doctitle_refine_name:doctitle_refine
|
|
doctitle_refine_name:doctitle_refine
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_name":project_name,
|
|
_dict = {"project_name":project_name,
|
|
"win_tenderer":win_tenderer
|
|
"win_tenderer":win_tenderer
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_name":project_name,
|
|
_dict = {"project_name":project_name,
|
|
"win_bid_price":win_bid_price
|
|
"win_bid_price":win_bid_price
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_name":project_name,
|
|
_dict = {"project_name":project_name,
|
|
"bidding_budget":bidding_budget
|
|
"bidding_budget":bidding_budget
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"project_name":project_name,
|
|
_dict = {"project_name":project_name,
|
|
doctitle_refine_name:doctitle_refine
|
|
doctitle_refine_name:doctitle_refine
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"win_tenderer":win_tenderer,
|
|
_dict = {"win_tenderer":win_tenderer,
|
|
"win_bid_price":win_bid_price
|
|
"win_bid_price":win_bid_price
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"win_tenderer":win_tenderer,
|
|
_dict = {"win_tenderer":win_tenderer,
|
|
"bidding_budget":bidding_budget
|
|
"bidding_budget":bidding_budget
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"win_tenderer":win_tenderer,
|
|
_dict = {"win_tenderer":win_tenderer,
|
|
doctitle_refine_name:doctitle_refine
|
|
doctitle_refine_name:doctitle_refine
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"win_bid_price":win_bid_price,
|
|
_dict = {"win_bid_price":win_bid_price,
|
|
"bidding_budget":bidding_budget
|
|
"bidding_budget":bidding_budget
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"win_bid_price":win_bid_price,
|
|
_dict = {"win_bid_price":win_bid_price,
|
|
doctitle_refine_name:doctitle_refine
|
|
doctitle_refine_name:doctitle_refine
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
_dict = {"bidding_budget":bidding_budget,
|
|
_dict = {"bidding_budget":bidding_budget,
|
|
doctitle_refine_name:doctitle_refine
|
|
doctitle_refine_name:doctitle_refine
|
|
}
|
|
}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
confidence=80
|
|
confidence=80
|
|
_dict = {doctitle_refine_name:doctitle_refine}
|
|
_dict = {doctitle_refine_name:doctitle_refine}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
_dict = {"project_codes":project_code}
|
|
_dict = {"project_codes":project_code}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
confidence=70
|
|
confidence=70
|
|
_dict = {"project_name":project_name}
|
|
_dict = {"project_name":project_name}
|
|
- self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
|
|
|
|
|
|
+ self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
|
|
|
|
|
|
return list_rules,table_name,table_index
|
|
return list_rules,table_name,table_index
|
|
|
|
|
|
@@ -2933,11 +2934,20 @@ class Dataflow_dumplicate(Dataflow):
|
|
|
|
|
|
_dict = {}
|
|
_dict = {}
|
|
#更新公共属性
|
|
#更新公共属性
|
|
|
|
+
|
|
|
|
+ _replace_replace = False
|
|
|
|
+ v = project_dict.get(document_district,"")
|
|
|
|
+ if not (v is None or v=="" or v=="[]" or v=="未知"):
|
|
|
|
+ _replace_replace = True
|
|
for k,v in project_dict.items():
|
|
for k,v in project_dict.items():
|
|
|
|
+ if not _replace_replace:
|
|
|
|
+ if k in [document_district,document_city,document_province,document_area]:
|
|
|
|
+ continue
|
|
if v is None or v=="" or v=="[]" or v=="未知":
|
|
if v is None or v=="" or v=="[]" or v=="未知":
|
|
continue
|
|
continue
|
|
if k in (project_project_dynamics,project_product,project_project_codes,project_docids):
|
|
if k in (project_project_dynamics,project_product,project_project_codes,project_docids):
|
|
continue
|
|
continue
|
|
|
|
+ _dict[k] = v
|
|
for _proj in projects:
|
|
for _proj in projects:
|
|
_proj.update(_dict)
|
|
_proj.update(_dict)
|
|
for _proj in projects:
|
|
for _proj in projects:
|
|
@@ -3505,12 +3515,13 @@ class Dataflow_dumplicate(Dataflow):
|
|
TermQuery(project_agency,agency)]
|
|
TermQuery(project_agency,agency)]
|
|
list_query.append([_query,1])
|
|
list_query.append([_query,1])
|
|
|
|
|
|
- if tenderee!="" and bidding_budget>0:
|
|
|
|
|
|
+ print(bidding_budget,"bidding_budget",type(bidding_budget))
|
|
|
|
+ if tenderee!="" and float(bidding_budget)>0:
|
|
_query = [TermQuery(project_tenderee,tenderee),
|
|
_query = [TermQuery(project_tenderee,tenderee),
|
|
TermQuery(project_bidding_budget,bidding_budget)]
|
|
TermQuery(project_bidding_budget,bidding_budget)]
|
|
list_query.append([_query,2])
|
|
list_query.append([_query,2])
|
|
|
|
|
|
- if bidding_budget>0 and win_bid_price>0:
|
|
|
|
|
|
+ if float(bidding_budget)>0 and float(win_bid_price)>0:
|
|
_query = [TermQuery(project_bidding_budget,bidding_budget),
|
|
_query = [TermQuery(project_bidding_budget,bidding_budget),
|
|
TermQuery(project_win_bid_price,win_bid_price)]
|
|
TermQuery(project_win_bid_price,win_bid_price)]
|
|
list_query.append([_query,2])
|
|
list_query.append([_query,2])
|
|
@@ -3540,12 +3551,12 @@ class Dataflow_dumplicate(Dataflow):
|
|
should_q_cod]
|
|
should_q_cod]
|
|
list_query.append([_query,2])
|
|
list_query.append([_query,2])
|
|
|
|
|
|
- if win_tenderer!="" and win_bid_price>0:
|
|
|
|
|
|
+ if win_tenderer!="" and float(win_bid_price)>0:
|
|
_query = [TermQuery(project_win_tenderer,win_tenderer),
|
|
_query = [TermQuery(project_win_tenderer,win_tenderer),
|
|
TermQuery(project_win_bid_price,win_bid_price)]
|
|
TermQuery(project_win_bid_price,win_bid_price)]
|
|
list_query.append([_query,2])
|
|
list_query.append([_query,2])
|
|
|
|
|
|
- if win_tenderer!="" and bidding_budget>0:
|
|
|
|
|
|
+ if win_tenderer!="" and float(bidding_budget)>0:
|
|
_query = [TermQuery(project_win_tenderer,win_tenderer),
|
|
_query = [TermQuery(project_win_tenderer,win_tenderer),
|
|
TermQuery(project_bidding_budget,bidding_budget)]
|
|
TermQuery(project_bidding_budget,bidding_budget)]
|
|
list_query.append([_query,2])
|
|
list_query.append([_query,2])
|
|
@@ -3824,8 +3835,8 @@ class Dataflow_dumplicate(Dataflow):
|
|
base_list = []
|
|
base_list = []
|
|
set_docid = set()
|
|
set_docid = set()
|
|
|
|
|
|
- list_rules,table_name,table_index = self.translate_dumplicate_rules(flow_dumplicate_status_from,item,get_all=get_all,to_log=True)
|
|
|
|
-
|
|
|
|
|
|
+ list_rules,table_name,table_index = self.translate_dumplicate_rules(flow_dumplicate_status_from,item,get_all=get_all,to_log=False)
|
|
|
|
+ print("len_rules",len(list_rules),table_name,table_index)
|
|
list_rules.sort(key=lambda x:x["confidence"],reverse=True)
|
|
list_rules.sort(key=lambda x:x["confidence"],reverse=True)
|
|
_i = 0
|
|
_i = 0
|
|
step = 5
|
|
step = 5
|
|
@@ -4121,7 +4132,7 @@ if __name__ == '__main__':
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
# df_dump.start_flow_dumplicate()
|
|
# df_dump.start_flow_dumplicate()
|
|
a = time.time()
|
|
a = time.time()
|
|
- df_dump.test_dumplicate(237450072)
|
|
|
|
|
|
+ df_dump.test_dumplicate(316399675)
|
|
# df_dump.test_merge([292315564],[287890754])
|
|
# df_dump.test_merge([292315564],[287890754])
|
|
# df_dump.flow_remove_project_tmp()
|
|
# df_dump.flow_remove_project_tmp()
|
|
print("takes",time.time()-a)
|
|
print("takes",time.time()-a)
|