|
@@ -363,9 +363,12 @@ class Dataflow():
|
|
|
list_must_queries.append(BoolQuery(should_queries=l_s))
|
|
|
elif k in set_nested:
|
|
|
_v = v
|
|
|
- if k!="" and k=="bidding_budget" or k=="win_bid_price":
|
|
|
- _v = float(_v)
|
|
|
- list_must_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
|
|
|
+ if k!="":
|
|
|
+ if k=="bidding_budget" or k=="win_bid_price":
|
|
|
+ _v = float(_v)
|
|
|
+ list_must_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
|
|
|
+ else:
|
|
|
+ list_must_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
|
|
|
elif k in set_term:
|
|
|
list_must_queries.append(TermQuery(k,v))
|
|
|
elif k in set_phrase:
|
|
@@ -384,9 +387,12 @@ class Dataflow():
|
|
|
list_must_no_queries.append(BoolQuery(should_queries=l_s))
|
|
|
elif k in set_nested:
|
|
|
_v = v
|
|
|
- if k!="" and k=="bidding_budget" or k=="win_bid_price":
|
|
|
- _v = float(_v)
|
|
|
- list_must_no_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
|
|
|
+ if k!="":
|
|
|
+ if k=="bidding_budget" or k=="win_bid_price":
|
|
|
+ _v = float(_v)
|
|
|
+ list_must_no_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
|
|
|
+ else:
|
|
|
+ list_must_no_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
|
|
|
elif k in set_term:
|
|
|
list_must_no_queries.append(TermQuery(k,v))
|
|
|
elif k in set_range:
|
|
@@ -2601,7 +2607,8 @@ class Dataflow_dumplicate(Dataflow):
|
|
|
"query":_query,
|
|
|
"singleNum_keys":[],
|
|
|
"contain_keys":[],
|
|
|
- "multiNum_keys":[]}
|
|
|
+ "multiNum_keys":[],
|
|
|
+ "_dict":_dict}
|
|
|
list_rules.append(_rule)
|
|
|
|
|
|
def translate_dumplicate_rules(self,status_from,item,get_all=False,to_log=False,day_dis=7):
|
|
@@ -3990,6 +3997,7 @@ class Dataflow_dumplicate(Dataflow):
|
|
|
must_not_q = [TermQuery("docid",a) for a in list(set_docid)[-100:]]
|
|
|
_query = BoolQuery(should_queries=[_rule["query"] for _rule in list_rules[_i:_i+step]],
|
|
|
must_not_queries=must_not_q)
|
|
|
+
|
|
|
_rule = list_rules[_i]
|
|
|
confidence = _rule["confidence"]
|
|
|
singleNum_keys = _rule["singleNum_keys"]
|
|
@@ -4291,7 +4299,7 @@ if __name__ == '__main__':
|
|
|
df_dump = Dataflow_dumplicate(start_delete_listener=False)
|
|
|
# df_dump.start_flow_dumplicate()
|
|
|
a = time.time()
|
|
|
- df_dump.test_dumplicate(397720289
|
|
|
+ df_dump.test_dumplicate(400929607
|
|
|
)
|
|
|
# df_dump.test_merge([242672995,235300429,240009762
|
|
|
# ],[243240169,])
|