浏览代码

去重查询规则修复

luojiehua 1 年之前
父节点
当前提交
5bcc183e9c
共有 1 个文件被更改,包括 16 次插入8 次删除
  1. 16 8
      BaseDataMaintenance/maintenance/dataflow.py

+ 16 - 8
BaseDataMaintenance/maintenance/dataflow.py

@@ -363,9 +363,12 @@ class Dataflow():
                     list_must_queries.append(BoolQuery(should_queries=l_s))
             elif k in set_nested:
                 _v = v
-                if k!="" and k=="bidding_budget" or k=="win_bid_price":
-                    _v = float(_v)
-                    list_must_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
+                if k!="":
+                    if k=="bidding_budget" or k=="win_bid_price":
+                        _v = float(_v)
+                        list_must_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
+                    else:
+                        list_must_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
             elif k in set_term:
                 list_must_queries.append(TermQuery(k,v))
             elif k in set_phrase:
@@ -384,9 +387,12 @@ class Dataflow():
                     list_must_no_queries.append(BoolQuery(should_queries=l_s))
             elif k in set_nested:
                 _v = v
-                if k!="" and k=="bidding_budget" or k=="win_bid_price":
-                    _v = float(_v)
-                    list_must_no_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
+                if k!="":
+                    if k=="bidding_budget" or k=="win_bid_price":
+                        _v = float(_v)
+                        list_must_no_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
+                    else:
+                        list_must_no_queries.append(NestedQuery("sub_docs_json",TermQuery("sub_docs_json.%s"%k,_v)))
             elif k in set_term:
                 list_must_no_queries.append(TermQuery(k,v))
             elif k in set_range:
@@ -2601,7 +2607,8 @@ class Dataflow_dumplicate(Dataflow):
                  "query":_query,
                  "singleNum_keys":[],
                  "contain_keys":[],
-                 "multiNum_keys":[]}
+                 "multiNum_keys":[],
+                 "_dict":_dict}
         list_rules.append(_rule)
 
     def translate_dumplicate_rules(self,status_from,item,get_all=False,to_log=False,day_dis=7):
@@ -3990,6 +3997,7 @@ class Dataflow_dumplicate(Dataflow):
                     must_not_q = [TermQuery("docid",a) for a in list(set_docid)[-100:]]
                 _query = BoolQuery(should_queries=[_rule["query"] for _rule in list_rules[_i:_i+step]],
                                    must_not_queries=must_not_q)
+
                 _rule = list_rules[_i]
                 confidence = _rule["confidence"]
                 singleNum_keys = _rule["singleNum_keys"]
@@ -4291,7 +4299,7 @@ if __name__ == '__main__':
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
     a = time.time()
-    df_dump.test_dumplicate(397720289
+    df_dump.test_dumplicate(400929607
                             )
     # df_dump.test_merge([242672995,235300429,240009762
     #                     ],[243240169,])