瀏覽代碼

采购意向去重规则

znj 1 月之前
父節點
當前提交
a8960794a4
共有 2 個文件被更改,包括 52 次插入6 次删除
  1. 9 2
      BaseDataMaintenance/maintenance/dataflow.py
  2. 43 4
      BaseDataMaintenance/maxcompute/documentDumplicate.py

+ 9 - 2
BaseDataMaintenance/maintenance/dataflow.py

@@ -2275,8 +2275,14 @@ class Dataflow_dumplicate(Dataflow):
         _dict["punish"] = _extract.get("punish",{})
         _dict["approval"] = _extract.get("approval",[])
 
+        # 专项债字段
         issue_details = _extract.get("debt_dic",{}).get("issue_details",[])
         _dict["is_special_bonds"] = 1 if _dict.get(document_tmp_docchannel)==302 and _dict.get(document_tmp_web_source_name)=='专项债券信息网' and issue_details else 0
+        # 采购意向字段
+        if _dict.get("docchannel")==114:
+            _dict["demand_info"] = _extract.get("demand_info",{}).get("data",[])
+        else:
+            _dict["demand_info"] = []
         return _dict
 
     def dumplicate_fianl_check(self,base_list,b_log=False):
@@ -2915,14 +2921,15 @@ class Dataflow_dumplicate(Dataflow):
         self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         # 专项债
         if item.get("is_special_bonds")==1:
+            confidence = 90
             _dict = {doctitle_refine_name: doctitle_refine,
                      document_tmp_web_source_name:"专项债券信息网"}
-            base_dict = {
+            tmp_base_dict = {
                 "docchannel": item["docchannel"],
                 "status": [201, 450],
                 # "page_time": [timeAdd(page_time, -365), timeAdd(page_time, 365)]
             }
-            self.appendRule(list_rules, _dict, base_dict, must_not_dict, confidence, item, b_log=to_log)
+            self.appendRule(list_rules, _dict, tmp_base_dict, must_not_dict, confidence, item, b_log=to_log)
 
 
         confidence=70

+ 43 - 4
BaseDataMaintenance/maxcompute/documentDumplicate.py

@@ -783,7 +783,7 @@ def check_money(bidding_budget_less,bidding_budget_greater,
                 win_bid_price_less,win_bid_price_greater,
                 moneys_less,moneys_greater,
                 moneys_attachment_less,moneys_attachment_greater):
-
+    # print('bidding_budget_less',bidding_budget_less,'bidding_budget_greater',bidding_budget_greater)
     bidding_budget_less_source = bidding_budget_less
     bidding_budget_greater_source = bidding_budget_greater
     win_bid_price_less_source = win_bid_price_less
@@ -816,9 +816,11 @@ def check_money(bidding_budget_less,bidding_budget_greater,
 
 
         if budget_less!=budget_greater:
-
             if min(budget_less,budget_greater)>0:
-                if max(budget_less,budget_greater)/min(budget_less,budget_greater)==10000:
+                # if max(budget_less,budget_greater)/min(budget_less,budget_greater)==10000:
+                # 金额单位错误,对比时为一万倍,考虑部分小数点后的数,9999<x<10001
+                if (max(budget_less,budget_greater)/min(budget_less,budget_greater)>9999 and max(budget_less,budget_greater)/min(budget_less,budget_greater)<10001)\
+                        or (max(bidding_budget_less_source,bidding_budget_greater_source)/min(bidding_budget_less_source,bidding_budget_greater_source)>9999 and max(bidding_budget_less_source,bidding_budget_greater_source)/min(bidding_budget_less_source,bidding_budget_greater_source)<10001):
                     budget_is_same = True
             if budget_less>10000 and budget_greater>10000 and round(budget_less/10000,2)==round(budget_greater/10000,2):
                 budget_is_same = True
@@ -842,7 +844,9 @@ def check_money(bidding_budget_less,bidding_budget_greater,
         if price_less!=price_greater:
 
             if min(price_less,price_greater)>0:
-                if max(price_less,price_greater)/min(price_less,price_greater)==10000:
+                # if max(price_less,price_greater)/min(price_less,price_greater)==10000:
+                if (max(price_less,price_greater)/min(price_less,price_greater)>9999 and max(price_less,price_greater)/min(price_less,price_greater)<10001)\
+                        or (max(win_bid_price_less_source,win_bid_price_greater_source)/min(win_bid_price_less_source,win_bid_price_greater_source)>9999 and max(win_bid_price_less_source,win_bid_price_greater_source)/min(win_bid_price_less_source,win_bid_price_greater_source)<10001):
                     price_is_same = True
             if price_less>10000 and price_greater>10000 and round(price_less/10000,2)==round(price_greater/10000,2):
                 price_is_same = True
@@ -1336,6 +1340,41 @@ def check_dumplicate_rule(document_less,document_greater,min_counts,b_log=False,
     elif project_codes_greater is None:
         project_codes_greater = []
 
+    # 采购意向去重
+    if docchannel_greater==docchannel_less==114:
+        sign = True
+        demand_info_less = document_less.get("demand_info",[])
+        demand_info_greater = document_greater.get("demand_info",[])
+        # if demand_info_less and not demand_info_greater:
+        #     sign = False
+        # elif not demand_info_less and demand_info_greater:
+        #     sign = False
+        # elif demand_info_less and demand_info_greater:
+        if demand_info_less and demand_info_greater:
+            # 重新确定demand_info的数量排序,按大小排序
+            if len(demand_info_greater)<len(demand_info_less):
+                _demand_info_less = demand_info_greater
+                _demand_info_greater = demand_info_less
+                demand_info_less = _demand_info_less
+                demand_info_greater = _demand_info_greater
+            for item1 in demand_info_less:
+                get_same = False
+                for item2 in demand_info_greater:
+                    if check_doctitle(re.sub("\s","",item1.get("project_name","").strip()),re.sub("\s","",item2.get("project_name","").strip())) and \
+                            check_money(float(item1.get("budget",0) if item1.get("budget",0) else 0),float(item2.get("budget",0) if item2.get("budget",0) else 0),0,0,[],[],[],[]) and \
+                            (item1.get("order_begin", "")[:7]==item2.get("order_begin", "")[:7] or \
+                                item1.get("order_end", "")[:7]==item2.get("order_end", "")[:7]):
+                        get_same = True
+                        break
+                if not get_same:
+                    sign = False
+                    break
+        if not sign:
+            return 0
+        else:
+            if len(demand_info_greater)==len(demand_info_less):# demand_info完全相同
+                return 1
+
     # 专项债去重
     if is_special_bonds_greater==is_special_bonds_less==1:
         detail_link_less = detail_link_less.strip() if detail_link_less else ""