فهرست منبع

处罚公告去重规则排除,oracle数据同步增加几个original字段

luojiehua 4 ماه پیش
والد
کامیت
584813abfc
2فایلهای تغییر یافته به همراه46 افزوده شده و 36 حذف شده
  1. 42 36
      BaseDataMaintenance/maintenance/dataflow.py
  2. 4 0
      BaseDataMaintenance/model/oracle/GongGaoTemp.py

+ 42 - 36
BaseDataMaintenance/maintenance/dataflow.py

@@ -4038,42 +4038,48 @@ class Dataflow_dumplicate(Dataflow):
 
         bidclose_time = page_time
         web_source_name = item.get(document_tmp_web_source_name,"")
+        docchannel = item.get(document_tmp_docchannel,"0")
 
-
-
-        if len(page_time)>0:
-            l_page_time = timeAdd(page_time,days=-90)
-            dict_time = item.get("dict_time",{})
-            for k,v in dict_time.items():
-                if v is not None and len(v)>0:
-                    if l_page_time>v:
-                        has_before = True
-                    if v>page_time:
-                        has_after = True
-                    if k==document_tmp_time_bidclose:
-                        bidclose_time = v
-
-        set_web_source = {"中国招标投标公共服务平台","比地招标"}
-
-        if web_source_name in set_web_source and bidclose_time<page_time:
-            return False
-
-        log("check page_time has_before %s has_after %s"%(str(has_before),str(has_after)))
-        if has_before:
-            _query = BoolQuery(must_queries=[MatchPhraseQuery(document_doctitle,item.get(document_doctitle,""))],
-                               must_not_queries=[TermQuery(document_docid,item.get(document_docid,0))])
-            if not has_after:
-                log("check page_time false %s==%s-%s"%(l_page_time,k,v))
-
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
-                                       SearchQuery(_query,get_total_count=True,limit=1))
-                if total_count>0:
-                    return False
-            if item.get(document_web_source_name,"")=="中国政府采购网":
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
-                                                                                    SearchQuery(_query,get_total_count=True,limit=1))
-                if total_count>0:
-                    return False
+        try:
+            docchannel = int(docchannel)
+        except:
+            docchannel = 0
+
+        if docchannel<200:
+
+            if len(page_time)>0:
+                l_page_time = timeAdd(page_time,days=-90)
+                dict_time = item.get("dict_time",{})
+                for k,v in dict_time.items():
+                    if v is not None and len(v)>0:
+                        if l_page_time>v:
+                            has_before = True
+                        if v>page_time:
+                            has_after = True
+                        if k==document_tmp_time_bidclose:
+                            bidclose_time = v
+
+            set_web_source = {"中国招标投标公共服务平台","比地招标"}
+
+            if web_source_name in set_web_source and bidclose_time<page_time:
+                return False
+
+            log("check page_time has_before %s has_after %s"%(str(has_before),str(has_after)))
+            if has_before:
+                _query = BoolQuery(must_queries=[MatchPhraseQuery(document_doctitle,item.get(document_doctitle,""))],
+                                   must_not_queries=[TermQuery(document_docid,item.get(document_docid,0))])
+                if not has_after:
+                    log("check page_time false %s==%s-%s"%(l_page_time,k,v))
+
+                    rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
+                                           SearchQuery(_query,get_total_count=True,limit=1))
+                    if total_count>0:
+                        return False
+                if item.get(document_web_source_name,"")=="中国政府采购网":
+                    rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
+                                                                                        SearchQuery(_query,get_total_count=True,limit=1))
+                    if total_count>0:
+                        return False
 
         return True
 
@@ -4515,7 +4521,7 @@ if __name__ == '__main__':
     # test_attachment_interface()
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
-    df_dump.test_dumplicate(576859812
+    df_dump.test_dumplicate(578681000
                             )
     # compare_dumplicate_check()
     # df_dump.test_merge([391898061

+ 4 - 0
BaseDataMaintenance/model/oracle/GongGaoTemp.py

@@ -67,6 +67,10 @@ class GongGaoTemp(BaseModel):
         new_dict["publishtime"] = publishtime
         if "docchannel" in new_dict:
             new_dict["original_docchannel"] = new_dict["docchannel"]
+        new_dict["original_area"] = new_dict.get("area","")
+        new_dict["original_province"] = new_dict.get("province","")
+        new_dict["original_city"] = new_dict.get("city","")
+        new_dict["original_district"] = new_dict.get("district","")
         return new_dict
 
     def setValue(self,k,v,isColumn=False):