Преглед изворни кода

优化监控,拟在建增加竣工时间的逻辑

luojiehua пре 2 година
родитељ
комит
d8191c6465

+ 4 - 4
BaseDataMaintenance/dataMonitor/data_monitor.py

@@ -451,10 +451,10 @@ class BaseDataMonitor():
         rows,next_token,total_count_lastday_dump,is_all_succeed = self.ots_client.search("document_tmp","document_tmp_index",
                                                                                     SearchQuery(query,None,True),
                                                                                     columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
-        if total_count_lastday_dump/total_count_lastday<0.2:
-            _msg = "公告去重报警,%s入库公告数:%d,其中去重数:%d,去重率:%.2f"%(last_date,total_count_lastday,total_count_lastday_dump,total_count_lastday_dump/total_count_lastday)
-            sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS)
-            # sendEmail(smtp_host,smtp_username,smtp_password,self.recieviers,_msg)
+        # if total_count_lastday_dump/total_count_lastday<0.2:
+        #     _msg = "公告去重报警,%s入库公告数:%d,其中去重数:%d,去重率:%.2f"%(last_date,total_count_lastday,total_count_lastday_dump,total_count_lastday_dump/total_count_lastday)
+        #     sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS)
+        #     # sendEmail(smtp_host,smtp_username,smtp_password,self.recieviers,_msg)
 
         # if total_count_to_dump>2000:
         #     _msg = "公告去重报警,待去重数量:%s"%(str(total_count_to_dump))

+ 60 - 49
BaseDataMaintenance/maintenance/dataflow.py

@@ -409,8 +409,8 @@ class Dataflow():
             extract_count += 1
         if sub_docs_json is not None:
             sub_docs = json.loads(sub_docs_json)
-            sub_docs.sort(key=lambda x:x.get("bidding_budget",0),reverse=True)
-            sub_docs.sort(key=lambda x:x.get("win_bid_price",0),reverse=True)
+            sub_docs.sort(key=lambda x:float(x.get("bidding_budget",0)),reverse=True)
+            sub_docs.sort(key=lambda x:float(x.get("win_bid_price",0)),reverse=True)
             # log("==%s"%(str(sub_docs)))
             for sub_docs in sub_docs:
                 for _key_sub_docs in sub_docs.keys():
@@ -2544,6 +2544,7 @@ class Dataflow_dumplicate(Dataflow):
         for _dict in list_dict:
             _docid = _dict.get(document_tmp_docid)
             confidence = _dict["confidence"]
+            print("confidence",_docid,confidence)
             if confidence>0.1:
                 if _docid not in set_docid:
                     base_list.append(_dict)
@@ -2576,7 +2577,7 @@ class Dataflow_dumplicate(Dataflow):
             table_name = "document_tmp"
             table_index = "document_tmp_index"
             base_dict = {
-                "docchannel":item["docchannel"],
+                "docchannel":item.get("docchannel",52),
                 "status":[status_from[0]],
                 "page_time":[timeAdd(page_time,-2),timeAdd(page_time,2)]
             }
@@ -2603,186 +2604,186 @@ class Dataflow_dumplicate(Dataflow):
         singleNum_keys = ["tenderee","win_tenderer"]
 
         confidence = 100
-        self.appendRule(list_rules,{document_tmp_fingerprint:fingerprint},base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,{document_tmp_fingerprint:fingerprint},base_dict,must_not_dict,confidence,item,b_log=to_log)
         confidence = 90
         _dict = {document_tmp_agency:agency,
                  "win_tenderer":win_tenderer,
                  "win_bid_price":win_bid_price}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {document_tmp_agency:agency,
                  "win_tenderer":win_tenderer,
                  "bidding_budget":bidding_budget}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {document_tmp_agency:agency,
                  "win_bid_price":win_bid_price,
                  "bidding_budget":bidding_budget}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {win_tenderer:win_tenderer,
                  "win_bid_price":win_bid_price,
                  "bidding_budget":bidding_budget}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"tenderee":tenderee,
                  "win_tenderer":win_tenderer,
                  "win_bid_price":win_bid_price}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"tenderee":tenderee,
                  "win_tenderer":win_tenderer,
                  "bidding_budget":bidding_budget}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"tenderee":tenderee,
                  "win_bid_price":win_bid_price,
                  "bidding_budget":bidding_budget}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"tenderee":tenderee,
                  "agency":agency,
                  "win_tenderer":win_tenderer}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"tenderee":tenderee,
                  "agency":agency,
                  "win_bid_price":win_bid_price}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"tenderee":tenderee,
                  "agency":agency,
                  "bidding_budget":bidding_budget}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         confidence=85
         _dict = {"tenderee":tenderee,
                  "agency":agency
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"tenderee":tenderee,
                  "project_codes":project_code
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"tenderee":tenderee,
                  "project_name":project_name
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         if getLength(product)>0:
             l_p = product.split(",")
             _dict = {"tenderee":tenderee,
                      "product":l_p[0]
                      }
-            self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+            self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"tenderee":tenderee,
                  "win_tenderer":win_tenderer
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"tenderee":tenderee,
                  "win_bid_price":win_bid_price
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"tenderee":tenderee,
                  "bidding_budget":bidding_budget
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"tenderee":tenderee,
                  doctitle_refine_name:doctitle_refine
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"agency":agency,
                  "project_codes":project_code
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"agency":agency,
                  "project_name":project_name
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_codes":project_code,
                  "project_name":project_name
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_codes":project_code,
                  "win_tenderer":win_tenderer
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_codes":project_code,
                  "win_bid_price":win_bid_price
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_codes":project_code,
                  "bidding_budget":bidding_budget
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_codes":project_code,
                  doctitle_refine_name:doctitle_refine
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_name":project_name,
                  "win_tenderer":win_tenderer
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_name":project_name,
                  "win_bid_price":win_bid_price
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_name":project_name,
                  "bidding_budget":bidding_budget
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"project_name":project_name,
                  doctitle_refine_name:doctitle_refine
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"win_tenderer":win_tenderer,
                  "win_bid_price":win_bid_price
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"win_tenderer":win_tenderer,
                  "bidding_budget":bidding_budget
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"win_tenderer":win_tenderer,
                  doctitle_refine_name:doctitle_refine
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"win_bid_price":win_bid_price,
                  "bidding_budget":bidding_budget
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"win_bid_price":win_bid_price,
                  doctitle_refine_name:doctitle_refine
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         _dict = {"bidding_budget":bidding_budget,
                  doctitle_refine_name:doctitle_refine
                  }
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         confidence=80
         _dict = {doctitle_refine_name:doctitle_refine}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
         _dict = {"project_codes":project_code}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         confidence=70
         _dict = {"project_name":project_name}
-        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item)
+        self.appendRule(list_rules,_dict,base_dict,must_not_dict,confidence,item,b_log=to_log)
 
         return list_rules,table_name,table_index
 
@@ -2933,11 +2934,20 @@ class Dataflow_dumplicate(Dataflow):
 
         _dict = {}
         #更新公共属性
+
+        _replace_replace = False
+        v = project_dict.get(document_district,"")
+        if not (v is None or v=="" or v=="[]" or v=="未知"):
+            _replace_replace = True
         for k,v in project_dict.items():
+            if not _replace_replace:
+                if k in [document_district,document_city,document_province,document_area]:
+                    continue
             if v is None or v=="" or v=="[]" or v=="未知":
                 continue
             if k in (project_project_dynamics,project_product,project_project_codes,project_docids):
                 continue
+            _dict[k] = v
         for _proj in projects:
             _proj.update(_dict)
         for _proj in projects:
@@ -3505,12 +3515,13 @@ class Dataflow_dumplicate(Dataflow):
                       TermQuery(project_agency,agency)]
             list_query.append([_query,1])
 
-        if tenderee!="" and bidding_budget>0:
+        print(bidding_budget,"bidding_budget",type(bidding_budget))
+        if tenderee!="" and float(bidding_budget)>0:
             _query = [TermQuery(project_tenderee,tenderee),
                                              TermQuery(project_bidding_budget,bidding_budget)]
             list_query.append([_query,2])
 
-        if bidding_budget>0 and win_bid_price>0:
+        if float(bidding_budget)>0 and float(win_bid_price)>0:
             _query = [TermQuery(project_bidding_budget,bidding_budget),
                       TermQuery(project_win_bid_price,win_bid_price)]
             list_query.append([_query,2])
@@ -3540,12 +3551,12 @@ class Dataflow_dumplicate(Dataflow):
                       should_q_cod]
             list_query.append([_query,2])
 
-        if win_tenderer!="" and win_bid_price>0:
+        if win_tenderer!="" and float(win_bid_price)>0:
             _query = [TermQuery(project_win_tenderer,win_tenderer),
                                              TermQuery(project_win_bid_price,win_bid_price)]
             list_query.append([_query,2])
 
-        if win_tenderer!="" and bidding_budget>0:
+        if win_tenderer!="" and float(bidding_budget)>0:
             _query = [TermQuery(project_win_tenderer,win_tenderer),
                       TermQuery(project_bidding_budget,bidding_budget)]
             list_query.append([_query,2])
@@ -3824,8 +3835,8 @@ class Dataflow_dumplicate(Dataflow):
             base_list = []
             set_docid = set()
 
-            list_rules,table_name,table_index = self.translate_dumplicate_rules(flow_dumplicate_status_from,item,get_all=get_all,to_log=True)
-
+            list_rules,table_name,table_index = self.translate_dumplicate_rules(flow_dumplicate_status_from,item,get_all=get_all,to_log=False)
+            print("len_rules",len(list_rules),table_name,table_index)
             list_rules.sort(key=lambda x:x["confidence"],reverse=True)
             _i = 0
             step = 5
@@ -4121,7 +4132,7 @@ if __name__ == '__main__':
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
     a = time.time()
-    df_dump.test_dumplicate(237450072)
+    df_dump.test_dumplicate(316399675)
     # df_dump.test_merge([292315564],[287890754])
     # df_dump.flow_remove_project_tmp()
     print("takes",time.time()-a)

+ 14 - 4
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -15,6 +15,8 @@ from BaseDataMaintenance.common.Utils import article_limit
 from BaseDataMaintenance.common.documentFingerprint import getFingerprint
 from BaseDataMaintenance.model.postgres.document_extract import *
 
+import sys
+sys.setrecursionlimit(1000000)
 
 
 class ActiveMQListener():
@@ -538,11 +540,13 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
         def on_message(self, headers):
             try:
+                log("get message")
                 message_id = headers.headers["message-id"]
                 body = headers.body
-                log("get message %s crtime:%s"%(message_id,json.loads(body)["crtime"]))
+                log("get message %s crtime:%s"%(message_id,json.loads(body).get("crtime","")))
                 self._func(_dict={"frame":headers,"conn":self.conn},result_queue=None)
             except Exception as e:
+                traceback.print_exc()
                 pass
 
         def on_error(self, headers):
@@ -728,6 +732,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
     def comsumer_handle(self,_dict,result_queue):
         try:
+            log("start handle")
             frame = _dict["frame"]
             conn = _dict["conn"]
             message_id = frame.headers["message-id"]
@@ -744,9 +749,14 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
             _dochtmlcon = item.get(document_tmp_dochtmlcon,"")
 
             if len(_dochtmlcon)>200000:
-                _soup = BeautifulSoup(_dochtmlcon,"lxml")
-                _soup = article_limit(_soup,200000)
-                _dochtmlcon = str(_soup)
+                try:
+                    _soup = BeautifulSoup(_dochtmlcon,"lxml")
+                    _soup = article_limit(_soup,200000)
+                    _dochtmlcon = str(_soup)
+                except Exception as e:
+                    traceback.print_exc()
+                    ackMsg(conn,message_id,subscription)
+                    return
 
 
             dhtml.setValue(document_tmp_dochtmlcon,_dochtmlcon,True)

+ 44 - 6
BaseDataMaintenance/maintenance/proposedBuilding/DataSynchronization.py

@@ -19,13 +19,16 @@ class DataSynchronization():
         self.isDone = False
         self.proposedBuilding_table = "proposedBuilding_tmp"
         self.proposedBuilding_table_index = "proposedBuilding_tmp_index"
+
+        self.designed_project_table = "designed_project"
+        self.designed_project_table_index = "designed_project_index"
         self.ots_client = getConnect_ots()
 
     def producer(self,task_queue):
         '''
         :return:生产数据
         '''
-        ots_client = getConnect_ots()
+        ots_client = self.ots_client
 
         bool_query = BoolQuery(must_queries=[ExistsQuery("crtime")])
 
@@ -56,14 +59,13 @@ class DataSynchronization():
 
         def _handle(_proposed,result_queue,ots_client):
 
-            print(_proposed)
 
             #修改designed_project
             _time = time.time()
-            _project_dict = _proposed.toDesigned_project(ots_client)
-            log("toDesigned_project takes %.2fs"%(time.time()-_time))
 
             try:
+                _project_dict = _proposed.toDesigned_project(ots_client)
+                log("toDesigned_project takes %.2fs"%(time.time()-_time))
                 _time = time.time()
                 if _project_dict is not None:
                     #更新数据
@@ -75,6 +77,7 @@ class DataSynchronization():
                 log("update designed takes %.2fs"%(time.time()-_time))
             except Exception as e:
                 log("comsumer failed cause of %s"%(str(e)))
+                log("proposed:%s"%(str(_proposed)))
                 log(traceback.format_exc())
 
 
@@ -106,10 +109,45 @@ class DataSynchronization():
 
         self.comsumer(task_queue)
 
+    def turn_stage(self):
+        '''
+        根据latest_service_time更新stage
+        :return:
+        '''
+
+        ots_client = self.ots_client
+        current_date = getCurrent_date("%Y-%m-%d")
+
+        bool_query = BoolQuery(must_queries=[RangeQuery("latest_service_time",range_to=current_date)],
+                               must_not_queries=[TermQuery("progress","竣工阶段")])
+
+        columns = ["progress"]
+
+        rows, next_token, total_count, is_all_succeed = ots_client.search(self.designed_project_table, self.designed_project_table_index,
+                                                                          SearchQuery(bool_query ,sort=Sort(sorters=[FieldSort("crtime",SortOrder.DESC)]), limit=100, get_total_count=True),
+                                                                          ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
+        list_data = getRow_ots(rows)
+        print("total_count",total_count)
+        for _data in list_data:
+            _proposed = designed_project(_data)
+            _proposed.setValue("progress","竣工阶段",True)
+            _proposed.update_row(ots_client)
+        _count = len(list_data)
+        while next_token:
+            rows, next_token, total_count, is_all_succeed = ots_client.search(self.designed_project_table, self.designed_project_table_index,
+                                                                              SearchQuery(bool_query ,next_token=next_token, limit=100, get_total_count=True),
+                                                                              ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
+            list_data = getRow_ots(rows)
+            for _data in list_data:
+                _proposed = designed_project(_data)
+                _proposed.setValue("progress","竣工阶段",True)
+                _proposed.update_row(ots_client)
+            _count += len(list_data)
 
     def scheduler(self):
         _scheduler = BlockingScheduler()
         _scheduler.add_job(self.maxcompute2ots,"cron",minute="*/1")
+        _scheduler.add_job(self.turn_stage,"cron",hour="*/5")
         _scheduler.start()
 
 def startSychro():
@@ -119,7 +157,7 @@ def startSychro():
 if __name__=="__main__":
     ds = DataSynchronization()
     # ds.scheduler()
-    ds.maxcompute2ots()
-
+    # ds.maxcompute2ots()
+    ds.turn_stage()
 
 

+ 1 - 1
BaseDataMaintenance/maintenance/tyc_company/remove_processed.py

@@ -45,7 +45,7 @@ class Tyc_company_maintenance():
 
     def start_remove_processed(self):
         _schedule = BlockingScheduler()
-        _schedule.add_job(self.remove_processed,"cron",second="*/5")
+        # _schedule.add_job(self.remove_processed,"cron",second="*/5")
         _schedule.add_job(self.remove_comsumer,"cron",second="*/5")
         _schedule.start()
 

+ 1 - 1
BaseDataMaintenance/maxcompute/documentDumplicate.py

@@ -283,7 +283,7 @@ class f_get_extractCount(object):
                         if  win_tenderer=="":
                             win_tenderer = _role["role_text"]
                         if "role_money" in _role:
-                            if str(_role["role_money"]["money"])!='' and float(_role["role_money"]["money"])>0:
+                            if str(_role["role_money"].get("money",""))!='' and float(_role["role_money"].get("money",""))>0:
                                 extract_count += 1
                                 if win_bid_price=="":
                                     win_bid_price = str(float(_role["role_money"]["money"]))

+ 58 - 23
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -1528,31 +1528,66 @@ def generate_common_properties(list_docs):
 
 
     _find = False
-    for _key in [document_district,document_city,document_province,document_area]:
-        area_dict = {}
-        for _doc in list_docs:
+    dict_count = {}
+    for _doc in list_docs:
+        for _key in [document_district,document_city,document_province,document_area]:
             loc = _doc.get(_key,"未知")
             if loc not in ('全国','未知',"0"):
-                if loc not in area_dict:
-                    area_dict[loc] = 0
-                area_dict[loc] += 1
-        list_loc = []
-        for k,v in area_dict.items():
-            list_loc.append([k,v])
-        list_loc.sort(key=lambda x:x[1],reverse=True)
-        if len(list_loc)>0:
-            project_dict[document_district] = _doc.get(document_district)
-            project_dict[document_city] = _doc.get(document_city)
-            project_dict[document_province] = _doc.get(document_province)
-            project_dict[document_area] = _doc.get(document_area)
-            _find = True
-            break
-    if not _find:
-        if len(list_docs)>0:
-            project_dict[document_district] = list_docs[0].get(document_district)
-            project_dict[document_city] = list_docs[0].get(document_city)
-            project_dict[document_province] = list_docs[0].get(document_province)
-            project_dict[document_area] = list_docs[0].get(document_area)
+                if loc not in dict_count:
+                    dict_count[loc] = 0
+                dict_count[loc] += 1
+    list_loc = []
+    for _doc in list_docs:
+        _d = {"count":0}
+        for _key in [document_district,document_city,document_province,document_area]:
+            loc = _doc.get(_key,"未知")
+            _d[_key] = loc
+            _d["count"] += dict_count.get(loc,0)
+            if _key==document_district and loc not in ("全国","未知",""):
+                _d["count"] += 1
+            if _key==document_city and loc not in ("全国","未知",""):
+                _d["count"] += 1
+            if _key==document_province and loc not in ("全国","未知",""):
+                _d["count"] += 1
+            if _key==document_area and loc not in ("全国","未知",""):
+                _d["count"] += 1
+        list_loc.append(_d)
+    list_loc.sort(key=lambda x:x.get("count",0),reverse=True)
+    if len(list_loc)>0:
+        project_dict[document_district] = _doc.get(document_district)
+        project_dict[document_city] = _doc.get(document_city)
+        project_dict[document_province] = _doc.get(document_province)
+        project_dict[document_area] = _doc.get(document_area)
+        _find = True
+    # print(dict_count)
+    # print(len(list_docs))
+    # print("list_loc",list_loc,project_dict)
+    #会导致省市错乱
+    # for _key in [document_district,document_city,document_province,document_area]:
+    #     area_dict = {}
+    #     for _doc in list_docs:
+    #         loc = _doc.get(_key,"未知")
+    #         if loc not in ('全国','未知',"0"):
+    #             if loc not in area_dict:
+    #                 area_dict[loc] = 0
+    #             area_dict[loc] += 1
+    #     list_loc = []
+    #     for k,v in area_dict.items():
+    #         list_loc.append([k,v])
+    #     list_loc.sort(key=lambda x:x[1],reverse=True)
+    #     if len(list_loc)>0:
+    #         project_dict[document_district] = _doc.get(document_district)
+    #         project_dict[document_city] = _doc.get(document_city)
+    #         project_dict[document_province] = _doc.get(document_province)
+    #         project_dict[document_area] = _doc.get(document_area)
+    #         _find = True
+    #         break
+    # if not _find:
+    #     if len(list_docs)>0:
+    #         project_dict[document_district] = list_docs[0].get(document_district)
+    #         project_dict[document_city] = list_docs[0].get(document_city)
+    #         project_dict[document_province] = list_docs[0].get(document_province)
+    #         project_dict[document_area] = list_docs[0].get(document_area)
 
 
 

Разлика између датотеке није приказан због своје велике величине
+ 12 - 5
BaseDataMaintenance/model/ots/proposedBuilding_tmp.py


Неке датотеке нису приказане због велике количине промена