1 vuosi sitten · b541534c3d
--- a/BaseDataMaintenance/common/ERNIE_utils.py
+++ b/BaseDataMaintenance/common/ERNIE_utils.py
@@ -19,23 +19,17 @@ def get_access_token():
 
				     return response.json().get("access_token")
			
 
				 
			
 
				 def main():
			
 
				-    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + get_access_token()
			
 
				+    # _token = get_access_token()
			
 
				+    _token = "24.93c9d66ffc94ffaef6c6c9d35770a5f5.2592000.1701242081.282335-37357318"
			
 
				+    url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + _token
			
 
				 
			
 
				     payload = json.dumps({
			
 
				         "messages": [
			
 
				             {
			
 
				                 "role": "user",
			
 
				                 "content": '''
			
 
				-                名称： 亚低温治疗仪
			
 
				-
			
 
				-品牌：GSZ
			
 
				-
			
 
				-规格型号：233
			
 
				-
			
 
				-数量：1台
			
 
				-
			
 
				-单价： 170000.00元
			
 
				-以上的GSZ是什么牌子
			
 
				+                假设分类是建筑建材-建筑涂料的相关产品词“面漆”
			
 
				+                请拓展其相关行业产品词，列举30个
			
 
				                 '''
			
 
				             }
			
 
				         ]
			
@@ -49,5 +43,4 @@ def main():
 
				     print(response.text)
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				-    print(get_access_token())
			
 
				     main()
			
--- a/BaseDataMaintenance/dataMonitor/data_monitor.py
+++ b/BaseDataMaintenance/dataMonitor/data_monitor.py
@@ -490,15 +490,31 @@ class BaseDataMonitor():
 
				                                                                             SearchQuery(query,None,True),
			
 
				                                                                             columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
			
 
				 
			
 
				+
			
 
				         if total_count>=1000:
			
 
				-            _cmd = 'cat %s | grep -c "%s.*upgrate True save"'%(flow_dumplicate_log_path,self.get_last_tenmin_time())
			
 
				+            _cmd = 'cat %s | grep -c "%s.*merge_project whole_time"'%(flow_dumplicate_log_path,self.get_last_tenmin_time())
			
 
				             process_count = self.cmd_execute(_cmd)
			
 
				             atAll = False
			
 
				             if process_count=="":
			
 
				                 process_count = 0
			
 
				+
			
 
				+            query = BoolQuery(must_queries=[
			
 
				+                RangeQuery("status",flow_dumplicate_status_from[1]),
			
 
				+                RangeQuery("opertime",self.get_last_tenmin_time())
			
 
				+            ])
			
 
				+
			
 
				+            rows,next_token,total_count_oper,is_all_succeed = self.ots_client.search("document_tmp","document_tmp_index",
			
 
				+                                                                                SearchQuery(query,None,True),
			
 
				+                                                                                columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
			
 
				+
			
 
				             if int(process_count)==0:
			
 
				-                atAll = True
			
 
				-            _msg = "数据流报警：待去重公告数为:%d,最近十分钟去重数为：%s"%(total_count,str(process_count))
			
 
				+                if total_count_oper==0:
			
 
				+                    atAll = True
			
 
				+                _cmd = "echo `tail %s -c 10000k` > %s"%(flow_dumplicate_log_path,flow_dumplicate_log_path)
			
 
				+                self.cmd_execute(_cmd)
			
 
				+            # if int(process_count)>0 and int(process_count)<100:
			
 
				+            #     self.cmd_execute("ps -ef | grep dumplicate | grep -v grep|cut -c 9-15|xargs kill -9")
			
 
				+            _msg = "数据流报警：待去重公告数为:%d,最近十分钟日志去重数为：%s，ots去重数为：%s"%(total_count,str(process_count),str(total_count_oper))
			
 
				             sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS,atAll=atAll)
			
 
				             # sendEmail(smtp_host,smtp_username,smtp_password,self.recieviers,_msg)
			
 
				 
			
--- a/BaseDataMaintenance/dataSource/interface.py
+++ b/BaseDataMaintenance/dataSource/interface.py
@@ -20,7 +20,7 @@ DEFAULT_TIMEOUT = 3000
 
				 import traceback
			
 
				 import base64
			
 
				 
			
 
				-def getAttachDealInterface(_data,_type,path="",restry=1,kwargs={},url=interface_url,timeout=DEFAULT_TIMEOUT):
			
 
				+def getAttachDealInterface(_data,_type,path="",restry=1,kwargs={},url=interface_url,timeout=DEFAULT_TIMEOUT,session=None):
			
 
				     _succeed = False
			
 
				     _html = ""
			
 
				     swf_images = []
			
@@ -35,7 +35,8 @@ def getAttachDealInterface(_data,_type,path="",restry=1,kwargs={},url=interface_
 
				             if len(kwargs.keys())>0:
			
 
				                 _json.update(kwargs)
			
 
				             headers = {"Content-Type":"application/json"}
			
 
				-            _resp = requests.post(url,data=_json,timeout=timeout)
			
 
				+            with requests.Session() as session:
			
 
				+                _resp = session.post(url,data=_json,timeout=timeout)
			
 
				 
			
 
				             if _resp.status_code==200:
			
 
				                 _result = json.loads(_resp.content.decode())
			
--- a/BaseDataMaintenance/dataSource/setttings.py
+++ b/BaseDataMaintenance/dataSource/setttings.py
@@ -13,8 +13,10 @@ mysql_host = "rm-bp1quo50k0q2ok73gi.mysql.rds.aliyuncs.com"
 
				 mysql_port = 3306
			
 
				 # mysql_user = "bxkc_read"
			
 
				 # mysql_pass = "bxkc_20RE18AD"
			
 
				-mysql_user = "bxkc"
			
 
				-mysql_pass = "M$7UdmVToY*N@ITU"
			
 
				+# mysql_user = "bxkc"
			
 
				+# mysql_pass = "x$nWk2ED5w=Lu2Yz"
			
 
				+mysql_user = "bxkc_id"
			
 
				+mysql_pass = "Dh52kE&8Q*&vem*B"
			
 
				 mysql_db = "bxkc"
			
 
				 
			
 
				 test_mysql_host = "192.168.2.170"
			
@@ -43,10 +45,10 @@ oracle_host = "192.168.0.150"
 
				 oracle_port = 1522
			
 
				 # oracle_user = "bxkc_data_readonly"
			
 
				 # oracle_pass = "P7WUrgcz0@#j8pjg"
			
 
				-oracle_user = "bxkc_write"
			
 
				-oracle_pass = "aBrTKNl9SaPk@Yy3"
			
 
				-# oracle_user = "bxkc_db"
			
 
				-# oracle_pass = "TKVF#3idC4UQlDVy"
			
 
				+# oracle_user = "bxkc_write"
			
 
				+# oracle_pass = "aBrTKNl9SaPk@Yy3"
			
 
				+oracle_user = "bxkc_db"
			
 
				+oracle_pass = "xb9F#24Hd#5rStr9"
			
 
				 oracle_db = "yanphone"
			
 
				 
			
 
				 ots_AccessKeyId = 'LTAI5tFuoxHm8Uxrr5nT8wTZ'
			
--- a/BaseDataMaintenance/fixDoc_to_queue_extract.py
+++ b/BaseDataMaintenance/fixDoc_to_queue_extract.py
@@ -8,4 +8,4 @@ from BaseDataMaintenance.maintenance.dataflow_mq import fixDoc_to_queue_extract,
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     # fixDoc_to_queue_extract()
			
 
				-    fixDoc_to_queue_init(filename="/data/python/flow_init_check/flow_init_2023-08-30.xlsx")
			
 
				+    fixDoc_to_queue_init(filename="/data/python/flow_init_check/flow_init_2023-12-04.xlsx")
			
--- a/BaseDataMaintenance/maintenance/2.py
+++ b/BaseDataMaintenance/maintenance/2.py
@@ -10,13 +10,26 @@ from BaseDataMaintenance.common.Utils import article_limit
 
				 
			
 
				 import codecs
			
 
				 
			
 
				+def getAttachPath(filemd5,_dochtmlcon):
			
 
				+    _soup = BeautifulSoup(_dochtmlcon,"lxml")
			
 
				+
			
 
				+    list_mark = ["data","filelink"]
			
 
				+    for _mark in list_mark:
			
 
				+        _find = _soup.find("a",attrs={_mark:filemd5})
			
 
				+        filelink = ""
			
 
				+        if _find is None:
			
 
				+            _find = _soup.find("img",attrs={_mark:filemd5})
			
 
				+            if _find is not None:
			
 
				+                filelink = _find.attrs.get("src","")
			
 
				+        else:
			
 
				+            filelink = _find.attrs.get("href","")
			
 
				+        if filelink.find("bidizhaobiao")>=0:
			
 
				+            _path = filelink.split("/file")
			
 
				+            if len(_path)>1:
			
 
				+                return _path[1]
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				 
			
 
				     text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
			
 
				-    content = str(BeautifulSoup(text).find("div"))
			
 
				-    _soup = BeautifulSoup(content,"lxml")
			
 
				-    print(len(str(_soup)))
			
 
				-    _soup = article_limit(_soup,100)
			
 
				-
			
 
				-    print(len(str(_soup)))
			
 
				-    print(str(_soup))
			
 
				+    filemd5='61393b5ef3d460b3714eb9667682144f'
			
 
				+    print(getAttachPath(filemd5,text))
			
--- a/BaseDataMaintenance/maintenance/3.py
+++ b/BaseDataMaintenance/maintenance/3.py
--- a/BaseDataMaintenance/maintenance/dataflow.py
+++ b/BaseDataMaintenance/maintenance/dataflow.py
@@ -254,7 +254,7 @@ class Dataflow():
 
				                     _data_base64 = base64.b64encode(open(localpath,"rb").read())
			
 
				                     #调用接口处理结果
			
 
				                     start_time = time.time()
			
 
				-                    _success,_html,swf_images = getAttachDealInterface(_data_base64,_filetype)
			
 
				+                    _success,_html,swf_images = getAttachDealInterface(_data_base64,_filetype,kwargs={"timeout":600})
			
 
				                     if _success:
			
 
				                         log("process filemd5:%s of type:%s with size:%.3fM download:%ds recognize takes %ds,ret_size:%d"%(filemd5,_filetype,round(_size/1024/1024,4),time_download,time.time()-start_time,len(_html)))
			
 
				                     else:
			
@@ -1796,6 +1796,7 @@ class Dataflow_attachment(Dataflow):
 
				 
			
 
				     def __init__(self):
			
 
				         Dataflow.__init__(self)
			
 
				+        self.process_list_thread = []
			
 
				 
			
 
				     def flow_attachment_process(self):
			
 
				         self.process_comsumer()
			
@@ -1808,22 +1809,32 @@ class Dataflow_attachment(Dataflow):
 
				         log("attachment_process alive:%d total:%d"%(alive_count,len(self.process_list_thread)))
			
 
				 
			
 
				     def process_comsumer(self):
			
 
				-        self.process_list_thread = []
			
 
				-        thread_count = 60
			
 
				+        if len(self.process_list_thread)==0:
			
 
				+            thread_count = 60
			
 
				 
			
 
				-        for i in range(thread_count):
			
 
				-            self.process_list_thread.append(Thread(target=self.process_comsumer_handle))
			
 
				+            for i in range(thread_count):
			
 
				+                self.process_list_thread.append(Thread(target=self.process_comsumer_handle))
			
 
				 
			
 
				-        for t in self.process_list_thread:
			
 
				-            t.start()
			
 
				+            for t in self.process_list_thread:
			
 
				+                t.start()
			
 
				 
			
 
				-        for t in self.process_list_thread:
			
 
				-            t.join()
			
 
				+        while 1:
			
 
				+            failed_count = 0
			
 
				+            for _i in range(len(self.process_list_thread)):
			
 
				+                t = self.process_list_thread[_i]
			
 
				+                if not t.is_alive():
			
 
				+                    failed_count += 1
			
 
				+                    self.prcess_list_thread[_i] = Thread(target=self.process_comsumer_handle)
			
 
				+                    self.prcess_list_thread[_i].start()
			
 
				+            if failed_count>0:
			
 
				+                log("attachment failed %d"%(failed_count))
			
 
				+            time.sleep(5)
			
 
				 
			
 
				 
			
 
				     def process_comsumer_handle(self):
			
 
				         while 1:
			
 
				             _flag = False
			
 
				+            log("attachment handle:%s"%str(threading.get_ident()))
			
 
				             try:
			
 
				                 item = self.queue_attachment_ocr.get(True,timeout=0.2)
			
 
				                 log("attachment get doc:%s"%(str(item.get("item",{}).get("docid"))))
			
@@ -2214,7 +2225,7 @@ class Dataflow_dumplicate(Dataflow):
 
				         _dict["project_name"] = _extract.get("name","")
			
 
				         _dict["dict_time"] = self.get_dict_time(_extract)
			
 
				 
			
 
				-    def dumplicate_fianl_check(self,base_list):
			
 
				+    def dumplicate_fianl_check(self,base_list,b_log=False):
			
 
				         the_group = base_list
			
 
				         the_group.sort(key=lambda x:x["confidence"],reverse=True)
			
 
				 
			
@@ -2232,17 +2243,16 @@ class Dataflow_dumplicate(Dataflow):
 
				                 continue
			
 
				             for _j in range(min(_i,10)):
			
 
				                 _dict2 = base_list[_j]
			
 
				-                _prob = self.dumplicate_check(_dict1,_dict2,_dict2.get("min_counts",10),b_log=False)
			
 
				-                # print("_prob:",_prob)
			
 
				+                _prob = self.dumplicate_check(_dict1,_dict2,_dict1.get("min_counts",10),b_log=b_log)
			
 
				+                print("_prob:",_prob)
			
 
				                 if _prob<=0.1:
			
 
				                     _pass = False
			
 
				                     break
			
 
				-            log("checking index:%d"%(_i))
			
 
				+            log("checking index:%d %s %.2f"%(_i,str(_pass),_prob))
			
 
				             _index = _i
			
 
				             if not _pass:
			
 
				                 _index -= 1
			
 
				                 break
			
 
				-
			
 
				         if _index>=1:
			
 
				             # #对重复入库的进行去重
			
 
				             # _l = the_group[:_index+1]
			
@@ -2258,7 +2268,7 @@ class Dataflow_dumplicate(Dataflow):
 
				             return the_group[:_index+1]
			
 
				         return []
			
 
				 
			
 
				-    def dumplicate_check(self,_dict1,_dict2,min_counts,b_log=True):
			
 
				+    def dumplicate_check(self,_dict1,_dict2,min_counts,b_log=False):
			
 
				         document_less = _dict1
			
 
				         docid_less = _dict1["docid"]
			
 
				         docchannel_less = document_less["docchannel"]
			
@@ -2370,7 +2380,7 @@ class Dataflow_dumplicate(Dataflow):
 
				             same_count += 1
			
 
				         if getLength(project_name_less)>0 and project_name_less==project_name_greater:
			
 
				             same_count += 1
			
 
				-        if getLength(doctitle_refine_less)>0 and doctitle_refine_less==doctitle_refine_greater:
			
 
				+        if getLength(doctitle_refine_less)>0 and (doctitle_refine_less==doctitle_refine_greater or doctitle_refine_less in doctitle_refine_greater or doctitle_refine_greater in doctitle_refine_less):
			
 
				             same_count += 1
			
 
				         base_prob = 0
			
 
				         if min_counts<3:
			
@@ -2849,7 +2859,7 @@ class Dataflow_dumplicate(Dataflow):
 
				 
			
 
				     def flow_dumpcate_comsumer(self):
			
 
				         from multiprocessing import Process
			
 
				-        process_count = 2
			
 
				+        process_count = 3
			
 
				         thread_count = 20
			
 
				         list_process = []
			
 
				         def start_thread():
			
@@ -3543,7 +3553,7 @@ class Dataflow_dumplicate(Dataflow):
 
				         if tenderee!="" and len(list_product)>0:
			
 
				             _query = [TermQuery(project_tenderee,tenderee),
			
 
				                       should_q_product]
			
 
				-            list_query.append([_query,2])
			
 
				+            list_query.append([_query,1])
			
 
				 
			
 
				         if tenderee!="" and project_name!="":
			
 
				             _query = [TermQuery(project_tenderee,tenderee),
			
@@ -3553,7 +3563,7 @@ class Dataflow_dumplicate(Dataflow):
 
				         if tenderee!="" and agency!="":
			
 
				             _query = [TermQuery(project_tenderee,tenderee),
			
 
				                       TermQuery(project_agency,agency)]
			
 
				-            list_query.append([_query,1])
			
 
				+            list_query.append([_query,0])
			
 
				 
			
 
				         if tenderee!="" and float(bidding_budget)>0:
			
 
				             _query = [TermQuery(project_tenderee,tenderee),
			
@@ -3574,12 +3584,12 @@ class Dataflow_dumplicate(Dataflow):
 
				         if agency!="" and win_tenderer!="":
			
 
				             _query = [TermQuery(project_agency,agency),
			
 
				                       TermQuery(project_win_tenderer,win_tenderer)]
			
 
				-            list_query.append([_query,2])
			
 
				+            list_query.append([_query,0])
			
 
				 
			
 
				         if agency!="" and len(list_product)>0:
			
 
				             _query = [TermQuery(project_agency,agency),
			
 
				                       should_q_product]
			
 
				-            list_query.append([_query,2])
			
 
				+            list_query.append([_query,1])
			
 
				 
			
 
				         if win_tenderer!="" and len(list_code)>0:
			
 
				             _query = [TermQuery(project_win_tenderer,win_tenderer),
			
@@ -3608,7 +3618,7 @@ class Dataflow_dumplicate(Dataflow):
 
				         if len(list_code)>0:
			
 
				             _query = [
			
 
				                       should_q_code]
			
 
				-            list_query.append([_query,1])
			
 
				+            list_query.append([_query,2])
			
 
				 
			
 
				             _query = [
			
 
				                 should_q_cod]
			
@@ -3623,11 +3633,11 @@ class Dataflow_dumplicate(Dataflow):
 
				         if len(list_product)>0 and should_q_area is not None:
			
 
				             _query = [should_q_area,
			
 
				                       should_q_product]
			
 
				-            list_query.append([_query,1])
			
 
				+            list_query.append([_query,0])
			
 
				 
			
 
				         generate_time = time.time()-_time
			
 
				         whole_time = time.time()-whole_time_start
			
 
				-        log("projects merge rules whole_time:%.3f prepare_time:%.3f log_time:%.3f generate_time:%.3f"%(whole_time,prepare_time,log_time,generate_time))
			
 
				+        # log("projects merge rules whole_time:%.3f prepare_time:%.3f log_time:%.3f generate_time:%.3f"%(whole_time,prepare_time,log_time,generate_time))
			
 
				         return list_query
			
 
				 
			
 
				 
			
@@ -3649,6 +3659,7 @@ class Dataflow_dumplicate(Dataflow):
 
				             must_not_q = []
			
 
				             for _uuid in list(set_uuid):
			
 
				                 must_not_q.append(TermQuery("uuid",_uuid))
			
 
				+                print("must_not_q uuid:%s"%(_uuid))
			
 
				 
			
 
				 
			
 
				             projects_merge_count = 0
			
@@ -3675,13 +3686,25 @@ class Dataflow_dumplicate(Dataflow):
 
				                 bidding_budget = _proj.get(project_bidding_budget,-1)
			
 
				                 win_tenderer = _proj.get(project_win_tenderer,"")
			
 
				                 win_bid_price = _proj.get(project_win_bid_price,-1)
			
 
				+                _dynamic = _proj.get(project_project_dynamics,"[]")
			
 
				+                is_yanshou = False
			
 
				+                list_dynamic = json.loads(_dynamic)
			
 
				+                for _d in list_dynamic:
			
 
				+                    _title = _d.get("doctitle","")
			
 
				+                    if re.search("验收公[示告]",_title) is not None:
			
 
				+                        is_yanshou = True
			
 
				+                        break
			
 
				 
			
 
				                 province = _proj.get(project_province,"")
			
 
				                 city = _proj.get(project_city,"")
			
 
				                 district = _proj.get(project_district,"")
			
 
				 
			
 
				-                page_time_less = timeAdd(page_time,-150)
			
 
				-                page_time_greater = timeAdd(page_time,120)
			
 
				+                if is_yanshou:
			
 
				+                    page_time_less = timeAdd(page_time,-750)
			
 
				+                    page_time_greater = timeAdd(page_time,720)
			
 
				+                else:
			
 
				+                    page_time_less = timeAdd(page_time,-450)
			
 
				+                    page_time_greater = timeAdd(page_time,420)
			
 
				                 sub_project_q = TermQuery(project_sub_project_name,sub_project_name) if sub_project_name.replace("Project","")!="" else None
			
 
				                 _time = time.time()
			
 
				                 list_must_query = self.getMerge_rules(page_time,project_codes,project_name,tenderee,agency,product,sub_project_name,bidding_budget,win_tenderer,win_bid_price,province,city,district)
			
@@ -3693,14 +3716,14 @@ class Dataflow_dumplicate(Dataflow):
 
				                 search_table_index = "project2_index_formerge"
			
 
				                 project_cls = Project
			
 
				 
			
 
				-                print("page_time,min_date",page_time,min_date)
			
 
				-                if page_time>=min_date:
			
 
				-                    search_table = "project2_tmp"
			
 
				-                    search_table_index = "project2_tmp_index"
			
 
				-                    project_cls = Project_tmp
			
 
				+                # print("page_time,min_date",page_time,min_date)
			
 
				+                # if page_time>=min_date:
			
 
				+                #     search_table = "project2_tmp"
			
 
				+                #     search_table_index = "project2_tmp_index"
			
 
				+                #     project_cls = Project_tmp
			
 
				 
			
 
				 
			
 
				-                _step = 4
			
 
				+                _step = 2
			
 
				                 _begin = 0
			
 
				                 must_queries = []
			
 
				 
			
@@ -3709,22 +3732,26 @@ class Dataflow_dumplicate(Dataflow):
 
				                     must_queries = [RangeQuery(project_page_time,page_time_less,page_time_greater,True,True),
			
 
				                                 ]
			
 
				 
			
 
				-                print("page_time_less,page_time_greater",page_time,page_time_less,page_time_greater)
			
 
				                 #sub_project_name非必要条件
			
 
				                 # if sub_project_q is not None:
			
 
				                 #     must_queries.append(sub_project_q)
			
 
				 
			
 
				                 projects_prepare_time += time.time()-_time
			
 
				                 _time = time.time()
			
 
				+                sort_type = SortOrder.DESC
			
 
				                 while _begin<len(list_must_query):
			
 
				+                    if sort_type==SortOrder.DESC:
			
 
				+                        sort_type=SortOrder.ASC
			
 
				+                    if sort_type==SortOrder.ASC:
			
 
				+                        sort_type=SortOrder.DESC
			
 
				                     list_should_q = []
			
 
				-                    _limit = 20
			
 
				+                    _limit = 10
			
 
				                     for must_q,_count in list_must_query[_begin:_begin+_step]:
			
 
				                         must_q1 = list(must_q)
			
 
				                         must_q1.extend(must_queries)
			
 
				                         list_should_q.append(BoolQuery(must_queries=must_q1))
			
 
				 
			
 
				-                        # _limit += _count*5
			
 
				+                        _limit += _count*5
			
 
				                     _query = BoolQuery(
			
 
				                                        should_queries=list_should_q,
			
 
				                                        must_not_queries=must_not_q[:100]
			
@@ -3734,7 +3761,7 @@ class Dataflow_dumplicate(Dataflow):
 
				                     #                                                                     columns_to_get=ColumnsToGet(column_names=[project_uuid,project_docids,project_zhao_biao_page_time,project_zhong_biao_page_time,project_page_time,project_area,project_province,project_city,project_district,project_info_type,project_industry,project_qcodes,project_project_name,project_project_code,project_project_codes,project_project_addr,project_tenderee,project_tenderee_addr,project_tenderee_phone,project_tenderee_contact,project_agency,project_agency_phone,project_agency_contact,project_sub_project_name,project_sub_project_code,project_bidding_budget,project_win_tenderer,project_win_bid_price,project_win_tenderer_manager,project_win_tenderer_phone,project_second_tenderer,project_second_bid_price,project_second_tenderer_manager,project_second_tenderer_phone,project_third_tenderer,project_third_bid_price,project_third_tenderer_manager,project_third_tenderer_phone,project_procurement_system,project_bidway,project_dup_data,project_docid_number,project_project_dynamics,project_product,project_moneysource,project_service_time,project_time_bidclose,project_time_bidopen,project_time_bidstart,project_time_commencement,project_time_completion,project_time_earnest_money_start,project_time_earnest_money_end,project_time_get_file_end,project_time_get_file_start,project_time_publicity_end,project_time_publicity_start,project_time_registration_end,project_time_registration_start,project_time_release,project_dup_docid,project_info_source,project_nlp_enterprise,project_nlp_enterprise_attachment],return_type=ColumnReturnType.SPECIFIED))
			
 
				 
			
 
				                     rows,next_token,total_count,is_all_succeed = self.ots_client_merge.search(search_table,search_table_index,
			
 
				-                                                                                              SearchQuery(_query,limit=_limit),
			
 
				+                                                                                              SearchQuery(_query,sort=Sort(sorters=[FieldSort(project_page_time,sort_type)]),limit=_limit),
			
 
				                                                                                               columns_to_get=ColumnsToGet(column_names=check_columns,return_type=ColumnReturnType.SPECIFIED))
			
 
				                     list_data = getRow_ots(rows)
			
 
				 
			
@@ -3829,8 +3856,9 @@ class Dataflow_dumplicate(Dataflow):
 
				             list_projects = self.merge_projects(list_projects,b_log)
			
 
				             # log("merge projects takes:%.3f"%(time.time()-_time))
			
 
				 
			
 
				+
			
 
				             _time = time.time()
			
 
				-            dumplicate_document_in_merge(list_projects)
			
 
				+            list_merge_dump = dumplicate_document_in_merge(list_projects,dup_docid[:-1])
			
 
				             # log("dumplicate document %d takes:%.3f"%(len(list_projects),time.time()-_time))
			
 
				 
			
 
				             _time = time.time()
			
@@ -3838,7 +3866,7 @@ class Dataflow_dumplicate(Dataflow):
 
				             # log("json projects takes:%.3f"%(time.time()-_time))
			
 
				             if b_log:
			
 
				                 log("project_json:%s"%project_json)
			
 
				-            return project_json
			
 
				+            return project_json,list_merge_dump
			
 
				         except Exception as e:
			
 
				             raise RuntimeError("error on dumplicate")
			
 
				 
			
@@ -3858,13 +3886,29 @@ class Dataflow_dumplicate(Dataflow):
 
				             else:
			
 
				                 if _save==1:
			
 
				                     set_fingerprint.add(fingerprint_less)
			
 
				-        print("_fingerprint",_fingerprint)
			
 
				-        print(set_fingerprint)
			
 
				         if _fingerprint in set_fingerprint:
			
 
				             return True
			
 
				         return False
			
 
				 
			
 
				 
			
 
				+    def check_page_time(self,item):
			
 
				+        page_time = item.get(document_page_time,"")
			
 
				+        has_before = False
			
 
				+        has_after = False
			
 
				+        if len(page_time)>0:
			
 
				+            l_page_time = timeAdd(page_time,days=-90)
			
 
				+            dict_time = item.get("dict_time",{})
			
 
				+            for k,v in dict_time.items():
			
 
				+                if v is not None and len(v)>0:
			
 
				+                    if l_page_time>v:
			
 
				+                        has_before = True
			
 
				+                    if v>page_time:
			
 
				+                        has_after = True
			
 
				+        if not has_after and has_before:
			
 
				+            log("check page_time false %s==%s-%s"%(l_page_time,k,v))
			
 
				+            return False
			
 
				+        return True
			
 
				+
			
 
				 
			
 
				     def dumplicate_comsumer_handle(self,item,result_queue,ots_client,get_all=False,upgrade=True):
			
 
				         try:
			
@@ -3901,9 +3945,10 @@ class Dataflow_dumplicate(Dataflow):
 
				 
			
 
				 
			
 
				 
			
 
				+            b_log = False if upgrade else True
			
 
				             _time = time.time()
			
 
				             # log("%d start final check with length:%d"%(item["docid"],len(base_list)))
			
 
				-            final_list = self.dumplicate_fianl_check(base_list)
			
 
				+            final_list = self.dumplicate_fianl_check(base_list,b_log)
			
 
				 
			
 
				             exist_finterprint = self.is_exist_fingerprint(final_list,item.get(document_tmp_docid),item.get(document_tmp_fingerprint),table_name)
			
 
				             # log("%d final_check takes:%.2f"%(item["docid"],time.time()-_time))
			
@@ -3929,7 +3974,7 @@ class Dataflow_dumplicate(Dataflow):
 
				             remove_list = []
			
 
				 
			
 
				 
			
 
				-            if len(final_list)==0 or best_docid==item.get(document_tmp_docid):
			
 
				+            if self.check_page_time(item) and (len(final_list)==0 or best_docid==item.get(document_tmp_docid)):
			
 
				                 dtmp.setValue(document_tmp_save,1,True)
			
 
				                 # dtmp.setValue(document_tmp_merge_uuid,self.merge_document(item,flow_dumplicate_status_to),True)
			
 
				                 dmp_docid = ",".join([str(a) for a in list(dup_docid)])
			
@@ -3953,26 +3998,25 @@ class Dataflow_dumplicate(Dataflow):
 
				 
			
 
				             list_docids = list(dup_docid)
			
 
				             list_docids.append(best_docid)
			
 
				-            b_log = False if upgrade else True
			
 
				 
			
 
				             if item.get(document_update_document)=="true":
			
 
				                 dtmp.setValue(document_tmp_save,1,True)
			
 
				 
			
 
				+            list_merge_dump = []
			
 
				             if exist_finterprint and dtmp.getProperties().get(document_tmp_save)==0:
			
 
				                 log("exist_finterprint %s"%(str(item.get(document_tmp_docid))))
			
 
				                 dtmp.setValue(document_tmp_projects,"[]",True)
			
 
				             else:
			
 
				-                dtmp.setValue(document_tmp_projects,self.merge_document_real(item,list_docids,table_name,dtmp.getProperties().get(document_tmp_save),flow_dumplicate_status_to,b_log),True)
			
 
				+                project_json,list_merge_dump = self.merge_document_real(item,list_docids,table_name,dtmp.getProperties().get(document_tmp_save),flow_dumplicate_status_to,b_log)
			
 
				+                dtmp.setValue(document_tmp_projects,project_json,True)
			
 
				             log("upgrate %s save:%s:docid:%d,final_list:%d,rules:%d,best_docid:%s,dmp_docid:%s"%(str(upgrade),dtmp.getProperties().get(document_tmp_save),item.get(document_tmp_docid),len(final_list),len(list_rules),str(best_docid),dmp_docid))
			
 
				 
			
 
				             if upgrade:
			
 
				-                if table_name=="document_tmp":
			
 
				-                    self.changeSaveStatus(remove_list)
			
 
				-
			
 
				                 # print(dtmp.getProperties())
			
 
				                 dtmp.setValue(document_tmp_dup_docid,dmp_docid,True)
			
 
				                 dtmp.setValue(document_tmp_best_docid,best_docid,True)
			
 
				                 _flag = dtmp.update_row(self.ots_client)
			
 
				+
			
 
				                 if not _flag:
			
 
				                     for i in range(10):
			
 
				                         list_proj_json = dtmp.getProperties().get(document_tmp_projects)
			
@@ -3981,6 +4025,11 @@ class Dataflow_dumplicate(Dataflow):
 
				                             dtmp.setValue(document_tmp_projects,json.dumps(list_proj[:len(list_proj)//2]),True)
			
 
				                             if dtmp.update_row(self.ots_client):
			
 
				                                 break
			
 
				+                if table_name=="document_tmp":
			
 
				+                    self.changeSaveStatus(remove_list)
			
 
				+                    self.changeSaveStatus(list_merge_dump)
			
 
				+
			
 
				+
			
 
				 
			
 
				 
			
 
				             # log("dump takes %.2f"%(time.time()-start_time))
			
@@ -4051,7 +4100,7 @@ class Dataflow_dumplicate(Dataflow):
 
				 
			
 
				     def start_flow_dumplicate(self):
			
 
				         schedule = BlockingScheduler()
			
 
				-        schedule.add_job(self.flow_dumplicate,"cron",second="*/10")
			
 
				+        schedule.add_job(self.flow_dumplicate,"cron",second="*/5")
			
 
				         schedule.add_job(self.flow_dumpcate_comsumer,"cron",second="*/30")
			
 
				         schedule.add_job(self.bdm.monitor_dumplicate,"cron",minute="*/10")
			
 
				         schedule.add_job(self.flow_remove,"cron",hour="20")
			
@@ -4061,13 +4110,25 @@ class Dataflow_dumplicate(Dataflow):
 
				 
			
 
				     def changeSaveStatus(self,list_dict):
			
 
				         for _dict in list_dict:
			
 
				-            if _dict.get(document_tmp_save,1)==1:
			
 
				-                _d = {"partitionkey":_dict["partitionkey"],
			
 
				-                      "docid":_dict["docid"],
			
 
				+            if isinstance(_dict,dict):
			
 
				+                if _dict.get(document_tmp_save,1)==1:
			
 
				+                    _d = {"partitionkey":_dict["partitionkey"],
			
 
				+                          "docid":_dict["docid"],
			
 
				+                          document_tmp_save:0
			
 
				+                          }
			
 
				+                    _d_tmp = Document_tmp(_d)
			
 
				+                    if _d_tmp.exists_row(self.ots_client):
			
 
				+                        _d_tmp.update_row(self.ots_client)
			
 
				+            elif isinstance(_dict,int):
			
 
				+                _d = {"partitionkey":_dict%500+1,
			
 
				+                      "docid":_dict,
			
 
				                       document_tmp_save:0
			
 
				                       }
			
 
				                 _d_tmp = Document_tmp(_d)
			
 
				-                _d_tmp.update_row(self.ots_client)
			
 
				+                if _d_tmp.fix_columns(self.ots_client,["status"],True):
			
 
				+                    if _d_tmp.getProperties().get("status")==1:
			
 
				+                        _d_tmp.setValue("status",0,True)
			
 
				+                        _d_tmp.update_row(self.ots_client)
			
 
				 
			
 
				 
			
 
				 
			
@@ -4175,8 +4236,10 @@ if __name__ == '__main__':
 
				     df_dump = Dataflow_dumplicate(start_delete_listener=False)
			
 
				     # df_dump.start_flow_dumplicate()
			
 
				     a = time.time()
			
 
				-    df_dump.test_dumplicate(339737931)
			
 
				-    # df_dump.test_merge([292315564],[287890754])
			
 
				+    df_dump.test_dumplicate(386161571
			
 
				+                            )
			
 
				+    # df_dump.test_merge([385521167
			
 
				+    #                     ],[385521113])
			
 
				     # df_dump.flow_remove_project_tmp()
			
 
				     print("takes",time.time()-a)
			
 
				     # df_dump.fix_doc_which_not_in_project()
			
--- a/BaseDataMaintenance/maintenance/dataflow_mq.py
+++ b/BaseDataMaintenance/maintenance/dataflow_mq.py
@@ -18,6 +18,7 @@ from BaseDataMaintenance.model.postgres.document_extract import *
 
				 import sys
			
 
				 sys.setrecursionlimit(1000000)
			
 
				 
			
 
				+from multiprocessing import Process
			
 
				 
			
 
				 class ActiveMQListener():
			
 
				 
			
@@ -40,6 +41,32 @@ class ActiveMQListener():
 
				 
			
 
				 class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
			
 
				 
			
 
				+    class AttachmentMQListener():
			
 
				+
			
 
				+        def __init__(self,conn,_func,_idx,*args,**kwargs):
			
 
				+            self.conn = conn
			
 
				+            self._func = _func
			
 
				+            self._idx = _idx
			
 
				+
			
 
				+        def on_error(self, headers):
			
 
				+            log("===============")
			
 
				+            log('received an error %s' % str(headers.body))
			
 
				+
			
 
				+        def on_message(self, headers):
			
 
				+            try:
			
 
				+                log("get message of idx:%s"%(str(self._idx)))
			
 
				+                message_id = headers.headers["message-id"]
			
 
				+                body = headers.body
			
 
				+                _dict = {"frame":headers,"conn":self.conn}
			
 
				+                self._func(_dict=_dict)
			
 
				+            except Exception as e:
			
 
				+                traceback.print_exc()
			
 
				+                pass
			
 
				+
			
 
				+
			
 
				+        def __del__(self):
			
 
				+            self.conn.disconnect()
			
 
				+
			
 
				     def __init__(self):
			
 
				         Dataflow_attachment.__init__(self)
			
 
				 
			
@@ -47,18 +74,47 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				         self.mq_attachment = "/queue/dataflow_attachment"
			
 
				         self.mq_attachment_failed = "/queue/dataflow_attachment_failed"
			
 
				         self.mq_extract = "/queue/dataflow_extract"
			
 
				-        self.comsumer_count = 120
			
 
				+
			
 
				+        self.queue_attachment_ocr = Queue()
			
 
				+        self.queue_attachment_not_ocr = Queue()
			
 
				+        self.comsumer_count = 90
			
 
				         self.retry_comsumer_count = 10
			
 
				         self.retry_times = 5
			
 
				         self.list_attachment_comsumer = []
			
 
				-        for _i in range(self.comsumer_count):
			
 
				-            listener_attachment = ActiveMQListener(getConnect_activateMQ(),self.queue_attachment)
			
 
				-            createComsumer(listener_attachment,self.mq_attachment)
			
 
				-            self.list_attachment_comsumer.append(listener_attachment)
			
 
				+
			
 
				+        # for _i in range(self.comsumer_count):
			
 
				+        #     listener_attachment = self.AttachmentMQListener(getConnect_activateMQ(),self.queue_attachment)
			
 
				+        #     createComsumer(listener_attachment,self.mq_attachment)
			
 
				+        #     self.list_attachment_comsumer.append(listener_attachment)
			
 
				+
			
 
				         self.attach_pool = ConnectorPool(10,30,getConnection_postgres)
			
 
				+        self.redis_pool = ConnectorPool(10,30,getConnect_redis_doc)
			
 
				         self.conn_mq = getConnect_activateMQ()
			
 
				         self.pool_mq = ConnectorPool(10,30,getConnect_activateMQ)
			
 
				 
			
 
				+        self.session = None
			
 
				+
			
 
				+        listener_p = Process(target=self.start_attachment_listener)
			
 
				+        listener_p.start()
			
 
				+
			
 
				+
			
 
				+
			
 
				+    def start_attachment_listener(self):
			
 
				+        for _i in range(self.comsumer_count):
			
 
				+            listener_attachment = self.AttachmentMQListener(getConnect_activateMQ(),self.attachment_listener_handler,_i)
			
 
				+            createComsumer(listener_attachment,self.mq_attachment)
			
 
				+            self.list_attachment_comsumer.append(listener_attachment)
			
 
				+
			
 
				+        while 1:
			
 
				+            for i in range(len(self.list_attachment_comsumer)):
			
 
				+                if self.list_attachment_comsumer[i].conn.is_connected():
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    listener = self.AttachmentMQListener(getConnect_activateMQ(),self.attachment_listener_handler,_i)
			
 
				+                    createComsumer(listener,self.mq_attachment)
			
 
				+                    self.list_attachment_comsumer[i] = listener
			
 
				+            time.sleep(5)
			
 
				+
			
 
				     def monitor_listener(self):
			
 
				         for i in range(len(self.list_attachment_comsumer)):
			
 
				             if self.list_attachment_comsumer[i].conn.is_connected():
			
@@ -77,7 +133,7 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				         if attachment_size<100 and failed_attachment_size>0:
			
 
				             list_comsumer = []
			
 
				             for _i in range(self.retry_comsumer_count):
			
 
				-                listener_attachment = ActiveMQListener(getConnect_activateMQ(),self.queue_attachment)
			
 
				+                listener_attachment = self.AttachmentMQListener(getConnect_activateMQ(),self.attachment_listener_handler,_i)
			
 
				                 list_comsumer.append(listener_attachment)
			
 
				                 createComsumer(listener_attachment,self.mq_attachment_failed)
			
 
				             while 1:
			
@@ -88,7 +144,34 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				             for _c in list_comsumer:
			
 
				                 _c.conn.disconnect()
			
 
				 
			
 
				+    def attachment_listener_handler(self,_dict):
			
 
				+        try:
			
 
				+            frame = _dict["frame"]
			
 
				+            conn = _dict["conn"]
			
 
				+            message_id = frame.headers["message-id"]
			
 
				+            item = json.loads(frame.body)
			
 
				+            page_attachments = json.loads(item.get(document_tmp_attachment_path,"[]"))
			
 
				+            _dochtmlcon = item.get(document_tmp_dochtmlcon,"")
			
 
				+
			
 
				+            if len(page_attachments)==0:
			
 
				+                newitem ={"item":item,"list_attach":[],"message_id":message_id,"conn":conn}
			
 
				+            else:
			
 
				+                list_fileMd5 = []
			
 
				+                for _atta in page_attachments:
			
 
				+                    list_fileMd5.append(_atta.get(document_tmp_attachment_path_filemd5))
			
 
				+
			
 
				+                list_attach = self.getAttachments(list_fileMd5,_dochtmlcon)
			
 
				+
			
 
				+                newitem = {"item":item,"list_attach":list_attach,"message_id":message_id,"conn":conn}
			
 
				+
			
 
				+
			
 
				+            log("attachment get doc:%s"%(str(newitem.get("item",{}).get("docid"))))
			
 
				+            self.attachment_recognize(newitem,None)
			
 
				+
			
 
				+            log("attachment get doc:%s succeed"%(str(newitem.get("item",{}).get("docid"))))
			
 
				 
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				 
			
 
				     def rec_attachments_by_interface(self,list_attach,_dochtmlcon,save=True):
			
 
				         try:
			
@@ -171,7 +254,6 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				 
			
 
				 
			
 
				 
			
 
				-
			
 
				             _to_ack = False
			
 
				             if not _succeed and _retry_times<self.retry_times:
			
 
				                 item[document_tmp_status] = random.randint(*flow_attachment_status_failed_to)
			
@@ -263,10 +345,12 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				                     attach.setValue(attachment_status,_ots_attach.getProperties().get(attachment_status,""))
			
 
				                     attach.setValue(attachment_filetype,_ots_attach.getProperties().get(attachment_filetype,""))
			
 
				                     attach.setValue(attachment_classification,_ots_attach.getProperties().get(attachment_classification,""))
			
 
				-                    if attach.exists(self.attach_pool):
			
 
				-                        attach.update_row(self.attach_pool)
			
 
				-                    else:
			
 
				-                        attach.insert_row(self.attach_pool)
			
 
				+                    # if attach.exists(self.attach_pool):
			
 
				+                    #     attach.update_row(self.attach_pool)
			
 
				+                    # else:
			
 
				+                    #     attach.insert_row(self.attach_pool)
			
 
				+                    self.putAttach_json_toRedis(filemd5,attach.getProperties())
			
 
				+
			
 
				 
			
 
				                     try:
			
 
				                         if os.exists(localpath):
			
@@ -293,11 +377,12 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				                     _ots_attach = attachment(attach.getProperties_ots())
			
 
				                     _ots_attach.update_row(self.ots_client)
			
 
				 
			
 
				-                    #更新postgres
			
 
				-                    if attach.exists(self.attach_pool):
			
 
				-                        attach.update_row(self.attach_pool)
			
 
				-                    else:
			
 
				-                        attach.insert_row(self.attach_pool)
			
 
				+                    # #更新postgres
			
 
				+                    # if attach.exists(self.attach_pool):
			
 
				+                    #     attach.update_row(self.attach_pool)
			
 
				+                    # else:
			
 
				+                    #     attach.insert_row(self.attach_pool)
			
 
				+                    self.putAttach_json_toRedis(filemd5,attach.getProperties())
			
 
				 
			
 
				 
			
 
				                     if local_exists:
			
@@ -314,7 +399,7 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				 
			
 
				                 # _data_base64 = base64.b64encode(open(localpath,"rb").read())
			
 
				                 # _success,_html,swf_images = getAttachDealInterface(_data_base64,_filetype)
			
 
				-                _success,_html,swf_images,classification = getAttachDealInterface(None,_filetype,path=localpath)
			
 
				+                _success,_html,swf_images,classification = getAttachDealInterface(None,_filetype,path=localpath,session=self.session)
			
 
				                 log("process filemd5:%s %s of type:%s with size:%.3fM download:%ds recognize takes %ds,ret_size:%d"%(filemd5,str(_success),_filetype,round(_size/1024/1024,4),time_download,time.time()-start_time,len(_html)))
			
 
				                 if _success:
			
 
				                     if len(_html)<5:
			
@@ -372,11 +457,12 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				                 _ots_attach = attachment(attach.getProperties_ots())
			
 
				                 _ots_attach.update_row(self.ots_client) #线上再开放更新
			
 
				 
			
 
				-                #更新postgres
			
 
				-                if attach.exists(self.attach_pool):
			
 
				-                    attach.update_row(self.attach_pool)
			
 
				-                else:
			
 
				-                    attach.insert_row(self.attach_pool)
			
 
				+                # #更新postgres
			
 
				+                # if attach.exists(self.attach_pool):
			
 
				+                #     attach.update_row(self.attach_pool)
			
 
				+                # else:
			
 
				+                #     attach.insert_row(self.attach_pool)
			
 
				+                self.putAttach_json_toRedis(filemd5,attach.getProperties())
			
 
				 
			
 
				 
			
 
				                 if local_exists:
			
@@ -403,24 +489,69 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				 
			
 
				 
			
 
				 
			
 
				-    def flow_attachment(self):
			
 
				-        self.flow_attachment_producer()
			
 
				-        self.flow_attachment_producer_comsumer()
			
 
				+    # def flow_attachment(self):
			
 
				+    #     self.flow_attachment_producer()
			
 
				+    #     self.flow_attachment_producer_comsumer()
			
 
				 
			
 
				     def getAttachPath(self,filemd5,_dochtmlcon):
			
 
				         _soup = BeautifulSoup(_dochtmlcon,"lxml")
			
 
				 
			
 
				-        _find = _soup.find("a",attrs={"data":filemd5})
			
 
				-        filelink = ""
			
 
				-        if _find is None:
			
 
				-            _find = _soup.find("img",attrs={"data":filemd5})
			
 
				-            if _find is not None:
			
 
				-                filelink = _find.attrs.get("src","")
			
 
				-        else:
			
 
				-            filelink = _find.attrs.get("href","")
			
 
				-        _path = filelink.split("/file")
			
 
				-        if len(_path)>1:
			
 
				-            return _path[1]
			
 
				+        list_mark = ["data","filelink"]
			
 
				+        for _mark in list_mark:
			
 
				+            _find = _soup.find("a",attrs={_mark:filemd5})
			
 
				+            filelink = ""
			
 
				+            if _find is None:
			
 
				+                _find = _soup.find("img",attrs={_mark:filemd5})
			
 
				+                if _find is not None:
			
 
				+                    filelink = _find.attrs.get("src","")
			
 
				+            else:
			
 
				+                filelink = _find.attrs.get("href","")
			
 
				+            if filelink.find("bidizhaobiao")>=0:
			
 
				+                _path = filelink.split("/file")
			
 
				+                if len(_path)>1:
			
 
				+                    return _path[1]
			
 
				+
			
 
				+
			
 
				+    def getAttach_json_fromRedis(self,filemd5):
			
 
				+        db = self.redis_pool.getConnector()
			
 
				+        try:
			
 
				+
			
 
				+            _key = "attach-%s"%(filemd5)
			
 
				+            _attach_json = db.get(_key)
			
 
				+            return _attach_json
			
 
				+        except Exception as e:
			
 
				+            log("getAttach_json_fromRedis error %s"%(str(e)))
			
 
				+        finally:
			
 
				+            try:
			
 
				+                if db.connection.check_health():
			
 
				+                    self.redis_pool.putConnector(db)
			
 
				+            except Exception as e:
			
 
				+                pass
			
 
				+        return None
			
 
				+
			
 
				+    def putAttach_json_toRedis(self,filemd5,extract_dict):
			
 
				+
			
 
				+        db = self.redis_pool.getConnector()
			
 
				+        try:
			
 
				+            new_dict = {}
			
 
				+            for k,v in extract_dict.items():
			
 
				+                if not isinstance(v,set):
			
 
				+                    new_dict[k] = v
			
 
				+            _key = "attach-%s"%(filemd5)
			
 
				+            _extract_json = db.set(str(_key),json.dumps(new_dict))
			
 
				+            db.expire(_key,3600*3)
			
 
				+            return _extract_json
			
 
				+        except Exception as e:
			
 
				+            log("putExtract_json_toRedis error%s"%(str(e)))
			
 
				+            traceback.print_exc()
			
 
				+        finally:
			
 
				+            try:
			
 
				+                if db.connection.check_health():
			
 
				+                    self.redis_pool.putConnector(db)
			
 
				+            except Exception as e:
			
 
				+                pass
			
 
				+
			
 
				+
			
 
				 
			
 
				     def getAttachments(self,list_filemd5,_dochtmlcon):
			
 
				         conn = self.attach_pool.getConnector()
			
@@ -432,15 +563,26 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				                     to_find_md5.append(_filemd5)
			
 
				 
			
 
				             conditions = ["filemd5 in ('%s')"%("','".join(to_find_md5))]
			
 
				-            list_attachment =  Attachment_postgres.select_rows(conn,Attachment_postgres,"attachment",conditions)
			
 
				-            log("select localpath database %d/%d"%(len(list_attachment),len(to_find_md5)))
			
 
				+            list_attachment = []
			
 
				+
			
 
				             set_md5 = set()
			
 
				-            for _attach in list_attachment:
			
 
				-                set_md5.add(_attach.getProperties().get(attachment_filemd5))
			
 
				-            list_not_in_md5 = []
			
 
				+            # list_attachment =  Attachment_postgres.select_rows(conn,Attachment_postgres,"attachment",conditions)
			
 
				+            # for _attach in list_attachment:
			
 
				+            #     set_md5.add(_attach.getProperties().get(attachment_filemd5))
			
 
				+
			
 
				+            for _filemd5 in to_find_md5:
			
 
				+                _json = self.getAttach_json_fromRedis(_filemd5)
			
 
				+
			
 
				+                if _json is not None:
			
 
				+                    set_md5.add(_filemd5)
			
 
				+                    list_attachment.append(Attachment_postgres(json.loads(_json)))
			
 
				+
			
 
				+            log("select localpath database %d/%d"%(len(set_md5),len(to_find_md5)))
			
 
				+
			
 
				             for _filemd5 in to_find_md5:
			
 
				+
			
 
				                 if _filemd5 not in set_md5:
			
 
				-                    list_not_in_md5.append(_filemd5)
			
 
				+
			
 
				                     _path = self.getAttachPath(_filemd5,_dochtmlcon)
			
 
				 
			
 
				 
			
@@ -452,15 +594,18 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				                             log("getAttachments find in ots:%s"%(_filemd5))
			
 
				                             list_attachment.append(Attachment_postgres(_attach_ots.getProperties()))
			
 
				                     else:
			
 
				-                        if _path[0]=="/":
			
 
				-                            _path = _path[1:]
			
 
				-                        _filetype = _path.split(".")[-1]
			
 
				-                        _attach = {attachment_filemd5:_filemd5,
			
 
				-                                   attachment_filetype:_filetype,
			
 
				-                                   attachment_status:20,
			
 
				-                                   attachment_path:"%s/%s"%(_filemd5[:4],_path),
			
 
				-                                   attachment_crtime:getCurrent_date(format="%Y-%m-%d %H:%M:%S")}
			
 
				-                        list_attachment.append(Attachment_postgres(_attach))
			
 
				+                        log("getAttachments search in path:%s"%(_filemd5))
			
 
				+                        if _path:
			
 
				+                            log("getAttachments find in path:%s"%(_filemd5))
			
 
				+                            if _path[0]=="/":
			
 
				+                                _path = _path[1:]
			
 
				+                            _filetype = _path.split(".")[-1]
			
 
				+                            _attach = {attachment_filemd5:_filemd5,
			
 
				+                                       attachment_filetype:_filetype,
			
 
				+                                       attachment_status:20,
			
 
				+                                       attachment_path:"%s/%s"%(_filemd5[:4],_path),
			
 
				+                                       attachment_crtime:getCurrent_date(format="%Y-%m-%d %H:%M:%S")}
			
 
				+                            list_attachment.append(Attachment_postgres(_attach))
			
 
				 
			
 
				 
			
 
				 
			
@@ -485,9 +630,17 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				 
			
 
				     def flow_attachment_producer_comsumer(self):
			
 
				         log("start flow_attachment comsumer")
			
 
				-        mt = MultiThreadHandler(self.queue_attachment,self.comsumer_handle,None,10,1,restart=True)
			
 
				+        mt = MultiThreadHandler(self.queue_attachment,self.comsumer_handle,None,10,1,need_stop=False,restart=True)
			
 
				         mt.run()
			
 
				 
			
 
				+    def flow_attachment_process(self):
			
 
				+        self.process_comsumer()
			
 
				+
			
 
				+        # p = Process(target = self.process_comsumer)
			
 
				+        # p.start()
			
 
				+        # p.join()
			
 
				+
			
 
				+
			
 
				     def set_queue(self,_dict):
			
 
				         list_attach = _dict.get("list_attach")
			
 
				         to_ocr = False
			
@@ -526,34 +679,43 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
				         current_date = getCurrent_date(format="%Y-%m-%d")
			
 
				         last_date = timeAdd(current_date,-2,format="%Y-%m-%d")
			
 
				         sql = " delete from attachment where crtime<='%s 00:00:00' "%(last_date)
			
 
				-        conn = getConnection_postgres()
			
 
				-        cursor = conn.cursor()
			
 
				-        cursor.execute(sql)
			
 
				-        conn.commit()
			
 
				-        conn.close()
			
 
				+        conn = self.attach_pool.getConnector()
			
 
				+        try:
			
 
				+            cursor = conn.cursor()
			
 
				+            cursor.execute(sql)
			
 
				+            conn.commit()
			
 
				+            self.attach_pool.putConnector(conn)
			
 
				+        except Exception as e:
			
 
				+            conn.close()
			
 
				+
			
 
				 
			
 
				 
			
 
				     def start_flow_attachment(self):
			
 
				         schedule = BlockingScheduler()
			
 
				-        schedule.add_job(self.flow_attachment_process,"cron",second="*/20")
			
 
				-        schedule.add_job(self.flow_attachment,"cron",second="*/10")
			
 
				-        schedule.add_job(self.monitor_attachment_process,"cron",second="*/10")
			
 
				-        schedule.add_job(self.remove_attachment_postgres,"cron",hour="6")
			
 
				+        # schedule.add_job(self.flow_attachment_process,"cron",second="*/20")
			
 
				+        # schedule.add_job(self.flow_attachment,"cron",second="*/10")
			
 
				+        # schedule.add_job(self.flow_attachment_producer,"cron",second="*/10")
			
 
				+        # schedule.add_job(self.flow_attachment_producer_comsumer,"cron",second="*/10")
			
 
				+        # schedule.add_job(self.monitor_listener,"cron",minute="*/1")
			
 
				+
			
 
				+
			
 
				+        # schedule.add_job(self.monitor_attachment_process,"cron",second="*/10")
			
 
				+        # schedule.add_job(self.remove_attachment_postgres,"cron",hour="6")
			
 
				         schedule.add_job(self.process_failed_attachment,"cron",minute="*/10")
			
 
				-        schedule.add_job(self.monitor_listener,"cron",minute="*/1")
			
 
				         schedule.start()
			
 
				 
			
 
				 class Dataflow_ActivteMQ_extract(Dataflow_extract):
			
 
				 
			
 
				     class ExtractListener():
			
 
				 
			
 
				-        def __init__(self,conn,_func,*args,**kwargs):
			
 
				+        def __init__(self,conn,_func,_idx,*args,**kwargs):
			
 
				             self.conn = conn
			
 
				             self._func = _func
			
 
				+            self._idx = _idx
			
 
				 
			
 
				         def on_message(self, headers):
			
 
				             try:
			
 
				-                log("get message")
			
 
				+                log("get message of idx:%d"%(self._idx))
			
 
				                 message_id = headers.headers["message-id"]
			
 
				                 body = headers.body
			
 
				                 log("get message %s crtime:%s"%(message_id,json.loads(body).get("crtime","")))
			
@@ -568,7 +730,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				         def __del__(self):
			
 
				             self.conn.disconnect()
			
 
				 
			
 
				-    def __init__(self):
			
 
				+    def __init__(self,create_listener=True):
			
 
				         Dataflow_extract.__init__(self)
			
 
				 
			
 
				         self.industy_url = "http://127.0.0.1:15000/industry_extract"
			
@@ -590,23 +752,43 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				             current_weight += self.extract_interfaces[_i][1]
			
 
				             self.extract_interfaces[_i][1] = current_weight/self.whole_weight
			
 
				 
			
 
				-        self.comsumer_count = 40
			
 
				-        self.pool_postgres = ConnectorPool(10,self.comsumer_count,getConnection_postgres)
			
 
				+        self.comsumer_count = 50
			
 
				+        # self.pool_postgres = ConnectorPool(10,self.comsumer_count,getConnection_postgres)
			
 
				         self.pool_redis_doc = ConnectorPool(10,self.comsumer_count,getConnect_redis_doc)
			
 
				         self.conn_mq = getConnect_activateMQ()
			
 
				         self.pool_mq = ConnectorPool(10,30,getConnect_activateMQ)
			
 
				         self.block_url = RLock()
			
 
				         self.url_count = 0
			
 
				+        self.session = None
			
 
				 
			
 
				 
			
 
				         self.list_extract_comsumer = []
			
 
				+
			
 
				+        # for _i in range(self.comsumer_count):
			
 
				+        #     listener_extract = self.ExtractListener(getConnect_activateMQ(),self.comsumer_handle,_i)
			
 
				+        #     createComsumer(listener_extract,self.mq_extract)
			
 
				+        #     self.list_extract_comsumer.append(listener_extract)
			
 
				+
			
 
				+        if create_listener:
			
 
				+            listener_p = Process(target=self.start_extract_listener)
			
 
				+            listener_p.start()
			
 
				+
			
 
				+    def start_extract_listener(self):
			
 
				+
			
 
				         for _i in range(self.comsumer_count):
			
 
				-            listener_extract = self.ExtractListener(getConnect_activateMQ(),self.comsumer_handle)
			
 
				+            listener_extract = self.ExtractListener(getConnect_activateMQ(),self.comsumer_handle,_i)
			
 
				             createComsumer(listener_extract,self.mq_extract)
			
 
				             self.list_extract_comsumer.append(listener_extract)
			
 
				 
			
 
				-
			
 
				-
			
 
				+        while 1:
			
 
				+            for _i in range(len(self.list_extract_comsumer)):
			
 
				+                if self.list_extract_comsumer[_i].conn.is_connected():
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    listener = self.ExtractListener(getConnect_activateMQ(),self.comsumer_handle,_i)
			
 
				+                    createComsumer(listener,self.mq_extract)
			
 
				+                    self.list_extract_comsumer[_i] = listener
			
 
				+            time.sleep(5)
			
 
				 
			
 
				     def monitor_listener(self):
			
 
				         for i in range(len(self.list_extract_comsumer)):
			
@@ -618,14 +800,14 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				                 self.list_extract_comsumer[i] = listener
			
 
				 
			
 
				     def getExtract_url(self):
			
 
				-        _url_num = 0
			
 
				-        with self.block_url:
			
 
				-            self.url_count += 1
			
 
				-            self.url_count %= self.whole_weight
			
 
				-            _url_num = self.url_count
			
 
				-
			
 
				-        # _r = random.random()
			
 
				-        _r = _url_num/self.whole_weight
			
 
				+        # _url_num = 0
			
 
				+        # with self.block_url:
			
 
				+        #     self.url_count += 1
			
 
				+        #     self.url_count %= self.whole_weight
			
 
				+        #     _url_num = self.url_count
			
 
				+
			
 
				+        _r = random.random()
			
 
				+        # _r = _url_num/self.whole_weight
			
 
				         for _i in range(len(self.extract_interfaces)):
			
 
				             if _r<=self.extract_interfaces[_i][1]:
			
 
				                 return self.extract_interfaces[_i][0]
			
@@ -636,7 +818,8 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				         # _url = self.extract_interfaces[_i]
			
 
				         _url = self.getExtract_url()
			
 
				         log("extract_url:%s"%(str(_url)))
			
 
				-        resp = requests.post(_url,json=json,headers=headers,timeout=10*60)
			
 
				+        with requests.Session() as session:
			
 
				+            resp = session.post(_url,json=json,headers=headers,timeout=10*60)
			
 
				         return resp
			
 
				 
			
 
				 
			
@@ -666,18 +849,21 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				 
			
 
				 
			
 
				         from BaseDataMaintenance.java.MQInfo import getQueueSize
			
 
				-        extract_failed_size = getQueueSize("dataflow_extract_failed")
			
 
				-        extract_size = getQueueSize("dataflow_extract")
			
 
				-        log("extract_failed_size %s extract_size %s"%(str(extract_failed_size),str(extract_size)))
			
 
				-        if extract_failed_size>0 and extract_size<100:
			
 
				-            failed_listener = self.ExtractListener(getConnect_activateMQ(),_handle)
			
 
				-            createComsumer(failed_listener,self.mq_extract_failed)
			
 
				-            while 1:
			
 
				-                extract_failed_size = getQueueSize("dataflow_extract_failed")
			
 
				-                if extract_failed_size==0:
			
 
				-                    break
			
 
				-                time.sleep(10)
			
 
				-            failed_listener.conn.disconnect()
			
 
				+        try:
			
 
				+            extract_failed_size = getQueueSize("dataflow_extract_failed")
			
 
				+            extract_size = getQueueSize("dataflow_extract")
			
 
				+            log("extract_failed_size %s extract_size %s"%(str(extract_failed_size),str(extract_size)))
			
 
				+            if extract_failed_size>0 and extract_size<100:
			
 
				+                failed_listener = self.ExtractListener(getConnect_activateMQ(),_handle,1)
			
 
				+                createComsumer(failed_listener,self.mq_extract_failed)
			
 
				+                while 1:
			
 
				+                    extract_failed_size = getQueueSize("dataflow_extract_failed")
			
 
				+                    if extract_failed_size==0:
			
 
				+                        break
			
 
				+                    time.sleep(10)
			
 
				+                failed_listener.conn.disconnect()
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				 
			
 
				     def flow_extract(self,):
			
 
				         self.comsumer()
			
@@ -762,8 +948,6 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				 
			
 
				             html_len = len(_dochtmlcon)
			
 
				             if html_len>50000:
			
 
				-                if int(item.get("docid"))==329546490:
			
 
				-                    save(item,"329546490.pk")
			
 
				                 log("docid %s dochtmlcon too long len %d "%(str(item.get("docid")),html_len))
			
 
				                 try:
			
 
				                     _dochtmlcon = re.sub("<html>|</html>|<body>|</body>", "", _dochtmlcon)
			
@@ -972,8 +1156,8 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
				         schedule = BlockingScheduler()
			
 
				         schedule.add_job(self.flow_extract_producer,"cron",second="*/20")
			
 
				         schedule.add_job(self.process_extract_failed,"cron",minute="*/5")
			
 
				-        schedule.add_job(self.delete_document_extract,"cron",hour="*/5")
			
 
				-        schedule.add_job(self.monitor_listener,"cron",minute="*/5")
			
 
				+        # schedule.add_job(self.delete_document_extract,"cron",hour="*/5")
			
 
				+        # schedule.add_job(self.monitor_listener,"cron",minute="*/5")
			
 
				         schedule.start()
			
 
				 
			
 
				 from multiprocessing import RLock
			
--- a/BaseDataMaintenance/maintenance/dataflow_settings.py
+++ b/BaseDataMaintenance/maintenance/dataflow_settings.py
@@ -1,6 +1,6 @@
 
				 
			
 
				 
			
 
				-flow_process_count = 3000
			
 
				+flow_process_count = 600
			
 
				 
			
 
				 flow_attachment_status_from = [0,10]
			
 
				 flow_attachment_status_failed_to = [0,0]
			
--- a/BaseDataMaintenance/maintenance/enterprise/enterprise2Redis.py
+++ b/BaseDataMaintenance/maintenance/enterprise/enterprise2Redis.py
@@ -17,12 +17,13 @@ from BaseDataMaintenance.dataSource.pool import ConnectorPool
 
				 from BaseDataMaintenance.common.multiThread import MultiThreadHandler
			
 
				 import pandas as pd
			
 
				 from BaseDataMaintenance.maintenance.dataflow_settings import *
			
 
				-
			
 
				+from elasticsearch import Elasticsearch
			
 
				 
			
 
				 # flow_enterprise2redis_path = "/data/python/flow_enterprise2redis.log"
			
 
				 log_enterprise2redis_create_time_path = "/data/python/enterprise2redis_createTime.log"
			
 
				 
			
 
				 
			
 
				+
			
 
				 # 线上流程 enterprise Redis表维护
			
 
				 class enterprise2Redis():
			
 
				 
			
@@ -57,100 +58,108 @@ class enterprise2Redis():
 
				             finally:
			
 
				                 pool_db.putConnector(_db)
			
 
				 
			
 
				-        ots_client = getConnect_ots()
			
 
				-        bool_query = BoolQuery(must_queries=[
			
 
				-            RangeQuery("create_time", range_from=last_create_time, range_to=now_time),
			
 
				-            RangeQuery("status", range_from=201, range_to=301),
			
 
				-        ])
			
 
				-        rows, next_token, total_count, is_all_succeed = ots_client.search("enterprise", "enterprise_index",
			
 
				-                                                                          SearchQuery(bool_query, get_total_count=True),
			
 
				-                                                                          columns_to_get=ColumnsToGet(
			
 
				-                                                                              return_type=ColumnReturnType.NONE))
			
 
				-
			
 
				-        if total_count > 0:
			
 
				-            column_list = ["nicknames", "history_names", 'tyc_id', 'legal_person', 'reg_capital', 'found_date',
			
 
				-                           'credit_code', 'tax_number', 'reg_number', 'org_number']
			
 
				-            all_rows = []
			
 
				-            first_query = False
			
 
				-            # 第一次查询
			
 
				-            while not first_query:
			
 
				-                try:
			
 
				-                    rows, next_token, total_count, is_all_succeed = ots_client.search(
			
 
				-                        "enterprise", "enterprise_index",
			
 
				-                        SearchQuery(
			
 
				-                            bool_query,
			
 
				-                            limit=100,
			
 
				-                            sort=Sort(sorters=[FieldSort('bidi_id', SortOrder.DESC)]),
			
 
				-                            get_total_count=True),
			
 
				-                            ColumnsToGet(return_type=ColumnReturnType.SPECIFIED,
			
 
				-                                     column_names=column_list)
			
 
				-                        )
			
 
				-                    first_query = True
			
 
				-                except:
			
 
				-                    print("~ first_query ots error ~")
			
 
				-            # next_token后续查询
			
 
				-            if first_query:
			
 
				-                while next_token:
			
 
				-                    try:
			
 
				-                        rows, next_token, total_count, is_all_succeed = ots_client.search(
			
 
				-                            "enterprise", "enterprise_index",
			
 
				-                            SearchQuery(
			
 
				-                                bool_query,
			
 
				-                                next_token=next_token,
			
 
				-                                limit=100,
			
 
				-                                get_total_count=True),
			
 
				-                            ColumnsToGet(return_type=ColumnReturnType.SPECIFIED,
			
 
				-                                         column_names=column_list)
			
 
				-                        )
			
 
				-                        all_rows.extend(rows)
			
 
				-
			
 
				-                    except:
			
 
				-                        print("~ ots query error, try again ~")
			
 
				-
			
 
				-            # 清洗过滤
			
 
				-            # legal_name_list = []
			
 
				-            # not_legal_name_list = []
			
 
				-            legal_name_num = 0
			
 
				-            not_legal_name_num = 0
			
 
				-            name_sign_list = []
			
 
				-            for row in all_rows:
			
 
				-                # 索引字段
			
 
				-                index_field = row[1]
			
 
				-                row_dict = dict((item[0], item[1]) for item in index_field)
			
 
				-                name = ""
			
 
				-                num = 0
			
 
				-                for key,value in row_dict.items():
			
 
				-                    if key=='nicknames':
			
 
				-                        name = value
			
 
				-                    else:
			
 
				-                        if len(str(value).replace("-",""))>1:
			
 
				-                            num += 1
			
 
				-                isLegal = isLegalNewName(name)
			
 
				-                if isLegal>=0:
			
 
				-                    if num>=1 and len(name)>4:
			
 
				-                        legal_name_num += 1
			
 
				-                        name_sign_list.append((name,1))
			
 
				-                # elif num>=1:
			
 
				-                #     pass
			
 
				-                # else:
			
 
				-                #     not_legal_name_num += 1
			
 
				-                #     # name_sign_list.append((name,0))
			
 
				-                #     pass
			
 
				-
			
 
				-            pool_db = ConnectorPool(10, 30, getConnect_redis_baseline)
			
 
				-            # _start_time = time.time()
			
 
				-            task_queue = Queue()
			
 
				-            for legal_name in name_sign_list:
			
 
				-                task_queue.put(legal_name)
			
 
				-                if task_queue.qsize() >= 100 * 10000:
			
 
				-                    _mt = MultiThreadHandler(task_queue, handle, None, 30)
			
 
				-                    _mt.run()
			
 
				-            if task_queue.qsize() >= 0:
			
 
				+        # es_url = "http://es-cn-lbj3cjmy3000djxak.elasticsearch.aliyuncs.com"  # 内网
			
 
				+        es_url = "http://es-cn-lbj3cjmy3000djxak.public.elasticsearch.aliyuncs.com" # 外网
			
 
				+        es_client = Elasticsearch([es_url],
			
 
				+                                  http_auth=('elastic', 'WWBu9#1HWHo$$gJm'),
			
 
				+                                  port=9200)
			
 
				+        body = {
			
 
				+            "_source": ["name", "history_names", 'legal_person', 'reg_capital', 'credit_code', 'tax_number',
			
 
				+                        'reg_number', 'org_number',
			
 
				+                        "zhao_biao_number", "zhong_biao_number", "dai_li_number", "bid_number"],
			
 
				+            'query': {  # 查询命令
			
 
				+                "bool": {
			
 
				+                    'must': [
			
 
				+                        {'range': {"update_time": {
			
 
				+                            "gte": last_create_time,  # >= 大于等于
			
 
				+                            "lt": now_time  # < 小于
			
 
				+                        }}},
			
 
				+                        # {'range': {"create_time": {
			
 
				+                        #     "gte": last_create_time,  # >= 大于等于
			
 
				+                        #     "lt": now_time  # < 小于
			
 
				+                        # }}},
			
 
				+                        {'range': {"status": {
			
 
				+                            "gte": 201,  # >= 大于等于
			
 
				+                            "lt": 301  # < 小于
			
 
				+                        }}}
			
 
				+                    ]
			
 
				+                }
			
 
				+            },
			
 
				+            "sort": [
			
 
				+                {"create_time": "desc"}
			
 
				+            ]
			
 
				+        }
			
 
				+
			
 
				+        query = es_client.search(index='enterprise', body=body, scroll='10m', size=5000)
			
 
				+        scroll_id = query['_scroll_id']  # 游标用于输出es查询出的所有结果
			
 
				+        query_result = query['hits']['hits']
			
 
				+        result = query_result
			
 
				+        while len(query_result) > 0:
			
 
				+            try:
			
 
				+                query_scroll = es_client.scroll(scroll_id=scroll_id, scroll='10m')
			
 
				+                scroll_id = query_scroll['_scroll_id']
			
 
				+                query_result = query_scroll['hits']['hits']
			
 
				+                if len(query_result) > 0:
			
 
				+                    result += query_result
			
 
				+                else:
			
 
				+                    break
			
 
				+            except:
			
 
				+                pass
			
 
				+
			
 
				+        es_client.clear_scroll(scroll_id=scroll_id)
			
 
				+
			
 
				+        legal_name_num = 0
			
 
				+        not_legal_name_num = 0
			
 
				+        add_redis_list = []
			
 
				+        for item in result:
			
 
				+            item = item['_source']
			
 
				+            name = item['name']
			
 
				+            history_names = item.get("history_names", "")
			
 
				+            legal_person = item.get("legal_person", "")
			
 
				+            reg_capital = item.get("reg_capital", "")
			
 
				+            credit_code = item.get("credit_code", "")
			
 
				+            tax_number = item.get("tax_number", "")
			
 
				+            reg_number = item.get("reg_number", "")
			
 
				+            org_number = item.get("org_number", "")
			
 
				+            zhao_biao_number = item.get("zhao_biao_number", 0)
			
 
				+            zhong_biao_number = item.get("zhong_biao_number", 0)
			
 
				+            dai_li_number = item.get("dai_li_number", 0)
			
 
				+            bid_number = item.get("bid_number", 0)
			
 
				+
			
 
				+            num = 0
			
 
				+            for business in [history_names, legal_person, reg_capital, credit_code, tax_number, reg_number, org_number]:
			
 
				+                if len(str(business).replace("-", "")) > 1:
			
 
				+                    num += 1
			
 
				+            isLegal = isLegalNewName(name)
			
 
				+            if isLegal >= 0:
			
 
				+                if num >= 1 and len(name) > 4:
			
 
				+                    legal_name_num += 1
			
 
				+                    _json = {"have_business": 1, "zhao_biao_number": zhao_biao_number,
			
 
				+                             "zhong_biao_number": zhong_biao_number,
			
 
				+                             "dai_li_number": dai_li_number, "bid_number": bid_number}
			
 
				+                    _json = json.dumps(_json, ensure_ascii=False)
			
 
				+                    add_redis_list.append((name, _json))
			
 
				+                elif num == 0 and bid_number > 0 and len(name) > 4:
			
 
				+                    legal_name_num += 1
			
 
				+                    _json = {"have_business": 0, "zhao_biao_number": zhao_biao_number,
			
 
				+                             "zhong_biao_number": zhong_biao_number,
			
 
				+                             "dai_li_number": dai_li_number, "bid_number": bid_number}
			
 
				+                    _json = json.dumps(_json, ensure_ascii=False)
			
 
				+                    add_redis_list.append((name, _json))
			
 
				+
			
 
				+        pool_db = ConnectorPool(10, 30, getConnect_redis_baseline)
			
 
				+        # _start_time = time.time()
			
 
				+        task_queue = Queue()
			
 
				+        for legal_name in add_redis_list:
			
 
				+            task_queue.put(legal_name)
			
 
				+            if task_queue.qsize() >= 100 * 10000:
			
 
				                 _mt = MultiThreadHandler(task_queue, handle, None, 30)
			
 
				                 _mt.run()
			
 
				-            return legal_name_num,not_legal_name_num
			
 
				+        if task_queue.qsize() >= 0:
			
 
				+            _mt = MultiThreadHandler(task_queue, handle, None, 30)
			
 
				+            _mt.run()
			
 
				 
			
 
				-        return 0,0
			
 
				+        return legal_name_num, not_legal_name_num
			
 
				 
			
 
				     # Redis新增 合法实体和不合法实体
			
 
				     def monitor_enterprise2redis(self):
			
@@ -201,7 +210,8 @@ class enterprise2Redis():
 
				         def handle(item, result_queue):
			
 
				             _db = pool_db.getConnector()
			
 
				             try:
			
 
				-                _db.set(item, 0)
			
 
				+                # _db.set(item, 0)
			
 
				+                _db.delete(item)
			
 
				             except Exception as e:
			
 
				                 traceback.print_exc()
			
 
				             finally:
			
@@ -234,30 +244,40 @@ class enterprise2Redis():
 
				 
			
 
				 # 新实体合法判断
			
 
				 def isLegalNewName(enterprise_name):
			
 
				-    # head_character_list = ["[",'【',"(",'（']
			
 
				-    # tail_character_list = ["]",'】',")",'）']
			
 
				     # 名称开头判断
			
 
				     if re.search("^[\da-zA-Z][^\da-zA-Z]|"
			
 
				                  "^[^\da-zA-Z\u4e00-\u9fa5\[【(（]|"
			
 
				                  "^[\[【(（].{,1}[\]】)）]|"
			
 
				                  "^[0〇]|"
			
 
				-                 "^(20[0-2][0-9]|[0-2]?[0-9]年|[0-1]?[0-9]月|[0-3]?[0-9]日)",enterprise_name):
			
 
				+                 "^(20[0-2][0-9]|[0-2]?[0-9]年|[0-1]?[0-9]月|[0-3]?[0-9]日)", enterprise_name):
			
 
				         return -1
			
 
				-    if len(re.findall("[\u4e00-\u9fa5]",enterprise_name))<2:
			
 
				+    if len(re.findall("[\u4e00-\u9fa5]", enterprise_name)) < 2:
			
 
				         return -1
			
 
				-    if len(re.findall("[\u4e00-\u9fa5]",enterprise_name))/len(enterprise_name) < 0.5:
			
 
				+    if len(re.findall("[\u4e00-\u9fa5]", enterprise_name)) / len(enterprise_name) < 0.5:
			
 
				         return -1
			
 
				-    if re.search("╳|＊|\*|×|xx|XX",enterprise_name):
			
 
				+    if re.search("╳|＊|\*|×|xx|XX|\s", enterprise_name):
			
 
				         return -1
			
 
				-    if re.search("个人|个体|测试",enterprise_name):
			
 
				+    if re.search("[区市镇乡县洲州路街]$", enterprise_name) and not re.search("(超市|门市|保护区|园区|景区|校区|社区|服务区|工区|小区|集市|花市|夜市|学区|旅游区|矿区|林区|度假区|示范区|菜市)$", enterprise_name):
			
 
				         return -1
			
 
				-    if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)",enterprise_name) and not re.search("^(镇江|乡宁|镇原|镇海|镇安|镇巴|镇坪|镇赉|镇康|镇沅|镇雄|镇远|镇宁|乡城|镇平|市中|市南|市北)",enterprise_name):
			
 
				+    if re.search("^个人|^个体|测试$", enterprise_name):
			
 
				         return -1
			
 
				-    if re.search("\d{1,2}:\d{2}(:\d{2})?|(rar|xlsx|zip|png|jpg|swf|docx|txt|pdf|PDF|doc|xls|bmp|&?nbsp)",enterprise_name):
			
 
				+    if re.search("个人|个体", enterprise_name):
			
 
				+        _split = re.split("个人|个体", enterprise_name)
			
 
				+        if len(_split[0]) <= 5:
			
 
				+            return -1
			
 
				+    if re.search("测试", enterprise_name) and len(enterprise_name) < 8:
			
 
				         return -1
			
 
				-    if re.search("(招标|代理)(人|机构)|联系(人|方式)|中标|评[标审选委]|候选|第.名|^(项目|业主)",enterprise_name):
			
 
				+    if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)", enterprise_name) and not re.search(
			
 
				+            "^(镇江|乡宁|镇原|镇海|镇安|镇巴|镇坪|镇赉|镇康|镇沅|镇雄|镇远|镇宁|乡城|镇平|市中|市南|市北)", enterprise_name):
			
 
				         return -1
			
 
				-    if re.search("[a-zA-Z\d一二三四五六七八九十]{1,2}(包|标段?)|第.批",enterprise_name):
			
 
				+    if re.search("\d{1,2}:\d{2}(:\d{2})?|(rar|xlsx|zip|png|jpg|swf|docx|txt|pdf|PDF|doc|xls|bmp|&?nbsp)",
			
 
				+                 enterprise_name):
			
 
				+        return -1
			
 
				+    if re.search("(招标|代理)(人|机构)|联系(人|方式)|中标|候选|第.名|^(项目|业主)", enterprise_name):
			
 
				+        return -1
			
 
				+    if re.search("评[标选委审]", enterprise_name) and not re.search("评[标选委审].{0,2}中心", enterprise_name):
			
 
				+        return -1
			
 
				+    if re.search("[a-zA-Z\d一二三四五六七八九十]{1,2}(标段?)|第.批$", enterprise_name):
			
 
				         return 0
			
 
				     return 1
			
 
				 
			
@@ -281,6 +301,13 @@ if __name__ == '__main__':
 
				     # now_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(now_time))
			
 
				     # legal_name_num,not_legal_name_num = em.update2redis(last_create_time=last_create_time,now_time=now_time)
			
 
				 
			
 
				+    # 删除线上实体
			
 
				+    # _enterprise2Redis = enterprise2Redis()
			
 
				+    # drop_list = ["个体工商户"]
			
 
				+    # _enterprise2Redis.fix_up_redis(drop_list)
			
 
				+
			
 
				+    # e = enterprise2Redis()
			
 
				+    # e.monitor_enterprise2redis()
			
 
				     pass
			
 
				 
			
 
				 
			
--- a/BaseDataMaintenance/maintenance/preproject/fillColumns.py
+++ b/BaseDataMaintenance/maintenance/preproject/fillColumns.py
@@ -20,7 +20,10 @@ class PreprojectFill():
 
				 
			
 
				         def comsumer_handle(_row,result_queue):
			
 
				             if _row.get(preproject_uuid) is None:
			
 
				-                _row[preproject_uuid] = uuid4().hex
			
 
				+                _preproject = Preproject(_row)
			
 
				+                # 删除无uuid数据
			
 
				+                _preproject.delete_row(self.ots_client)
			
 
				+                return
			
 
				             if _row.get(preproject_has_bidfile) is None:
			
 
				                 json_docids = _row.get(preproject_json_docids)
			
 
				                 if json_docids is not None:
			
@@ -41,16 +44,27 @@ class PreprojectFill():
 
				             _preproject = Preproject(_row)
			
 
				             _preproject.update_row(self.ots_client)
			
 
				 
			
 
				+
			
 
				         _mul = MultiThreadHandler(self.task_queue,comsumer_handle,None,10)
			
 
				         _mul.run()
			
 
				 
			
 
				 
			
 
				 
			
 
				     def fill_producer(self):
			
 
				-        q1 = BoolQuery(should_queries=[WildcardQuery("uuid","*"),
			
 
				-                                          RangeQuery("has_bidfile",0)])
			
 
				+        # 存在uuid数据，补充'has_bidfile'字段
			
 
				+        q1 = BoolQuery(must_queries=[
			
 
				+            ExistsQuery("uuid"),
			
 
				+            BoolQuery(must_not_queries=[
			
 
				+                ExistsQuery("has_bidfile")
			
 
				+            ])
			
 
				+        ])
			
 
				+        # 无uuid数据，用于删除行数据
			
 
				+        q2 = BoolQuery(must_not_queries=[
			
 
				+                ExistsQuery("uuid")
			
 
				+            ])
			
 
				         columns = ["uuid","has_bidfile","json_docids"]
			
 
				-        query = BoolQuery(must_not_queries=[q1])
			
 
				+        query = BoolQuery(should_queries=[q1,
			
 
				+                                          q2])
			
 
				         rows,next_token,total_count,is_all_succeed = self.ots_client.search("preproject","preproject_index",
			
 
				                                                                             SearchQuery(query,sort=Sort(sorters=[FieldSort("uuid")]),get_total_count=True,limit=100),
			
 
				                                                                             ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
			
@@ -66,7 +80,9 @@ class PreprojectFill():
 
				                                                                                 ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
			
 
				 
			
 
				     def fill_contact_producer(self):
			
 
				-        q1 = BoolQuery(must_queries=[TermQuery("status",1),
			
 
				+        q1 = BoolQuery(must_queries=[
			
 
				+            TermQuery("status",1),
			
 
				+            ExistsQuery("uuid")
			
 
				                                        ])
			
 
				         columns = ["status",preproject_tenderee,preproject_last_tenderee_contact,preproject_last_tenderee_phone,preproject_last_win_tenderer,preproject_last_win_tenderer_contact,preproject_last_win_tenderer_phone]
			
 
				         query = q1
			
--- a/BaseDataMaintenance/maintenance/product/product_parameter.py
+++ b/BaseDataMaintenance/maintenance/product/product_parameter.py
@@ -199,7 +199,7 @@ class Product_Attachment_Processor():
 
				             list_product = list(set(list_product))
			
 
				             dp = Document_product(item)
			
 
				             if attachments is None or attachments=="" or len(list_product)==0:
			
 
				-                dp.setValue(DOCUMENT_PRODUCT_PARAMETER_STATUS,parameter_status_no_bidfile)
			
 
				+                dp.setValue(DOCUMENT_PRODUCT_PARAMETER_STATUS,parameter_status_no_bidfile,True)
			
 
				                 dp.update_row(self.ots_client)
			
 
				                 return
			
 
				             list_attachment = json.loads(attachments)
			
--- a/BaseDataMaintenance/maintenance/proposedBuilding/DataSynchronization.py
+++ b/BaseDataMaintenance/maintenance/proposedBuilding/DataSynchronization.py
@@ -37,7 +37,7 @@ class DataSynchronization():
 
				         columns = ["uuid","crtime","json_list_group"]
			
 
				 
			
 
				         rows, next_token, total_count, is_all_succeed = ots_client.search(self.proposedBuilding_table, self.proposedBuilding_table_index,
			
 
				-                                                                          SearchQuery(bool_query ,sort=Sort(sorters=[FieldSort("crtime",SortOrder.DESC)]), limit=100, get_total_count=True),
			
 
				+                                                                          SearchQuery(bool_query ,sort=Sort(sorters=[FieldSort("crtime",SortOrder.ASC)]), limit=100, get_total_count=True),
			
 
				                                                                           ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
			
 
				         list_data = getRow_ots(rows)
			
 
				         for _data in list_data:
			
@@ -196,11 +196,43 @@ class DataSynchronization():
 
				         mt=MultiThreadHandler(task_queue,_handle,None,30)
			
 
				         mt.run()
			
 
				 
			
 
				+    def drop_data(self):
			
 
				+        ots_client = getConnect_ots()
			
 
				+
			
 
				+        bool_query = BoolQuery(must_queries=[ExistsQuery("crtime")])
			
 
				+
			
 
				+        task_queue = queue.Queue()
			
 
				+
			
 
				+        rows, next_token, total_count, is_all_succeed = ots_client.search(self.proposedBuilding_table, self.proposedBuilding_table_index,
			
 
				+                                                                          SearchQuery(bool_query ,sort=Sort(sorters=[FieldSort("crtime",SortOrder.ASC)]), limit=100, get_total_count=True),
			
 
				+                                                                          ColumnsToGet(return_type=ColumnReturnType.SPECIFIED))
			
 
				+        list_data = getRow_ots(rows)
			
 
				+        for _data in list_data:
			
 
				+            _proposed = proposedBuilding_tmp(_data)
			
 
				+            task_queue.put(_proposed,True)
			
 
				+            print(total_count,task_queue.qsize())
			
 
				+        _count = len(list_data)
			
 
				+        while next_token:
			
 
				+            rows, next_token, total_count, is_all_succeed = ots_client.search(self.proposedBuilding_table, self.proposedBuilding_table_index,
			
 
				+                                                                              SearchQuery(bool_query ,next_token=next_token, limit=100, get_total_count=True),
			
 
				+                                                                              ColumnsToGet(return_type=ColumnReturnType.SPECIFIED))
			
 
				+            list_data = getRow_ots(rows)
			
 
				+            for _data in list_data:
			
 
				+                _proposed = proposedBuilding_tmp(_data)
			
 
				+                task_queue.put(_proposed,True)
			
 
				+                print(total_count,task_queue.qsize())
			
 
				+
			
 
				+        def _handle(item,result_queue):
			
 
				+            item.delete_row(ots_client)
			
 
				+
			
 
				+        mt = MultiThreadHandler(task_queue,_handle,None,30)
			
 
				+        mt.run()
			
 
				+
			
 
				     def scheduler(self):
			
 
				         from BaseDataMaintenance.maintenance.major_project.unionDocument import MajorUnion
			
 
				         mu = MajorUnion()
			
 
				         _scheduler = BlockingScheduler()
			
 
				-        _scheduler.add_job(self.maxcompute2ots,"cron",minute="*/8")
			
 
				+        _scheduler.add_job(self.maxcompute2ots,"cron",minute="*/1")
			
 
				         _scheduler.add_job(self.turn_stage,"cron",hour="*/5")
			
 
				         _scheduler.add_job(mu.comsumer,"cron",minute="*/8")
			
 
				         _scheduler.start()
			
@@ -215,8 +247,9 @@ def startSychro():
 
				 if __name__=="__main__":
			
 
				     ds = DataSynchronization()
			
 
				     # # ds.scheduler()
			
 
				-    # # ds.maxcompute2ots()
			
 
				+    # ds.maxcompute2ots()
			
 
				     # ds.turn_stage()
			
 
				-    ds.fix_progress()
			
 
				+    # ds.fix_progress()
			
 
				+    ds.drop_data()
			
 
				 
			
 
				 
			
--- a/BaseDataMaintenance/maintenance/test_speed.py
+++ b/BaseDataMaintenance/maintenance/test_speed.py
@@ -0,0 +1,368 @@
 
				+
			
 
				+
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+s = '''
			
 
				+2023-11-22 15:44:41,364 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:44:41,369 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:44:41,567 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:41,599 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:44:41,641 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:44:41,686 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:44:41,777 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:44:41,814 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:44:41,890 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:44:41,902 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:44:42,482 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:44:42,244 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:44:42,596 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:44:42,639 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:44:43,052 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:44:43,340 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+2023-11-22 15:44:43,353 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:44:43,450 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:44:43,563 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:44:43,596 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:44:43,617 - BaseDataMaintenance.common.Utils - INFO - get message of idx:23
			
 
				+2023-11-22 15:44:43,655 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:44:43,711 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:44:44,064 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:44:44,066 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:44:44,080 - BaseDataMaintenance.common.Utils - INFO - get message of idx:32
			
 
				+2023-11-22 15:44:44,318 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:44:44,329 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:44:44,513 - BaseDataMaintenance.common.Utils - INFO - get message of idx:31
			
 
				+2023-11-22 15:44:44,681 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:44:44,772 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:44:44,870 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:44:44,964 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:44:44,986 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:44:44,990 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:45,017 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:44:45,142 - BaseDataMaintenance.common.Utils - INFO - get message of idx:43
			
 
				+2023-11-22 15:44:45,183 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:44:45,285 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:44:45,691 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:44:45,983 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:44:46,100 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:44:46,159 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:44:46,193 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:44:46,242 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:44:46,468 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:44:46,539 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:46,621 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:44:46,660 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:44:46,703 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:44:46,775 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:44:46,820 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:44:46,876 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:44:46,894 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:44:47,161 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:44:47,180 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:44:47,209 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:44:47,480 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:44:47,543 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:47,649 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:44:47,664 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:44:48,101 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:44:48,003 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:44:48,302 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:44:48,339 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:44:48,432 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:44:48,472 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:44:48,479 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:44:48,511 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:44:48,534 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:44:48,544 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:44:48,562 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:44:48,620 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:44:48,661 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:44:48,789 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:48,854 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:44:48,913 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:44:49,277 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:49,288 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:44:49,302 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:44:49,306 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:44:49,331 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:44:49,494 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:44:49,513 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:44:49,579 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:44:49,884 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:44:50,070 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:44:50,199 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:44:50,292 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:44:50,406 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:44:50,490 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:44:50,514 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:44:50,541 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:44:50,642 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:44:50,880 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:44:50,991 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:44:51,049 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:44:51,066 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:44:51,127 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:44:51,175 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:44:51,597 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:51,691 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:44:51,734 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:44:51,884 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:51,952 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:44:52,141 - BaseDataMaintenance.common.Utils - INFO - get message of idx:12
			
 
				+2023-11-22 15:44:51,991 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:44:52,098 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:44:52,113 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:44:52,155 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:44:52,171 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:44:52,174 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:44:52,209 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:44:52,431 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:44:52,574 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:44:52,713 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:44:52,727 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:44:52,777 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:44:52,798 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:44:52,810 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:44:52,820 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+2023-11-22 15:44:52,824 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:44:52,906 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:44:52,926 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:44:52,927 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:44:53,000 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:44:53,171 - BaseDataMaintenance.common.Utils - INFO - get message of idx:42
			
 
				+2023-11-22 15:44:53,189 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:44:53,392 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:44:53,555 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:44:53,623 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:44:53,826 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:44:53,953 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:44:53,956 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:44:53,997 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:44:54,083 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:44:54,042 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:44:54,222 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:44:54,225 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:44:54,284 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:44:54,341 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:44:54,347 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:44:54,393 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:54,394 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:54,466 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:44:54,486 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:44:54,520 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:44:54,689 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:44:54,620 - BaseDataMaintenance.common.Utils - INFO - get message of idx:12
			
 
				+2023-11-22 15:44:54,687 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:44:54,701 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:44:54,736 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:44:54,752 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:44:54,813 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:44:54,854 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:44:54,921 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:44:55,107 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:44:55,210 - BaseDataMaintenance.common.Utils - INFO - get message of idx:31
			
 
				+2023-11-22 15:44:55,256 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:44:55,299 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:44:55,778 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:44:55,906 - BaseDataMaintenance.common.Utils - INFO - get message of idx:23
			
 
				+2023-11-22 15:44:55,959 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:44:56,038 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:44:56,065 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:44:56,072 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:44:56,189 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:44:56,205 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+2023-11-22 15:44:56,207 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:44:56,364 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:44:56,275 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:44:56,341 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:44:56,357 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:44:56,379 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:56,406 - BaseDataMaintenance.common.Utils - INFO - get message of idx:42
			
 
				+2023-11-22 15:44:56,417 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:44:56,424 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:44:56,440 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:44:56,444 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:44:56,498 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:44:56,512 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:44:56,596 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:44:56,626 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:44:56,707 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:44:56,712 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:44:56,836 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:44:57,058 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:44:57,086 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:44:57,136 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:57,347 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:44:57,375 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:44:57,451 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:44:57,609 - BaseDataMaintenance.common.Utils - INFO - get message of idx:12
			
 
				+2023-11-22 15:44:57,645 - BaseDataMaintenance.common.Utils - INFO - get message of idx:31
			
 
				+2023-11-22 15:44:57,724 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:44:57,726 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:44:57,764 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:44:57,816 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:44:57,893 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:44:57,917 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:44:58,044 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:44:58,128 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:44:58,240 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:44:58,258 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:44:58,459 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:44:58,497 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:44:58,498 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:44:58,709 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:44:58,593 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:44:58,603 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:44:58,612 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:44:58,627 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:44:58,648 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:44:58,712 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:44:58,763 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+2023-11-22 15:44:58,821 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:44:58,825 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:44:58,949 - BaseDataMaintenance.common.Utils - INFO - get message of idx:23
			
 
				+2023-11-22 15:44:58,972 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:44:58,973 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:44:59,185 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:44:59,231 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:44:59,261 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:44:59,278 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:44:59,381 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:44:59,410 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:44:59,489 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:44:59,504 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:44:59,759 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:44:59,762 - BaseDataMaintenance.common.Utils - INFO - get message of idx:42
			
 
				+2023-11-22 15:44:59,787 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:44:59,821 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:44:59,919 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:44:59,956 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:45:00,021 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:45:00,063 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:45:00,161 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:45:00,189 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:45:00,525 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:45:00,724 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:45:00,727 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:45:00,728 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:45:00,745 - BaseDataMaintenance.common.Utils - INFO - get message of idx:31
			
 
				+2023-11-22 15:45:00,847 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:45:00,861 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:45:00,876 - BaseDataMaintenance.common.Utils - INFO - get message of idx:12
			
 
				+2023-11-22 15:45:00,884 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:45:00,970 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:45:01,228 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:45:01,238 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:45:01,254 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:45:01,267 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:45:01,539 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:45:01,560 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+2023-11-22 15:45:01,636 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:45:01,667 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:45:01,673 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:45:01,685 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:45:01,686 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:45:01,689 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:45:01,720 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:45:01,739 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:45:01,740 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:45:01,921 - BaseDataMaintenance.common.Utils - INFO - get message of idx:23
			
 
				+2023-11-22 15:45:01,995 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:45:02,082 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:45:02,090 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:45:02,193 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:45:02,310 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:45:02,440 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:45:02,458 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:45:02,496 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:45:02,531 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:45:02,588 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:45:02,608 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:45:02,717 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:45:02,763 - BaseDataMaintenance.common.Utils - INFO - get message of idx:12
			
 
				+2023-11-22 15:45:02,817 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:45:02,841 - BaseDataMaintenance.common.Utils - INFO - get message of idx:42
			
 
				+2023-11-22 15:45:02,878 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:45:02,931 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:45:03,051 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:45:03,123 - BaseDataMaintenance.common.Utils - INFO - get message of idx:32
			
 
				+2023-11-22 15:45:03,127 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:45:03,078 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:45:03,229 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:45:03,302 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:45:03,358 - BaseDataMaintenance.common.Utils - INFO - get message of idx:26
			
 
				+2023-11-22 15:45:03,417 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:45:03,545 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:45:03,808 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:45:03,897 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:45:03,925 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:45:03,955 - BaseDataMaintenance.common.Utils - INFO - get message of idx:31
			
 
				+2023-11-22 15:45:04,033 - BaseDataMaintenance.common.Utils - INFO - get message of idx:36
			
 
				+2023-11-22 15:45:04,065 - BaseDataMaintenance.common.Utils - INFO - get message of idx:24
			
 
				+2023-11-22 15:45:04,072 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:45:04,072 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:45:04,096 - BaseDataMaintenance.common.Utils - INFO - get message of idx:1
			
 
				+2023-11-22 15:45:04,099 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:45:04,102 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:45:04,134 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:45:04,148 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+2023-11-22 15:45:04,208 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:45:04,330 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:45:04,335 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:45:04,392 - BaseDataMaintenance.common.Utils - INFO - get message of idx:3
			
 
				+2023-11-22 15:45:04,614 - BaseDataMaintenance.common.Utils - INFO - get message of idx:10
			
 
				+2023-11-22 15:45:04,827 - BaseDataMaintenance.common.Utils - INFO - get message of idx:22
			
 
				+2023-11-22 15:45:04,992 - BaseDataMaintenance.common.Utils - INFO - get message of idx:34
			
 
				+2023-11-22 15:45:05,001 - BaseDataMaintenance.common.Utils - INFO - get message of idx:40
			
 
				+2023-11-22 15:45:05,090 - BaseDataMaintenance.common.Utils - INFO - get message of idx:39
			
 
				+2023-11-22 15:45:05,091 - BaseDataMaintenance.common.Utils - INFO - get message of idx:16
			
 
				+2023-11-22 15:45:05,190 - BaseDataMaintenance.common.Utils - INFO - get message of idx:28
			
 
				+2023-11-22 15:45:05,214 - BaseDataMaintenance.common.Utils - INFO - get message of idx:49
			
 
				+2023-11-22 15:45:05,229 - BaseDataMaintenance.common.Utils - INFO - get message of idx:12
			
 
				+2023-11-22 15:45:05,400 - BaseDataMaintenance.common.Utils - INFO - get message of idx:11
			
 
				+2023-11-22 15:45:05,259 - BaseDataMaintenance.common.Utils - INFO - get message of idx:7
			
 
				+2023-11-22 15:45:05,320 - BaseDataMaintenance.common.Utils - INFO - get message of idx:38
			
 
				+2023-11-22 15:45:05,370 - BaseDataMaintenance.common.Utils - INFO - get message of idx:13
			
 
				+2023-11-22 15:45:05,566 - BaseDataMaintenance.common.Utils - INFO - get message of idx:17
			
 
				+2023-11-22 15:45:05,540 - BaseDataMaintenance.common.Utils - INFO - get message of idx:8
			
 
				+2023-11-22 15:45:05,585 - BaseDataMaintenance.common.Utils - INFO - get message of idx:14
			
 
				+2023-11-22 15:45:05,620 - BaseDataMaintenance.common.Utils - INFO - get message of idx:47
			
 
				+2023-11-22 15:45:05,646 - BaseDataMaintenance.common.Utils - INFO - get message of idx:42
			
 
				+2023-11-22 15:45:06,029 - BaseDataMaintenance.common.Utils - INFO - get message of idx:35
			
 
				+2023-11-22 15:45:06,087 - BaseDataMaintenance.common.Utils - INFO - get message of idx:9
			
 
				+2023-11-22 15:45:06,125 - BaseDataMaintenance.common.Utils - INFO - get message of idx:29
			
 
				+2023-11-22 15:45:06,175 - BaseDataMaintenance.common.Utils - INFO - get message of idx:21
			
 
				+2023-11-22 15:45:06,325 - BaseDataMaintenance.common.Utils - INFO - get message of idx:20
			
 
				+2023-11-22 15:45:06,326 - BaseDataMaintenance.common.Utils - INFO - get message of idx:25
			
 
				+2023-11-22 15:45:06,343 - BaseDataMaintenance.common.Utils - INFO - get message of idx:32
			
 
				+2023-11-22 15:45:06,447 - BaseDataMaintenance.common.Utils - INFO - get message of idx:5
			
 
				+2023-11-22 15:45:06,451 - BaseDataMaintenance.common.Utils - INFO - get message of idx:23
			
 
				+2023-11-22 15:45:06,596 - BaseDataMaintenance.common.Utils - INFO - get message of idx:48
			
 
				+2023-11-22 15:45:06,597 - BaseDataMaintenance.common.Utils - INFO - get message of idx:2
			
 
				+2023-11-22 15:45:06,616 - BaseDataMaintenance.common.Utils - INFO - get message of idx:37
			
 
				+2023-11-22 15:45:06,655 - BaseDataMaintenance.common.Utils - INFO - get message of idx:18
			
 
				+2023-11-22 15:45:06,795 - BaseDataMaintenance.common.Utils - INFO - get message of idx:33
			
 
				+2023-11-22 15:45:06,873 - BaseDataMaintenance.common.Utils - INFO - get message of idx:41
			
 
				+2023-11-22 15:45:06,895 - BaseDataMaintenance.common.Utils - INFO - get message of idx:44
			
 
				+2023-11-22 15:45:06,949 - BaseDataMaintenance.common.Utils - INFO - get message of idx:27
			
 
				+2023-11-22 15:45:06,961 - BaseDataMaintenance.common.Utils - INFO - get message of idx:0
			
 
				+2023-11-22 15:45:06,968 - BaseDataMaintenance.common.Utils - INFO - get message of idx:30
			
 
				+2023-11-22 15:45:06,981 - BaseDataMaintenance.common.Utils - INFO - get message of idx:19
			
 
				+2023-11-22 15:45:07,061 - BaseDataMaintenance.common.Utils - INFO - get message of idx:45
			
 
				+2023-11-22 15:45:07,139 - BaseDataMaintenance.common.Utils - INFO - get message of idx:31
			
 
				+2023-11-22 15:45:07,172 - BaseDataMaintenance.common.Utils - INFO - get message of idx:4
			
 
				+
			
 
				+'''
			
 
				+
			
 
				+set_a = set()
			
 
				+_c = 0
			
 
				+for a in re.split("\n",s):
			
 
				+    a = a.strip()
			
 
				+    if a=="":
			
 
				+        continue
			
 
				+    b = a.split(":")[-1]
			
 
				+    _c += 1
			
 
				+    print(b)
			
 
				+    set_a.add(b)
			
 
				+print(len(set_a),_c)
			
--- a/BaseDataMaintenance/maxcompute/1.py
+++ b/BaseDataMaintenance/maxcompute/1.py
@@ -2084,7 +2084,9 @@ if __name__ == '__main__':
 
				     # _str1 = "SXXY-ZBP-GG-2020002"
			
 
				     # _str2 = "SXXY-ZBP-GG-2020002"
			
 
				     # print(getSimilarityOfString(_str1,_str2))
			
 
				-    print(check_doctitle("南京市秦淮新河沿线泰山公寓、天虹山庄、福润雅居南区小区环境综合整治","（雨花台区）秦淮新河沿线泰山公寓、天虹山庄、福润雅居南区小区环境综合整治勘察设计"))
			
 
				+    # print(check_doctitle("南京市秦淮新河沿线泰山公寓、天虹山庄、福润雅居南区小区环境综合整治","（雨花台区）秦淮新河沿线泰山公寓、天虹山庄、福润雅居南区小区环境综合整治勘察设计"))
			
 
				+    print(type({52,101,118,119,120}))
			
 
				+    print((1 if 1==1 else 2) + (1 if 1==1 else 2))
			
 
				     # print(check_product(None,None))
			
 
				     # print(check_code("4451020073383382206021325","4451020073383382206021322"))
			
 
				     # print(check_money("550.0","440.0","",""))
			
--- a/BaseDataMaintenance/maxcompute/documentDumplicate.py
+++ b/BaseDataMaintenance/maxcompute/documentDumplicate.py
@@ -889,7 +889,7 @@ code_pattern = re.compile("[A-Za-z0-9\-\(\)（）【】\.－]+")
 
				 num_pattern = re.compile("^\d+(?:\.\d+)?$")
			
 
				 num1_pattern = re.compile("[一二三四五六七八九A-Za-z]+")
			
 
				 location_pattern = re.compile("[^\[【\(]{1,2}[市区镇县村路]")
			
 
				-building_pattern = "工程招标代理|工程设计|工程造价咨询|施工图设计文件审查|咨询|环评|设计|施工监理|施工|监理|EPC|epc|总承包|水土保持|选址论证|勘界|勘察|预算编制|预算审核|设备类|第?[\(（]?[一二三四五六七八九1-9][）\)]?[次批]"
			
 
				+building_pattern = "工程招标代理|工程设计|暂停|继续|工程造价咨询|施工图设计文件审查|咨询|环评|设计|施工监理|施工|监理|EPC|epc|总承包|水土保持|选址论证|勘界|勘察|预算编制|预算审核|设备类|第?[\(（]?[一二三四五六七八九1-9][）\)]?[次批]"
			
 
				 date_pattern = re.compile("\d{2,4}[\-\./年]\d{1,2}[\-\./月]\d{1,2}")
			
 
				 def check_doctitle(doctitle_refind_less, doctitle_refind_greater, codes_less=[], code_greater=[]):
			
 
				     if code_greater is None:
			
@@ -990,10 +990,18 @@ def check_product(product_less,product_greater,split_char=","):
 
				 
			
 
				         _product_l = product_less.split(split_char)
			
 
				         _product_g = product_greater.split(split_char)
			
 
				+        same_count = 0
			
 
				+        if len(_product_g)>len(_product_l):
			
 
				+            a = _product_g
			
 
				+            _product_g = _product_l
			
 
				+            _product_l = a
			
 
				         for _l in _product_l:
			
 
				             for _g in _product_g:
			
 
				                 if getSimilarityOfString(_l,_g)>=0.8:
			
 
				-                    return True
			
 
				+                    same_count += 1
			
 
				+                    break
			
 
				+        if same_count/len(_product_l)>0.5:
			
 
				+            return True
			
 
				         return False
			
 
				     return True
			
 
				 
			
@@ -1024,7 +1032,7 @@ def check_time(json_time_less,json_time_greater):
 
				             if getLength(v)>0:
			
 
				                 v1 = time_greater.get(k,"")
			
 
				                 if getLength(v1)>0:
			
 
				-                    if v!=v1:
			
 
				+                    if v[:10]!=v1[:10]:
			
 
				                         return False
			
 
				     return True
			
 
				 
			
--- a/BaseDataMaintenance/maxcompute/documentMerge.py
+++ b/BaseDataMaintenance/maxcompute/documentMerge.py
@@ -1707,12 +1707,33 @@ def generate_packages_properties(list_docs):
 
				     for _doc in list_docs:
			
 
				         _dict = {}
			
 
				         sub_docs = _doc.get("sub_docs")
			
 
				+
			
 
				+
			
 
				         if sub_docs is not None:
			
 
				             for _d in sub_docs:
			
 
				                 sub_project_code = _d.get(project_sub_project_code,"")
			
 
				                 sub_project_name = _d.get(project_sub_project_name,"")
			
 
				                 win_tenderer = _d.get(project_win_tenderer,"")
			
 
				                 win_bid_price = _d.get(project_win_bid_price,"")
			
 
				+
			
 
				+                if sub_project_name=="Project":
			
 
				+
			
 
				+                    win_exists = False
			
 
				+                    win_price_exists = False
			
 
				+                    win_sum = 0
			
 
				+                    for _d1 in sub_docs:
			
 
				+                        if _d.get(project_sub_project_name,"")=="Project":
			
 
				+                            continue
			
 
				+                        if _d1.get(project_win_tenderer,"")==win_tenderer:
			
 
				+                            win_exists = True
			
 
				+                        if _d1.get(project_win_tenderer,"")==win_tenderer and _d1.get(project_win_bid_price,"")!="":
			
 
				+                            win_sum += float(_d1.get(project_win_bid_price,0))
			
 
				+                        if _d1.get(project_win_bid_price,"")==win_bid_price:
			
 
				+                            win_price_exists = True
			
 
				+                    if win_exists and (win_price_exists or win_bid_price=="" or float(win_bid_price)==0 or float(win_bid_price)==win_sum):
			
 
				+                        continue
			
 
				+
			
 
				+
			
 
				                 _key = "%s-%s-%s-%s"%(sub_project_code,sub_project_name,win_tenderer,win_bid_price)
			
 
				                 if _key in set_key:
			
 
				                     continue
			
@@ -2106,10 +2127,11 @@ def dumplicate_projects(list_projects,b_log=False):
 
				     :return:
			
 
				     '''
			
 
				     appendKeyvalueCount(list_projects)
			
 
				-    list_projects.sort(key=lambda x:x.get(project_page_time,""))
			
 
				+    list_projects.sort(key=lambda x:str(x.get(project_page_time,"")))
			
 
				     list_projects.sort(key=lambda x:x.get("keyvaluecount",0),reverse=True)
			
 
				     cluster_projects = list_projects[:50]
			
 
				     _count = 10
			
 
				+    print("dumplicate projects rest",len(cluster_projects))
			
 
				     while _count>0:
			
 
				         _count -= 1
			
 
				         _update = False
			
@@ -2140,7 +2162,7 @@ def dumplicate_projects(list_projects,b_log=False):
 
				             break
			
 
				         cluster_projects = list_p
			
 
				 
			
 
				-
			
 
				+    print("dumplicate projects rest",len(cluster_projects))
			
 
				     return cluster_projects
			
 
				 
			
 
				 def update_projects_by_project(project_dict,projects):
			
@@ -2148,7 +2170,7 @@ def update_projects_by_project(project_dict,projects):
 
				     _dict = {}
			
 
				     #更新公共属性
			
 
				     for k,v in project_dict.items():
			
 
				-        if k in (project_project_dynamics,project_page_time,project_product,project_project_codes,project_docids,project_uuid,project_nlp_enterprise,project_nlp_enterprise_attachment):
			
 
				+        if k in (project_project_dynamics,project_page_time,project_sub_project_name,project_product,project_project_codes,project_docids,project_uuid,project_nlp_enterprise,project_nlp_enterprise_attachment):
			
 
				             continue
			
 
				         for _proj in projects:
			
 
				             if k not in _proj:
			
@@ -2162,10 +2184,15 @@ def update_projects_by_project(project_dict,projects):
 
				                     elif isinstance(_v,(int,float)):
			
 
				                         if _v==0:
			
 
				                             _dict[k] = v
			
 
				+
			
 
				     for _proj in projects:
			
 
				         _proj.update(_dict)
			
 
				-        if _proj.get(project_page_time,"")<project_dict.get(project_page_time,""):
			
 
				+        if str(_proj.get(project_page_time,""))<str(project_dict.get(project_page_time,"")):
			
 
				             _proj[project_page_time] = project_dict.get(project_page_time,"")
			
 
				+        if project_dict.get(project_sub_project_name) is not None and project_dict.get(project_sub_project_name) not in {"","Project"}:
			
 
				+            if not (_proj.get(project_sub_project_name) is not None and _proj.get(project_sub_project_name) not in {"","Project"}):
			
 
				+                _proj[project_sub_project_name] = project_dict.get(project_sub_project_name)
			
 
				+
			
 
				 
			
 
				     #拼接属性
			
 
				     append_dict = {}
			
@@ -2228,7 +2255,7 @@ def update_projects_by_project(project_dict,projects):
 
				     list_dynamics = []
			
 
				     for k,v in dict_dynamic.items():
			
 
				         list_dynamics.append(v)
			
 
				-    list_dynamics.sort(key=lambda x:x.get(document_page_time,""))
			
 
				+    list_dynamics.sort(key=lambda x:str(x.get(document_page_time,"")))
			
 
				 
			
 
				     append_dict[project_project_dynamics] = json.dumps(list_dynamics[:100],ensure_ascii=False)
			
 
				 
			
@@ -2277,7 +2304,7 @@ def check_time_merge(json_time_less,json_time_greater,b_log,set_time_key=set([pr
 
				                 if getLength(v)>0:
			
 
				                     v1 = time_greater.get(k,"")
			
 
				                     if getLength(v1)>0:
			
 
				-                        _dis = getTimeStamp(v)-getTimeStamp(v1)
			
 
				+                        _dis = getTimeStamp(v[:10])-getTimeStamp(v1[:10])
			
 
				                         if _dis>86400*5 or _dis<-86400*5:
			
 
				                             if b_log:
			
 
				                                 log("check time failed %s-%s-%s"%(str(k),str(v),str(v1)))
			
@@ -2370,13 +2397,13 @@ def check_zhaozhong_page_time_merge(zhao_biao_page_time,zhong_biao_page_time,zha
 
				         if getLength(bidopen_to_merge)>0 and bidopen_to_merge>zhong_biao_page_time_to_merge:
			
 
				             zhong_biao_page_time_to_merge = bidopen_to_merge
			
 
				 
			
 
				-    if (getLength(zhong_biao_page_time)>0 and getLength(zhao_biao_page_time_to_merge)>0 and zhong_biao_page_time<zhao_biao_page_time_to_merge) or (len(zhong_biao_page_time_to_merge)>0 and len(zhao_biao_page_time)>0 and zhong_biao_page_time_to_merge<zhao_biao_page_time):
			
 
				+    if (getLength(zhong_biao_page_time)>0 and getLength(zhao_biao_page_time_to_merge)>0 and zhong_biao_page_time<zhao_biao_page_time_to_merge) or (getLength(zhong_biao_page_time_to_merge)>0 and getLength(zhao_biao_page_time)>0 and zhong_biao_page_time_to_merge<zhao_biao_page_time):
			
 
				         if b_log:
			
 
				             log("check zhaobiao zhongbiao page_time failed %s=%s===%s=%s"%(str(zhao_biao_page_time),str(zhong_biao_page_time),str(zhao_biao_page_time_to_merge),str(zhong_biao_page_time_to_merge)))
			
 
				         return -1
			
 
				     return 1
			
 
				 
			
 
				-def check_sub_project_name_merge(sub_project_name,sub_project_name_to_merge,b_log):
			
 
				+def check_sub_project_name_merge(sub_project_name,sub_project_name_to_merge,project_dynamics,project_dynamics_to_merge,b_log,package_number_pattern = re.compile("((包|标[段号的包]|分?包|包组|项目)编?号?[:：]?[\(（]?[0-9A-Za-z一二三四五六七八九十]{1,4})|(第?[0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包))")):
			
 
				     #check sub_project_name
			
 
				     sub_project_name = str(sub_project_name).replace("Project","")
			
 
				     sub_project_name_to_merge = str(sub_project_name_to_merge).replace("Project","")
			
@@ -2387,6 +2414,42 @@ def check_sub_project_name_merge(sub_project_name,sub_project_name_to_merge,b_lo
 
				                 log("check sub_project_name failed %s===%s"%(str(sub_project_name),str(sub_project_name_to_merge)))
			
 
				             return -1
			
 
				         return 1
			
 
				+    if project_dynamics is not None and project_dynamics_to_merge is not None:
			
 
				+        try:
			
 
				+            project_dynamics = json.loads(project_dynamics)
			
 
				+            project_dynamics_to_merge = json.loads(project_dynamics_to_merge)
			
 
				+            set_title_name = set()
			
 
				+            set_title_name_to_merge = set()
			
 
				+            for _d in project_dynamics:
			
 
				+                _title1 = _d.get(document_doctitle,"")
			
 
				+                _title_name = None
			
 
				+                _title_name_search = re.search(package_number_pattern,_title1)
			
 
				+                if _title_name_search is not None:
			
 
				+                    _title_name = _title_name_search.group()
			
 
				+                    _title_name = re.sub("[^0-9A-Za-z一二三四五六七八九十]",'',_title_name)
			
 
				+                    if _title_name!="":
			
 
				+                        set_title_name.add(_title_name)
			
 
				+
			
 
				+            for _dm in project_dynamics_to_merge:
			
 
				+
			
 
				+                _title2 = _dm.get(document_doctitle,"")
			
 
				+                _title_name = None
			
 
				+                _title_name_search = re.search(package_number_pattern,_title2)
			
 
				+                if _title_name_search is not None:
			
 
				+                    _title_name = _title_name_search.group()
			
 
				+                    _title_name = re.sub("[^0-9A-Za-z一二三四五六七八九十]",'',_title_name)
			
 
				+                    if _title_name!="":
			
 
				+                        set_title_name_to_merge.add(_title_name)
			
 
				+            if len(set_title_name)>0 and len(set_title_name_to_merge)>0:
			
 
				+                if len(set_title_name&set_title_name_to_merge)==0:
			
 
				+                    if b_log:
			
 
				+                        log("check sub_project_name title set failed %s===%s"%(str(set_title_name),str(set_title_name_to_merge)))
			
 
				+                    return -1
			
 
				+                else:
			
 
				+                    return 1
			
 
				+        except Exception as e:
			
 
				+            traceback.print_exc()
			
 
				+
			
 
				     return 0
			
 
				 
			
 
				 def check_roles_merge(enterprise,enterprise_to_merge,tenderee,tenderee_to_merge,agency,agency_to_merge,win_tenderer,win_tenderer_to_merge,b_log):
			
@@ -2395,7 +2458,7 @@ def check_roles_merge(enterprise,enterprise_to_merge,tenderee,tenderee_to_merge,
 
				         if tenderee in enterprise_to_merge or tenderee_to_merge in enterprise:
			
 
				             pass
			
 
				         else:
			
 
				-            if getSimilarityOfString(tenderee,tenderee_to_merge)==1:
			
 
				+            if getSimilarityOfString(re.sub("[省市]",'',tenderee),re.sub("[省市]",'',tenderee_to_merge))==1:
			
 
				                 pass
			
 
				             else:
			
 
				                 if b_log:
			
@@ -2403,18 +2466,21 @@ def check_roles_merge(enterprise,enterprise_to_merge,tenderee,tenderee_to_merge,
 
				                 return -1
			
 
				     _set2 = set([a for a in [agency,agency_to_merge] if a!=""])
			
 
				     if len(_set2)>1:
			
 
				-        if getSimilarityOfString(agency,agency_to_merge)==1:
			
 
				+        if agency in enterprise_to_merge or agency_to_merge in enterprise:
			
 
				             pass
			
 
				         else:
			
 
				-            if b_log:
			
 
				-                log("check agency failed %s===%s"%(str(agency),str(agency_to_merge)))
			
 
				-            return -1
			
 
				+            if getSimilarityOfString(re.sub("[省市]",'',agency),re.sub("[省市]",'',agency_to_merge))==1:
			
 
				+                pass
			
 
				+            else:
			
 
				+                if b_log:
			
 
				+                    log("check agency failed %s===%s"%(str(agency),str(agency_to_merge)))
			
 
				+                return -1
			
 
				     _set3 = set([a for a in [win_tenderer,win_tenderer_to_merge] if a!=""])
			
 
				     if len(_set3)>1:
			
 
				         if win_tenderer in enterprise_to_merge or win_tenderer_to_merge in enterprise:
			
 
				             pass
			
 
				         else:
			
 
				-            if getSimilarityOfString(win_tenderer,win_tenderer_to_merge)==1:
			
 
				+            if getSimilarityOfString(re.sub("[省市]",'',win_tenderer),re.sub("[省市]",'',win_tenderer_to_merge))==1:
			
 
				                 pass
			
 
				             else:
			
 
				                 if b_log:
			
@@ -2445,16 +2511,23 @@ def check_money_merge(bidding_budget,bidding_budget_to_merge,win_bid_price,win_b
 
				         return -1
			
 
				 
			
 
				     _set1 = set([a for a in [win_bid_price,win_bid_price_to_merge] if a>0])
			
 
				+
			
 
				     if len(_set1)>1:
			
 
				         if b_log:
			
 
				             log("check win_bid_price failed %s===%s"%(str(win_bid_price),str(win_bid_price_to_merge)))
			
 
				         return -1
			
 
				     #check money
			
 
				+    if len(_set)==1 and len(_set1)==0:
			
 
				+        if (bidding_budget>0 and bidding_budget_to_merge>0):
			
 
				+            return 1
			
 
				+
			
 
				 
			
 
				     if len(_set)==1 and len(_set1)==1:
			
 
				         max_win_bid_price = max(_set1)
			
 
				         max_bidding_budget = max(_set)
			
 
				         radio = max_win_bid_price/max_bidding_budget
			
 
				+        if (bidding_budget>0 and bidding_budget_to_merge>0) or (win_bid_price>0 and win_bid_price_to_merge>0):
			
 
				+            return 1
			
 
				         #允许中标金额大于预算10%
			
 
				         if max_win_bid_price>max_bidding_budget*(1.1):
			
 
				             if b_log:
			
@@ -2464,9 +2537,8 @@ def check_money_merge(bidding_budget,bidding_budget_to_merge,win_bid_price,win_b
 
				             if radio<0.3:
			
 
				                 if b_log:
			
 
				                     log("check money failed radio<0.3 %s===%s"%(str(max(_set1)),str(max(_set))))
			
 
				-                return -1
			
 
				-        if (bidding_budget>0 and bidding_budget_to_merge>0) or (win_bid_price>0 and win_bid_price_to_merge>0):
			
 
				-            return 1
			
 
				+                return 0
			
 
				+                # return -1
			
 
				     return 0
			
 
				 
			
 
				 def check_project_codes_merge(list_code,list_code_to_merge,b_log):
			
@@ -2475,6 +2547,8 @@ def check_project_codes_merge(list_code,list_code_to_merge,b_log):
 
				     has_similar = False
			
 
				     for _c in list_code[:100]:
			
 
				         for _c1 in list_code_to_merge[:100]:
			
 
				+            _c = str(_c).replace("【","[").replace("】","]")
			
 
				+            _c1 = str(_c1).replace("【","[").replace("】","]")
			
 
				             _simi = getSimilarityOfString(_c,_c1,3)
			
 
				             if _simi==1:
			
 
				                 has_same = True
			
@@ -2493,7 +2567,7 @@ def check_project_codes_merge(list_code,list_code_to_merge,b_log):
 
				     return 0
			
 
				 
			
 
				 
			
 
				-def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=False,simple_check=False):
			
 
				+def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*300,return_prob=False,simple_check=False):
			
 
				     docids = _proj.get(project_docids,"")
			
 
				     page_time = _proj.get(project_page_time,"")
			
 
				     project_codes = _proj.get(project_project_codes,"")
			
@@ -2511,6 +2585,8 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
				 
			
 
				     project_dynamics = _proj.get(project_project_dynamics)
			
 
				 
			
 
				+
			
 
				+
			
 
				     enterprise = _proj.get("enterprise")
			
 
				     if enterprise is None:
			
 
				         try:
			
@@ -2541,8 +2617,16 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
				     zhao_biao_page_time_to_merge = _dict.get(project_zhao_biao_page_time,"")
			
 
				     zhong_biao_page_time_to_merge = _dict.get(project_zhong_biao_page_time,"")
			
 
				 
			
 
				+
			
 
				     project_dynamics_to_merge = _dict.get(project_project_dynamics)
			
 
				 
			
 
				+    is_few = False
			
 
				+    if (0 if project_codes=="" else 1) + (0 if project_name=="" else 1) + (0 if bidding_budget<0 else 1) +(0 if tenderee=="" else 1) + (0 if win_bid_price<0 else 1) + (0 if win_tenderer=="" else 1)<=1:
			
 
				+        is_few = True
			
 
				+    if (0 if project_codes_to_merge=="" else 1) + (0 if project_name_to_merge=="" else 1) + (0 if bidding_budget_to_merge<0 else 1) +(0 if tenderee_to_merge=="" else 1) + (0 if win_bid_price_to_merge<0 else 1) + (0 if win_tenderer_to_merge=="" else 1)<=1:
			
 
				+        is_few = True
			
 
				+
			
 
				+
			
 
				     list_code_to_merge = [a for a in project_codes_to_merge.split(",") if a!='']
			
 
				     if project_code_to_merge!="":
			
 
				         list_code_to_merge.append(project_code_to_merge)
			
@@ -2571,30 +2655,45 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
				             return False,0
			
 
				         return False
			
 
				 
			
 
				+
			
 
				     #事件判断-金额
			
 
				     _money_check = check_money_merge(bidding_budget,bidding_budget_to_merge,win_bid_price,win_bid_price_to_merge,b_log)
			
 
				     check_dict[_money_check] += 1
			
 
				-    if check_dict[-1]>0:
			
 
				-        if return_prob:
			
 
				-            return False,0
			
 
				-        return False
			
 
				     prob_count += _money_check
			
 
				 
			
 
				     #人物判断-角色
			
 
				     _roles_check = check_roles_merge(enterprise,enterprise_to_merge,tenderee,tenderee_to_merge,agency,agency_to_merge,win_tenderer,win_tenderer_to_merge,b_log)
			
 
				     check_dict[_roles_check] += 1
			
 
				-    if check_dict[-1]>0:
			
 
				-        if return_prob:
			
 
				-            return False,0
			
 
				-        return False
			
 
				+
			
 
				     prob_count += _roles_check
			
 
				-    _product_check = check_product_merge(product,product_to_merge,b_log)
			
 
				 
			
 
				-    prob_count += _product_check*2
			
 
				+
			
 
				+    _product_check = check_product_merge(product,product_to_merge,b_log)
			
 
				     _project_name_check = check_project_name_merge(project_name,project_name_to_merge,b_log)
			
 
				-    prob_count += _project_name_check
			
 
				     _title_check = check_dynamics_title_merge(project_dynamics,project_dynamics_to_merge,b_log)
			
 
				-    prob_count += _title_check
			
 
				+
			
 
				+    #事件判断-编号
			
 
				+    _codes_check = check_project_codes_merge(list_code,list_code_to_merge,b_log)
			
 
				+    check_dict[_codes_check] += 1
			
 
				+
			
 
				+    prob_count += _codes_check
			
 
				+
			
 
				+    if is_few:
			
 
				+        if _codes_check!=1:
			
 
				+            if _title_check!=1:
			
 
				+                if return_prob:
			
 
				+                    return False,0
			
 
				+                return False
			
 
				+            if len(enterprise)>0 and len(enterprise_to_merge)>0:
			
 
				+                if len(enterprise & enterprise_to_merge)==0:
			
 
				+                    if return_prob:
			
 
				+                        return False,0
			
 
				+                    return False
			
 
				+            if _product_check==-1:
			
 
				+                if return_prob:
			
 
				+                    return False,0
			
 
				+                return False
			
 
				+
			
 
				     min_count = 2
			
 
				     if product=="" or product_to_merge=="":
			
 
				         min_count = 1
			
@@ -2604,12 +2703,12 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
				             log("project_name,project_name_to_merge %s %s"%(project_name,project_name_to_merge))
			
 
				             log("product,product_to_merge %s %s"%(product,product_to_merge))
			
 
				             log("check _project_name_check+_product_check+_title_check<2 failed %d %s,%s,%s"%(_project_name_check+_product_check+_title_check,str(_project_name_check),str(_product_check),str(_title_check)))
			
 
				-        if return_prob:
			
 
				-            return False,0
			
 
				-        return False
			
 
				+        # if return_prob:
			
 
				+        #     return False,0
			
 
				+        # return False
			
 
				+        prob_count += -1
			
 
				     else:
			
 
				-        check_dict[1] += 1
			
 
				-        check_dict[1] += 1
			
 
				+        prob_count += 2
			
 
				 
			
 
				     if simple_check:
			
 
				         if return_prob:
			
@@ -2617,23 +2716,19 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
				             return True,_prob
			
 
				         return True
			
 
				 
			
 
				-    #事件判断-编号
			
 
				-    _codes_check = check_project_codes_merge(list_code,list_code_to_merge,b_log)
			
 
				-    check_dict[_codes_check] += 1
			
 
				-    if check_dict[-1]>0:
			
 
				-        if return_prob:
			
 
				-            return False,0
			
 
				-        return False
			
 
				-    prob_count += _codes_check
			
 
				 
			
 
				     #时间判断-其他时间
			
 
				     _time_check = check_time_merge(_proj,_dict,b_log)
			
 
				     check_dict[_time_check] += 1
			
 
				 
			
 
				     #时间判断-分包编号
			
 
				-    _sub_project_name_check = check_sub_project_name_merge(sub_project_name,sub_project_name_to_merge,b_log)
			
 
				+    _sub_project_name_check = check_sub_project_name_merge(sub_project_name,sub_project_name_to_merge,project_dynamics,project_dynamics_to_merge,b_log)
			
 
				+    if docids==docids_to_merge and _sub_project_name_check==-1:
			
 
				+        if return_prob:
			
 
				+            return False,0
			
 
				+        return False
			
 
				     check_dict[_sub_project_name_check] += 1
			
 
				-    prob_count += _sub_project_name_check
			
 
				+    prob_count += _sub_project_name_check*3
			
 
				 
			
 
				     #时间判断-发布时间
			
 
				     _page_time_check = check_page_time_merge(page_time,page_time_to_merge,b_log,time_limit)
			
@@ -2642,18 +2737,23 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
				 
			
 
				     _prob = prob_count/8
			
 
				 
			
 
				+    if b_log:
			
 
				+        log("check %s-%s result%s"%(docids,docids_to_merge,str(check_dict)))
			
 
				     if _prob<0.15:
			
 
				         if b_log:
			
 
				-            log("prob less than 0.15")
			
 
				+            log("prob less than 0.15 prob_count:%d"%(prob_count))
			
 
				         if return_prob:
			
 
				             return False,_prob
			
 
				         return False
			
 
				 
			
 
				-    if b_log:
			
 
				-        log("check %s-%s result%s"%(docids,docids_to_merge,str(check_dict)))
			
 
				+
			
 
				     if check_dict[-1]>0:
			
 
				         if check_dict[-1]==1:
			
 
				-            if (_codes_check==1 and _roles_check==1 and _product_check==1) or (_roles_check==1 and _money_check==1 and _product_check==1):
			
 
				+            if _roles_check==-1:
			
 
				+                if return_prob:
			
 
				+                    return False,0
			
 
				+                return False
			
 
				+            if (_codes_check==1 and _roles_check==1 and _product_check==1 and _money_check>=0) or (_roles_check==1 and _money_check==1 and _product_check==1) or (_money_check==1 and _product_check==1 and _codes_check==1) or (_money_check>=0 and _roles_check==1 and _codes_check==1 and (_title_check==1 or _project_name_check==1 or _product_check==1)):
			
 
				                 if return_prob:
			
 
				                     return True,_prob
			
 
				                 return True
			
@@ -2859,18 +2959,20 @@ def get_page_time_dis(page_time,n_page_time):
 
				 
			
 
				 def check_page_time_dup(page_time,n_page_time):
			
 
				     _dis = get_page_time_dis(page_time,n_page_time)
			
 
				-    if _dis>=0 and _dis<=10:
			
 
				+    if _dis>=0 and _dis<=20:
			
 
				         return True
			
 
				     return False
			
 
				 
			
 
				 
			
 
				-def dumplicate_document_in_merge(list_projects):
			
 
				+def dumplicate_document_in_merge(list_projects,dup_docid):
			
 
				     '''
			
 
				     合并时去重
			
 
				     :param list_projects:
			
 
				     :return:
			
 
				     '''
			
 
				 
			
 
				+    dup_docid = set(dup_docid)
			
 
				+    set_dup_total = set()
			
 
				     for _proj in list_projects:
			
 
				         try:
			
 
				             docids = _proj.get(project_docids,"")
			
@@ -2882,48 +2984,65 @@ def dumplicate_document_in_merge(list_projects):
 
				             _time = time.time()
			
 
				             for _d in list_dynamics:
			
 
				                 docid = _d.get(document_docid)
			
 
				+                doctitle = _d.get(document_doctitle,"")
			
 
				+                title_search = re.search("[一二三四五六七八九十1-9]+(?:次|标|包)",doctitle)
			
 
				                 if str(docid) not in set_docids:
			
 
				                     continue
			
 
				+
			
 
				+                if docid in dup_docid:
			
 
				+                    continue
			
 
				                 _status = _d.get(document_status,201)
			
 
				                 is_multipack = _d.get("is_multipack",True)
			
 
				                 extract_count = _d.get(document_tmp_extract_count,0)
			
 
				                 docchannel = _d.get(document_docchannel,0)
			
 
				                 page_time = _d.get(document_page_time,"")
			
 
				-                if _status>=401 and _status<=450:
			
 
				-                    set_dup_docid.add(str(docid))
			
 
				-                # if docchannel>0:
			
 
				-                #     if docchannel in dict_channel_proj:
			
 
				-                #         n_d = dict_channel_proj[docchannel]
			
 
				-                #         n_docid = n_d.get(document_docid)
			
 
				-                #         n_is_multipack = n_d.get("is_multipack",True)
			
 
				-                #         n_extract_count = n_d.get(document_tmp_extract_count,0)
			
 
				-                #         n_page_time = n_d.get(document_page_time,"")
			
 
				-                #         if docid==n_docid:
			
 
				-                #             continue
			
 
				-                #         if not check_page_time_dup(page_time,n_page_time):
			
 
				-                #             continue
			
 
				-                #
			
 
				-                #         if extract_count>n_extract_count:
			
 
				-                #             n_d[document_status] = 401
			
 
				-                #             set_dup_docid.add(str(n_docid))
			
 
				-                #             dict_channel_proj[docchannel] = _d
			
 
				-                #         elif extract_count==n_extract_count:
			
 
				-                #             if int(n_docid)>int(docid):
			
 
				-                #                 n_d[document_status] = 401
			
 
				-                #                 set_dup_docid.add(str(n_docid))
			
 
				-                #                 dict_channel_proj[docchannel] = _d
			
 
				-                #             elif int(n_docid)<int(docid):
			
 
				-                #                 _d[document_status] = 401
			
 
				-                #                 set_dup_docid.add(str(docid))
			
 
				-                #         else:
			
 
				-                #             _d[document_status] = 401
			
 
				-                #             set_dup_docid.add(str(docid))
			
 
				-                #         if not is_multipack and not n_is_multipack:
			
 
				-                #             pass
			
 
				-                #     else:
			
 
				-                #         dict_channel_proj[docchannel] = _d
			
 
				+                # if _status>=401 and _status<=450:
			
 
				+                #     print(":1",docid)
			
 
				+                #     set_dup_docid.add(str(docid))
			
 
				+                if docchannel in {52,101,118,119,120} and extract_count>5:
			
 
				+                    if docchannel in dict_channel_proj:
			
 
				+                        n_d = dict_channel_proj[docchannel]
			
 
				+                        n_docid = n_d.get(document_docid)
			
 
				+                        n_is_multipack = n_d.get("is_multipack",True)
			
 
				+                        n_extract_count = n_d.get(document_tmp_extract_count,0)
			
 
				+                        n_page_time = n_d.get(document_page_time,"")
			
 
				+                        n_doctitle = n_d.get(document_doctitle,"")
			
 
				+                        if docid==n_docid:
			
 
				+                            continue
			
 
				+                        if not check_page_time_dup(page_time,n_page_time):
			
 
				+                            continue
			
 
				+                        if is_multipack or n_is_multipack:
			
 
				+                            continue
			
 
				+                        n_title_search = re.search("[一二三四五六七八九十1-9]+(?:次|标|包)",n_doctitle)
			
 
				+                        if title_search is None and n_title_search is None:
			
 
				+                            pass
			
 
				+                        elif title_search is not None and n_title_search is not None and str(title_search.group())==str(n_title_search.group()):
			
 
				+                            pass
			
 
				+                        else:
			
 
				+                            continue
			
 
				+
			
 
				+                        if extract_count>n_extract_count:
			
 
				+                            n_d[document_status] = 401
			
 
				+                            set_dup_docid.add(str(n_docid))
			
 
				+                            dict_channel_proj[docchannel] = _d
			
 
				+                        elif extract_count==n_extract_count:
			
 
				+                            if int(n_docid)>int(docid):
			
 
				+                                n_d[document_status] = 401
			
 
				+                                set_dup_docid.add(str(n_docid))
			
 
				+                                dict_channel_proj[docchannel] = _d
			
 
				+                            elif int(n_docid)<int(docid):
			
 
				+                                _d[document_status] = 401
			
 
				+                                set_dup_docid.add(str(docid))
			
 
				+                        else:
			
 
				+                            _d[document_status] = 401
			
 
				+                            set_dup_docid.add(str(docid))
			
 
				+                        if not is_multipack and not n_is_multipack:
			
 
				+                            pass
			
 
				+                    else:
			
 
				+                        dict_channel_proj[docchannel] = _d
			
 
				 
			
 
				             set_docids = set_docids-set_dup_docid
			
 
				+            set_dup_total |= set_dup_docid
			
 
				             if len(set_docids)==0:
			
 
				                 log("projects set_docids length is zero %s"%(docids))
			
 
				             else:
			
@@ -2932,8 +3051,10 @@ def dumplicate_document_in_merge(list_projects):
 
				             _proj[project_docid_number] = len(set_docids)
			
 
				             _proj[project_dup_docid] = ",".join(list(set_dup_docid))
			
 
				             # log("dumplicate_document docid%s dynamic %d takes%.3f"%(str(docid),len(list_dynamics),time.time()-_time))
			
 
				+
			
 
				         except Exception as e:
			
 
				             traceback.print_exc()
			
 
				+    return list(set_dup_total)
			
 
				 
			
 
				 @annotate('string,string->string')
			
 
				 class f_dumplicate_projects(BaseUDAF):
			
--- a/BaseDataMaintenance/model/ots/BaseModel.py
+++ b/BaseDataMaintenance/model/ots/BaseModel.py
@@ -48,13 +48,6 @@ class BaseModel():
 
				                 _list.append((_key,_v))
			
 
				         return _list
			
 
				 
			
 
				-    def getPrimaryKey_turple(self):
			
 
				-        _list = []
			
 
				-        for _key in self.getPrimary_keys():
			
 
				-            _list.append((_key,self.getProperties().get(_key)))
			
 
				-        return _list
			
 
				-
			
 
				-
			
 
				     @staticmethod
			
 
				     def search(ots_client,table_name,key_tuple,columns_to_get):
			
 
				         try:
			
@@ -74,6 +67,13 @@ class BaseModel():
 
				             traceback.print_exc()
			
 
				             log("get row failed, http_status:%d, error_code:%s, error_message:%s, request_id:%s" % (str(e.get_http_status()), e.get_error_code(), e.get_error_message(), e.get_request_id()))
			
 
				 
			
 
				+
			
 
				+    def getPrimaryKey_turple(self):
			
 
				+        _list = []
			
 
				+        for _key in self.getPrimary_keys():
			
 
				+            _list.append((_key,self.getProperties().get(_key)))
			
 
				+        return _list
			
 
				+
			
 
				     def fix_columns(self,ots_client,columns_to_fix,_flag):
			
 
				         _dict = self.search(ots_client,self.table_name,self.getPrimaryKey_turple(),columns_to_fix)
			
 
				         if _dict is not None:
			
--- a/BaseDataMaintenance/model/ots/designed_project.py
+++ b/BaseDataMaintenance/model/ots/designed_project.py
@@ -21,13 +21,28 @@ class designed_project(BaseModel):
 
				         for _spid in spids.split(","):
			
 
				             should_q.append(TermQuery("spids",_spid))
			
 
				 
			
 
				-        bool_query = BoolQuery(should_queries=should_q)
			
 
				-        columns = ["docids"]
			
 
				-        rows, next_token, total_count, is_all_succeed = ots_client.search("designed_project", "designed_project_index",
			
 
				-                                                                          SearchQuery(bool_query, limit=100,get_total_count=True),
			
 
				-                                                                          ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
			
 
				-        list_dict = getRow_ots(rows)
			
 
				-        return list_dict
			
 
				+        _begin = 0
			
 
				+        _step = 20
			
 
				+        list_dict = []
			
 
				+        while 1:
			
 
				+            _end = _begin +_step
			
 
				+            bool_query = BoolQuery(should_queries=should_q[_begin:_end])
			
 
				+            columns = ["status"]
			
 
				+            rows, next_token, total_count, is_all_succeed = ots_client.search("designed_project", "designed_project_index",
			
 
				+                                                                              SearchQuery(bool_query, limit=100,get_total_count=True),
			
 
				+                                                                              ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
			
 
				+            list_dict.extend(getRow_ots(rows))
			
 
				+            _begin = _end
			
 
				+            if _begin>=len(should_q):
			
 
				+                break
			
 
				+        list_dict_final = []
			
 
				+        set_id = set()
			
 
				+        for _dict in list_dict:
			
 
				+            if _dict.get("id","") in set_id:
			
 
				+                continue
			
 
				+            list_dict_final.append(_dict)
			
 
				+            set_id.add(_dict.get("id",""))
			
 
				+        return list_dict_final
			
 
				 
			
 
				     def getAttribute_turple(self):
			
 
				         _list = []
			
@@ -51,7 +66,10 @@ class designed_project(BaseModel):
 
				         if len(list_dict)>0:
			
 
				             for _dict in list_dict[1:]:
			
 
				                 _designed_delete = designed_project(_dict)
			
 
				-                _designed_delete.delete_row(ots_client)
			
 
				+
			
 
				+                _designed_delete.setValue("status","404",True)
			
 
				+                _designed_delete.update_project(ots_client)
			
 
				+                # _designed_delete.delete_row(ots_client)
			
 
				 
			
 
				             _designed_update = designed_project(list_dict[0])
			
 
				             properties = _designed_update.getProperties()
			
--- a/BaseDataMaintenance/model/ots/document.py
+++ b/BaseDataMaintenance/model/ots/document.py
@@ -592,8 +592,137 @@ def delete_documents():
 
				     print("delete count:%d"%_count)
			
 
				 
			
 
				 
			
 
				+def turn_document_docchannel():
			
 
				+    from BaseDataMaintenance.dataSource.source import getConnect_ots
			
 
				+    from BaseDataMaintenance.common.multiThread import MultiThreadHandler
			
 
				+    import queue
			
 
				+    from threading import Thread
			
 
				+    import json
			
 
				+    task_queue = queue.Queue()
			
 
				+    from BaseDataMaintenance.model.ots.attachment import attachment_filemd5,attachment_file_title,attachment_file_link
			
 
				+    ots_client = getConnect_ots()
			
 
				+    def producer(task_queue,ots_client):
			
 
				+
			
 
				+        bool_query = BoolQuery(
			
 
				+            must_queries=[
			
 
				+                TermQuery("web_source_no","DX007520-7"),
			
 
				+                # TermQuery("docid",363793104)
			
 
				+                # MatchPhraseQuery("doctitle","珠海城市职业技术学院2022年05月至2022年06月政府采购意向"),
			
 
				+                # BoolQuery(should_queries=[
			
 
				+                #                           # TermQuery("tenderee","山西利民工业有限责任公司"),
			
 
				+                #                           # MatchPhraseQuery("doctitle","中国电信"),
			
 
				+                #                           # MatchPhraseQuery("doctextcon","中国电信"),
			
 
				+                #                           # MatchPhraseQuery("attachmenttextcon","中国电信")]),
			
 
				+                #                           # RangeQuery(document_status,88,120,True,True),
			
 
				+                #                           RangeQuery("page_time","2022-03-24","2022-03-25",True,False),
			
 
				+                #                           ExistsQuery
			
 
				+                #                                  #,TermQuery(document_docid,171146519)
			
 
				+                #                                  ]
			
 
				+                # )
			
 
				+            ],
			
 
				+            # must_not_queries=[WildcardQuery("DX004354*")]
			
 
				+        )
			
 
				+
			
 
				+        # bool_query = BoolQuery(
			
 
				+        #     # must_queries=[
			
 
				+        #     #     RangeQuery("crtime","2023-08-30 15:00:00","2023-08-30 23:59:59"),
			
 
				+        #     #     NestedQuery("page_attachments",ExistsQuery("page_attachments.fileMd5"))
			
 
				+        #     # ],
			
 
				+        #     # must_not_queries=[WildcardQuery("attachmenttextcon","*")],
			
 
				+        #     should_queries=[
			
 
				+        #         NestedQuery("sub_docs_json",TermQuery("sub_docs_json.win_tenderer","个体工商户")),
			
 
				+        #         NestedQuery("sub_docs_json",TermQuery("sub_docs_json.win_tenderer","机械设备")),
			
 
				+        #     ]
			
 
				+        #
			
 
				+        # )
			
 
				+
			
 
				+        rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
			
 
				+                                                                       SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
			
 
				+                                                                       columns_to_get=ColumnsToGet(["detail_link"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+        list_data = getRow_ots(rows)
			
 
				+        print(total_count)
			
 
				+        _count = len(list_data)
			
 
				+        for _data in list_data:
			
 
				+            _document = Document(_data)
			
 
				+            task_queue.put(_document)
			
 
				+        while next_token:
			
 
				+            rows,next_token,total_count,is_all_succeed = ots_client.search("document","document_index",
			
 
				+                                                                           SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
			
 
				+                                                                           columns_to_get=ColumnsToGet(["detail_link"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+            list_data = getRow_ots(rows)
			
 
				+            _count += len(list_data)
			
 
				+            print("%d/%d"%(_count,total_count))
			
 
				+            for _data in list_data:
			
 
				+                _document = Document(_data)
			
 
				+                task_queue.put(_document)
			
 
				+
			
 
				+        # docids = [223820830,224445409]
			
 
				+        # for docid in docids:
			
 
				+        #     _dict = {document_docid:int(docid),
			
 
				+        #              document_partitionkey:int(docid)%500+1,
			
 
				+        #              }
			
 
				+        #     task_queue.put(Document(_dict))
			
 
				+        # import pandas as pd
			
 
				+        # df = pd.read_excel("G:\\20221212error.xlsx")
			
 
				+        # for docid in df["docid"]:
			
 
				+        #     _dict = {document_docid:int(docid),
			
 
				+        #              document_partitionkey:int(docid)%500+1,
			
 
				+        #              }
			
 
				+        #     task_queue.put(Document(_dict))
			
 
				+        log("task_queue size:%d"%(task_queue.qsize()))
			
 
				+
			
 
				+    def _handle(item,result_queue,ots_client):
			
 
				+        #change attach value
			
 
				+        # list_attachment = json.loads(item.getProperties().get(document_attachment_path))
			
 
				+        # print("docid",item.getProperties().get(document_docid))
			
 
				+        # for attach in list_attachment:
			
 
				+        #
			
 
				+        #     filemd5 = attach.get(document_attachment_path_filemd5,"")
			
 
				+        #     _document_html = item.getProperties().get(document_dochtmlcon,"")
			
 
				+        #
			
 
				+        #     _file_title = item.getTitleFromHtml(filemd5,_document_html)
			
 
				+        #     filelink = item.getSourceLinkFromHtml(filemd5,_document_html)
			
 
				+        #     attach[document_attachment_path_fileTitle] = _file_title
			
 
				+        #     attach[document_attachment_path_fileLink] = filelink
			
 
				+        #
			
 
				+        # item.setValue(document_attachment_path,json.dumps(list_attachment,ensure_ascii=False),True)
			
 
				+        # item.all_columns.remove(document_dochtmlcon)
			
 
				+
			
 
				+        #change status
			
 
				+        # item.setValue(document_docchannel,item.getProperties().get(document_original_docchannel),True)
			
 
				+        # item.setValue(document_status,random.randint(151,171),True)
			
 
				+        # item.setValue(document_area,"华南",True)
			
 
				+        # item.setValue(document_province,"广东",True)
			
 
				+        # item.setValue(document_city,"珠海",True)
			
 
				+        # item.setValue(document_district,"金湾区",True)
			
 
				+        # item.setValue(document_status,1,True)
			
 
				+        # print(item.getProperties())
			
 
				+        # item.update_row(ots_client)
			
 
				+        detail_link = item.getProperties().get("detail_link","")
			
 
				+        if "/012002002/" in detail_link:
			
 
				+            partitionkey = item.getProperties().get("partitionkey")
			
 
				+            docid = item.getProperties().get("docid")
			
 
				+            _dict = {document_partitionkey:partitionkey,
			
 
				+                     document_docid:docid,
			
 
				+                     document_docchannel:101,
			
 
				+                     document_original_docchannel:101}
			
 
				+            doc = Document(_dict)
			
 
				+            doc.update_row(ots_client)
			
 
				+            print(_dict)
			
 
				+
			
 
				+        # log("update %d status done"%(item.getProperties().get(document_docid)))
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+    t_producer = Thread(target=producer,kwargs={"task_queue":task_queue,"ots_client":ots_client})
			
 
				+    t_producer.start()
			
 
				+    t_producer.join()
			
 
				+    mt = MultiThreadHandler(task_queue,_handle,None,30,ots_client=ots_client)
			
 
				+    mt.run()
			
 
				+
			
 
				 if __name__=="__main__":
			
 
				     # turn_extract_status()
			
 
				-    turn_document_status()
			
 
				+    # turn_document_status()
			
 
				     # drop_extract2()
			
 
				     # fixDocumentHtml()
			
 
				+    turn_document_docchannel()
			
--- a/BaseDataMaintenance/model/ots/document_tmp.py
+++ b/BaseDataMaintenance/model/ots/document_tmp.py
@@ -243,7 +243,6 @@ def turn_extract_status():
 
				     mt.run()
			
 
				 
			
 
				 
			
 
				-
			
 
				 def turn_document_tmp_status():
			
 
				     from BaseDataMaintenance.dataSource.source import getConnect_ots
			
 
				     from BaseDataMaintenance.common.multiThread import MultiThreadHandler
			
@@ -253,13 +252,22 @@ def turn_document_tmp_status():
 
				     task_queue = queue.Queue()
			
 
				     from BaseDataMaintenance.model.ots.attachment import attachment_filemd5,attachment_file_title,attachment_file_link
			
 
				     ots_client = getConnect_ots()
			
 
				-    def producer(task_queue,ots_client):
			
 
				 
			
 
				+    def producer1(task_queue,ots_client):
			
 
				+        for l_a in a.split("\n"):
			
 
				+            l_a = l_a.strip()
			
 
				+            if l_a !="":
			
 
				+                task_queue.put(Document_tmp({document_tmp_partitionkey:int(l_a)%500+1,
			
 
				+                                             document_tmp_docid:int(l_a),
			
 
				+                                             document_tmp_status:66}))
			
 
				+
			
 
				+    def producer(task_queue,ots_client):
			
 
				 
			
 
				         bool_query = BoolQuery(
			
 
				             must_queries=[
			
 
				-                TermQuery("fingerprint","md5=2cc044b81ec13acddcc970b71b780365")
			
 
				-                # RangeQuery("status",66,71),
			
 
				+                # TermQuery("fingerprint","md5=2cc044b81ec13acddcc970b71b780365")
			
 
				+                TermQuery("save",1),
			
 
				+                RangeQuery("status",72),
			
 
				                 # BoolQuery(should_queries=[
			
 
				                 #                           # TermQuery("tenderee","山西利民工业有限责任公司"),
			
 
				                 #                           # MatchPhraseQuery("doctitle","中国电信"),
			
@@ -272,16 +280,16 @@ def turn_document_tmp_status():
 
				                 #                                  ]
			
 
				                 # )
			
 
				             ],
			
 
				-            must_not_queries=[
			
 
				-                TermQuery("docid",288599518)
			
 
				-                # ExistsQuery("status"),
			
 
				-                # ExistsQuery("page_time"),
			
 
				-                              ]
			
 
				+            # must_not_queries=[
			
 
				+            #     TermQuery("docid",288599518)
			
 
				+            #     # ExistsQuery("status"),
			
 
				+            #     # ExistsQuery("page_time"),
			
 
				+            #                   ]
			
 
				         )
			
 
				 
			
 
				         rows,next_token,total_count,is_all_succeed = ots_client.search("document_tmp","document_tmp_index",
			
 
				                                                                        SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("docid",SortOrder.DESC)]),limit=100,get_total_count=True),
			
 
				-                                                                       columns_to_get=ColumnsToGet(["doctitle"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+                                                                       columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
			
 
				         list_data = getRow_ots(rows)
			
 
				         print(total_count)
			
 
				         # print(list_data)
			
@@ -292,7 +300,7 @@ def turn_document_tmp_status():
 
				         while next_token:
			
 
				             rows,next_token,total_count,is_all_succeed = ots_client.search("document_tmp","document_tmp_index",
			
 
				                                                                            SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
			
 
				-                                                                           columns_to_get=ColumnsToGet(["doctitle"],return_type=ColumnReturnType.SPECIFIED))
			
 
				+                                                                           columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
			
 
				             list_data = getRow_ots(rows)
			
 
				             _count += len(list_data)
			
 
				             print("%d/%d"%(_count,total_count))
			
@@ -342,13 +350,22 @@ def turn_document_tmp_status():
 
				         # json.loads(_extract_json)
			
 
				         # item.setValue(document_tmp_status,71,True)
			
 
				         # item.setValue(document_tmp_save,1,True)
			
 
				-        print(item.getProperties())
			
 
				+        # if item.exists_row(ots_client):
			
 
				+        #     item.update_row(ots_client)
			
 
				+        # print(item.getProperties())
			
 
				         # item.update_row(ots_client)
			
 
				         # log("update %d status done"%(item.getProperties().get(document_tmp_docid)))
			
 
				-        item.delete_row(ots_client)
			
 
				+        # item.delete_row(ots_client)
			
 
				+        from BaseDataMaintenance.model.ots.document import Document
			
 
				+
			
 
				+        Doc = Document(item.getProperties())
			
 
				+        if Doc.fix_columns(ots_client,["status"],True):
			
 
				+            if Doc.getProperties().get("status",0)>=401:
			
 
				+                print(Doc.getProperties().get("docid"),"redo")
			
 
				+                item.setValue("status",66,True)
			
 
				+                item.update_row(ots_client)
			
 
				         pass
			
 
				 
			
 
				-
			
 
				     t_producer = Thread(target=producer,kwargs={"task_queue":task_queue,"ots_client":ots_client})
			
 
				     t_producer.start()
			
 
				     t_producer.join()
			
--- a/BaseDataMaintenance/model/ots/proposedBuilding_tmp.py
+++ b/BaseDataMaintenance/model/ots/proposedBuilding_tmp.py