Эх сурвалжийг харах

优化oracle重启的问题,不需要重启程序

luojiehua 2 жил өмнө
parent
commit
a9cd07f6f1

+ 1 - 1
BaseDataMaintenance/maintenance/dataflow.py

@@ -4034,7 +4034,7 @@ if __name__ == '__main__':
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
     a = time.time()
-    df_dump.test_dumplicate(292444835)
+    df_dump.test_dumplicate(122943660)
     print("takes",time.time()-a)
     # df_dump.fix_doc_which_not_in_project()
     # df_dump.delete_projects_by_document(16288036)

+ 2 - 3
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -1088,11 +1088,10 @@ class Dataflow_init(Dataflow):
                     _obj.delete_row(conn_oracle)
                 else:
                     log("send_msg_error111:%s,%d"%(ots_dict.get("uuid"),len(ots_dict.get("dochtmlcon",""))))
-
+            self.pool_oracle.putConnector(conn_oracle)
         except Exception as e:
             traceback.print_exc()
-        finally:
-            self.pool_oracle.putConnector(conn_oracle)
+
 
     def ots2mq(self):
         try:

+ 32 - 14
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -2444,7 +2444,7 @@ def check_project_codes_merge(list_code,list_code_to_merge,b_log):
         return 1
     return 0
 
-def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=False):
+def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=False,simple_check=False):
     docids = _proj.get(project_docids,"")
     page_time = _proj.get(project_page_time,"")
     project_codes = _proj.get(project_project_codes,"")
@@ -2538,14 +2538,6 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
             return False,0
         return False
 
-    #事件判断-编号
-    _codes_check = check_project_codes_merge(list_code,list_code_to_merge,b_log)
-    check_dict[_codes_check] += 1
-    if check_dict[-1]>0:
-        if return_prob:
-            return False,0
-        return False
-
     _product_check = check_product_merge(product,product_to_merge,b_log)
 
 
@@ -2553,8 +2545,11 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
 
     _title_check = check_dynamics_title_merge(project_dynamics,project_dynamics_to_merge,b_log)
 
+    min_count = 2
+    if product=="" or product_to_merge=="":
+        min_count = 1
     #事件判断--产品和名称、标题需要满足两个个
-    if _project_name_check+_product_check+_title_check<2:
+    if _project_name_check+_product_check+_title_check<min_count:
         if b_log:
             log("project_name,project_name_to_merge %s %s"%(project_name,project_name_to_merge))
             log("product,product_to_merge %s %s"%(product,product_to_merge))
@@ -2566,6 +2561,19 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
         check_dict[1] += 1
         check_dict[1] += 1
 
+    if simple_check:
+        if return_prob:
+            _prob = check_dict[1]/(check_dict[-1]+check_dict[0]+check_dict[1])
+            return True,_prob
+        return True
+
+    #事件判断-编号
+    _codes_check = check_project_codes_merge(list_code,list_code_to_merge,b_log)
+    check_dict[_codes_check] += 1
+    if check_dict[-1]>0:
+        if return_prob:
+            return False,0
+        return False
 
     #时间判断-其他时间
     _time_check = check_time_merge(_proj,_dict,b_log)
@@ -2641,7 +2649,7 @@ class f_group_merge_projects(BaseUDAF):
                 for _j in range(_i+1,len(_group)):
                     _p_uuid,_,_p = _group[_i]
                     _pp_uuid,_,_pp = _group[_j]
-                    if check_merge_rule(_p,_pp,False):
+                    if check_merge_rule(_p,_pp,False,simple_check=True):
                         list_group_pair.append([_p_uuid,_pp_uuid])
             if len(list_group_pair)>0:
                 list_group_data.append(list_group_pair)
@@ -2973,18 +2981,28 @@ class f_check_projects_by_num(BaseUDTF):
                 if _num>=len_start and _num<=len_end:
                     self.forward(json.dumps(_proj,ensure_ascii=False))
 
-@annotate('string->string')
+@annotate('string->string,string')
 class f_check_projects_by_time(BaseUDTF):
 
     def process(self,json_projects):
         if json_projects is not None:
             list_projects = json.loads(json_projects)
+            _type = ""
             for _proj in list_projects:
                 zhaobiao = _proj.get(project_zhao_biao_page_time)
                 zhongbiao = _proj.get(project_zhong_biao_page_time)
-                if (zhongbiao is None or zhongbiao=="") and zhaobiao is not None and zhaobiao!="":
+                if getLength(zhaobiao)>0 and getLength(zhongbiao)>0:
+                    _type = "招中标"
+                elif getLength(zhaobiao)>0 and getLength(zhongbiao)==0:
+                    _type = "招标"
+                elif getLength(zhaobiao)==0 and getLength(zhongbiao)>0:
+                    _type = "中标"
+                else:
+                    _type = "其他"
+                self.forward(json.dumps(_proj,ensure_ascii=False),_type)
+                # if (zhongbiao is None or zhongbiao=="") and zhaobiao is not None and zhaobiao!="":
                 # if zhaobiao is not None and zhongbiao is not None and zhaobiao!="" and zhongbiao!="":
-                    self.forward(json.dumps(_proj,ensure_ascii=False))
+                #     self.forward(json.dumps(_proj,ensure_ascii=False))
 
 @annotate('string->string,string,double')
 class f_extract_year_win_and_price(BaseUDTF):

+ 1 - 1
BaseDataMaintenance/test/ab.py

@@ -10,7 +10,7 @@ from BaseDataMaintenance.dataSource.source import getConnect_ots
 
 import json
 ots_client = getConnect_ots()
-docid = 290544305
+docid = 291120546
 a = {"name":"双腿链条索具&DSL-WLL3TON\载荷3t"}
 test_str = json.dumps(a,ensure_ascii=False)
 print(test_str)