Forráskód Böngészése

数据遗漏检查后自动同步

luojiehua 6 hónapja
szülő
commit
0c2f1ef452

+ 2 - 0
.idea/encodings.xml

@@ -2,8 +2,10 @@
 <project version="4">
   <component name="Encoding">
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/attachmentProcessTime.xlsx" charset="GBK" />
+    <file url="file://$PROJECT_DIR$/BaseDataMaintenance/chat/chatUtil.py" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/dataSource/searchPaddle.py" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/maintenance/attachment/2022-01-18_183521_export11.xlsx" charset="GBK" />
+    <file url="file://$PROJECT_DIR$/BaseDataMaintenance/maintenance/gpt_extract.py" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/maintenance/product/select_product_exclude_name_from_tw_prod.csv" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/maintenance/product/select_product_product_name_exclude_name.csv" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/maintenance/product/update_product.csv" charset="GBK" />

+ 2 - 3
BaseDataMaintenance/common/Utils.py

@@ -720,7 +720,6 @@ def getMultipleFactor(unit):
     MultipleFactor = {"兆":Decimal(1000000000000),"亿":Decimal(100000000),"万":Decimal(10000),"仟":Decimal(1000),"千":Decimal(1000),"佰":Decimal(100),"百":Decimal(100),"拾":Decimal(10),"十":Decimal(10),"元":Decimal(1),"圆":Decimal(1),"角":round(Decimal(0.1),1),"分":round(Decimal(0.01),2)}
     return MultipleFactor.get(unit)
 
-
 def getUnifyMoney(money):
     '''
     @summary:将中文金额字符串转换为数字金额
@@ -735,9 +734,9 @@ def getUnifyMoney(money):
     money = re.sub("[,,]","",money)
     money = re.sub("[^0-9.零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]","",money)
     result = Decimal(0)
-    chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖","一","二","三","四","五","六","七","八","九"]
+    chnDigits = ["零", "壹", "贰", "叁", "肆", "伍", "陆", "柒", "捌", "玖"]
     # chnFactorUnits = ["兆", "亿", "万", "仟", "佰", "拾","圆","元","角","分"]
-    chnFactorUnits = ["圆", "元","兆", "亿", "万", "仟", "佰", "拾", "角", "分", '十', '百', '千']
+    chnFactorUnits = ["兆", "亿", "万", "仟", '千', "佰", '百', "拾", '十',"圆", "元", "角", "分"]  # 20240611 修复大写提取错误 '陆拾陆亿伍千柒佰零叁万肆千叁佰陆拾伍元' Decimal('11607430365')
 
     LowMoneypattern = re.compile("^[\d,]+(\.\d+)?$")
     BigMoneypattern = re.compile("^零?(?P<BigMoney>[%s])$"%("".join(chnDigits)))

+ 9 - 0
BaseDataMaintenance/dataMonitor/data_monitor.py

@@ -20,6 +20,8 @@ from BaseDataMaintenance.common.multiThread import MultiThreadHandler
 
 from BaseDataMaintenance.maintenance.dataflow_settings import *
 
+from BaseDataMaintenance.maintenance.dataflow_mq import fixDoc_to_queue_extract,fixDoc_to_queue_init
+
 import pandas as pd
 
 
@@ -200,6 +202,13 @@ class BaseDataMonitor():
                 sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS,atAll=True)
                 # sendEmail(smtp_host,smtp_username,smtp_password,self.recieviers,_msg)
 
+            _count = fixDoc_to_queue_init(check_filename)
+            if _count>0:
+                _msg = "数据遗漏检查%d条公告已重新同步"%(_count)
+                sentMsgToDD(_msg,ACCESS_TOKEN_DATAWORKS,atAll=True)
+                df_data.to_excel("%s_bak.xlsx"%check_filename)
+
+
 
 
         except Exception as e:

+ 1 - 1
BaseDataMaintenance/fixDoc_to_queue_extract.py

@@ -8,4 +8,4 @@ from BaseDataMaintenance.maintenance.dataflow_mq import fixDoc_to_queue_extract,
 
 if __name__ == '__main__':
     # fixDoc_to_queue_extract()
-    fixDoc_to_queue_init(filename="/data/python/flow_init_check/flow_init_2024-08-13.xlsx")
+    fixDoc_to_queue_init(filename="/data/python/flow_init_check/flow_init_2024-12-02.xlsx")

+ 71 - 61
BaseDataMaintenance/maintenance/dataflow.py

@@ -3177,74 +3177,84 @@ class Dataflow_dumplicate(Dataflow):
 
 
         #更新私有属性
-        for _pp in list_package_properties:
-
-            flag_update = False
-            sub_project_name = _pp.get(project_sub_project_name,"")
-            if sub_project_name=="Project":
-                sub_project_name = ""
-            win_tenderer = _pp.get(project_win_tenderer,"")
-            win_bid_price = _pp.get(project_win_bid_price,0)
-            bidding_budget = _pp.get(project_bidding_budget,0)
-            if win_tenderer!="" and bidding_budget!=0:
-                _key = "%s-%s-%s"%(sub_project_name,str(win_tenderer),str(bidding_budget))
-                if _key in dict_package:
-                    if self.is_same_package(_pp,dict_package[_key]):
-                        ud = self.getUpdate_dict(_pp)
-                        self.set_project_uuid(ud,dict_package[_key].get("uuid"))
-                        dict_package[_key].update(ud)
-                        flag_update = True
-                        continue
-            if win_tenderer!="" and  win_bid_price!=0:
-                _key = "%s-%s-%s"%(sub_project_name,win_tenderer,str(win_bid_price))
-                if _key in dict_package:
-                    if self.is_same_package(_pp,dict_package[_key]):
-                        ud = self.getUpdate_dict(_pp)
-                        self.set_project_uuid(ud,dict_package[_key].get("uuid"))
-                        dict_package[_key].update(ud)
-                        flag_update = True
-                        continue
-            if win_tenderer!="":
-                _key = "%s-%s"%(sub_project_name,win_tenderer)
-                if _key in dict_package:
-                    if self.is_same_package(_pp,dict_package[_key]):
-                        ud = self.getUpdate_dict(_pp)
-                        self.set_project_uuid(ud,dict_package[_key].get("uuid"))
-                        dict_package[_key].update(ud)
-                        flag_update = True
-                        continue
-            if bidding_budget!=0:
-                _key = "%s-%s"%(sub_project_name,str(bidding_budget))
-                if _key in dict_package:
-                    if self.is_same_package(_pp,dict_package[_key]):
-                        ud = self.getUpdate_dict(_pp)
-                        self.set_project_uuid(ud,dict_package[_key].get("uuid"))
-                        dict_package[_key].update(ud)
-                        flag_update = True
-                        continue
-            if not flag_update:
-                _pp.update(project_dict)
-                projects.append(_pp)
+        if len(projects)==1 and len(list_package_properties)==1:
+            _pp = list_package_properties[0]
+            pp = projects[0]
+            ud = self.getUpdate_dict(_pp)
+            self.set_project_uuid(ud,pp.get("uuid"))
+            pp.update(_pp)
+        else:
 
+            for _pp in list_package_properties:
 
-                _counts = 0
+                flag_update = False
+                sub_project_name = _pp.get(project_sub_project_name,"")
+                if sub_project_name=="Project":
+                    sub_project_name = ""
+                win_tenderer = _pp.get(project_win_tenderer,"")
+                win_bid_price = _pp.get(project_win_bid_price,0)
+                bidding_budget = _pp.get(project_bidding_budget,0)
                 if win_tenderer!="" and bidding_budget!=0:
                     _key = "%s-%s-%s"%(sub_project_name,str(win_tenderer),str(bidding_budget))
-                    dict_package[_key] = _pp
-                _counts += 1
+                    if _key in dict_package:
+                        if self.is_same_package(_pp,dict_package[_key]):
+                            ud = self.getUpdate_dict(_pp)
+                            self.set_project_uuid(ud,dict_package[_key].get("uuid"))
+                            dict_package[_key].update(ud)
+                            flag_update = True
+                            continue
                 if win_tenderer!="" and  win_bid_price!=0:
                     _key = "%s-%s-%s"%(sub_project_name,win_tenderer,str(win_bid_price))
-                    dict_package[_key] = _pp
-                    _counts +=1
-                if _counts==0:
-                    if win_tenderer!="":
-                        _key = "%s-%s"%(sub_project_name,win_tenderer)
+                    if _key in dict_package:
+                        if self.is_same_package(_pp,dict_package[_key]):
+                            ud = self.getUpdate_dict(_pp)
+                            self.set_project_uuid(ud,dict_package[_key].get("uuid"))
+                            dict_package[_key].update(ud)
+                            flag_update = True
+                            continue
+                if win_tenderer!="":
+                    _key = "%s-%s"%(sub_project_name,win_tenderer)
+                    if _key in dict_package:
+                        if self.is_same_package(_pp,dict_package[_key]):
+                            ud = self.getUpdate_dict(_pp)
+                            self.set_project_uuid(ud,dict_package[_key].get("uuid"))
+                            dict_package[_key].update(ud)
+                            flag_update = True
+                            continue
+                if bidding_budget!=0:
+                    _key = "%s-%s"%(sub_project_name,str(bidding_budget))
+                    if _key in dict_package:
+                        if self.is_same_package(_pp,dict_package[_key]):
+                            ud = self.getUpdate_dict(_pp)
+                            self.set_project_uuid(ud,dict_package[_key].get("uuid"))
+                            dict_package[_key].update(ud)
+                            flag_update = True
+                            continue
+                if not flag_update:
+                    _pp.update(project_dict)
+                    projects.append(_pp)
+
+
+                    _counts = 0
+                    if win_tenderer!="" and bidding_budget!=0:
+                        _key = "%s-%s-%s"%(sub_project_name,str(win_tenderer),str(bidding_budget))
                         dict_package[_key] = _pp
-                        _counts += 1
-                    if bidding_budget!=0:
-                        _key = "%s-%s"%(sub_project_name,str(bidding_budget))
+                    _counts += 1
+                    if win_tenderer!="" and  win_bid_price!=0:
+                        _key = "%s-%s-%s"%(sub_project_name,win_tenderer,str(win_bid_price))
                         dict_package[_key] = _pp
-                        _counts += 1
+                        _counts +=1
+                    if _counts==0:
+                        if win_tenderer!="":
+                            _key = "%s-%s"%(sub_project_name,win_tenderer)
+                            dict_package[_key] = _pp
+                            _counts += 1
+                        if bidding_budget!=0:
+                            _key = "%s-%s"%(sub_project_name,str(bidding_budget))
+                            dict_package[_key] = _pp
+                            _counts += 1
+
+
 
 
 
@@ -4487,7 +4497,7 @@ if __name__ == '__main__':
     # test_attachment_interface()
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
-    df_dump.test_dumplicate(457656095
+    df_dump.test_dumplicate(562889387
                             )
     # compare_dumplicate_check()
     # df_dump.test_merge([391898061

+ 19 - 2
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -1951,25 +1951,42 @@ current_path = os.path.abspath(os.path.dirname(__file__))
 def fixDoc_to_queue_init(filename=""):
     import pandas as pd
     from BaseDataMaintenance.model.oracle.GongGaoTemp import dict_oracle2ots
+    from BaseDataMaintenance.model.oracle.TouSuTemp import dict_oracle2ots as dict_oracle2ots_tousu
     if filename=="":
         filename = os.path.join(current_path,"check.xlsx")
     df = pd.read_excel(filename)
     if "docchannel" in dict_oracle2ots:
         dict_oracle2ots.pop("docchannel")
     row_name = ",".join(list(dict_oracle2ots.keys()))
+
+    list_tousu_keys = []
+    for k,v in dict_oracle2ots_tousu.items():
+        if str(k).isupper():
+            list_tousu_keys.append(k)
+    row_name_tousu = ",".join(list(list_tousu_keys))
     conn = getConnection_oracle()
     cursor = conn.cursor()
     _count = 0
     for uuid,tablename,_exists,_toolong in zip(df["uuid"],df["tablename"],df["exists"],df["tolong"]):
         if _exists==0 and _toolong==0:
             _count += 1
+            is_tousu = False
+            if tablename in ('bxkc.t_wei_fa_ji_lu_temp','bxkc.t_tou_su_chu_li_temp','bxkc.t_qi_ta_shi_xin_temp'):
+                is_tousu = True
             _source = str(tablename).replace("_TEMP","")
-            sql = " insert into %s(%s) select %s from %s where id='%s' "%(tablename,row_name,row_name,_source,uuid)
-            cursor.execute(sql)
+            if is_tousu:
+                _source = str(tablename).replace("_temp","")
+            _rowname = row_name_tousu if is_tousu else row_name
+
+            sql = " insert into %s(%s) select %s from %s where id='%s' "%(tablename,_rowname,_rowname,_source,uuid)
             log("%d:%s"%(_count,sql))
+            cursor.execute(sql)
+
     conn.commit()
     conn.close()
 
+    return _count
+
 if __name__ == '__main__':
     # di = Dataflow_init()
     # di.start_dataflow_init()

+ 11 - 12
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -2757,21 +2757,20 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*300,return_prob=Fa
 
     prob_count += _codes_check
 
-    if is_few:
-        if _codes_check!=1:
-            if _title_check!=1:
-                if return_prob:
-                    return False,0
-                return False
-            if len(enterprise)>0 and len(enterprise_to_merge)>0:
-                if len(enterprise & enterprise_to_merge)==0:
-                    if return_prob:
-                        return False,0
-                    return False
-            if _product_check==-1:
+    if _codes_check!=1:
+        if _title_check!=1:
+            if return_prob:
+                return False,0
+            return False
+        if len(enterprise)>0 and len(enterprise_to_merge)>0:
+            if len(enterprise & enterprise_to_merge)==0:
                 if return_prob:
                     return False,0
                 return False
+        if _product_check==-1:
+            if return_prob:
+                return False,0
+            return False
 
     min_count = 2
     if product=="" or product_to_merge=="":