4 月之前 · d6e339a17c
--- a/BaseDataMaintenance/common/Utils.py
+++ b/BaseDataMaintenance/common/Utils.py
@@ -661,7 +661,104 @@ def load(path):
 
															     with open(path, 'rb') as f:
														
 
															         object1 = pickle.load(f)
														
 
															         return object1
														
 
															-    
														
 
															+
														
 
															+
														
 
															+def uniform_num(num):
														
 
															+    d1 = {'一': '1', '二': '2', '三': '3', '四': '4', '五': '5', '六': '6', '七': '7', '八': '8', '九': '9', '十': '10'}
														
 
															+    # d2 = {'A': '1', 'B': '2', 'C': '3', 'D': '4', 'E': '5', 'F': '6', 'G': '7', 'H': '8', 'I': '9', 'J': '10'}
														
 
															+    d3 = {'Ⅰ': '1', 'Ⅱ': '2', 'Ⅲ': '3', 'Ⅳ': '4', 'Ⅴ': '5', 'Ⅵ': '6', 'Ⅶ': '7'}
														
 
															+    if num.isdigit():
														
 
															+        if re.search('^0[\d]$', num):
														
 
															+            num = num[1:]
														
 
															+        return num
														
 
															+    elif re.search('^[一二三四五六七八九十]+$', num):
														
 
															+        _digit = re.search('^[一二三四五六七八九十]+$', num).group(0)
														
 
															+        if len(_digit) == 1:
														
 
															+            num = d1[_digit]
														
 
															+        elif len(_digit) == 2 and _digit[0] == '十':
														
 
															+            num = '1'+ d1[_digit[1]]
														
 
															+        elif len(_digit) == 2 and _digit[1] == '十':
														
 
															+            num = d1[_digit[0]] + '0'
														
 
															+        elif len(_digit) == 3 and _digit[1] == '十':
														
 
															+            num = d1[_digit[0]] + d1[_digit[2]]
														
 
															+    elif re.search('[ⅠⅡⅢⅣⅤⅥⅦ]', num):
														
 
															+        num = re.search('[ⅠⅡⅢⅣⅤⅥⅦ]', num).group(0)
														
 
															+        num = d3[num]
														
 
															+    return num
														
 
															+
														
 
															+def uniform_package_name(package_name):
														
 
															+    '''
														
 
															+    统一规范化包号。数值类型统一为阿拉伯数字，字母统一为大写，包含施工监理等抽到前面, 例 A包监理一标段 统一为 监理A1 ; 包Ⅱ 统一为 2
														
 
															+    :param package_name: 字符串类型 包号
														
 
															+    :return:
														
 
															+    '''
														
 
															+    package_name_raw = package_name
														
 
															+    package_name = re.sub('pdf|doc|docs|xlsx|rar|\d{4}年', ' ', package_name)
														
 
															+    package_name = package_name.replace('标段（包）', '标段').replace('№', '')
														
 
															+    package_name = re.sub('\[|【', '', package_name)
														
 
															+    kw = re.search('(施工|监理|监测|勘察|设计|劳务)', package_name)
														
 
															+    name = ""
														
 
															+    if kw:
														
 
															+        name += kw.group(0)
														
 
															+    if re.search('^[a-zA-Z0-9-]{5,}$', package_name):   # 五个字符以上编号
														
 
															+        _digit = re.search('^[a-zA-Z0-9-]{5,}$', package_name).group(0).upper()
														
 
															+        # print('规范化包号1', _digit)
														
 
															+        name += _digit
														
 
															+    elif re.search('(?P<eng>[a-zA-Z])包[：）]?第?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))标段?', package_name): # 处理类似 A包2标段
														
 
															+        ser = re.search('(?P<eng>[a-zA-Z])包[：）]?第?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))标段?', package_name)
														
 
															+        # print('规范化包号2', ser.group(0))
														
 
															+        _char = ser.groupdict().get('eng')
														
 
															+        if _char:
														
 
															+            _char = _char.upper()
														
 
															+        _digit = ser.groupdict().get('num')
														
 
															+        _digit = uniform_num(_digit)
														
 
															+        name += _char.upper() + _digit
														
 
															+    elif re.search('第?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))(标[段号的包项]?|合同[包段]|([分子]?[包标]))', package_name): # 处理类似 A包2标段
														
 
															+        ser = re.search('第?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))(标[段号的包项]?|合同[包段]|([分子]?[包标]))', package_name)
														
 
															+        # print('规范化包号3', ser.group(0))
														
 
															+        _char = ser.groupdict().get('eng')
														
 
															+        if _char:
														
 
															+            _char = _char.upper()
														
 
															+        _digit = ser.groupdict().get('num')
														
 
															+        _digit = uniform_num(_digit)
														
 
															+        if _char:
														
 
															+            name += _char.upper()
														
 
															+        name += _digit
														
 
															+    elif re.search('(标[段号的包项]?|项目|子项目?|([分子]?包|包[组件号]))编?号?[:：]?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))', package_name):  # 数字的统一的阿拉伯数字
														
 
															+        ser = re.search('(标[段号的包项]?|项目|子项目?|([分子]?包|包[组件号]))编?号?[:：]?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))',package_name)
														
 
															+        # print('规范化包号4', ser.group(0))
														
 
															+        _char = ser.groupdict().get('eng')
														
 
															+        if _char:
														
 
															+            _char = _char.upper()
														
 
															+        _digit = ser.groupdict().get('num')
														
 
															+        _digit = uniform_num(_digit)
														
 
															+        if _char:
														
 
															+            name += _char.upper()
														
 
															+        name += _digit
														
 
															+    elif re.search('(标[段号的包项]|([分子]?包|包[组件号]))编?号?[:：]?(?P<eng>[a-zA-Z-]{1,5})', package_name):  # 数字的统一的阿拉伯数字
														
 
															+        _digit = re.search('(标[段号的包项]|([分子]?包|包[组件号]))编?号?[:：]?(?P<eng>[a-zA-Z-]{1,5})', package_name).group('eng').upper()
														
 
															+        # print('规范化包号5', _digit)
														
 
															+        name += _digit
														
 
															+    elif re.search('(?P<eng>[a-zA-Z]{1,4})(标[段号的包项]|([分子]?[包标]|包[组件号]))', package_name):  # 数字的统一的阿拉伯数字
														
 
															+        _digit = re.search('(?P<eng>[a-zA-Z]{1,4})(标[段号的包项]|([分子]?[包标]|包[组件号]))', package_name).group('eng').upper()
														
 
															+        # print('规范化包号6', _digit)
														
 
															+        name += _digit
														
 
															+    elif re.search('^([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4})$', package_name):  # 数字的统一的阿拉伯数字
														
 
															+        _digit = re.search('^([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4})$', package_name).group(0)
														
 
															+        # print('规范化包号7', _digit)
														
 
															+        _digit = uniform_num(_digit)
														
 
															+        name += _digit
														
 
															+    elif re.search('^[a-zA-Z0-9-]+$', package_name):
														
 
															+        _char = re.search('^[a-zA-Z0-9-]+$', package_name).group(0)
														
 
															+        # print('规范化包号8', _char)
														
 
															+        name += _char.upper()
														
 
															+    if name == "":
														
 
															+        return package_name_raw
														
 
															+    else:
														
 
															+        if name.isdigit():
														
 
															+            name = str(int(name))
														
 
															+        # print('原始包号：%s, 处理后：%s'%(package_name, name))
														
 
															+        return name
														
 
															 def getIndexOfWord_fool(word):
														
--- a/BaseDataMaintenance/dataSource/interface.py
+++ b/BaseDataMaintenance/dataSource/interface.py
@@ -73,7 +73,32 @@ ACCESS_TOKEN_SUANFA = "https://oapi.dingtalk.com/robot/send?access_token=eec7d42
 
															 ACCESS_TOKEN_DATAWORKS = "https://oapi.dingtalk.com/robot/send?access_token=9489f01c4ab9f0c3f87e2ff5c3e35eb9fb0d17afb6244de4683596df1111daea"
														
 
															-def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False):
														
 
															+# def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False):
														
 
															+#     timestamp = str(round(time.time() * 1000))
														
 
															+#     secret = 'SECb1c5d36f73fb7cd36f91c71cb05441a7bbdad872e051234a626c7d7ceba6ee6a'
														
 
															+#     secret_enc = secret.encode('utf-8')
														
 
															+#     string_to_sign = '{}\n{}'.format(timestamp, secret)
														
 
															+#     string_to_sign_enc = string_to_sign.encode('utf-8')
														
 
															+#     hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
														
 
															+#     sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
														
 
															+#     # print(timestamp)
														
 
															+#     # print(sign)
														
 
															+#
														
 
															+#        #导入依赖库
														
 
															+#     headers={'Content-Type': 'application/json'}   #定义数据类型
														
 
															+#     webhook = "%s&timestamp=%s&sign=%s"%(access_token,timestamp,sign)
														
 
															+#     #定义要发送的数据
														
 
															+#     #"at": {"atMobiles": "['"+ mobile + "']"
														
 
															+#     data = {
														
 
															+#         "msgtype": "text",
														
 
															+#         "text": {"content": msg},
														
 
															+#         "isAtAll": False,
														
 
															+#         "at":{"isAtAll": atAll}
														
 
															+#     }
														
 
															+#     res = requests.post(webhook, data=json.dumps(data), headers=headers)   #发送post请求
														
 
															+#     # print(res.status_code)
														
 
															+
														
 
															+def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False,atMobiles=[]):
														
 
															     timestamp = str(round(time.time() * 1000))
														
 
															     secret = 'SECb1c5d36f73fb7cd36f91c71cb05441a7bbdad872e051234a626c7d7ceba6ee6a'
														
 
															     secret_enc = secret.encode('utf-8')
														
@@ -89,17 +114,25 @@ def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False):
 
															     webhook = "%s&timestamp=%s&sign=%s"%(access_token,timestamp,sign)
														
 
															     #定义要发送的数据
														
 
															     #"at": {"atMobiles": "['"+ mobile + "']"
														
 
															-    data = {
														
 
															-        "msgtype": "text",
														
 
															-        "text": {"content": msg},
														
 
															-        "isAtAll": False,
														
 
															-        "at":{"isAtAll": atAll}
														
 
															-    }
														
 
															+    if atMobiles: # at 特定人群手机号
														
 
															+        data = {
														
 
															+            "msgtype": "text",
														
 
															+            "text": {"content": msg},
														
 
															+            "isAtAll": False,
														
 
															+            "at": {'atMobiles':atMobiles
														
 
															+                    ,"isAtAll": False}
														
 
															+        }
														
 
															+    else:
														
 
															+        data = {
														
 
															+            "msgtype": "text",
														
 
															+            "text": {"content": msg},
														
 
															+            "isAtAll": False,
														
 
															+            "at":{"isAtAll": atAll}
														
 
															+        }
														
 
															     res = requests.post(webhook, data=json.dumps(data), headers=headers)   #发送post请求
														
 
															     # print(res.status_code)
														
 
															-
														
 
															 if __name__=="__main__":
														
 
															     # print(getAttachDealInterface(base64.b64encode(open("F://Workspace2016/BaseDataMaintenance/BaseDataMaintenance/maintenance/attachment/readme.md","rb").read()),"pdf"))
														
 
															     # sentMsgToDD("测试消息")
														
--- a/BaseDataMaintenance/interface/project_merge_interface.py
+++ b/BaseDataMaintenance/interface/project_merge_interface.py
@@ -0,0 +1,132 @@
 
															+
														
 
															+
														
 
															+from BaseDataMaintenance.maintenance.dataflow import Dataflow_dumplicate,log
														
 
															+
														
 
															+import time
														
 
															+
														
 
															+
														
 
															+flow = Dataflow_dumplicate(start_delete_listener=False)
														
 
															+
														
 
															+from BaseDataMaintenance.common.Utils import uniform_package_name
														
 
															+import json
														
 
															+import re
														
 
															+
														
 
															+
														
 
															+def merge_document_interface(item,b_log=False):
														
 
															+    '''
														
 
															+    实时项目合并
														
 
															+    :param item:
														
 
															+    :param dup_docid:重复的公告集合
														
 
															+    :param status_to:
														
 
															+    :return:
														
 
															+    '''
														
 
															+    try:
														
 
															+
														
 
															+        _proj = {
														
 
															+            "page_time":item.get("page_time"),
														
 
															+            "project_codes":item.get("project_codes"),
														
 
															+            "project_name":item.get("project_name"),
														
 
															+            "tenderee":item.get("tenderee"),
														
 
															+            "agency":item.get("agency"),
														
 
															+            "product":item.get("product"),
														
 
															+            "sub_project_name":item.get("sub_project_name"),
														
 
															+            "bidding_budget":item.get("bidding_budget"),
														
 
															+            "win_tenderer":item.get("win_tenderer"),
														
 
															+            "win_bid_price":item.get("win_bid_price"),
														
 
															+            "province":item.get("province"),
														
 
															+            "city":item.get("city"),
														
 
															+            "district":item.get("district"),
														
 
															+            "zhao_biao_page_time":item.get("zhao_biao_page_time"),
														
 
															+            "zhong_biao_page_time":item.get("zhong_biao_page_time"),
														
 
															+            "enterprise":item.get("enterprise"),
														
 
															+            "detail_link":item.get("detail_link"),
														
 
															+            "doctitle":item.get("doctitle"),
														
 
															+
														
 
															+        }
														
 
															+
														
 
															+        if _proj.get("province"):
														
 
															+            _proj["province"] = re.sub("省","",str(_proj["province"]))
														
 
															+        if _proj.get("city"):
														
 
															+            if len(str(_proj["city"]))>2:
														
 
															+                _proj["city"] = re.sub("市","",str(_proj["city"]))
														
 
															+        if _proj.get("district"):
														
 
															+            if len(str(_proj["district"]))>2:
														
 
															+                _proj["district"] = re.sub("区|县|镇","",str(_proj["district"]))
														
 
															+
														
 
															+        _proj["sub_project_name"] = uniform_package_name(_proj["sub_project_name"])
														
 
															+
														
 
															+        enterprise = _proj.get("enterprise","")
														
 
															+        list_enterprise = enterprise.split(",") if enterprise else []
														
 
															+        enterprise = {"nlp_enterprise":list_enterprise}
														
 
															+        _proj["enterprise"] = json.dumps(enterprise,ensure_ascii= False)
														
 
															+
														
 
															+        list_projects = flow.merge_projects([_proj],b_log=b_log)
														
 
															+        if len(list_projects)>0:
														
 
															+            uuids = list_projects[0].get("uuid","")
														
 
															+            if uuids:
														
 
															+                l_uuid = uuids.split(",")
														
 
															+                if l_uuid:
														
 
															+                    return l_uuid[0]
														
 
															+    except Exception as e:
														
 
															+        raise RuntimeError("error on dumplicate")
														
 
															+
														
 
															+
														
 
															+import os
														
 
															+
														
 
															+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
														
 
															+
														
 
															+from flask import Flask,request,jsonify
														
 
															+app = Flask(__name__)
														
 
															+
														
 
															+
														
 
															+@app.route("/project_merge",methods=["POST"])
														
 
															+def embedding():
														
 
															+    _r = {"success": True}
														
 
															+    try:
														
 
															+        item = request.json
														
 
															+        project_uuid = merge_document_interface(item)
														
 
															+        if project_uuid is None:
														
 
															+            project_uuid = ""
														
 
															+        _r["project_uuid"] = project_uuid
														
 
															+    except Exception as e:
														
 
															+        _r["success"] = False
														
 
															+        _r["msg"] = str(e)
														
 
															+
														
 
															+    return jsonify(_r)
														
 
															+
														
 
															+def start_project_merge_server():
														
 
															+    app.run(host="0.0.0.0",port="15010",debug=False)
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+
														
 
															+    # start_project_merge_server()
														
 
															+
														
 
															+    _proj = {
														
 
															+        "page_time":"2025-01-14",
														
 
															+        "project_codes":"SHX-ZB-2024-01013-07",
														
 
															+        "project_name":"泗洪县2025年财政资金定期（含大额存单）存储金融机构采购项目（一期）",
														
 
															+        "tenderee":"泗洪县财政局",
														
 
															+        "agency":"中天志远咨询有限公司",
														
 
															+        "product":"存储金融机构,财政资金定期（含大额存单）存储金融机构",
														
 
															+        "sub_project_name":"8",
														
 
															+        "bidding_budget":0,
														
 
															+        "win_tenderer":"中国农业银行股份有限公司泗洪县支行",
														
 
															+        "win_bid_price":50000000,
														
 
															+        "province":"江苏",
														
 
															+        "city":"宿迁",
														
 
															+        "district":"泗洪",
														
 
															+        "zhao_biao_page_time":"",
														
 
															+        "zhong_biao_page_time":"2025-01-14",
														
 
															+        "enterprise":"中国银行股份有限公司泗洪支行,南京银行股份有限公司泗洪支行",
														
 
															+        "detail_link":"",
														
 
															+        "doctitle":"泗洪县2025年财政资金定期（含大额存单）存储金融机构采购项目（一期）",
														
 
															+
														
 
															+    }
														
 
															+    import requests
														
 
															+    resp = requests.post("http://localhost:15010/project_merge",json=_proj)
														
 
															+    print(resp.content.decode("utf-8"))
														
 
															+
														
 
															+    #
														
 
															+    # print(merge_document_interface(_proj,b_log=True))
														
 
															+
														
 
															+
														
--- a/BaseDataMaintenance/maintenance/dataflow.py
+++ b/BaseDataMaintenance/maintenance/dataflow.py
@@ -3802,7 +3802,7 @@ class Dataflow_dumplicate(Dataflow):
 
															                 list_dynamic = json.loads(_dynamic)
														
 
															                 for _d in list_dynamic:
														
 
															                     _title = _d.get("doctitle","")
														
 
															-                    if re.search("验收公[示告]",_title) is not None:
														
 
															+                    if re.search("验收公[示告]|验收结果",_title) is not None or _d.get("docchannel")==122:
														
 
															                         is_yanshou = True
														
 
															                         break
														
@@ -4038,42 +4038,48 @@ class Dataflow_dumplicate(Dataflow):
 
															         bidclose_time = page_time
														
 
															         web_source_name = item.get(document_tmp_web_source_name,"")
														
 
															+        docchannel = item.get(document_tmp_docchannel,"0")
														
 
															-
														
 
															-
														
 
															-        if len(page_time)>0:
														
 
															-            l_page_time = timeAdd(page_time,days=-90)
														
 
															-            dict_time = item.get("dict_time",{})
														
 
															-            for k,v in dict_time.items():
														
 
															-                if v is not None and len(v)>0:
														
 
															-                    if l_page_time>v:
														
 
															-                        has_before = True
														
 
															-                    if v>page_time:
														
 
															-                        has_after = True
														
 
															-                    if k==document_tmp_time_bidclose:
														
 
															-                        bidclose_time = v
														
 
															-
														
 
															-        set_web_source = {"中国招标投标公共服务平台","比地招标"}
														
 
															-
														
 
															-        if web_source_name in set_web_source and bidclose_time<page_time:
														
 
															-            return False
														
 
															-
														
 
															-        log("check page_time has_before %s has_after %s"%(str(has_before),str(has_after)))
														
 
															-        if has_before:
														
 
															-            _query = BoolQuery(must_queries=[MatchPhraseQuery(document_doctitle,item.get(document_doctitle,""))],
														
 
															-                               must_not_queries=[TermQuery(document_docid,item.get(document_docid,0))])
														
 
															-            if not has_after:
														
 
															-                log("check page_time false %s==%s-%s"%(l_page_time,k,v))
														
 
															-
														
 
															-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
														
 
															-                                       SearchQuery(_query,get_total_count=True,limit=1))
														
 
															-                if total_count>0:
														
 
															-                    return False
														
 
															-            if item.get(document_web_source_name,"")=="中国政府采购网":
														
 
															-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
														
 
															-                                                                                    SearchQuery(_query,get_total_count=True,limit=1))
														
 
															-                if total_count>0:
														
 
															-                    return False
														
 
															+        try:
														
 
															+            docchannel = int(docchannel)
														
 
															+        except:
														
 
															+            docchannel = 0
														
 
															+
														
 
															+        if docchannel<200:
														
 
															+
														
 
															+            if len(page_time)>0:
														
 
															+                l_page_time = timeAdd(page_time,days=-90)
														
 
															+                dict_time = item.get("dict_time",{})
														
 
															+                for k,v in dict_time.items():
														
 
															+                    if v is not None and len(v)>0:
														
 
															+                        if l_page_time>v:
														
 
															+                            has_before = True
														
 
															+                        if v>page_time:
														
 
															+                            has_after = True
														
 
															+                        if k==document_tmp_time_bidclose:
														
 
															+                            bidclose_time = v
														
 
															+
														
 
															+            set_web_source = {"中国招标投标公共服务平台","比地招标"}
														
 
															+
														
 
															+            if web_source_name in set_web_source and bidclose_time<page_time:
														
 
															+                return False
														
 
															+
														
 
															+            log("check page_time has_before %s has_after %s"%(str(has_before),str(has_after)))
														
 
															+            if has_before:
														
 
															+                _query = BoolQuery(must_queries=[MatchPhraseQuery(document_doctitle,item.get(document_doctitle,""))],
														
 
															+                                   must_not_queries=[TermQuery(document_docid,item.get(document_docid,0))])
														
 
															+                if not has_after:
														
 
															+                    log("check page_time false %s==%s-%s"%(l_page_time,k,v))
														
 
															+
														
 
															+                    rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
														
 
															+                                           SearchQuery(_query,get_total_count=True,limit=1))
														
 
															+                    if total_count>0:
														
 
															+                        return False
														
 
															+                if item.get(document_web_source_name,"")=="中国政府采购网":
														
 
															+                    rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
														
 
															+                                                                                        SearchQuery(_query,get_total_count=True,limit=1))
														
 
															+                    if total_count>0:
														
 
															+                        return False
														
 
															         return True
														
@@ -4285,6 +4291,65 @@ class Dataflow_dumplicate(Dataflow):
 
															             mt.run()
														
 
															+    def send_daily_check_data(self):
														
 
															+        import datetime
														
 
															+        def get_download_url(bucket, ObjectName, timeout):
														
 
															+            url = ""
														
 
															+            exist = bucket.object_exists(ObjectName)
														
 
															+            if exist:
														
 
															+                get_url = False
														
 
															+                for i in range(3):
														
 
															+                    try:
														
 
															+                        url = bucket.sign_url('GET', ObjectName, timeout)
														
 
															+                        url = url.replace("-internal", "")  # 替换地址里的内网标识
														
 
															+                        get_url = True
														
 
															+                    except:
														
 
															+                        pass
														
 
															+                    if get_url:
														
 
															+                        break
														
 
															+            return url
														
 
															+
														
 
															+        file_timeout = 60 * 60 * 24 * 5 # 文件下载链接保存 5 天
														
 
															+        # 获取昨天的日期
														
 
															+        date = str(datetime.date.today() - datetime.timedelta(days=1))
														
 
															+        oss_path = 'tmp_document_quality_data/'
														
 
															+        object_path = oss_path + date + '/'
														
 
															+        msg = "每日数据质量检查结果(报警)："
														
 
															+
														
 
															+        csv_name = "数据质量监控检查结果.xlsx"
														
 
															+        ObjectName = object_path + csv_name
														
 
															+        url = get_download_url(self.bucket,ObjectName,file_timeout)
														
 
															+        if url:
														
 
															+            msg += "\n文件名：\"%s\"，链接:%s" % (csv_name, url)
														
 
															+
														
 
															+        csv_name = "公告重复量大的编号.xlsx"
														
 
															+        ObjectName = object_path + csv_name
														
 
															+        url = get_download_url(self.bucket, ObjectName, file_timeout)
														
 
															+        if url:
														
 
															+            msg += "\n文件名：\"%s\"，链接:%s" % (csv_name, url)
														
 
															+
														
 
															+        csv_name = "公告附件重复量大的编号.xlsx"
														
 
															+        ObjectName = object_path + csv_name
														
 
															+        url = get_download_url(self.bucket, ObjectName, file_timeout)
														
 
															+        if url:
														
 
															+            msg += "\n文件名：\"%s\"，链接:%s" % (csv_name, url)
														
 
															+
														
 
															+        csv_name = "附件识别异常的站源.xlsx"
														
 
															+        ObjectName = object_path + csv_name
														
 
															+        url = get_download_url(self.bucket, ObjectName, file_timeout)
														
 
															+        if url:
														
 
															+            msg += "\n文件名：\"%s\"，链接:%s" % (csv_name, url)
														
 
															+
														
 
															+        csv_name = "报名时间，截止时间在发布时间之前的公告.xlsx"
														
 
															+        ObjectName = object_path + csv_name
														
 
															+        url = get_download_url(self.bucket, ObjectName, file_timeout)
														
 
															+        if url:
														
 
															+            msg += "\n文件名：\"%s\"，链接:%s" % (csv_name, url)
														
 
															+
														
 
															+        atMobiles = ['18813973429'] # 维阵
														
 
															+        ACCESS_TOKEN_DATAWORKS = "https://oapi.dingtalk.com/robot/send?access_token=9489f01c4ab9f0c3f87e2ff5c3e35eb9fb0d17afb6244de4683596df1111daea"
														
 
															+        sentMsgToDD(msg,ACCESS_TOKEN_DATAWORKS,atMobiles=atMobiles)
														
 
															+
														
 
															     def start_flow_dumplicate(self):
														
 
															         schedule = BlockingScheduler()
														
@@ -4292,6 +4357,7 @@ class Dataflow_dumplicate(Dataflow):
 
															         schedule.add_job(self.flow_dumpcate_comsumer,"cron",second="*/30")
														
 
															         schedule.add_job(self.bdm.monitor_dumplicate,"cron",minute="*/10")
														
 
															         schedule.add_job(self.flow_remove,"cron",hour="20")
														
 
															+        schedule.add_job(self.send_daily_check_data,"cron",hour='9', minute='10')
														
 
															         schedule.add_job(self.flow_remove_project_tmp,"cron",hour="20")
														
 
															         schedule.add_job(self.fix_doc_which_not_in_project,"cron",minute="*/10")
														
 
															         schedule.start()
														
@@ -4339,7 +4405,7 @@ class Dataflow_dumplicate(Dataflow):
 
															         list_dict = getRow_ots(rows)
														
 
															         for item in list_dict:
														
 
															-            self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=True,upgrade=False)
														
 
															+            self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=True,upgrade=True)
														
 
															             return
														
 
															     def test_merge(self,list_docid_less,list_docid_greater):
														
@@ -4515,7 +4581,7 @@ if __name__ == '__main__':
 
															     # test_attachment_interface()
														
 
															     df_dump = Dataflow_dumplicate(start_delete_listener=False)
														
 
															     # df_dump.start_flow_dumplicate()
														
 
															-    df_dump.test_dumplicate(576859812
														
 
															+    df_dump.test_dumplicate(583564377
														
 
															                             )
														
 
															     # compare_dumplicate_check()
														
 
															     # df_dump.test_merge([391898061
														
--- a/BaseDataMaintenance/maintenance/dataflow_mq.py
+++ b/BaseDataMaintenance/maintenance/dataflow_mq.py
@@ -654,6 +654,11 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
 
															                             _attach_pg = Attachment_postgres(_attach_ots.getProperties())
														
 
															                             _attach_pg.setValue("ots_exists",True,True)
														
 
															                             list_attachment.append(_attach_pg)
														
 
															+                        else:
														
 
															+                            log("getAttachments status None find in ots:%s"%(_filemd5))
														
 
															+                            _attach_pg = Attachment_postgres(_attach_ots.getProperties())
														
 
															+                            _attach_pg.setValue("ots_exists",True,True)
														
 
															+                            list_attachment.append(_attach_pg)
														
 
															                     else:
														
 
															                         log("getAttachments search in path:%s"%(_filemd5))
														
--- a/BaseDataMaintenance/maxcompute/documentMerge.py
+++ b/BaseDataMaintenance/maxcompute/documentMerge.py
@@ -3111,8 +3111,10 @@ def update_document_from_dynamic(_proj):
 
															 def to_project_json(projects):
														
 
															     list_proj = []
														
 
															+    used_uuid = set()
														
 
															     for _proj in projects:
														
 
															         _uuid = _proj.get(project_uuid,"")
														
 
															+
														
 
															         update_uuid = _proj.get("update_uuid","")
														
 
															         _project_uuid = _proj.get("project_uuid","")
														
 
															         if "enterprise" in _proj:
														
@@ -3123,7 +3125,12 @@ def to_project_json(projects):
 
															             list_update_uuid.append(_project_uuid)
														
 
															         list_update_uuid = list(set(list_update_uuid))
														
 
															         if len(list_uuid)>0:
														
 
															-            _proj["keep_uuid"] = list_uuid[0]
														
 
															+            _proj["keep_uuid"] = ""
														
 
															+            for _uuid in list_uuid:
														
 
															+                if _uuid not in used_uuid:
														
 
															+                    used_uuid.add(_uuid)
														
 
															+                    _proj["keep_uuid"] = list_uuid[0]
														
 
															+
														
 
															             _proj["delete_uuid"] = ",".join(list_uuid[1:])
														
 
															             list_update_uuid.extend(list_uuid[1:])
														
 
															             _proj["update_uuid"] = ",".join(list_update_uuid)
														
@@ -3144,6 +3151,17 @@ def to_project_json(projects):
 
															         if "project_uuid" in _proj:
														
 
															             _proj.pop("project_uuid")
														
 
															         update_document_from_dynamic(_proj)
														
 
															+    for _proj in projects:
														
 
															+        delete_uuid = _proj.get("delete_uuid","")
														
 
															+        _find = False
														
 
															+        l_uuid = []
														
 
															+        for _uuid in delete_uuid.split(","):
														
 
															+            if _uuid in used_uuid:
														
 
															+                _find = True
														
 
															+            else:
														
 
															+                l_uuid.append(_uuid)
														
 
															+        if _find:
														
 
															+            _proj["delete_uuid"] = ",".join(l_uuid)
														
 
															     return json.dumps(list_proj,cls=MyEncoder,ensure_ascii=False)
														
 
															 def get_page_time_dis(page_time,n_page_time):
														
--- a/BaseDataMaintenance/model/oracle/GongGaoTemp.py
+++ b/BaseDataMaintenance/model/oracle/GongGaoTemp.py
@@ -67,6 +67,10 @@ class GongGaoTemp(BaseModel):
 
															         new_dict["publishtime"] = publishtime
														
 
															         if "docchannel" in new_dict:
														
 
															             new_dict["original_docchannel"] = new_dict["docchannel"]
														
 
															+        new_dict["original_area"] = new_dict.get("area","")
														
 
															+        new_dict["original_province"] = new_dict.get("province","")
														
 
															+        new_dict["original_city"] = new_dict.get("city","")
														
 
															+        new_dict["original_district"] = new_dict.get("district","")
														
 
															         return new_dict
														
 
															     def setValue(self,k,v,isColumn=False):
														
--- a/BaseDataMaintenance/start_project_merge_server.py
+++ b/BaseDataMaintenance/start_project_merge_server.py
@@ -0,0 +1,8 @@
 
															+
														
 
															+
														
 
															+import sys
														
 
															+import os
														
 
															+from BaseDataMaintenance.interface.project_merge_interface import start_project_merge_server
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    start_project_merge_server()