Эх сурвалжийг харах

Merge remote-tracking branch 'origin/master'

fangjiasheng 4 сар өмнө
parent
commit
d6e339a17c

+ 98 - 1
BaseDataMaintenance/common/Utils.py

@@ -661,7 +661,104 @@ def load(path):
     with open(path, 'rb') as f:
         object1 = pickle.load(f)
         return object1
-    
+
+
+def uniform_num(num):
+    d1 = {'一': '1', '二': '2', '三': '3', '四': '4', '五': '5', '六': '6', '七': '7', '八': '8', '九': '9', '十': '10'}
+    # d2 = {'A': '1', 'B': '2', 'C': '3', 'D': '4', 'E': '5', 'F': '6', 'G': '7', 'H': '8', 'I': '9', 'J': '10'}
+    d3 = {'Ⅰ': '1', 'Ⅱ': '2', 'Ⅲ': '3', 'Ⅳ': '4', 'Ⅴ': '5', 'Ⅵ': '6', 'Ⅶ': '7'}
+    if num.isdigit():
+        if re.search('^0[\d]$', num):
+            num = num[1:]
+        return num
+    elif re.search('^[一二三四五六七八九十]+$', num):
+        _digit = re.search('^[一二三四五六七八九十]+$', num).group(0)
+        if len(_digit) == 1:
+            num = d1[_digit]
+        elif len(_digit) == 2 and _digit[0] == '十':
+            num = '1'+ d1[_digit[1]]
+        elif len(_digit) == 2 and _digit[1] == '十':
+            num = d1[_digit[0]] + '0'
+        elif len(_digit) == 3 and _digit[1] == '十':
+            num = d1[_digit[0]] + d1[_digit[2]]
+    elif re.search('[ⅠⅡⅢⅣⅤⅥⅦ]', num):
+        num = re.search('[ⅠⅡⅢⅣⅤⅥⅦ]', num).group(0)
+        num = d3[num]
+    return num
+
+def uniform_package_name(package_name):
+    '''
+    统一规范化包号。数值类型统一为阿拉伯数字,字母统一为大写,包含施工监理等抽到前面, 例 A包监理一标段 统一为 监理A1 ; 包Ⅱ 统一为 2
+    :param package_name: 字符串类型 包号
+    :return:
+    '''
+    package_name_raw = package_name
+    package_name = re.sub('pdf|doc|docs|xlsx|rar|\d{4}年', ' ', package_name)
+    package_name = package_name.replace('标段(包)', '标段').replace('№', '')
+    package_name = re.sub('\[|【', '', package_name)
+    kw = re.search('(施工|监理|监测|勘察|设计|劳务)', package_name)
+    name = ""
+    if kw:
+        name += kw.group(0)
+    if re.search('^[a-zA-Z0-9-]{5,}$', package_name):   # 五个字符以上编号
+        _digit = re.search('^[a-zA-Z0-9-]{5,}$', package_name).group(0).upper()
+        # print('规范化包号1', _digit)
+        name += _digit
+    elif re.search('(?P<eng>[a-zA-Z])包[:)]?第?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))标段?', package_name): # 处理类似 A包2标段
+        ser = re.search('(?P<eng>[a-zA-Z])包[:)]?第?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))标段?', package_name)
+        # print('规范化包号2', ser.group(0))
+        _char = ser.groupdict().get('eng')
+        if _char:
+            _char = _char.upper()
+        _digit = ser.groupdict().get('num')
+        _digit = uniform_num(_digit)
+        name += _char.upper() + _digit
+    elif re.search('第?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))(标[段号的包项]?|合同[包段]|([分子]?[包标]))', package_name): # 处理类似 A包2标段
+        ser = re.search('第?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))(标[段号的包项]?|合同[包段]|([分子]?[包标]))', package_name)
+        # print('规范化包号3', ser.group(0))
+        _char = ser.groupdict().get('eng')
+        if _char:
+            _char = _char.upper()
+        _digit = ser.groupdict().get('num')
+        _digit = uniform_num(_digit)
+        if _char:
+            name += _char.upper()
+        name += _digit
+    elif re.search('(标[段号的包项]?|项目|子项目?|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))', package_name):  # 数字的统一的阿拉伯数字
+        ser = re.search('(标[段号的包项]?|项目|子项目?|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))',package_name)
+        # print('规范化包号4', ser.group(0))
+        _char = ser.groupdict().get('eng')
+        if _char:
+            _char = _char.upper()
+        _digit = ser.groupdict().get('num')
+        _digit = uniform_num(_digit)
+        if _char:
+            name += _char.upper()
+        name += _digit
+    elif re.search('(标[段号的包项]|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[a-zA-Z-]{1,5})', package_name):  # 数字的统一的阿拉伯数字
+        _digit = re.search('(标[段号的包项]|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[a-zA-Z-]{1,5})', package_name).group('eng').upper()
+        # print('规范化包号5', _digit)
+        name += _digit
+    elif re.search('(?P<eng>[a-zA-Z]{1,4})(标[段号的包项]|([分子]?[包标]|包[组件号]))', package_name):  # 数字的统一的阿拉伯数字
+        _digit = re.search('(?P<eng>[a-zA-Z]{1,4})(标[段号的包项]|([分子]?[包标]|包[组件号]))', package_name).group('eng').upper()
+        # print('规范化包号6', _digit)
+        name += _digit
+    elif re.search('^([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4})$', package_name):  # 数字的统一的阿拉伯数字
+        _digit = re.search('^([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4})$', package_name).group(0)
+        # print('规范化包号7', _digit)
+        _digit = uniform_num(_digit)
+        name += _digit
+    elif re.search('^[a-zA-Z0-9-]+$', package_name):
+        _char = re.search('^[a-zA-Z0-9-]+$', package_name).group(0)
+        # print('规范化包号8', _char)
+        name += _char.upper()
+    if name == "":
+        return package_name_raw
+    else:
+        if name.isdigit():
+            name = str(int(name))
+        # print('原始包号:%s, 处理后:%s'%(package_name, name))
+        return name
 
 def getIndexOfWord_fool(word):
     

+ 41 - 8
BaseDataMaintenance/dataSource/interface.py

@@ -73,7 +73,32 @@ ACCESS_TOKEN_SUANFA = "https://oapi.dingtalk.com/robot/send?access_token=eec7d42
 ACCESS_TOKEN_DATAWORKS = "https://oapi.dingtalk.com/robot/send?access_token=9489f01c4ab9f0c3f87e2ff5c3e35eb9fb0d17afb6244de4683596df1111daea"
 
 
-def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False):
+# def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False):
+#     timestamp = str(round(time.time() * 1000))
+#     secret = 'SECb1c5d36f73fb7cd36f91c71cb05441a7bbdad872e051234a626c7d7ceba6ee6a'
+#     secret_enc = secret.encode('utf-8')
+#     string_to_sign = '{}\n{}'.format(timestamp, secret)
+#     string_to_sign_enc = string_to_sign.encode('utf-8')
+#     hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
+#     sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
+#     # print(timestamp)
+#     # print(sign)
+#
+#        #导入依赖库
+#     headers={'Content-Type': 'application/json'}   #定义数据类型
+#     webhook = "%s&timestamp=%s&sign=%s"%(access_token,timestamp,sign)
+#     #定义要发送的数据
+#     #"at": {"atMobiles": "['"+ mobile + "']"
+#     data = {
+#         "msgtype": "text",
+#         "text": {"content": msg},
+#         "isAtAll": False,
+#         "at":{"isAtAll": atAll}
+#     }
+#     res = requests.post(webhook, data=json.dumps(data), headers=headers)   #发送post请求
+#     # print(res.status_code)
+
+def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False,atMobiles=[]):
     timestamp = str(round(time.time() * 1000))
     secret = 'SECb1c5d36f73fb7cd36f91c71cb05441a7bbdad872e051234a626c7d7ceba6ee6a'
     secret_enc = secret.encode('utf-8')
@@ -89,17 +114,25 @@ def sentMsgToDD(msg,access_token=ACCESS_TOKEN_SUANFA,atAll=False):
     webhook = "%s&timestamp=%s&sign=%s"%(access_token,timestamp,sign)
     #定义要发送的数据
     #"at": {"atMobiles": "['"+ mobile + "']"
-    data = {
-        "msgtype": "text",
-        "text": {"content": msg},
-        "isAtAll": False,
-        "at":{"isAtAll": atAll}
-    }
+    if atMobiles: # at 特定人群手机号
+        data = {
+            "msgtype": "text",
+            "text": {"content": msg},
+            "isAtAll": False,
+            "at": {'atMobiles':atMobiles
+                    ,"isAtAll": False}
+        }
+    else:
+        data = {
+            "msgtype": "text",
+            "text": {"content": msg},
+            "isAtAll": False,
+            "at":{"isAtAll": atAll}
+        }
     res = requests.post(webhook, data=json.dumps(data), headers=headers)   #发送post请求
     # print(res.status_code)
 
 
-
 if __name__=="__main__":
     # print(getAttachDealInterface(base64.b64encode(open("F://Workspace2016/BaseDataMaintenance/BaseDataMaintenance/maintenance/attachment/readme.md","rb").read()),"pdf"))
     # sentMsgToDD("测试消息")

+ 132 - 0
BaseDataMaintenance/interface/project_merge_interface.py

@@ -0,0 +1,132 @@
+
+
+from BaseDataMaintenance.maintenance.dataflow import Dataflow_dumplicate,log
+
+import time
+
+
+flow = Dataflow_dumplicate(start_delete_listener=False)
+
+from BaseDataMaintenance.common.Utils import uniform_package_name
+import json
+import re
+
+
+def merge_document_interface(item,b_log=False):
+    '''
+    实时项目合并
+    :param item:
+    :param dup_docid:重复的公告集合
+    :param status_to:
+    :return:
+    '''
+    try:
+
+        _proj = {
+            "page_time":item.get("page_time"),
+            "project_codes":item.get("project_codes"),
+            "project_name":item.get("project_name"),
+            "tenderee":item.get("tenderee"),
+            "agency":item.get("agency"),
+            "product":item.get("product"),
+            "sub_project_name":item.get("sub_project_name"),
+            "bidding_budget":item.get("bidding_budget"),
+            "win_tenderer":item.get("win_tenderer"),
+            "win_bid_price":item.get("win_bid_price"),
+            "province":item.get("province"),
+            "city":item.get("city"),
+            "district":item.get("district"),
+            "zhao_biao_page_time":item.get("zhao_biao_page_time"),
+            "zhong_biao_page_time":item.get("zhong_biao_page_time"),
+            "enterprise":item.get("enterprise"),
+            "detail_link":item.get("detail_link"),
+            "doctitle":item.get("doctitle"),
+
+        }
+
+        if _proj.get("province"):
+            _proj["province"] = re.sub("省","",str(_proj["province"]))
+        if _proj.get("city"):
+            if len(str(_proj["city"]))>2:
+                _proj["city"] = re.sub("市","",str(_proj["city"]))
+        if _proj.get("district"):
+            if len(str(_proj["district"]))>2:
+                _proj["district"] = re.sub("区|县|镇","",str(_proj["district"]))
+
+        _proj["sub_project_name"] = uniform_package_name(_proj["sub_project_name"])
+
+        enterprise = _proj.get("enterprise","")
+        list_enterprise = enterprise.split(",") if enterprise else []
+        enterprise = {"nlp_enterprise":list_enterprise}
+        _proj["enterprise"] = json.dumps(enterprise,ensure_ascii= False)
+
+        list_projects = flow.merge_projects([_proj],b_log=b_log)
+        if len(list_projects)>0:
+            uuids = list_projects[0].get("uuid","")
+            if uuids:
+                l_uuid = uuids.split(",")
+                if l_uuid:
+                    return l_uuid[0]
+    except Exception as e:
+        raise RuntimeError("error on dumplicate")
+
+
+import os
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+from flask import Flask,request,jsonify
+app = Flask(__name__)
+
+
+@app.route("/project_merge",methods=["POST"])
+def embedding():
+    _r = {"success": True}
+    try:
+        item = request.json
+        project_uuid = merge_document_interface(item)
+        if project_uuid is None:
+            project_uuid = ""
+        _r["project_uuid"] = project_uuid
+    except Exception as e:
+        _r["success"] = False
+        _r["msg"] = str(e)
+
+    return jsonify(_r)
+
+def start_project_merge_server():
+    app.run(host="0.0.0.0",port="15010",debug=False)
+
+if __name__ == '__main__':
+
+    # start_project_merge_server()
+
+    _proj = {
+        "page_time":"2025-01-14",
+        "project_codes":"SHX-ZB-2024-01013-07",
+        "project_name":"泗洪县2025年财政资金定期(含大额存单)存储金融机构采购项目(一期)",
+        "tenderee":"泗洪县财政局",
+        "agency":"中天志远咨询有限公司",
+        "product":"存储金融机构,财政资金定期(含大额存单)存储金融机构",
+        "sub_project_name":"8",
+        "bidding_budget":0,
+        "win_tenderer":"中国农业银行股份有限公司泗洪县支行",
+        "win_bid_price":50000000,
+        "province":"江苏",
+        "city":"宿迁",
+        "district":"泗洪",
+        "zhao_biao_page_time":"",
+        "zhong_biao_page_time":"2025-01-14",
+        "enterprise":"中国银行股份有限公司泗洪支行,南京银行股份有限公司泗洪支行",
+        "detail_link":"",
+        "doctitle":"泗洪县2025年财政资金定期(含大额存单)存储金融机构采购项目(一期)",
+
+    }
+    import requests
+    resp = requests.post("http://localhost:15010/project_merge",json=_proj)
+    print(resp.content.decode("utf-8"))
+
+    #
+    # print(merge_document_interface(_proj,b_log=True))
+
+

+ 104 - 38
BaseDataMaintenance/maintenance/dataflow.py

@@ -3802,7 +3802,7 @@ class Dataflow_dumplicate(Dataflow):
                 list_dynamic = json.loads(_dynamic)
                 for _d in list_dynamic:
                     _title = _d.get("doctitle","")
-                    if re.search("验收公[示告]",_title) is not None:
+                    if re.search("验收公[示告]|验收结果",_title) is not None or _d.get("docchannel")==122:
                         is_yanshou = True
                         break
 
@@ -4038,42 +4038,48 @@ class Dataflow_dumplicate(Dataflow):
 
         bidclose_time = page_time
         web_source_name = item.get(document_tmp_web_source_name,"")
+        docchannel = item.get(document_tmp_docchannel,"0")
 
-
-
-        if len(page_time)>0:
-            l_page_time = timeAdd(page_time,days=-90)
-            dict_time = item.get("dict_time",{})
-            for k,v in dict_time.items():
-                if v is not None and len(v)>0:
-                    if l_page_time>v:
-                        has_before = True
-                    if v>page_time:
-                        has_after = True
-                    if k==document_tmp_time_bidclose:
-                        bidclose_time = v
-
-        set_web_source = {"中国招标投标公共服务平台","比地招标"}
-
-        if web_source_name in set_web_source and bidclose_time<page_time:
-            return False
-
-        log("check page_time has_before %s has_after %s"%(str(has_before),str(has_after)))
-        if has_before:
-            _query = BoolQuery(must_queries=[MatchPhraseQuery(document_doctitle,item.get(document_doctitle,""))],
-                               must_not_queries=[TermQuery(document_docid,item.get(document_docid,0))])
-            if not has_after:
-                log("check page_time false %s==%s-%s"%(l_page_time,k,v))
-
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
-                                       SearchQuery(_query,get_total_count=True,limit=1))
-                if total_count>0:
-                    return False
-            if item.get(document_web_source_name,"")=="中国政府采购网":
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
-                                                                                    SearchQuery(_query,get_total_count=True,limit=1))
-                if total_count>0:
-                    return False
+        try:
+            docchannel = int(docchannel)
+        except:
+            docchannel = 0
+
+        if docchannel<200:
+
+            if len(page_time)>0:
+                l_page_time = timeAdd(page_time,days=-90)
+                dict_time = item.get("dict_time",{})
+                for k,v in dict_time.items():
+                    if v is not None and len(v)>0:
+                        if l_page_time>v:
+                            has_before = True
+                        if v>page_time:
+                            has_after = True
+                        if k==document_tmp_time_bidclose:
+                            bidclose_time = v
+
+            set_web_source = {"中国招标投标公共服务平台","比地招标"}
+
+            if web_source_name in set_web_source and bidclose_time<page_time:
+                return False
+
+            log("check page_time has_before %s has_after %s"%(str(has_before),str(has_after)))
+            if has_before:
+                _query = BoolQuery(must_queries=[MatchPhraseQuery(document_doctitle,item.get(document_doctitle,""))],
+                                   must_not_queries=[TermQuery(document_docid,item.get(document_docid,0))])
+                if not has_after:
+                    log("check page_time false %s==%s-%s"%(l_page_time,k,v))
+
+                    rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
+                                           SearchQuery(_query,get_total_count=True,limit=1))
+                    if total_count>0:
+                        return False
+                if item.get(document_web_source_name,"")=="中国政府采购网":
+                    rows,next_token,total_count,is_all_succeed = self.ots_client.search("document","document_index",
+                                                                                        SearchQuery(_query,get_total_count=True,limit=1))
+                    if total_count>0:
+                        return False
 
         return True
 
@@ -4285,6 +4291,65 @@ class Dataflow_dumplicate(Dataflow):
             mt.run()
 
 
+    def send_daily_check_data(self):
+        import datetime
+        def get_download_url(bucket, ObjectName, timeout):
+            url = ""
+            exist = bucket.object_exists(ObjectName)
+            if exist:
+                get_url = False
+                for i in range(3):
+                    try:
+                        url = bucket.sign_url('GET', ObjectName, timeout)
+                        url = url.replace("-internal", "")  # 替换地址里的内网标识
+                        get_url = True
+                    except:
+                        pass
+                    if get_url:
+                        break
+            return url
+
+        file_timeout = 60 * 60 * 24 * 5 # 文件下载链接保存 5 天
+        # 获取昨天的日期
+        date = str(datetime.date.today() - datetime.timedelta(days=1))
+        oss_path = 'tmp_document_quality_data/'
+        object_path = oss_path + date + '/'
+        msg = "每日数据质量检查结果(报警):"
+
+        csv_name = "数据质量监控检查结果.xlsx"
+        ObjectName = object_path + csv_name
+        url = get_download_url(self.bucket,ObjectName,file_timeout)
+        if url:
+            msg += "\n文件名:\"%s\",链接:%s" % (csv_name, url)
+
+        csv_name = "公告重复量大的编号.xlsx"
+        ObjectName = object_path + csv_name
+        url = get_download_url(self.bucket, ObjectName, file_timeout)
+        if url:
+            msg += "\n文件名:\"%s\",链接:%s" % (csv_name, url)
+
+        csv_name = "公告附件重复量大的编号.xlsx"
+        ObjectName = object_path + csv_name
+        url = get_download_url(self.bucket, ObjectName, file_timeout)
+        if url:
+            msg += "\n文件名:\"%s\",链接:%s" % (csv_name, url)
+
+        csv_name = "附件识别异常的站源.xlsx"
+        ObjectName = object_path + csv_name
+        url = get_download_url(self.bucket, ObjectName, file_timeout)
+        if url:
+            msg += "\n文件名:\"%s\",链接:%s" % (csv_name, url)
+
+        csv_name = "报名时间,截止时间在发布时间之前的公告.xlsx"
+        ObjectName = object_path + csv_name
+        url = get_download_url(self.bucket, ObjectName, file_timeout)
+        if url:
+            msg += "\n文件名:\"%s\",链接:%s" % (csv_name, url)
+
+        atMobiles = ['18813973429'] # 维阵
+        ACCESS_TOKEN_DATAWORKS = "https://oapi.dingtalk.com/robot/send?access_token=9489f01c4ab9f0c3f87e2ff5c3e35eb9fb0d17afb6244de4683596df1111daea"
+        sentMsgToDD(msg,ACCESS_TOKEN_DATAWORKS,atMobiles=atMobiles)
+
 
     def start_flow_dumplicate(self):
         schedule = BlockingScheduler()
@@ -4292,6 +4357,7 @@ class Dataflow_dumplicate(Dataflow):
         schedule.add_job(self.flow_dumpcate_comsumer,"cron",second="*/30")
         schedule.add_job(self.bdm.monitor_dumplicate,"cron",minute="*/10")
         schedule.add_job(self.flow_remove,"cron",hour="20")
+        schedule.add_job(self.send_daily_check_data,"cron",hour='9', minute='10')
         schedule.add_job(self.flow_remove_project_tmp,"cron",hour="20")
         schedule.add_job(self.fix_doc_which_not_in_project,"cron",minute="*/10")
         schedule.start()
@@ -4339,7 +4405,7 @@ class Dataflow_dumplicate(Dataflow):
         list_dict = getRow_ots(rows)
 
         for item in list_dict:
-            self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=True,upgrade=False)
+            self.dumplicate_comsumer_handle(item,None,self.ots_client,get_all=True,upgrade=True)
             return
 
     def test_merge(self,list_docid_less,list_docid_greater):
@@ -4515,7 +4581,7 @@ if __name__ == '__main__':
     # test_attachment_interface()
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
-    df_dump.test_dumplicate(576859812
+    df_dump.test_dumplicate(583564377
                             )
     # compare_dumplicate_check()
     # df_dump.test_merge([391898061

+ 5 - 0
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -654,6 +654,11 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
                             _attach_pg = Attachment_postgres(_attach_ots.getProperties())
                             _attach_pg.setValue("ots_exists",True,True)
                             list_attachment.append(_attach_pg)
+                        else:
+                            log("getAttachments status None find in ots:%s"%(_filemd5))
+                            _attach_pg = Attachment_postgres(_attach_ots.getProperties())
+                            _attach_pg.setValue("ots_exists",True,True)
+                            list_attachment.append(_attach_pg)
 
                     else:
                         log("getAttachments search in path:%s"%(_filemd5))

+ 19 - 1
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -3111,8 +3111,10 @@ def update_document_from_dynamic(_proj):
 def to_project_json(projects):
 
     list_proj = []
+    used_uuid = set()
     for _proj in projects:
         _uuid = _proj.get(project_uuid,"")
+
         update_uuid = _proj.get("update_uuid","")
         _project_uuid = _proj.get("project_uuid","")
         if "enterprise" in _proj:
@@ -3123,7 +3125,12 @@ def to_project_json(projects):
             list_update_uuid.append(_project_uuid)
         list_update_uuid = list(set(list_update_uuid))
         if len(list_uuid)>0:
-            _proj["keep_uuid"] = list_uuid[0]
+            _proj["keep_uuid"] = ""
+            for _uuid in list_uuid:
+                if _uuid not in used_uuid:
+                    used_uuid.add(_uuid)
+                    _proj["keep_uuid"] = list_uuid[0]
+
             _proj["delete_uuid"] = ",".join(list_uuid[1:])
             list_update_uuid.extend(list_uuid[1:])
             _proj["update_uuid"] = ",".join(list_update_uuid)
@@ -3144,6 +3151,17 @@ def to_project_json(projects):
         if "project_uuid" in _proj:
             _proj.pop("project_uuid")
         update_document_from_dynamic(_proj)
+    for _proj in projects:
+        delete_uuid = _proj.get("delete_uuid","")
+        _find = False
+        l_uuid = []
+        for _uuid in delete_uuid.split(","):
+            if _uuid in used_uuid:
+                _find = True
+            else:
+                l_uuid.append(_uuid)
+        if _find:
+            _proj["delete_uuid"] = ",".join(l_uuid)
     return json.dumps(list_proj,cls=MyEncoder,ensure_ascii=False)
 
 def get_page_time_dis(page_time,n_page_time):

+ 4 - 0
BaseDataMaintenance/model/oracle/GongGaoTemp.py

@@ -67,6 +67,10 @@ class GongGaoTemp(BaseModel):
         new_dict["publishtime"] = publishtime
         if "docchannel" in new_dict:
             new_dict["original_docchannel"] = new_dict["docchannel"]
+        new_dict["original_area"] = new_dict.get("area","")
+        new_dict["original_province"] = new_dict.get("province","")
+        new_dict["original_city"] = new_dict.get("city","")
+        new_dict["original_district"] = new_dict.get("district","")
         return new_dict
 
     def setValue(self,k,v,isColumn=False):

+ 8 - 0
BaseDataMaintenance/start_project_merge_server.py

@@ -0,0 +1,8 @@
+
+
+import sys
+import os
+from BaseDataMaintenance.interface.project_merge_interface import start_project_merge_server
+
+if __name__ == '__main__':
+    start_project_merge_server()