Bläddra i källkod

项目合并接口

luojiehua 4 månader sedan
förälder
incheckning
f408beeb94

+ 98 - 1
BaseDataMaintenance/common/Utils.py

@@ -661,7 +661,104 @@ def load(path):
     with open(path, 'rb') as f:
         object1 = pickle.load(f)
         return object1
-    
+
+
+def uniform_num(num):
+    d1 = {'一': '1', '二': '2', '三': '3', '四': '4', '五': '5', '六': '6', '七': '7', '八': '8', '九': '9', '十': '10'}
+    # d2 = {'A': '1', 'B': '2', 'C': '3', 'D': '4', 'E': '5', 'F': '6', 'G': '7', 'H': '8', 'I': '9', 'J': '10'}
+    d3 = {'Ⅰ': '1', 'Ⅱ': '2', 'Ⅲ': '3', 'Ⅳ': '4', 'Ⅴ': '5', 'Ⅵ': '6', 'Ⅶ': '7'}
+    if num.isdigit():
+        if re.search('^0[\d]$', num):
+            num = num[1:]
+        return num
+    elif re.search('^[一二三四五六七八九十]+$', num):
+        _digit = re.search('^[一二三四五六七八九十]+$', num).group(0)
+        if len(_digit) == 1:
+            num = d1[_digit]
+        elif len(_digit) == 2 and _digit[0] == '十':
+            num = '1'+ d1[_digit[1]]
+        elif len(_digit) == 2 and _digit[1] == '十':
+            num = d1[_digit[0]] + '0'
+        elif len(_digit) == 3 and _digit[1] == '十':
+            num = d1[_digit[0]] + d1[_digit[2]]
+    elif re.search('[ⅠⅡⅢⅣⅤⅥⅦ]', num):
+        num = re.search('[ⅠⅡⅢⅣⅤⅥⅦ]', num).group(0)
+        num = d3[num]
+    return num
+
+def uniform_package_name(package_name):
+    '''
+    统一规范化包号。数值类型统一为阿拉伯数字,字母统一为大写,包含施工监理等抽到前面, 例 A包监理一标段 统一为 监理A1 ; 包Ⅱ 统一为 2
+    :param package_name: 字符串类型 包号
+    :return:
+    '''
+    package_name_raw = package_name
+    package_name = re.sub('pdf|doc|docs|xlsx|rar|\d{4}年', ' ', package_name)
+    package_name = package_name.replace('标段(包)', '标段').replace('№', '')
+    package_name = re.sub('\[|【', '', package_name)
+    kw = re.search('(施工|监理|监测|勘察|设计|劳务)', package_name)
+    name = ""
+    if kw:
+        name += kw.group(0)
+    if re.search('^[a-zA-Z0-9-]{5,}$', package_name):   # 五个字符以上编号
+        _digit = re.search('^[a-zA-Z0-9-]{5,}$', package_name).group(0).upper()
+        # print('规范化包号1', _digit)
+        name += _digit
+    elif re.search('(?P<eng>[a-zA-Z])包[:)]?第?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))标段?', package_name): # 处理类似 A包2标段
+        ser = re.search('(?P<eng>[a-zA-Z])包[:)]?第?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))标段?', package_name)
+        # print('规范化包号2', ser.group(0))
+        _char = ser.groupdict().get('eng')
+        if _char:
+            _char = _char.upper()
+        _digit = ser.groupdict().get('num')
+        _digit = uniform_num(_digit)
+        name += _char.upper() + _digit
+    elif re.search('第?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))(标[段号的包项]?|合同[包段]|([分子]?[包标]))', package_name): # 处理类似 A包2标段
+        ser = re.search('第?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))(标[段号的包项]?|合同[包段]|([分子]?[包标]))', package_name)
+        # print('规范化包号3', ser.group(0))
+        _char = ser.groupdict().get('eng')
+        if _char:
+            _char = _char.upper()
+        _digit = ser.groupdict().get('num')
+        _digit = uniform_num(_digit)
+        if _char:
+            name += _char.upper()
+        name += _digit
+    elif re.search('(标[段号的包项]?|项目|子项目?|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))', package_name):  # 数字的统一的阿拉伯数字
+        ser = re.search('(标[段号的包项]?|项目|子项目?|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[0-9a-zA-Z-]{1,4})?(?P<num>([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4}))',package_name)
+        # print('规范化包号4', ser.group(0))
+        _char = ser.groupdict().get('eng')
+        if _char:
+            _char = _char.upper()
+        _digit = ser.groupdict().get('num')
+        _digit = uniform_num(_digit)
+        if _char:
+            name += _char.upper()
+        name += _digit
+    elif re.search('(标[段号的包项]|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[a-zA-Z-]{1,5})', package_name):  # 数字的统一的阿拉伯数字
+        _digit = re.search('(标[段号的包项]|([分子]?包|包[组件号]))编?号?[::]?(?P<eng>[a-zA-Z-]{1,5})', package_name).group('eng').upper()
+        # print('规范化包号5', _digit)
+        name += _digit
+    elif re.search('(?P<eng>[a-zA-Z]{1,4})(标[段号的包项]|([分子]?[包标]|包[组件号]))', package_name):  # 数字的统一的阿拉伯数字
+        _digit = re.search('(?P<eng>[a-zA-Z]{1,4})(标[段号的包项]|([分子]?[包标]|包[组件号]))', package_name).group('eng').upper()
+        # print('规范化包号6', _digit)
+        name += _digit
+    elif re.search('^([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4})$', package_name):  # 数字的统一的阿拉伯数字
+        _digit = re.search('^([0-9]{1,4}|[一二三四五六七八九十]{1,4}|[ⅠⅡⅢⅣⅤⅥⅦ]{1,4})$', package_name).group(0)
+        # print('规范化包号7', _digit)
+        _digit = uniform_num(_digit)
+        name += _digit
+    elif re.search('^[a-zA-Z0-9-]+$', package_name):
+        _char = re.search('^[a-zA-Z0-9-]+$', package_name).group(0)
+        # print('规范化包号8', _char)
+        name += _char.upper()
+    if name == "":
+        return package_name_raw
+    else:
+        if name.isdigit():
+            name = str(int(name))
+        # print('原始包号:%s, 处理后:%s'%(package_name, name))
+        return name
 
 def getIndexOfWord_fool(word):
     

+ 132 - 0
BaseDataMaintenance/interface/project_merge_interface.py

@@ -0,0 +1,132 @@
+
+
+from BaseDataMaintenance.maintenance.dataflow import Dataflow_dumplicate,log
+
+import time
+
+
+flow = Dataflow_dumplicate(start_delete_listener=False)
+
+from BaseDataMaintenance.common.Utils import uniform_package_name
+import json
+import re
+
+
+def merge_document_interface(item,b_log=False):
+    '''
+    实时项目合并
+    :param item:
+    :param dup_docid:重复的公告集合
+    :param status_to:
+    :return:
+    '''
+    try:
+
+        _proj = {
+            "page_time":item.get("page_time"),
+            "project_codes":item.get("project_codes"),
+            "project_name":item.get("project_name"),
+            "tenderee":item.get("tenderee"),
+            "agency":item.get("agency"),
+            "product":item.get("product"),
+            "sub_project_name":item.get("sub_project_name"),
+            "bidding_budget":item.get("bidding_budget"),
+            "win_tenderer":item.get("win_tenderer"),
+            "win_bid_price":item.get("win_bid_price"),
+            "province":item.get("province"),
+            "city":item.get("city"),
+            "district":item.get("district"),
+            "zhao_biao_page_time":item.get("zhao_biao_page_time"),
+            "zhong_biao_page_time":item.get("zhong_biao_page_time"),
+            "enterprise":item.get("enterprise"),
+            "detail_link":item.get("detail_link"),
+            "doctitle":item.get("doctitle"),
+
+        }
+
+        if _proj.get("province"):
+            _proj["province"] = re.sub("省","",str(_proj["province"]))
+        if _proj.get("city"):
+            if len(str(_proj["city"]))>2:
+                _proj["city"] = re.sub("市","",str(_proj["city"]))
+        if _proj.get("district"):
+            if len(str(_proj["district"]))>2:
+                _proj["district"] = re.sub("区|县|镇","",str(_proj["district"]))
+
+        _proj["sub_project_name"] = uniform_package_name(_proj["sub_project_name"])
+
+        enterprise = _proj.get("enterprise","")
+        list_enterprise = enterprise.split(",") if enterprise else []
+        enterprise = {"nlp_enterprise":list_enterprise}
+        _proj["enterprise"] = json.dumps(enterprise,ensure_ascii= False)
+
+        list_projects = flow.merge_projects([_proj],b_log=b_log)
+        if len(list_projects)>0:
+            uuids = list_projects[0].get("uuid","")
+            if uuids:
+                l_uuid = uuids.split(",")
+                if l_uuid:
+                    return l_uuid[0]
+    except Exception as e:
+        raise RuntimeError("error on dumplicate")
+
+
+import os
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+
+from flask import Flask,request,jsonify
+app = Flask(__name__)
+
+
+@app.route("/project_merge",methods=["POST"])
+def embedding():
+    _r = {"success": True}
+    try:
+        item = request.json
+        project_uuid = merge_document_interface(item)
+        if project_uuid is None:
+            project_uuid = ""
+        _r["project_uuid"] = project_uuid
+    except Exception as e:
+        _r["success"] = False
+        _r["msg"] = str(e)
+
+    return jsonify(_r)
+
+def start_project_merge_server():
+    app.run(host="0.0.0.0",port="15010",debug=False)
+
+if __name__ == '__main__':
+
+    # start_project_merge_server()
+
+    _proj = {
+        "page_time":"2025-01-14",
+        "project_codes":"SHX-ZB-2024-01013-07",
+        "project_name":"泗洪县2025年财政资金定期(含大额存单)存储金融机构采购项目(一期)",
+        "tenderee":"泗洪县财政局",
+        "agency":"中天志远咨询有限公司",
+        "product":"存储金融机构,财政资金定期(含大额存单)存储金融机构",
+        "sub_project_name":"8",
+        "bidding_budget":0,
+        "win_tenderer":"中国农业银行股份有限公司泗洪县支行",
+        "win_bid_price":50000000,
+        "province":"江苏",
+        "city":"宿迁",
+        "district":"泗洪",
+        "zhao_biao_page_time":"",
+        "zhong_biao_page_time":"2025-01-14",
+        "enterprise":"中国银行股份有限公司泗洪支行,南京银行股份有限公司泗洪支行",
+        "detail_link":"",
+        "doctitle":"泗洪县2025年财政资金定期(含大额存单)存储金融机构采购项目(一期)",
+
+    }
+    import requests
+    resp = requests.post("http://localhost:15010/project_merge",json=_proj)
+    print(resp.content.decode("utf-8"))
+
+    #
+    # print(merge_document_interface(_proj,b_log=True))
+
+

+ 8 - 0
BaseDataMaintenance/start_project_merge_server.py

@@ -0,0 +1,8 @@
+
+
+import sys
+import os
+from BaseDataMaintenance.interface.project_merge_interface import start_project_merge_server
+
+if __name__ == '__main__':
+    start_project_merge_server()