Pārlūkot izejas kodu

Redis公司实体过滤规则修改

znj 6 mēneši atpakaļ
vecāks
revīzija
bd9facf410

+ 69 - 10
BaseDataMaintenance/maintenance/enterprise/enterprise2Redis.py

@@ -85,7 +85,7 @@ class enterprise2Redis():
                 }
             },
             "sort": [
-                {"create_time": "desc"}
+                {"update_time": "desc"}
             ]
         }
 
@@ -277,14 +277,16 @@ def isLegalNewName(enterprise_name):
     if re.search("^.?测试|测试.?$", enterprise_name):
         return -1
     if re.search("个人|个体", enterprise_name):
-        #  按照字数过滤,比如剔除个体工商户这些字眼之后还有6个字以上的,可能是有用的 2024/12/5新增
-        _name = re.sub("(个人|个体).?[工商户]*|[\(\)(){}\{\}\[\]【】]","",enterprise_name)
-        if len(_name)<=6:
-            return -1
-
-        # _split = re.split("个人|个体", enterprise_name)
-        # if len(_split[0]) <= 5:
-        #     return -1
+        if re.search("(个人|个体).?工商户",enterprise_name):
+            #  按照字数过滤,比如剔除个体工商户这些字眼之后还有6个字以上的,可能是有用的 2024/12/5新增
+            _name = re.sub("(个人|个体).?[工商户]*|[\(\)(){}\{\}\[\]【】]","",enterprise_name)
+            if len(re.findall("[\u4e00-\u9fa5]", _name))<=4:
+                return -1
+        else:
+            _split = re.split("个人|个体", enterprise_name)
+            if len(_split[0]) <= 5:
+                return -1
+
     if (re.search("测试", enterprise_name) and len(enterprise_name) < 8) or len(re.findall("测试", enterprise_name))>1:
         return -1
     if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)", enterprise_name) and not re.search(
@@ -301,7 +303,61 @@ def isLegalNewName(enterprise_name):
         return 0
     return 1
 
-
+def test1():
+    legal_name_num = 0
+    add_redis_list = []
+    result =[{'_source': {'reg_number': '230602601147025', 'org_number': 'MADB2NAN6', 'update_time': '2024-08-01 07:28:14', 'credit_code': '92230602MADB2NAN6N', 'name': '大庆萨尔图区若飞物资经销处(个体工商户)'}, '_score': None, 'sort': ['2024-08-01 07:28:14'], '_index': 'enterprise_v3', '_type': '_doc', '_id': '大庆萨尔图区若飞物资经销处(个体工商户)'}]
+    for item in result:
+        item = item['_source']
+        name = item['name']
+        # history_names = item.get("history_names", "")
+        # legal_person = item.get("legal_person", "")
+        # reg_capital = item.get("reg_capital", "")
+        credit_code = item.get("credit_code", "")
+        credit_code = re.sub("\s", "", credit_code)
+        credit_code = credit_code if re.search("^[\dA-Z]{2}\d{6}[\dA-Z]{10}$", credit_code) else ""
+        # tax_number = item.get("tax_number", "")
+        # tax_number = re.sub("\s","",tax_number)
+        # tax_number = tax_number if len(tax_number)>=15 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",tax_number) else ""
+        # reg_number = item.get("reg_number", "")
+        # reg_number = re.sub("\s","",reg_number)
+        # reg_number = reg_number if len(reg_number)>=10 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",reg_number) else ""
+        # org_number = item.get("org_number", "") # 已弃用,统一社会信用代码取代组织机构代码
+        # org_number = org_number if not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",org_number) else ""
+        zhao_biao_number = item.get("zhao_biao_number", 0)
+        zhao_biao_number = zhao_biao_number if zhao_biao_number else 0
+        zhong_biao_number = item.get("zhong_biao_number", 0)
+        zhong_biao_number = zhong_biao_number if zhong_biao_number else 0
+        dai_li_number = item.get("dai_li_number", 0)
+        dai_li_number = dai_li_number if dai_li_number else 0
+        bid_number = item.get("bid_number", 0)
+        bid_number = bid_number if bid_number else 0
+
+        num = 0
+        for business in [credit_code]:  # 新增实体只判断credit_code
+            business = re.sub("\s-", "", str(business))
+            business = re.sub("^nan$", "", business)
+            if len(business) > 1:
+                num += 1
+        isLegal = isLegalNewName(name)
+        if isLegal >= 0:
+            if num >= 1 and len(name) > 4:
+                legal_name_num += 1
+                _json = {"have_business": 1, "zhao_biao_number": zhao_biao_number,
+                         "zhong_biao_number": zhong_biao_number,
+                         "dai_li_number": dai_li_number, "bid_number": bid_number,
+                         "credit_code": credit_code}
+                _json = json.dumps(_json, ensure_ascii=False)
+                add_redis_list.append((name, _json))
+            elif num == 0 and bid_number > 0 and len(name) > 4:
+                legal_name_num += 1
+                _json = {"have_business": 0, "zhao_biao_number": zhao_biao_number,
+                         "zhong_biao_number": zhong_biao_number,
+                         "dai_li_number": dai_li_number, "bid_number": bid_number,
+                         "credit_code": credit_code}
+                _json = json.dumps(_json, ensure_ascii=False)
+                add_redis_list.append((name, _json))
+    print(add_redis_list)
 
 if __name__ == '__main__':
 
@@ -328,6 +384,9 @@ if __name__ == '__main__':
 
     # e = enterprise2Redis()
     # e.monitor_enterprise2redis()
+
+    # print(isLegalNewName('大庆萨尔图区若飞物资经销处(个体工商户)'))
+
     pass