瀏覽代碼

Redis实体表流程更新

znj 11 月之前
父節點
當前提交
24c1888499
共有 1 個文件被更改,包括 22 次插入12 次删除
  1. 22 12
      BaseDataMaintenance/maintenance/enterprise/enterprise2Redis.py

+ 22 - 12
BaseDataMaintenance/maintenance/enterprise/enterprise2Redis.py

@@ -64,8 +64,7 @@ class enterprise2Redis():
                                   http_auth=('elastic', 'WWBu9#1HWHo$$gJm'),
                                   port=9200)
         body = {
-            "_source": ["name", "history_names", 'legal_person', 'reg_capital', 'credit_code', 'tax_number',
-                        'reg_number', 'org_number',
+            "_source": ["name", 'credit_code',
                         "zhao_biao_number", "zhong_biao_number", "dai_li_number", "bid_number"],
             'query': {  # 查询命令
                 "bool": {
@@ -114,23 +113,34 @@ class enterprise2Redis():
         for item in result:
             item = item['_source']
             name = item['name']
-            history_names = item.get("history_names", "")
-            legal_person = item.get("legal_person", "")
-            reg_capital = item.get("reg_capital", "")
+            # history_names = item.get("history_names", "")
+            # legal_person = item.get("legal_person", "")
+            # reg_capital = item.get("reg_capital", "")
             credit_code = item.get("credit_code", "")
             credit_code = re.sub("\s","",credit_code)
             credit_code = credit_code if re.search("^[\dA-Z]{2}\d{6}[\dA-Z]{10}$",credit_code) else ""
-            tax_number = item.get("tax_number", "")
-            reg_number = item.get("reg_number", "")
-            org_number = item.get("org_number", "")
+            # tax_number = item.get("tax_number", "")
+            # tax_number = re.sub("\s","",tax_number)
+            # tax_number = tax_number if len(tax_number)>=15 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",tax_number) else ""
+            # reg_number = item.get("reg_number", "")
+            # reg_number = re.sub("\s","",reg_number)
+            # reg_number = reg_number if len(reg_number)>=10 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",reg_number) else ""
+            # org_number = item.get("org_number", "") # 已弃用,统一社会信用代码取代组织机构代码
+            # org_number = org_number if not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",org_number) else ""
             zhao_biao_number = item.get("zhao_biao_number", 0)
+            zhao_biao_number = zhao_biao_number if zhao_biao_number else 0
             zhong_biao_number = item.get("zhong_biao_number", 0)
+            zhong_biao_number = zhong_biao_number if zhong_biao_number else 0
             dai_li_number = item.get("dai_li_number", 0)
+            dai_li_number = dai_li_number if dai_li_number else 0
             bid_number = item.get("bid_number", 0)
+            bid_number = bid_number if bid_number else 0
 
             num = 0
-            for business in [history_names, legal_person, reg_capital, credit_code, tax_number, reg_number, org_number]:
-                if len(str(business).replace("-", "")) > 1:
+            for business in [credit_code]: # 新增实体只判断credit_code
+                business = re.sub("\s-","",str(business))
+                business = re.sub("^nan$","",business)
+                if len(business) > 1:
                     num += 1
             isLegal = isLegalNewName(name)
             if isLegal >= 0:
@@ -263,13 +273,13 @@ def isLegalNewName(enterprise_name):
         return -1
     if re.search("[区市镇乡县洲州路街]$", enterprise_name) and not re.search("(超市|门市|保护区|园区|景区|校区|社区|服务区|工区|小区|集市|花市|夜市|学区|旅游区|矿区|林区|度假区|示范区|菜市)$", enterprise_name):
         return -1
-    if re.search("^个人|^个体|测试$", enterprise_name):
+    if re.search("^.?(个人|个体|测试)|(个人|个体|测试).?$", enterprise_name):
         return -1
     if re.search("个人|个体", enterprise_name):
         _split = re.split("个人|个体", enterprise_name)
         if len(_split[0]) <= 5:
             return -1
-    if re.search("测试", enterprise_name) and len(enterprise_name) < 8:
+    if (re.search("测试", enterprise_name) and len(enterprise_name) < 8) or len(re.findall("测试", enterprise_name))>1:
         return -1
     if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)", enterprise_name) and not re.search(
             "^(镇江|乡宁|镇原|镇海|镇安|镇巴|镇坪|镇赉|镇康|镇沅|镇雄|镇远|镇宁|乡城|镇平|市中|市南|市北)", enterprise_name):