소스 검색

优化产品处理流匹配规则

luojiehua 1 년 전
부모
커밋
8882d3ea05

+ 13 - 3
BaseDataMaintenance/maintenance/product/productUtils.py

@@ -74,7 +74,7 @@ def check_brand(source,target):
         return True
 
 SPECS_CHECK_SET = set([i for i in 'abcdefghijklmnopqrstuvwxyz']) | set([i for i in '0123456789.']) | set([i for i in 'IⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ'])
-SPECS_PATTERN = re.compile("[^%s]"%("".join(list(SPECS_CHECK_SET))))
+NOT_SPECS_PATTERN = re.compile("[^%s]"%("".join(list(SPECS_CHECK_SET))))
 
 def has_same_specs_count(source, target):
 
@@ -103,6 +103,16 @@ def has_same_specs_count(source, target):
 
     return True
 
+def is_legal_specs(specs):
+    if specs is None or specs=="":
+        return False
+    specs = str(specs).lower()
+    for s in specs:
+        if s not in SPECS_CHECK_SET:
+            return False
+    return True
+
+
 def check_specs(source,target):
     '''
     check if the source specs is the same as the target
@@ -114,8 +124,8 @@ def check_specs(source,target):
     source = str(source).lower()
     target = str(target).lower()
 
-    source = re.sub(SPECS_PATTERN,'',source)
-    target = re.sub(SPECS_PATTERN,'',target)
+    source = re.sub(NOT_SPECS_PATTERN,'',source)
+    target = re.sub(NOT_SPECS_PATTERN,'',target)
 
     if source==target and len(source)>0:
         return True

+ 28 - 2
BaseDataMaintenance/maintenance/product/products.py

@@ -215,6 +215,7 @@ class Product_Manager(Product_Dict_Manager):
 
             if specs is not None and specs!="":
                 specs_vector = request_embedding(specs)
+                log("getting sepcs %s"%(specs))
                 if specs_vector is not None:
                     Coll,_ = self.get_collection(SPECS_GRADE)
                     search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
@@ -225,8 +226,11 @@ class Product_Manager(Product_Dict_Manager):
                         ots_name = _search.entity.get("standard_name")
                         ots_parent_id = _search.entity.get("ots_parent_id")
 
+                        log("checking %s and %s"%(specs,ots_name))
                         if is_similar(specs,ots_name):
+                            log("is_similar")
                             if check_specs(specs,ots_name):
+                                log("check_specs succeed")
                                 new_specs = ots_name
 
                                 # to update the document_product_dict which is builded for search
@@ -254,6 +258,7 @@ class Product_Manager(Product_Dict_Manager):
                                             if _dpd.updateAlias(specs):
                                                 _dpd.update_row(self.ots_client)
                             else:
+                                log("check_specs failed")
                                 new_specs = clean_product_specs(specs)
                                 # insert into document_product_dict a new record
                                 # to update the document_product_dict which is builded for search
@@ -275,7 +280,27 @@ class Product_Manager(Product_Dict_Manager):
 
                         else:
                             # add new specs?
-                            pass
+                            log("not similar")
+                            if is_legal_specs(specs):
+                                log("is_legal_specs")
+                                new_specs = clean_product_specs(specs)
+                                # insert into document_product_dict a new record
+                                # to update the document_product_dict which is builded for search
+                                # add new specs
+                                if brand_ots_id is not None and name_ots_id is not None:
+                                    _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
+                                    _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
+                                          DOCUMENT_PRODUCT_DICT_NAME:new_specs,
+                                          DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(specs,new_specs),
+                                          DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
+                                          DOCUMENT_PRODUCT_DICT_STATUS:1,
+                                          DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
+                                          DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                          DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                          }
+                                    _dpd = Document_product_dict(_d)
+                                    _dpd.update_row(self.ots_client)
+
 
         # judge if the product matches the standard product
         if name_ots_id is not None:
@@ -444,8 +469,9 @@ class Product_Manager(Product_Dict_Manager):
 
     def start_processing(self):
         scheduler = BlockingScheduler()
-        scheduler.add_job(self.producer,"cron",minute="*/1")
+        scheduler.add_job(self.producer,"cron",second="*/20")
         scheduler.add_job(self.comsumer,"cron",minute="*/1")
+        scheduler.add_job(self.embedding_comsumer,"cron",minute="*/1")
         scheduler.start()
 
 

+ 1 - 1
BaseDataMaintenance/model/ots/document_product.py

@@ -6,7 +6,7 @@ DOCUMENT_PRODUCT_ID = 'id'
 DOCUMENT_PRODUCT_NAME = 'name'
 DOCUMENT_PRODUCT_BRAND = 'brand'
 DOCUMENT_PRODUCT_SPECS = 'specs'
-DOCUMENT_PRODUCT_BRANDSPECS = 'brandSpecs'
+DOCUMENT_PRODUCT_BRANDSPECS = 'brand_specs'
 DOCUMENT_PRODUCT_FULL_NAME = 'full_name'
 DOCUMENT_PRODUCT_UNIT_PRICE = 'unit_price'
 DOCUMENT_PRODUCT_QUANTITY = 'quantity'

+ 1 - 1
BaseDataMaintenance/model/ots/document_product_tmp.py

@@ -6,7 +6,7 @@ DOCUMENT_PRODUCT_TMP_ID = 'id'
 DOCUMENT_PRODUCT_TMP_NAME = 'name'
 DOCUMENT_PRODUCT_TMP_BRAND = 'brand'
 DOCUMENT_PRODUCT_TMP_SPECS = 'specs'
-DOCUMENT_PRODUCT_TMP_BRANDSPECS = 'brandSpecs'
+DOCUMENT_PRODUCT_TMP_BRANDSPECS = 'brand_specs'
 DOCUMENT_PRODUCT_TMP_FULL_NAME = 'full_name'
 DOCUMENT_PRODUCT_TMP_UNIT_PRICE = 'unit_price'
 DOCUMENT_PRODUCT_TMP_QUANTITY = 'quantity'