Sfoglia il codice sorgente

修复redis连接问题,修复判定品牌规则,匹配规则

luojiehua 1 anno fa
parent
commit
9b19920cad

+ 33 - 16
BaseDataMaintenance/common/milvusUtil.py

@@ -1,11 +1,13 @@
 import traceback
 
-from pymilvus import connections,utility,FieldSchema,CollectionSchema,DataType,Collection
+from pymilvus import connections,utility,FieldSchema,CollectionSchema,DataType,Collection,Milvus
 
+current_host = None
 def init_milvus(host):
+    global current_host
+    current_host = host
     connections.connect("default",host=host,port=19530)
 
-
 def drop_embedding_collection(collection_name):
     print("drop collection:",collection_name)
     utility.drop_collection(collection_name)
@@ -22,25 +24,40 @@ def create_embedding_schema(collection_name,fields,index_name,index_params):
         coll.create_index(index_name,index_params=index_params)
 
 def getCollection(collection_name):
-    coll = Collection(collection_name)
-    print(collection_name,"num_entities",coll.num_entities)
-    return coll
+    try:
+        coll = Collection(collection_name)
+        print(collection_name,"num_entities",coll.num_entities)
+        return coll
+    except Exception as e:
+        init_milvus(current_host)
+        coll = Collection(collection_name)
+        print(collection_name,"num_entities",coll.num_entities)
+        return coll
 
 
-def insert_embedding(coll,entities):
-    coll.insert(entities)
-    coll.load()
-    print("num_entities",coll.num_entities)
+def insert_embedding(coll,entities,retry_times =3):
+    for _ in range(retry_times):
+        try:
+            coll.insert(entities)
+            coll.load()
+            print("num_entities",coll.num_entities)
+            return
+        except Exception as e:
+            init_milvus(current_host)
 
 
-def search_embedding(coll,index_name,vector,search_params,output_fields,limit=3):
-    list_result = []
-    result = coll.search(vector,index_name,search_params,top_k=limit,output_fields=output_fields,limit=limit)
-    for hits in result:
-        for hit in hits:
-            list_result.append(hit)
+def search_embedding(coll,index_name,vector,search_params,output_fields,limit=3,retry_times=3):
+    for _ in retry_times:
+        try:
+            list_result = []
+            result = coll.search(vector,index_name,search_params,top_k=limit,output_fields=output_fields,limit=limit)
+            for hits in result:
+                for hit in hits:
+                    list_result.append(hit)
 
-    return list_result
+            return list_result
+        except Exception as e:
+            init_milvus(current_host)
 
 if __name__ == '__main__':
     # drop_embedding_collection()

+ 5 - 0
BaseDataMaintenance/dataSource/source.py

@@ -158,6 +158,11 @@ def getConnect_redis_product():
                            db=8,password=REDIS_PASS)
     return db
 
+def getConnect_redis_product_pool():
+    pool = redis.ConnectionPool(host=REDIS_HOST, port=REDIS_PORT,
+                                db=8,password=REDIS_PASS,max_connections=40)
+    return pool
+
 if __name__=="__main__":
     # solrQuery("document",{"q":"*:*"})
     # getConnect_mongodb()

+ 2 - 2
BaseDataMaintenance/maintenance/dataflow.py

@@ -2842,8 +2842,8 @@ class Dataflow_dumplicate(Dataflow):
 
     def flow_dumpcate_comsumer(self):
         from multiprocessing import Process
-        process_count = 3
-        thread_count = 30
+        process_count = 1
+        thread_count = 20
         list_process = []
         def start_thread():
             mt = MultiThreadHandler(self.queue_dumplicate,self.dumplicate_comsumer_handle,None,thread_count,1,need_stop=False,ots_client=self.ots_client)

+ 115 - 28
BaseDataMaintenance/maintenance/product/productUtils.py

@@ -6,23 +6,38 @@ import Levenshtein
 import re
 # 判断是不是入参字符串为全中文
 
-from BaseDataMaintenance.dataSource.source import getConnect_redis_product
+from BaseDataMaintenance.dataSource.source import getConnect_redis_product_pool
 from BaseDataMaintenance.dataSource.pool import ConnectorPool
 from BaseDataMaintenance.common.Utils import log
 from BaseDataMaintenance.common.documentFingerprint import getMD5
 from BaseDataMaintenance.common.milvusUtil import search_embedding
 
-pool_product = ConnectorPool(10,30,getConnect_redis_product)
 
+import redis
+pool_product = getConnect_redis_product_pool()
+import traceback
+from tablestore import *
 
-def get_milvus_search(coll,index_name,name,vector,search_params,output_fields,limit=3):
+from BaseDataMaintenance.model.ots.document_product_dict_interface import *
+from BaseDataMaintenance.model.ots.document_product_dict import *
+from BaseDataMaintenance.model.ots.document_product_tmp import *
+from BaseDataMaintenance.model.ots.enterprise import *
+from BaseDataMaintenance.maintenance.product.make_brand_pattern import get_area_set
+
+area_set = get_area_set()
+
+def get_embedding_search(coll,index_name,name,grade,vector,search_params,output_fields,limit=3):
 
     if name is None or name=="":
         return None
-    db = pool_product.getConnector()
+    db = redis.Redis(connection_pool=pool_product)
     try:
-        _md5 = getMD5(str(name))+"_milvus"
-        _search_list = db.get(_md5)
+        _md5 = getMD5(str(name))+"_milvus_%d"%(grade)
+        _search_list = None
+        try:
+            _search_list = db.get(_md5)
+        except Exception as e:
+            log("get redis data error")
         if _search_list is not None:
             return json.loads(_search_list)
         else:
@@ -38,50 +53,57 @@ def get_milvus_search(coll,index_name,name,vector,search_params,output_fields,li
                 for k in output_fields:
                     _d[k] = _search.entity.get(k)
                 final_list.append(_d)
-            db.set(_md5,json.dumps(final_list))
-            db.expire(_md5,2*60)
+            final_list = remove_repeat_item(final_list,k="standard_name")
+            try:
+                db.set(_md5,json.dumps(final_list))
+                db.expire(_md5,2*60)
+            except Exception as e:
+                log("set redis data error")
             return final_list
 
     except Exception as e:
-        log("getExtract_json_fromRedis error %s"%(str(e)))
+        traceback.print_exc()
         raise RuntimeError("get milvus search error")
-    finally:
-        try:
-            if db.connection.check_health():
-                pool_product.putConnector(db)
-        except Exception as e:
-            pass
     return None
 
 
-
-    return list_result
+def remove_repeat_item(list_result,k="standard_name"):
+    final_list = []
+    set_k = set()
+    for item in list_result:
+        _v = item.get(k)
+        if _v is not None and _v in set_k:
+            continue
+        final_list.append(item)
+        set_k.add(_v)
+    return final_list
 
 def get_embedding_request(sentence,retry_times=3):
 
     if sentence is None or sentence=="":
         return None
-    db = pool_product.getConnector()
+    db = redis.Redis(connection_pool=pool_product)
 
     try:
         _md5 = getMD5(str(sentence))+"_embedding"
-        _embedding = db.get(_md5)
+        _embedding = None
+        try:
+            _embedding = db.get(_md5)
+        except Exception as e:
+            log("get redis data error")
         if _embedding is not None:
             return json.loads(_embedding)
         else:
             _embedding = request_embedding(sentence,retry_times=retry_times)
             if _embedding is not None:
-                db.set(_md5,json.dumps(_embedding))
+                try:
+                    db.set(_md5,json.dumps(_embedding))
+                except Exception as e:
+                    log("set redis data error")
             return _embedding
     except Exception as e:
-        log("getExtract_json_fromRedis error %s"%(str(e)))
+        traceback.print_exc()
         raise RuntimeError("get embedding request error")
-    finally:
-        try:
-            if db.connection.check_health():
-                pool_product.putConnector(db)
-        except Exception as e:
-            pass
     return None
 
 
@@ -235,6 +257,70 @@ def has_same_specs_count(source, target):
 
     return True
 
+def is_legal_brand(ots_client,brand):
+    _search = re.search("品牌[::;;](?P<brand>.{2,8}?)([.。、;::]|规格|型号|生产厂家|厂家)",brand)
+    if _search is not None:
+        brand = _search.groupdict().get("brand")
+    if brand is None or len(brand)<2:
+        return False
+    # check whether this brand exists in interface and action is delete
+    bool_query = BoolQuery(must_queries=[
+        TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_NAME,brand),
+        TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE,BRAND_GRADE),
+        TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION,"delete")
+    ])
+
+    rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_dict_interface","document_product_dict_interface_index",
+                                                                        SearchQuery(bool_query,get_total_count=True))
+    if total_count>0:
+        return False
+
+    # check whether this brand exists in dict and grade=name_grade or grade=specs_grade
+    bool_query = BoolQuery(must_queries=[
+        TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand),
+        BoolQuery(should_queries=[
+            TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,NAME_GRADE),
+            TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
+        ])
+
+    ])
+    rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_dict","document_product_dict_index",
+                                                                        SearchQuery(bool_query,get_total_count=True))
+    if total_count>0:
+        return False
+
+    # check the area+brand type
+    _f = is_area_brand(brand,area_set)
+    if _f==1:
+        log("%s is_legal_brand True by is_area_brand"%(brand))
+        return True
+    elif _f==2:
+        return False
+
+    # check the company type
+    if len(brand)<100 and len(brand)>=8:
+        _d = {ENTERPRISE_NAME:brand}
+        _ent = Enterprise(_d)
+        if _ent.exists_row(ots_client):
+            _ent.fix_columns(ots_client,[ENTERPRISE_bid_number,ENTERPRISE_STATUS,ENTERPRISE_tyc_id],True)
+            if _ent.getProperties().get(ENTERPRISE_STATUS,0)>=201 and _ent.getProperties().get(ENTERPRISE_STATUS,0)<=300:
+                if _ent.getProperties().get(ENTERPRISE_bid_number,0)>0 or _ent.getProperties().get(ENTERPRISE_tyc_id,0):
+                    log("%s is_legal_brand True by Enterprise"%(brand))
+                    return True
+
+    # check the group count and char
+    bool_query = BoolQuery(must_queries=[
+        TermQuery(DOCUMENT_PRODUCT_TMP_BRAND,brand)
+    ])
+    rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_temp","document_product_temp_index",
+                                                                        SearchQuery(bool_query,get_total_count=True))
+
+    if total_count>=5:
+        new_brand = re.sub("[^\u4e00-\u9fff]",'',brand)
+        if re.search("详见|无|国产|null|其他|详细|废标|[0-9/]|品牌|文件|^见",brand) is None and len(brand)<=8:
+            log("%s is_legal_brand True by count"%(brand))
+            return True
+
 SPECS_PATTERN = re.compile("[^A-Za-z0-9-\\/()().]")
 def is_legal_specs(specs):
     if specs is None or specs=="":
@@ -360,4 +446,5 @@ if __name__ == '__main__':
     import Levenshtein
     print(Levenshtein.ratio('助听器','助行器'))
     a = "无锡贝尔森品牌"
-    print(clean_product_brand(a))
+    print(clean_product_brand(a))
+    print(is_legal_brand(getConnect_ots(),"液晶显示"))

+ 18 - 1
BaseDataMaintenance/maintenance/product/product_dict.py

@@ -49,6 +49,11 @@ class Product_Dict_Manager():
         self.Coll_brand = getCollection(self.collection_name_brand)
         self.Coll_specs = getCollection(self.collection_name_specs)
 
+        # self.pool_name = ConnectorPool(init_num=10,max_num=30,method_init=getCollection,collection_name=self.collection_name_name)
+        #
+        # self.pool_brand = ConnectorPool(init_num=10,max_num=30,method_init=getCollection,collection_name=self.collection_name_brand)
+        # self.pool_specs = ConnectorPool(init_num=10,max_num=30,method_init=getCollection,collection_name=self.collection_name_specs)
+
 
     def init_milvus(self):
         from pymilvus import connections,FieldSchema,DataType
@@ -808,7 +813,19 @@ def search_similar():
     df.to_excel("search_similar1.xlsx",columns=df_columns)
 
 
+def insert_interface_delete(name,grade):
+    ots_client = getConnect_ots()
+    from uuid import uuid4
+    _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:name,
+          DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
+          DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:grade,
+          DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
+          DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"delete",
+          DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S")}
+    dpdi = Document_product_dict_interface(_d)
+    dpdi.update_row(ots_client)
 
 
 if __name__ == '__main__':
-    start_embedding_product_dict
+    # start_embedding_product_dict()
+    insert_interface_delete("液晶显示屏",4)

+ 270 - 420
BaseDataMaintenance/maintenance/product/products.py

@@ -40,7 +40,6 @@ class Product_Manager(Product_Dict_Manager):
 
         self.set_id = set()
 
-        self.area_set = get_area_set()
 
     def get_product_id(self,docid,name,brand,specs,unit_price,quantity):
         if name is None:
@@ -157,15 +156,16 @@ class Product_Manager(Product_Dict_Manager):
         brand_ots_id = None
         specs_ots_id = None
         if name is not None and name!="":
-            name_vector = request_embedding(name)
+            name_vector = get_embedding_request(name)
             if name_vector is not None:
                 Coll,_ = self.get_collection(NAME_GRADE)
-                search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
+
+                search_list = get_embedding_search(Coll,embedding_index_name,name,NAME_GRADE,[name_vector],self.search_params,output_fields,limit=60)
 
                 for _search in search_list:
-                    ots_id = _search.entity.get("standard_name_id")
-                    ots_name = _search.entity.get("standard_name")
-                    ots_parent_id = _search.entity.get("ots_parent_id")
+                    ots_id = _search.get("standard_name_id")
+                    ots_name = _search.get("standard_name")
+                    ots_parent_id = _search.get("ots_parent_id")
 
                     if is_similar(name,ots_name) or check_product(name,ots_name):
                         name_ots_id = ots_id
@@ -179,15 +179,15 @@ class Product_Manager(Product_Dict_Manager):
                         break
         if name_ots_id is None:
             for name in list_candidates:
-                name_vector = request_embedding(name)
+                name_vector = get_embedding_request(name)
                 if name_vector is not None:
                     Coll,_ = self.get_collection(NAME_GRADE)
-                    search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=20)
+                    search_list = get_embedding_search(Coll,embedding_index_name,name,NAME_GRADE,[name_vector],self.search_params,output_fields,limit=20)
 
                     for _search in search_list:
-                        ots_id = _search.entity.get("standard_name_id")
-                        ots_name = _search.entity.get("standard_name")
-                        ots_parent_id = _search.entity.get("ots_parent_id")
+                        ots_id = _search.get("standard_name_id")
+                        ots_name = _search.get("standard_name")
+                        ots_parent_id = _search.get("ots_parent_id")
 
                         if is_similar(name,ots_name) or check_product(name,ots_name):
                             name_ots_id = ots_id
@@ -203,101 +203,94 @@ class Product_Manager(Product_Dict_Manager):
 
             if brand is not None and brand!="":
 
-                #check ots
-                bool_query = BoolQuery(must_queries=[
-                    TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand),
-                    TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,BRAND_GRADE)
-                ])
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
-                                                                                    SearchQuery(bool_query,get_total_count=True))
-                if total_count>0:
-                    new_brand = brand
-                else:
-                    s_brand = brand
-                    l_brand = [brand]
-                    l_brand.append(clean_product_brand(s_brand))
-                    brand_ch = get_chinese_string(brand)
-                    l_brand.extend(brand_ch)
+                s_brand = brand
+                l_brand = [brand]
+                l_brand.append(clean_product_brand(s_brand))
+                brand_ch = get_chinese_string(brand)
+                l_brand.extend(brand_ch)
 
-                    _find = False
-                    for brand in l_brand:
+                _find = False
+                for brand in l_brand:
 
-                        brand_vector = request_embedding(brand)
-                        if brand_vector is not None:
-                            Coll,_ = self.get_collection(BRAND_GRADE)
-                            search_list = search_embedding(Coll,embedding_index_name,[brand_vector],self.search_params,output_fields,limit=60)
+                    brand_vector = get_embedding_request(brand)
+                    if brand_vector is not None:
+                        Coll,_ = self.get_collection(BRAND_GRADE)
+                        search_list = get_embedding_search(Coll,embedding_index_name,brand,BRAND_GRADE,[brand_vector],self.search_params,output_fields,limit=60)
 
-                            # log("search brand %s"%(brand))
-                            for _search in search_list:
+                        # log("search brand %s"%(brand))
+                        for _search in search_list:
 
-                                ots_id = _search.entity.get("standard_name_id")
-                                ots_name = _search.entity.get("standard_name")
-                                ots_parent_id = _search.entity.get("ots_parent_id")
+                            ots_id = _search.get("standard_name_id")
+                            ots_name = _search.get("standard_name")
+                            ots_parent_id = _search.get("ots_parent_id")
 
-                                # log("check brand %s and %s"%(brand,ots_name))
-                                if is_similar(brand,ots_name) or check_brand(brand,ots_name):
+                            # log("check brand %s and %s"%(brand,ots_name))
+                            if is_similar(brand,ots_name) or check_brand(brand,ots_name):
 
-                                    # log("check brand similar succeed:%s and %s"%(brand,ots_name))
-                                    new_brand = ots_name
+                                # log("check brand similar succeed:%s and %s"%(brand,ots_name))
 
-                                    log("checking brand %s succeed %s"%(brand,new_brand))
-                                    # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
-
-                                    if name_ots_id is not None:
-                                        brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
-
-                                        _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
-                                                    DOCUMENT_PRODUCT_DICT_NAME:new_brand,
-                                                    DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
-                                                    DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
-                                                    DOCUMENT_PRODUCT_DICT_STATUS:1,
-                                                    DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
-                                                    DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
-                                                    DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                                    DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                                    }
-                                        _dpd_brand = Document_product_dict(_d_brand)
-                                        # _dpd_brand.updateAlias(str(new_brand).lower())
-                                        if not _dpd_brand.exists_row(self.ots_client):
-                                            _dpd_brand.update_row(self.ots_client)
-
-                                        else:
-                                            pass
-                                            # #update alias
-                                            # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
-                                            # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
-                                            # if _flag:
-                                            #     if _dpd.updateAlias(brand):
-                                            #         _dpd.update_row(self.ots_client)
-
-                                    _find = True
-                                    break
-                                else:
-                                    # log("check brand similar failed:%s and %s"%(brand,ots_name))
-                                    # add new brand?
-                                    pass
-                            if _find:
-                                break
-                    if not _find:
-                        for brand in l_brand:
-                            if self.check_new_brand(brand):
-                                new_brand = clean_product_brand(brand)
-                                if new_brand=="":
+                                if ots_name==new_name:
                                     continue
-                                log("adding new brand %s"%(str(new_brand)))
-                                _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(brand).lower()),
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                            DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
-                                            }
-                                dpdi = Document_product_dict_interface(_d_brand)
-                                dpdi.update_row(self.ots_client)
+                                new_brand = ots_name
+
+                                log("checking brand %s succeed %s"%(brand,new_brand))
+                                # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
+
+                                if name_ots_id is not None:
+                                    brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
+
+                                    _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
+                                                DOCUMENT_PRODUCT_DICT_NAME:new_brand,
+                                                DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
+                                                DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
+                                                DOCUMENT_PRODUCT_DICT_STATUS:1,
+                                                DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
+                                                DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
+                                                DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                                DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                                }
+                                    _dpd_brand = Document_product_dict(_d_brand)
+                                    # _dpd_brand.updateAlias(str(new_brand).lower())
+                                    if not _dpd_brand.exists_row(self.ots_client):
+                                        _dpd_brand.update_row(self.ots_client)
+
+                                    else:
+                                        pass
+                                        # #update alias
+                                        # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
+                                        # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
+                                        # if _flag:
+                                        #     if _dpd.updateAlias(brand):
+                                        #         _dpd.update_row(self.ots_client)
+
+                                _find = True
                                 break
+                            else:
+                                # log("check brand similar failed:%s and %s"%(brand,ots_name))
+                                # add new brand?
+                                pass
+                        if _find:
+                            break
+                if not _find:
+                    for brand in l_brand:
+                        if self.check_new_brand(brand):
+                            new_brand = clean_product_brand(brand)
+                            if new_brand=="":
+                                continue
+                            log("adding new brand %s"%(str(new_brand)))
+                            _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(brand).lower()),
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                        DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
+                                        }
+                            dpdi = Document_product_dict_interface(_d_brand)
+                            dpdi.update_row(self.ots_client)
+                            break
 
             if brand_ots_id is None:
                 _find = False
@@ -315,20 +308,17 @@ class Product_Manager(Product_Dict_Manager):
                         start_time = time.time()
                         # brand_vector = request_embedding(brand)
                         brand_vector = get_embedding_request(brand)
-                        log("get embedding for brand %s takes %.4fs"%(brand,time.time()-start_time))
+                        debug("get embedding for brand %s takes %.4fs"%(brand,time.time()-start_time))
                         if brand_vector is not None:
                             Coll,_ = self.get_collection(BRAND_GRADE)
                             start_time = time.time()
                             # search_list = search_embedding(Coll,embedding_index_name,[brand_vector],self.search_params,output_fields,limit=10)
-                            search_list = get_milvus_search(Coll,embedding_index_name,brand,[brand_vector],self.search_params,output_fields,limit=10)
-                            log("get search_list for brand %s takes %.4fs"%(brand,time.time()-start_time))
+                            search_list = get_embedding_search(Coll,embedding_index_name,brand,BRAND_GRADE,[brand_vector],self.search_params,output_fields,limit=10)
+                            debug("get search_list for brand %s takes %.4fs"%(brand,time.time()-start_time))
                             # log("search brand %s"%(brand))
                             for _search in search_list:
 
 
-                                # ots_id = _search.entity.get("standard_name_id")
-                                # ots_name = _search.entity.get("standard_name")
-                                # ots_parent_id = _search.entity.get("ots_parent_id")
                                 ots_id = _search.get("standard_name_id")
                                 ots_name = _search.get("standard_name")
                                 ots_parent_id = _search.get("ots_parent_id")
@@ -336,7 +326,10 @@ class Product_Manager(Product_Dict_Manager):
                                 # log("check brand %s and %s"%(brand,ots_name))
                                 if is_similar(brand,ots_name,_radio=95) or check_brand(brand,ots_name):
                                     # log("check brand similar succeed:%s and %s"%(brand,ots_name))
+                                    if ots_name==new_name:
+                                        continue
                                     new_brand = ots_name
+
                                     log("checking brand %s succeed %s"%(brand,new_brand))
                                     # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
 
@@ -372,265 +365,172 @@ class Product_Manager(Product_Dict_Manager):
 
             if specs is not None and specs!="":
 
-                #check ots
-                bool_query = BoolQuery(must_queries=[
-                    TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs),
-                    TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
-                ])
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
-                                                                                    SearchQuery(bool_query,get_total_count=True))
-                if total_count>0:
-                    new_specs = specs
-
-                    if brand_ots_id is not None:
-                        # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
-                        specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
-
-                        _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
-                                    DOCUMENT_PRODUCT_DICT_NAME:new_specs,
-                                    DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
-                                    DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
-                                    DOCUMENT_PRODUCT_DICT_STATUS:1,
-                                    DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
-                                    DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
-                                    DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                    DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                    }
-                        _dpd_specs = Document_product_dict(_d_specs)
-                        # _dpd_specs.updateAlias(str(new_specs).lower())
-                        if not _dpd_specs.exists_row(self.ots_client):
-                            _dpd_specs.update_row(self.ots_client)
-                            # user interface to add
-                else:
-                    debug("getting sepcs %s"%(specs))
-                    list_specs = []
-                    c_specs = clean_product_specs(specs)
-                    list_specs.append(c_specs)
-
-                    for s in re.split("[\u4e00-\u9fff]",specs):
-                        if s!="" and len(s)>4:
-                            list_specs.append(s)
-                    similar_flag = None
-                    _index = 0
-                    break_flag = False
-                    for c_specs in list_specs:
-                        if break_flag:
-                            break
-                        _index += 1
-                        specs_vector = request_embedding(c_specs)
 
-                        if specs_vector is not None:
-                            Coll,_ = self.get_collection(SPECS_GRADE)
-                            search_list = search_embedding(Coll,embedding_index_name,[specs_vector],self.search_params,output_fields,limit=60)
+                debug("getting sepcs %s"%(specs))
+                list_specs = []
+                c_specs = clean_product_specs(specs)
+                list_specs.append(c_specs)
 
-                            for _search in search_list:
-
-                                ots_id = _search.entity.get("standard_name_id")
-                                ots_name = _search.entity.get("standard_name")
-                                ots_parent_id = _search.entity.get("ots_parent_id")
-
-                                debug("checking specs %s and %s"%(specs,ots_name))
-                                if is_similar(specs,ots_name):
-                                    # log("specs is_similar")
-                                    if check_specs(c_specs,ots_name):
-                                        break_flag = True
-                                        new_specs = ots_name
-                                        log("check_specs %s succeed %s"%(specs,new_specs))
-
-                                        # to update the document_product_dict which is builded for search
-                                        if brand_ots_id is not None:
-                                            # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
-                                            specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
-
-                                            _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
-                                                        DOCUMENT_PRODUCT_DICT_NAME:new_specs,
-                                                        DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
-                                                        DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
-                                                        DOCUMENT_PRODUCT_DICT_STATUS:1,
-                                                        DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
-                                                        DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
-                                                        DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                                        DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                                        }
-                                            _dpd_specs = Document_product_dict(_d_specs)
-                                            # _dpd_specs.updateAlias(str(new_specs).lower())
-                                            if not _dpd_specs.exists_row(self.ots_client):
-                                                _dpd_specs.update_row(self.ots_client)
-                                                # user interface to add
-                                            else:
-                                                pass
-                                                # #update alias
-                                                # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
-                                                # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
-                                                # if _flag:
-                                                #     if _dpd.updateAlias(specs):
-                                                #         _dpd.update_row(self.ots_client)
-                                        break
-                                    else:
-                                        if _index == 1:
-                                            similar_flag = True
-
-                    if not break_flag and similar_flag:
-                        debug("check_specs failed")
-                        new_specs = clean_product_specs(specs)
-                        # insert into document_product_dict a new record
-                        # to update the document_product_dict which is builded for search
-                        # add new specs
-                        if brand_ots_id is not None and name_ots_id is not None and len(specs)<MAX_NAME_LENGTH:
-                            # _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
-                            # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
-                            #       DOCUMENT_PRODUCT_DICT_NAME:new_specs,
-                            #       DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(new_specs.lower()),
-                            #       DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
-                            #       DOCUMENT_PRODUCT_DICT_STATUS:1,
-                            #       DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
-                            #       DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                            #       DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                            #       }
-                            # _dpd = Document_product_dict(_d)
-                            # # _dpd.updateAlias(new_specs)
-                            # _dpd.update_row(self.ots_client)
-
-                            # user interface to add
-                            log("adding new specs %s"%(new_specs))
-                            _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(specs),
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                  DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
-                                  }
-                            _dpdi = Document_product_dict_interface(_d)
-                            _dpdi.update_row(self.ots_client)
-                    else:
-                        # add new specs?
-                        debug("specs not similar")
-                        if is_legal_specs(specs) and len(specs)<MAX_NAME_LENGTH:
-                            debug("is_legal_specs")
-                            new_specs = clean_product_specs(specs)
-                            # insert into document_product_dict a new record
-                            # to update the document_product_dict which is builded for search
-                            # add new specs
-                            if brand_ots_id is not None and name_ots_id is not None:
-                                _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
-
-                                # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
-                                #       DOCUMENT_PRODUCT_DICT_NAME:new_specs,
-                                #       DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(specs,new_specs),
-                                #       DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
-                                #       DOCUMENT_PRODUCT_DICT_STATUS:1,
-                                #       DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
-                                #       DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                #       DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                #       }
-                                # _dpd = Document_product_dict(_d)
-                                # _dpd.update_row(self.ots_client)
-
-                                log("adding new specs %s"%(new_specs))
-                                # user interface to add
-                                _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(new_specs.lower()),
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                      DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
-                                      }
-                                _dpdi = Document_product_dict_interface(_d)
-                                _dpdi.update_row(self.ots_client)
+                for s in re.split("[\u4e00-\u9fff]",specs):
+                    if s!="" and len(s)>4:
+                        list_specs.append(s)
+                similar_flag = None
+                _index = 0
+                break_flag = False
+                for c_specs in list_specs:
+                    if break_flag:
+                        break
+                    _index += 1
+                    specs_vector = get_embedding_request(c_specs)
+
+                    if specs_vector is not None:
+                        Coll,_ = self.get_collection(SPECS_GRADE)
+                        search_list = get_embedding_search(Coll,embedding_index_name,c_specs,SPECS_GRADE,[specs_vector],self.search_params,output_fields,limit=60)
+
+                        for _search in search_list:
+
+                            ots_id = _search.get("standard_name_id")
+                            ots_name = _search.get("standard_name")
+                            ots_parent_id = _search.get("ots_parent_id")
+
+                            debug("checking specs %s and %s"%(specs,ots_name))
+                            if is_similar(specs,ots_name):
+                                # log("specs is_similar")
+                                if check_specs(c_specs,ots_name):
+                                    break_flag = True
+                                    new_specs = ots_name
+                                    log("check_specs %s succeed %s"%(specs,new_specs))
+
+                                    # to update the document_product_dict which is builded for search
+                                    if brand_ots_id is not None:
+                                        # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
+                                        specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
+
+                                        _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
+                                                    DOCUMENT_PRODUCT_DICT_NAME:new_specs,
+                                                    DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
+                                                    DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
+                                                    DOCUMENT_PRODUCT_DICT_STATUS:1,
+                                                    DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
+                                                    DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
+                                                    DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                                    DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                                    }
+                                        _dpd_specs = Document_product_dict(_d_specs)
+                                        # _dpd_specs.updateAlias(str(new_specs).lower())
+                                        if not _dpd_specs.exists_row(self.ots_client):
+                                            _dpd_specs.update_row(self.ots_client)
+                                            # user interface to add
+                                        else:
+                                            pass
+                                            # #update alias
+                                            # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
+                                            # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
+                                            # if _flag:
+                                            #     if _dpd.updateAlias(specs):
+                                            #         _dpd.update_row(self.ots_client)
+                                    break
+                                else:
+                                    if _index == 1:
+                                        similar_flag = True
+
+                # add new specs?
+                debug("specs not similar")
+                if is_legal_specs(specs) and len(specs)<MAX_NAME_LENGTH and len(specs)>=5:
+                    debug("is_legal_specs")
+                    new_specs = clean_product_specs(specs)
+                    # insert into document_product_dict a new record
+                    # to update the document_product_dict which is builded for search
+                    # add new specs
+                    if brand_ots_id is not None and name_ots_id is not None:
+                        _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
+
+                        # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
+                        #       DOCUMENT_PRODUCT_DICT_NAME:new_specs,
+                        #       DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(specs,new_specs),
+                        #       DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
+                        #       DOCUMENT_PRODUCT_DICT_STATUS:1,
+                        #       DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
+                        #       DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                        #       DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                        #       }
+                        # _dpd = Document_product_dict(_d)
+                        # _dpd.update_row(self.ots_client)
+
+                        log("adding new specs %s"%(new_specs))
+                        # user interface to add
+                        _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(new_specs.lower()),
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                              DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
+                              }
+                        _dpdi = Document_product_dict_interface(_d)
+                        _dpdi.update_row(self.ots_client)
         if specs_ots_id is None:
             _find = False
             for specs in list_candidates:
                 if _find:
                     break
-                bool_query = BoolQuery(must_queries=[
-                    TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs),
-                    TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
-                ])
-                rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
-                                                                                    SearchQuery(bool_query,get_total_count=True))
-                if total_count>0:
-                    new_specs = specs
-                    _find = True
-                    if brand_ots_id is not None:
-                        # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
-                        specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
-
-                        _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
-                                    DOCUMENT_PRODUCT_DICT_NAME:new_specs,
-                                    DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
-                                    DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
-                                    DOCUMENT_PRODUCT_DICT_STATUS:1,
-                                    DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
-                                    DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
-                                    DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                    DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                    }
-                        _dpd_specs = Document_product_dict(_d_specs)
-                        # _dpd_specs.updateAlias(str(new_specs).lower())
-                        if not _dpd_specs.exists_row(self.ots_client):
-                            _dpd_specs.update_row(self.ots_client)
-                            # user interface to add
-                else:
-                    debug("getting sepcs %s"%(specs))
-                    list_specs = []
-                    c_specs = clean_product_specs(specs)
-                    list_specs.append(c_specs)
-
-                    for s in re.split("[\u4e00-\u9fff]",specs):
-                        if s!="" and len(s)>4:
-                            list_specs.append(s)
-                    similar_flag = None
-                    _index = 0
-                    for c_specs in list_specs:
-                        if _find:
-                            break
-                        _index += 1
-                        specs_vector = request_embedding(c_specs)
 
-                        if specs_vector is not None:
-                            Coll,_ = self.get_collection(SPECS_GRADE)
-                            search_list = search_embedding(Coll,embedding_index_name,[specs_vector],self.search_params,output_fields,limit=20)
+                debug("getting sepcs %s"%(specs))
+                list_specs = []
+                c_specs = clean_product_specs(specs)
+                list_specs.append(c_specs)
+
+                for s in re.split("[\u4e00-\u9fff]",specs):
+                    if s!="" and len(s)>4:
+                        list_specs.append(s)
+                similar_flag = None
+                _index = 0
+                for c_specs in list_specs:
+                    if _find:
+                        break
+                    _index += 1
+                    specs_vector = get_embedding_request(c_specs)
+
+                    if specs_vector is not None:
+                        Coll,_ = self.get_collection(SPECS_GRADE)
+                        search_list = get_embedding_search(Coll,embedding_index_name,c_specs,SPECS_GRADE,[specs_vector],self.search_params,output_fields,limit=20)
 
-                            for _search in search_list:
-                                if _find:
-                                    break
+                        for _search in search_list:
+                            if _find:
+                                break
 
-                                ots_id = _search.entity.get("standard_name_id")
-                                ots_name = _search.entity.get("standard_name")
-                                ots_parent_id = _search.entity.get("ots_parent_id")
-
-                                debug("checking specs %s and %s"%(specs,ots_name))
-                                if is_similar(specs,ots_name):
-                                    # log("specs is_similar")
-                                    if check_specs(c_specs,ots_name):
-                                        break_flag = True
-                                        new_specs = ots_name
-                                        if brand_ots_id is not None:
-                                            # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
-                                            specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
-
-                                            _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
-                                                        DOCUMENT_PRODUCT_DICT_NAME:new_specs,
-                                                        DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
-                                                        DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
-                                                        DOCUMENT_PRODUCT_DICT_STATUS:1,
-                                                        DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
-                                                        DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
-                                                        DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                                        DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
-                                                        }
-                                            _dpd_specs = Document_product_dict(_d_specs)
-                                            # _dpd_specs.updateAlias(str(new_specs).lower())
-                                            if not _dpd_specs.exists_row(self.ots_client):
-                                                _dpd_specs.update_row(self.ots_client)
-                                            _find = True
-                                            break
+                            ots_id = _search.get("standard_name_id")
+                            ots_name = _search.get("standard_name")
+                            ots_parent_id = _search.get("ots_parent_id")
+
+                            debug("checking specs %s and %s"%(specs,ots_name))
+                            if is_similar(specs,ots_name):
+                                # log("specs is_similar")
+                                if check_specs(c_specs,ots_name):
+                                    break_flag = True
+                                    new_specs = ots_name
+                                    if brand_ots_id is not None:
+                                        # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
+                                        specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
+
+                                        _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
+                                                    DOCUMENT_PRODUCT_DICT_NAME:new_specs,
+                                                    DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
+                                                    DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
+                                                    DOCUMENT_PRODUCT_DICT_STATUS:1,
+                                                    DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
+                                                    DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
+                                                    DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                                    DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
+                                                    }
+                                        _dpd_specs = Document_product_dict(_d_specs)
+                                        # _dpd_specs.updateAlias(str(new_specs).lower())
+                                        if not _dpd_specs.exists_row(self.ots_client):
+                                            _dpd_specs.update_row(self.ots_client)
+                                        _find = True
+                                        break
 
         # judge if the product matches the standard product
         if name_ots_id is not None:
@@ -681,7 +581,7 @@ class Product_Manager(Product_Dict_Manager):
                 is_legal_data = False
 
             if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE),(float,int)) and isinstance(win_bid_price,(float,int)):
-                if _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)>win_bid_price*10:
+                if _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)>win_bid_price*10 and win_bid_price>0:
                     is_legal_data = False
 
             if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE),(float,int)) and _product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE)>100000000:
@@ -737,65 +637,7 @@ class Product_Manager(Product_Dict_Manager):
         save_product_tmp.update_row(self.ots_client)
 
     def check_new_brand(self,brand):
-
-        _search = re.search("品牌[::;;](?P<brand>.{2,8}?)([.。、;::]|规格|型号|生产厂家|厂家)",brand)
-        if _search is not None:
-            brand = _search.groupdict().get("brand")
-        if brand is None or len(brand)<2:
-            return False
-        # check whether this brand exists in interface and action is delete
-        bool_query = BoolQuery(must_queries=[
-            TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_NAME,brand),
-            TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE,BRAND_GRADE),
-            TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION,"delete")
-        ])
-
-        rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict_interface","document_product_dict_interface_index",
-                                                                            SearchQuery(bool_query,get_total_count=True))
-        if total_count>0:
-            return False
-
-        # check whether this brand exists in dict and grade=name_grade or grade=specs_grade
-        bool_query = BoolQuery(must_queries=[
-            TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand),
-            BoolQuery(should_queries=[
-                TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,NAME_GRADE),
-                TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
-            ])
-
-        ])
-        rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
-                                                                            SearchQuery(bool_query,get_total_count=True))
-        if total_count>0:
-            return False
-
-        # check the area+brand type
-        _f = is_area_brand(brand,self.area_set)
-        if _f==1:
-            return True
-        elif _f==2:
-            return False
-
-        # check the company type
-        _d = {ENTERPRISE_NAME:brand}
-        _ent = Enterprise(_d)
-        if _ent.exists_row(self.ots_client):
-            return True
-
-        # check the group count and char
-        bool_query = BoolQuery(must_queries=[
-            TermQuery(DOCUMENT_PRODUCT_TMP_BRAND,brand)
-        ])
-        rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_temp","document_product_temp_index",
-                                                                            SearchQuery(bool_query,get_total_count=True))
-
-        if total_count>=5:
-            new_brand = re.sub("[^\u4e00-\u9fff]",'',brand)
-            if re.search("详见|无|国产|null|其他|详细|废标|[0-9/]|品牌|文件",brand) is None and len(brand)<=8:
-                return True
-
-        # extract the brand
-        # "品牌[::]?(<brand>.{2,5}([.。、::]|型号|生产厂家|厂家))"
+        return is_legal_brand(self.ots_client,brand)
 
 
 
@@ -1004,21 +846,24 @@ def fix_product_data():
     # delete document_product and change the record status to 1 in document_product_temp which id=original id
     :return:
     '''
+    table_name = "document_product_temp"
+    table_index = "document_product_temp_index"
+    columns = [DOCUMENT_PRODUCT_TMP_WIN_BID_PRICE]
     ots_client = getConnect_ots()
     bool_query = BoolQuery(must_queries=[
-        # RangeQuery("status",1)
-        TermQuery("docid",246032980)
+        RangeQuery("status",501),
+        # TermQuery("docid",246032980)
     ])
 
-    rows,next_token,total_count,is_all_succeed = ots_client.search("document_product","document_product_index",
+    rows,next_token,total_count,is_all_succeed = ots_client.search(table_name,table_index,
                                                                    SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
-                                                                   columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID,DOCUMENT_PRODUCT_DOCID],return_type=ColumnReturnType.SPECIFIED))
+                                                                   columns_to_get=ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
 
     list_rows = getRow_ots(rows)
     while next_token:
-        rows,next_token,total_count,is_all_succeed = ots_client.search('document_product','document_product_index',
+        rows,next_token,total_count,is_all_succeed = ots_client.search(table_name,table_index,
                                                                        SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
-                                                                       columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID,DOCUMENT_PRODUCT_DOCID],return_type=ColumnReturnType.SPECIFIED))
+                                                                       columns_to_get=ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
         list_rows.extend(getRow_ots(rows))
         print("%d/%d"%(len(list_rows),total_count))
         # if len(list_rows)>10000:
@@ -1071,9 +916,14 @@ def fix_product_data():
         dp.delete_row(ots_client)
 
     def handle(item,result_queue):
-        print("handle")
+        win_bid_price = item.get(DOCUMENT_PRODUCT_TMP_WIN_BID_PRICE,1)
+        if win_bid_price==0:
+            dpt = Document_product_tmp(item)
+            dpt.setValue(DOCUMENT_PRODUCT_TMP_STATUS,1,True)
+            dpt.update_row(ots_client)
+
 
-    mt = MultiThreadHandler(task_queue,deleteAndReprocess,None,30,1)
+    mt = MultiThreadHandler(task_queue,handle,None,30,1)
     mt.run()
 
 def test_check_brand():

+ 1 - 0
BaseDataMaintenance/model/ots/document_product_tmp.py

@@ -25,6 +25,7 @@ DOCUMENT_PRODUCT_TMP_TENDEREE_CONTACT = 'tenderee_contact'
 DOCUMENT_PRODUCT_TMP_PROCUREMENT_SYSTEM = 'procurement_system'
 DOCUMENT_PRODUCT_TMP_BIDDING_BUDGET = 'bidding_budget'
 DOCUMENT_PRODUCT_TMP_WIN_TENDERER = 'win_tenderer'
+DOCUMENT_PRODUCT_TMP_WIN_BID_PRICE = 'win_bid_price'
 DOCUMENT_PRODUCT_TMP_PROVINCE = 'province'
 DOCUMENT_PRODUCT_TMP_CITY = 'city'
 DOCUMENT_PRODUCT_TMP_DISTRICT = 'district'

+ 3 - 0
BaseDataMaintenance/model/ots/enterprise.py

@@ -6,6 +6,9 @@ import traceback
 
 
 ENTERPRISE_NAME = "name"
+ENTERPRISE_STATUS = "status"
+ENTERPRISE_bid_number = "bid_number"
+ENTERPRISE_tyc_id = "tyc_id"
 
 class Enterprise(BaseModel):