|
@@ -156,11 +156,31 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
brand_ots_id = None
|
|
|
specs_ots_id = None
|
|
|
if name is not None and name!="":
|
|
|
- name_vector = get_embedding_request(name)
|
|
|
- if name_vector is not None:
|
|
|
+ Coll,_ = self.get_collection(NAME_GRADE)
|
|
|
+
|
|
|
+ search_list = get_intellect_search(Coll,embedding_index_name,name,NAME_GRADE,self.search_params,output_fields,limit=10)
|
|
|
+
|
|
|
+ for _search in search_list:
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
+
|
|
|
+ if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
+ name_ots_id = ots_id
|
|
|
+ new_name = standard_name
|
|
|
+
|
|
|
+ log("checking name %s succeed %s"%(name,ots_name))
|
|
|
+ # #update alias of name
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag and _dpd.updateAlias(name):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
+ break
|
|
|
+ if name_ots_id is None:
|
|
|
+ for name in list_candidates:
|
|
|
Coll,_ = self.get_collection(NAME_GRADE)
|
|
|
-
|
|
|
- search_list = get_embedding_search(Coll,embedding_index_name,name,NAME_GRADE,[name_vector],self.search_params,output_fields,limit=20)
|
|
|
+ search_list = get_intellect_search(Coll,embedding_index_name,name,NAME_GRADE,self.search_params,output_fields,limit=10)
|
|
|
|
|
|
for _search in search_list:
|
|
|
ots_id = _search.get("standard_name_id")
|
|
@@ -168,42 +188,18 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
- if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
+ if is_similar(name,ots_name,_radio=95):
|
|
|
+
|
|
|
+ log("checking name %s succeed %s"%(name,ots_name))
|
|
|
name_ots_id = ots_id
|
|
|
new_name = standard_name
|
|
|
|
|
|
- log("checking name %s succeed %s"%(name,ots_name))
|
|
|
# #update alias of name
|
|
|
# _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
# _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
# if _flag and _dpd.updateAlias(name):
|
|
|
# _dpd.update_row(self.ots_client)
|
|
|
break
|
|
|
- if name_ots_id is None:
|
|
|
- for name in list_candidates:
|
|
|
- name_vector = get_embedding_request(name)
|
|
|
- if name_vector is not None:
|
|
|
- Coll,_ = self.get_collection(NAME_GRADE)
|
|
|
- search_list = get_embedding_search(Coll,embedding_index_name,name,NAME_GRADE,[name_vector],self.search_params,output_fields,limit=10)
|
|
|
-
|
|
|
- for _search in search_list:
|
|
|
- ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("ots_name")
|
|
|
- standard_name = _search.get("standard_name")
|
|
|
- ots_parent_id = _search.get("ots_parent_id")
|
|
|
-
|
|
|
- if is_similar(name,ots_name,_radio=95):
|
|
|
-
|
|
|
- log("checking name %s succeed %s"%(name,ots_name))
|
|
|
- name_ots_id = ots_id
|
|
|
- new_name = standard_name
|
|
|
-
|
|
|
- # #update alias of name
|
|
|
- # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
- # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- # if _flag and _dpd.updateAlias(name):
|
|
|
- # _dpd.update_row(self.ots_client)
|
|
|
- break
|
|
|
if name_ots_id is not None:
|
|
|
|
|
|
if brand is not None and brand!="":
|
|
@@ -214,14 +210,104 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
brand_ch = get_chinese_string(brand)
|
|
|
l_brand.extend(brand_ch)
|
|
|
|
|
|
+ Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
+
|
|
|
_find = False
|
|
|
for brand in l_brand:
|
|
|
+ search_list = get_intellect_search(Coll,embedding_index_name,brand,BRAND_GRADE,self.search_params,output_fields,limit=10)
|
|
|
|
|
|
- brand_vector = get_embedding_request(brand)
|
|
|
- if brand_vector is not None:
|
|
|
- Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
- search_list = get_embedding_search(Coll,embedding_index_name,brand,BRAND_GRADE,[brand_vector],self.search_params,output_fields,limit=20)
|
|
|
+ # log("search brand %s"%(brand))
|
|
|
+ for _search in search_list:
|
|
|
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
+
|
|
|
+ # log("check brand %s and %s"%(brand,ots_name))
|
|
|
+ if is_similar(brand,ots_name) or check_brand(brand,ots_name):
|
|
|
+
|
|
|
+ # log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
+
|
|
|
+ if ots_name==new_name:
|
|
|
+ continue
|
|
|
+ new_brand = standard_name
|
|
|
+
|
|
|
+ log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
+ # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
|
+
|
|
|
+ if name_ots_id is not None:
|
|
|
+ brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
|
|
|
+
|
|
|
+ _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
+ DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ }
|
|
|
+ _dpd_brand = Document_product_dict(_d_brand)
|
|
|
+ # _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
+ if not _dpd_brand.exists_row(self.ots_client):
|
|
|
+ _dpd_brand.update_row(self.ots_client)
|
|
|
+
|
|
|
+ else:
|
|
|
+ pass
|
|
|
+ # #update alias
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag:
|
|
|
+ # if _dpd.updateAlias(brand):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
+
|
|
|
+ _find = True
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ # log("check brand similar failed:%s and %s"%(brand,ots_name))
|
|
|
+ # add new brand?
|
|
|
+ pass
|
|
|
+ if _find:
|
|
|
+ break
|
|
|
+ if not _find:
|
|
|
+ for brand in l_brand:
|
|
|
+ if self.check_new_brand(brand):
|
|
|
+ new_brand = clean_product_brand(brand)
|
|
|
+ if new_brand=="":
|
|
|
+ continue
|
|
|
+ log("adding new brand %s"%(str(new_brand)))
|
|
|
+ _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(brand).lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
+ }
|
|
|
+ dpdi = Document_product_dict_interface(_d_brand)
|
|
|
+ dpdi.update_row(self.ots_client)
|
|
|
+ break
|
|
|
+
|
|
|
+ if brand_ots_id is None:
|
|
|
+ _find = False
|
|
|
+ Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
+ for brand in list_candidates:
|
|
|
+ if _find:
|
|
|
+ break
|
|
|
+ l_brand = [brand]
|
|
|
+ l_brand.append(clean_product_brand(brand))
|
|
|
+ brand_ch = get_chinese_string(brand)
|
|
|
+ l_brand.extend(brand_ch)
|
|
|
+
|
|
|
+ for brand in l_brand:
|
|
|
+ if _find:
|
|
|
+ break
|
|
|
+
|
|
|
+ search_list = get_intellect_search(Coll,embedding_index_name,brand,BRAND_GRADE,self.search_params,output_fields,limit=10)
|
|
|
# log("search brand %s"%(brand))
|
|
|
for _search in search_list:
|
|
|
|
|
@@ -231,10 +317,8 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
# log("check brand %s and %s"%(brand,ots_name))
|
|
|
- if is_similar(brand,ots_name) or check_brand(brand,ots_name):
|
|
|
-
|
|
|
+ if check_brand(brand,ots_name):
|
|
|
# log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
-
|
|
|
if ots_name==new_name:
|
|
|
continue
|
|
|
new_brand = standard_name
|
|
@@ -271,104 +355,6 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
_find = True
|
|
|
break
|
|
|
- else:
|
|
|
- # log("check brand similar failed:%s and %s"%(brand,ots_name))
|
|
|
- # add new brand?
|
|
|
- pass
|
|
|
- if _find:
|
|
|
- break
|
|
|
- if not _find:
|
|
|
- for brand in l_brand:
|
|
|
- if self.check_new_brand(brand):
|
|
|
- new_brand = clean_product_brand(brand)
|
|
|
- if new_brand=="":
|
|
|
- continue
|
|
|
- log("adding new brand %s"%(str(new_brand)))
|
|
|
- _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(brand).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
- }
|
|
|
- dpdi = Document_product_dict_interface(_d_brand)
|
|
|
- dpdi.update_row(self.ots_client)
|
|
|
- break
|
|
|
-
|
|
|
- if brand_ots_id is None:
|
|
|
- _find = False
|
|
|
- for brand in list_candidates:
|
|
|
- if _find:
|
|
|
- break
|
|
|
- l_brand = [brand]
|
|
|
- l_brand.append(clean_product_brand(brand))
|
|
|
- brand_ch = get_chinese_string(brand)
|
|
|
- l_brand.extend(brand_ch)
|
|
|
-
|
|
|
- for brand in l_brand:
|
|
|
- if _find:
|
|
|
- break
|
|
|
- start_time = time.time()
|
|
|
- # brand_vector = request_embedding(brand)
|
|
|
- brand_vector = get_embedding_request(brand)
|
|
|
- debug("get embedding for brand %s takes %.4fs"%(brand,time.time()-start_time))
|
|
|
- if brand_vector is not None:
|
|
|
- Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
- start_time = time.time()
|
|
|
- # search_list = search_embedding(Coll,embedding_index_name,[brand_vector],self.search_params,output_fields,limit=10)
|
|
|
- search_list = get_embedding_search(Coll,embedding_index_name,brand,BRAND_GRADE,[brand_vector],self.search_params,output_fields,limit=10)
|
|
|
- debug("get search_list for brand %s takes %.4fs"%(brand,time.time()-start_time))
|
|
|
- # log("search brand %s"%(brand))
|
|
|
- for _search in search_list:
|
|
|
-
|
|
|
-
|
|
|
- ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("ots_name")
|
|
|
- standard_name = _search.get("standard_name")
|
|
|
- ots_parent_id = _search.get("ots_parent_id")
|
|
|
-
|
|
|
- # log("check brand %s and %s"%(brand,ots_name))
|
|
|
- if is_similar(brand,ots_name,_radio=95):
|
|
|
- # log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
- if ots_name==new_name:
|
|
|
- continue
|
|
|
- new_brand = standard_name
|
|
|
-
|
|
|
- log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
- # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
|
-
|
|
|
- if name_ots_id is not None:
|
|
|
- brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
|
|
|
-
|
|
|
- _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
- DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- }
|
|
|
- _dpd_brand = Document_product_dict(_d_brand)
|
|
|
- # _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
- if not _dpd_brand.exists_row(self.ots_client):
|
|
|
- _dpd_brand.update_row(self.ots_client)
|
|
|
-
|
|
|
- else:
|
|
|
- pass
|
|
|
- # #update alias
|
|
|
- # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
- # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- # if _flag:
|
|
|
- # if _dpd.updateAlias(brand):
|
|
|
- # _dpd.update_row(self.ots_client)
|
|
|
-
|
|
|
- _find = True
|
|
|
- break
|
|
|
|
|
|
if specs is not None and specs!="":
|
|
|
|
|
@@ -1003,13 +989,36 @@ def test_check_brand():
|
|
|
f.write(b+"\n")
|
|
|
|
|
|
def test_match():
|
|
|
- a = "-SL-10XL"
|
|
|
- vector = request_embedding(get_milvus_standard_name(a))
|
|
|
+ a = "数字化医用X射线摄影系统(DR)"
|
|
|
+
|
|
|
+
|
|
|
+ # vector = request_embedding(get_milvus_standard_name(a))
|
|
|
+ vector = [get_embedding_request(b) for b in a]
|
|
|
pm = Product_Manager()
|
|
|
- Coll,_ = pm.get_collection(SPECS_GRADE)
|
|
|
+ _GRADE = NAME_GRADE
|
|
|
+ Coll,_ = pm.get_collection(_GRADE)
|
|
|
+ print(Coll.name)
|
|
|
+
|
|
|
output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id"]
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[vector],pm.search_params,output_fields,limit=20)
|
|
|
- print(search_list)
|
|
|
+ # start_time = time.time()
|
|
|
+ # print(Coll.query(expr=" ots_id in ['75058b275a4c1d8ee38b58c5c5cce3bb'] ",output_fields=output_fields))
|
|
|
+ # print("cost",time.time()-start_time)
|
|
|
+ # print(Coll.compact())
|
|
|
+ # result = search_embedding(Coll,embedding_index_name,[vector],pm.search_params,output_fields,limit=20)
|
|
|
+ #
|
|
|
+ # final_list = []
|
|
|
+ # for _search in result:
|
|
|
+ # _d = {}
|
|
|
+ # for k in output_fields:
|
|
|
+ # _d[k] = _search.entity.get(k)
|
|
|
+ # final_list.append(_d)
|
|
|
+ # final_list = remove_repeat_item(final_list,k="ots_name")
|
|
|
+
|
|
|
+ start_time = time.time()
|
|
|
+ # final_list = get_embedding_search(Coll,embedding_index_name,a,_GRADE,vector,pm.search_params,output_fields,limit=5)
|
|
|
+ final_list = get_intellect_search(Coll,embedding_index_name,a,_GRADE,pm.search_params,output_fields,limit=10)
|
|
|
+ print("cost",time.time()-start_time)
|
|
|
+ print(final_list)
|
|
|
|
|
|
|
|
|
def rebuild_milvus():
|
|
@@ -1148,6 +1157,121 @@ def move_document_product():
|
|
|
mt = MultiThreadHandler(task_queue,_handle,None,30)
|
|
|
mt.run()
|
|
|
|
|
|
+current_path = os.path.dirname(__file__)
|
|
|
+def delete_brands():
|
|
|
+ filename = os.path.join(current_path,"search_similar2_1.xlsx_brand_move.txt")
|
|
|
+
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+ list_brand = []
|
|
|
+ with open(filename,"r",encoding="utf8") as f:
|
|
|
+ while 1:
|
|
|
+ brand = f.readline()
|
|
|
+ if not brand:
|
|
|
+ break
|
|
|
+ brand = brand.strip()
|
|
|
+ list_brand.append(brand)
|
|
|
+
|
|
|
+ pm = Product_Manager()
|
|
|
+ Coll,_ = pm.get_collection(BRAND_GRADE)
|
|
|
+
|
|
|
+ print(Coll.name)
|
|
|
+ Coll.compact()
|
|
|
+ _count = 0
|
|
|
+
|
|
|
+ task_queue = Queue()
|
|
|
+ for brand in list_brand:
|
|
|
+ _count += 1
|
|
|
+ task_queue.put(brand)
|
|
|
+ # if _count>=2:
|
|
|
+ # break
|
|
|
+
|
|
|
+ def _handle(brand,result_queue):
|
|
|
+
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,BRAND_GRADE),
|
|
|
+ TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand)
|
|
|
+ ])
|
|
|
+
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
+ SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
+ ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ _id = get_milvus_product_dict_id(brand)
|
|
|
+
|
|
|
+ while next_token:
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
+ SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
+ ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
+ list_data.extend(getRow_ots(rows))
|
|
|
+ for _d in list_data:
|
|
|
+ dpd = Document_product_dict(_d)
|
|
|
+ dpd.delete_row(ots_client)
|
|
|
+ # print(Coll.query(expr=" ots_id in ['%s']"%(_id),output_fields=["ots_id","ots_name"]))
|
|
|
+ delete_counts = Coll.delete(expr=" ots_id in ['%s']"%(_id)).delete_count
|
|
|
+
|
|
|
+ log("brand %s total_count %d md5:%s delete_counts:%d"%(brand,total_count,_id,delete_counts))
|
|
|
+
|
|
|
+ mt = MultiThreadHandler(task_queue,_handle,None,30)
|
|
|
+ mt.run()
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def delete_specs():
|
|
|
+ filename = os.path.join(current_path,"search_similar2_1.xlsx_specs_move.txt")
|
|
|
+
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+ list_brand = []
|
|
|
+ with open(filename,"r",encoding="utf8") as f:
|
|
|
+ while 1:
|
|
|
+ brand = f.readline()
|
|
|
+ if not brand:
|
|
|
+ break
|
|
|
+ brand = brand.strip()
|
|
|
+ list_brand.append(brand)
|
|
|
+
|
|
|
+ pm = Product_Manager()
|
|
|
+ Coll,_ = pm.get_collection(SPECS_GRADE)
|
|
|
+ print(Coll.name)
|
|
|
+ Coll.compact()
|
|
|
+
|
|
|
+ _count = 0
|
|
|
+ task_queue = Queue()
|
|
|
+
|
|
|
+ for specs in list_brand:
|
|
|
+ task_queue.put(specs)
|
|
|
+ _count += 1
|
|
|
+ # if _count>=2:
|
|
|
+ # break
|
|
|
+
|
|
|
+ def _handle(specs,result_queue):
|
|
|
+
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE),
|
|
|
+ TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs)
|
|
|
+ ])
|
|
|
+
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
+ SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
+ ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ _id = get_milvus_product_dict_id(specs)
|
|
|
+
|
|
|
+ while next_token:
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
+ SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
+ ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
+ list_data.extend(getRow_ots(rows))
|
|
|
+ for _d in list_data:
|
|
|
+ dpd = Document_product_dict(_d)
|
|
|
+ dpd.delete_row(ots_client)
|
|
|
+ # print(Coll.query(expr=" ots_id in ['%s']"%(_id),output_fields=["ots_id","ots_name"]))
|
|
|
+ delete_counts = Coll.delete(expr=" ots_id in ['%s']"%(_id)).delete_count
|
|
|
+
|
|
|
+ log("brand %s total_count %d md5:%s delete_counts:%d"%(specs,total_count,_id,delete_counts))
|
|
|
+
|
|
|
+ mt = MultiThreadHandler(task_queue,_handle,None,30)
|
|
|
+ mt.run()
|
|
|
+ Coll.compact()
|
|
|
|
|
|
|
|
|
|
|
@@ -1156,10 +1280,12 @@ def test():
|
|
|
# pm.test()
|
|
|
# fix_product_data()
|
|
|
# test_check_brand()
|
|
|
- # test_match()
|
|
|
+ test_match()
|
|
|
# rebuild_milvus()
|
|
|
|
|
|
- move_document_product()
|
|
|
+ # move_document_product()
|
|
|
+ # delete_brands()
|
|
|
+ # delete_specs()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|