|
@@ -164,13 +164,15 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
for _search in search_list:
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("standard_name")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
name_ots_id = ots_id
|
|
|
- new_name = ots_name
|
|
|
+ new_name = standard_name
|
|
|
|
|
|
+ log("checking name %s succeed %s"%(name,ots_name))
|
|
|
# #update alias of name
|
|
|
# _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
# _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
@@ -186,12 +188,15 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
for _search in search_list:
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("standard_name")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
- if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
+ if is_similar(name,ots_name,_radio=95):
|
|
|
+
|
|
|
+ log("checking name %s succeed %s"%(name,ots_name))
|
|
|
name_ots_id = ots_id
|
|
|
- new_name = ots_name
|
|
|
+ new_name = standard_name
|
|
|
|
|
|
# #update alias of name
|
|
|
# _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
@@ -221,7 +226,8 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
for _search in search_list:
|
|
|
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("standard_name")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
# log("check brand %s and %s"%(brand,ots_name))
|
|
@@ -231,7 +237,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
if ots_name==new_name:
|
|
|
continue
|
|
|
- new_brand = ots_name
|
|
|
+ new_brand = standard_name
|
|
|
|
|
|
log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
# judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
@@ -320,15 +326,16 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("standard_name")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
# log("check brand %s and %s"%(brand,ots_name))
|
|
|
- if is_similar(brand,ots_name,_radio=95) or check_brand(brand,ots_name):
|
|
|
+ if is_similar(brand,ots_name,_radio=95):
|
|
|
# log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
if ots_name==new_name:
|
|
|
continue
|
|
|
- new_brand = ots_name
|
|
|
+ new_brand = standard_name
|
|
|
|
|
|
log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
# judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
@@ -390,7 +397,8 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
for _search in search_list:
|
|
|
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("standard_name")
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
debug("checking specs %s and %s"%(specs,ots_name))
|
|
@@ -398,7 +406,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
# log("specs is_similar")
|
|
|
if check_specs(c_specs,ots_name):
|
|
|
break_flag = True
|
|
|
- new_specs = ots_name
|
|
|
+ new_specs = standard_name
|
|
|
log("check_specs %s succeed %s"%(specs,new_specs))
|
|
|
|
|
|
# to update the document_product_dict which is builded for search
|
|
@@ -502,7 +510,9 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
break
|
|
|
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
- ots_name = _search.get("standard_name")
|
|
|
+
|
|
|
+ ots_name = _search.get("ots_name")
|
|
|
+ standard_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
debug("checking specs %s and %s"%(specs,ots_name))
|
|
@@ -510,7 +520,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
# log("specs is_similar")
|
|
|
if check_specs(c_specs,ots_name):
|
|
|
break_flag = True
|
|
|
- new_specs = ots_name
|
|
|
+ new_specs = standard_name
|
|
|
if brand_ots_id is not None:
|
|
|
# judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
@@ -922,7 +932,6 @@ def fix_product_data():
|
|
|
dpt.setValue(DOCUMENT_PRODUCT_TMP_STATUS,1,True)
|
|
|
dpt.update_row(ots_client)
|
|
|
|
|
|
-
|
|
|
mt = MultiThreadHandler(task_queue,handle,None,30,1)
|
|
|
mt.run()
|
|
|
|
|
@@ -978,29 +987,119 @@ def test_check_brand():
|
|
|
f.write(b+"\n")
|
|
|
|
|
|
def test_match():
|
|
|
- a = "Mini-7"
|
|
|
+ a = "MFUSONE"
|
|
|
vector = request_embedding(a)
|
|
|
pm = Product_Manager()
|
|
|
- Coll,_ = pm.get_collection(NAME_GRADE)
|
|
|
+ Coll,_ = pm.get_collection(SPECS_GRADE)
|
|
|
output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id"]
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[vector],pm.search_params,output_fields,limit=60)
|
|
|
+ search_list = search_embedding(Coll,embedding_index_name,[vector],pm.search_params,output_fields,limit=20)
|
|
|
print(search_list)
|
|
|
|
|
|
|
|
|
+def rebuild_milvus():
|
|
|
+
|
|
|
+ pdm = Product_Dict_Manager()
|
|
|
+ from multiprocessing import Queue as PQueue
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ RangeQuery(DOCUMENT_PRODUCT_DICT_GRADE,3)
|
|
|
+ ])
|
|
|
+ ots_client = getConnect_ots()
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("grade")]),limit=100,get_total_count=True),
|
|
|
+ ColumnsToGet([DOCUMENT_PRODUCT_DICT_GRADE,DOCUMENT_PRODUCT_DICT_NAME,DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+ while next_token:
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
+ ColumnsToGet([DOCUMENT_PRODUCT_DICT_GRADE,DOCUMENT_PRODUCT_DICT_NAME,DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+
|
|
|
+ list_data.extend(getRow_ots(rows))
|
|
|
+ print("%d/%d"%(len(list_data),total_count))
|
|
|
+ set_name_grade = set()
|
|
|
+ task_queue = PQueue()
|
|
|
+ for _data in list_data:
|
|
|
+ name = _data.get(DOCUMENT_PRODUCT_DICT_NAME)
|
|
|
+ grade = _data.get(DOCUMENT_PRODUCT_DICT_GRADE)
|
|
|
+ _key = "%s--%d"%(name,grade)
|
|
|
+ if _key not in set_name_grade:
|
|
|
+ task_queue.put(_data)
|
|
|
+ set_name_grade.add(_key)
|
|
|
+
|
|
|
+ log("rebuild milvus %d counts"%(task_queue.qsize()))
|
|
|
+ def insert_into_milvus(item,result_queue):
|
|
|
+
|
|
|
+ name = item.get(DOCUMENT_PRODUCT_DICT_NAME,"")
|
|
|
+ n_name = get_milvus_standard_name(name)
|
|
|
+
|
|
|
+ name_id = get_milvus_product_dict_id(n_name)
|
|
|
+
|
|
|
+ vector = request_embedding(n_name)
|
|
|
+ parent_id = item.get(DOCUMENT_PRODUCT_DICT_PARENT_ID,"")
|
|
|
+ grade = item.get(DOCUMENT_PRODUCT_DICT_GRADE)
|
|
|
+ Coll,_ = pdm.get_collection(grade)
|
|
|
+ standard_alias = item.get(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS,"")
|
|
|
+ if vector is not None and Coll is not None:
|
|
|
+
|
|
|
+ data = [[name_id],
|
|
|
+ [name],
|
|
|
+ [name],
|
|
|
+ [name_id],
|
|
|
+ [vector],
|
|
|
+ [parent_id],
|
|
|
+ [grade]]
|
|
|
+ insert_embedding(Coll,data)
|
|
|
+
|
|
|
+ if standard_alias is not None and standard_alias!="":
|
|
|
+ list_alias = standard_alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
+ for _alias in list_alias:
|
|
|
+ _alias = _alias.strip()
|
|
|
+ if len(_alias)==0:
|
|
|
+ continue
|
|
|
+ if _alias==name:
|
|
|
+ continue
|
|
|
+ _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
+ n_alias = get_milvus_standard_name(_alias)
|
|
|
+ vector = request_embedding(n_alias)
|
|
|
+ data = [[_id],
|
|
|
+ [_alias],
|
|
|
+ [name],
|
|
|
+ [name_id],
|
|
|
+ [vector],
|
|
|
+ [parent_id],
|
|
|
+ [grade]]
|
|
|
+ insert_embedding(Coll,data)
|
|
|
+
|
|
|
+ def start_thread():
|
|
|
+ mt = MultiThreadHandler(task_queue,insert_into_milvus,None,5)
|
|
|
+ mt.run()
|
|
|
+ p_count = 5
|
|
|
+ list_p = []
|
|
|
+ for i in range(p_count):
|
|
|
+ p = Process(target=start_thread)
|
|
|
+ list_p.append(p)
|
|
|
+ for p in list_p:
|
|
|
+ p.start()
|
|
|
+ for p in list_p:
|
|
|
+ p.join()
|
|
|
+
|
|
|
+
|
|
|
def test():
|
|
|
# pm = Product_Manager()
|
|
|
# pm.test()
|
|
|
- fix_product_data()
|
|
|
+ # fix_product_data()
|
|
|
# test_check_brand()
|
|
|
# test_match()
|
|
|
+ rebuild_milvus()
|
|
|
+
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
# start_process_product()
|
|
|
# print(getMD5('11936c56f2dd1426764e317ca2e8e1a7'+'&&鱼跃'))
|
|
|
- test()
|
|
|
- print(Product_Manager.get_bid_filemd5s(155415770,getConnect_ots()))
|
|
|
- name = "一"
|
|
|
- ots_name = "一氧化碳分析仪"
|
|
|
- print(is_similar(name,ots_name),check_product(name,ots_name))
|
|
|
- print(is_legal_specs('SCM-A/SB(0.18D)'))
|
|
|
+ # print(Product_Manager.get_bid_filemd5s(155415770,getConnect_ots()))
|
|
|
+ # name = "一"
|
|
|
+ # ots_name = "一氧化碳分析仪"
|
|
|
+ # print(is_similar(name,ots_name),check_product(name,ots_name))
|
|
|
+ # print(is_legal_specs('SCM-A/SB(0.18D)'))
|
|
|
+ test()
|