|
@@ -105,7 +105,7 @@ class Product_Dict_Manager():
|
|
|
must_queries=[RangeQuery(DOCUMENT_PRODUCT_DICT_GRADE,3,5,True,True)],
|
|
|
must_not_queries=[TermQuery(DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED,IS_SYNCHONIZED)])
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort(DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED)]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
|
|
|
|
|
@@ -114,7 +114,7 @@ class Product_Dict_Manager():
|
|
|
self.queue_product_dict.put(_d)
|
|
|
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
|
|
|
list_dict = getRow_ots(rows)
|
|
@@ -129,43 +129,14 @@ class Product_Dict_Manager():
|
|
|
def embedding_comsumer(self):
|
|
|
def handle(item,result_queue):
|
|
|
try:
|
|
|
- id = item.get(DOCUMENT_PRODUCT_DICT_ID)
|
|
|
name = str(item.get(DOCUMENT_PRODUCT_DICT_NAME))[:MAX_NAME_LENGTH]
|
|
|
- vector = request_embedding(name)
|
|
|
+
|
|
|
parent_id = item.get(DOCUMENT_PRODUCT_DICT_PARENT_ID)
|
|
|
grade = item.get(DOCUMENT_PRODUCT_DICT_GRADE)
|
|
|
Coll,_ = self.get_collection(grade)
|
|
|
standard_alias = item.get(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS,"")
|
|
|
- if vector is not None and Coll is not None:
|
|
|
-
|
|
|
- data = [[id],
|
|
|
- [name],
|
|
|
- [name],
|
|
|
- [id],
|
|
|
- [vector],
|
|
|
- [parent_id],
|
|
|
- [grade]]
|
|
|
- insert_embedding(Coll,data)
|
|
|
-
|
|
|
-
|
|
|
- if standard_alias is not None and standard_alias!="":
|
|
|
- list_alias = standard_alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
- for _alias in list_alias:
|
|
|
- _alias = _alias.strip()
|
|
|
- if len(_alias)==0:
|
|
|
- continue
|
|
|
- if _alias==name:
|
|
|
- continue
|
|
|
- _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
- vector = request_embedding(_alias)
|
|
|
- data = [[_id],
|
|
|
- [_alias],
|
|
|
- [name],
|
|
|
- [id],
|
|
|
- [vector],
|
|
|
- [parent_id],
|
|
|
- [grade]]
|
|
|
- insert_embedding(Coll,data)
|
|
|
+
|
|
|
+ if insert_new_record_to_milvus(Coll,name,grade,parent_id,standard_alias):
|
|
|
|
|
|
_pd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:id,DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED})
|
|
|
_pd.update_row(self.ots_client)
|
|
@@ -194,12 +165,12 @@ class Product_Dict_Manager():
|
|
|
should_q
|
|
|
])
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_temp","document_product_temp_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_tmp_table_name,Document_product_tmp_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_temp","document_product_temp_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_tmp_table_name,Document_product_tmp_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data.extend(getRow_ots(rows))
|
|
@@ -218,12 +189,12 @@ class Product_Dict_Manager():
|
|
|
for name in list_name:
|
|
|
bool_query = self.make_query(name,DOCUMENT_PRODUCT_NAME,TermQuery,len(name),5)
|
|
|
if bool_query is not None:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data.extend(getRow_ots(rows))
|
|
@@ -245,12 +216,12 @@ class Product_Dict_Manager():
|
|
|
for name in list_name:
|
|
|
bool_query = self.make_query(name,DOCUMENT_PRODUCT_NAME,TermQuery,len(name),5)
|
|
|
if bool_query is not None:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data.extend(getRow_ots(rows))
|
|
@@ -268,12 +239,12 @@ class Product_Dict_Manager():
|
|
|
else:
|
|
|
bool_query = self.make_query(name,DOCUMENT_PRODUCT_BRAND,TermQuery,len(name),5)
|
|
|
if bool_query is not None:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data.extend(getRow_ots(rows))
|
|
@@ -292,12 +263,12 @@ class Product_Dict_Manager():
|
|
|
else:
|
|
|
bool_query = self.make_query(name,DOCUMENT_PRODUCT_SPECS,TermQuery,len(name),5)
|
|
|
if bool_query is not None:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_data.extend(getRow_ots(rows))
|
|
@@ -354,7 +325,7 @@ class Product_Dict_Manager():
|
|
|
TermQuery(term_columns,str(name)),
|
|
|
RangeQuery(DOCUMENT_PRODUCT_DICT_CREATE_TIME,None,str(create_time))
|
|
|
])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
|
|
|
SearchQuery(bool_query,get_total_count=True,limit=1),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
if total_count>0:
|
|
@@ -391,27 +362,17 @@ class Product_Dict_Manager():
|
|
|
TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE,grade),
|
|
|
RangeQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS,201,301)
|
|
|
])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict_interface","document_product_dict_interface_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_interface_table_name,Document_product_dict_interface_table_name+"_index",
|
|
|
SearchQuery(bool_query,get_total_count=True))
|
|
|
if total_count>0:
|
|
|
return
|
|
|
|
|
|
list_name = []
|
|
|
#update milvus
|
|
|
- vector = request_embedding(name)
|
|
|
Coll,_ = self.get_collection(grade)
|
|
|
- if vector is not None and Coll is not None:
|
|
|
- id = original_id
|
|
|
- data = [[id],
|
|
|
- [name],
|
|
|
- [name],
|
|
|
- [id],
|
|
|
- [vector],
|
|
|
- [parent_id],
|
|
|
- [grade]]
|
|
|
- insert_embedding(Coll,data)
|
|
|
- list_name.append(name)
|
|
|
|
|
|
+ if insert_new_record_to_milvus(Coll,name,grade,parent_id,standard_alias):
|
|
|
+ list_name.append(name)
|
|
|
if standard_alias is not None and standard_alias!="":
|
|
|
list_alias = standard_alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
for _alias in list_alias:
|
|
@@ -420,18 +381,9 @@ class Product_Dict_Manager():
|
|
|
continue
|
|
|
if _alias==name:
|
|
|
continue
|
|
|
- _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
- vector = request_embedding(_alias)
|
|
|
- data = [[_id],
|
|
|
- [_alias],
|
|
|
- [name],
|
|
|
- [id],
|
|
|
- [vector],
|
|
|
- [parent_id],
|
|
|
- [grade]]
|
|
|
- insert_embedding(Coll,data)
|
|
|
list_name.append(_alias)
|
|
|
- time.sleep(3)
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
|
|
|
#judge whether there exists records before this record created,if not process the history data
|
|
|
if not self.exists_records(name,grade,create_time):
|
|
@@ -468,70 +420,27 @@ class Product_Dict_Manager():
|
|
|
else:
|
|
|
new_id = original_id
|
|
|
|
|
|
- # update the milvus
|
|
|
- if not (len(new_name_set)==len(old_name_set) and len(new_name_set)==len(new_name_set&old_name_set)):
|
|
|
- Coll,_ = self.get_collection(grade)
|
|
|
- o_id = original_id
|
|
|
- expr = " ots_id in ['%s']"%o_id
|
|
|
- Coll.delete(expr)
|
|
|
|
|
|
- _alias = dpd.getProperties().get(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS)
|
|
|
- if _alias is not None and _alias!="":
|
|
|
- list_alias = _alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
- for _alias in list_alias:
|
|
|
- _alias = _alias.strip()
|
|
|
- if len(_alias)==0:
|
|
|
- continue
|
|
|
- if _alias==name:
|
|
|
- continue
|
|
|
- _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
- expr = " ots_id in ['%s']"%o_id
|
|
|
- Coll.delete(expr)
|
|
|
-
|
|
|
- list_name = []
|
|
|
- vector = request_embedding(name)
|
|
|
- if vector is not None and Coll is not None:
|
|
|
- id = new_id
|
|
|
- data = [[id],
|
|
|
- [name],
|
|
|
- [name],
|
|
|
- [id],
|
|
|
- [vector],
|
|
|
- [parent_id],
|
|
|
- [grade]]
|
|
|
- insert_embedding(Coll,data)
|
|
|
- list_name.append(name)
|
|
|
-
|
|
|
- if standard_alias is not None and standard_alias!="":
|
|
|
- list_alias = standard_alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
- for _alias in list_alias:
|
|
|
- _alias = _alias.strip()
|
|
|
- if len(_alias)==0:
|
|
|
- continue
|
|
|
- if _alias==name:
|
|
|
- continue
|
|
|
- _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
- vector = request_embedding(_alias)
|
|
|
- data = [[_id],
|
|
|
- [_alias],
|
|
|
- [name],
|
|
|
- [id],
|
|
|
- [vector],
|
|
|
- [parent_id],
|
|
|
- [grade]]
|
|
|
- insert_embedding(Coll,data)
|
|
|
- list_name.append(_alias)
|
|
|
- time.sleep(3)
|
|
|
+ Coll,_ = self.get_collection(grade)
|
|
|
|
|
|
- # process history
|
|
|
delete_names = list(old_name_set-new_name_set)
|
|
|
+ insert_names = list(new_name_set-old_name_set)
|
|
|
+ # update the milvus
|
|
|
+ if len(delete_names)>0:
|
|
|
+ for _name in delete_names:
|
|
|
+ delete_record_from_milvus(Coll,_name,"")
|
|
|
+ time.sleep(1)
|
|
|
+ if len(insert_names)>0:
|
|
|
+ insert_new_record_to_milvus(Coll,name,grade,parent_id,standard_alias)
|
|
|
+
|
|
|
+ # process history
|
|
|
if len(delete_names)>0:
|
|
|
self.process_history([old_name],grade,"update")
|
|
|
- insert_names = list(new_name_set-old_name_set)
|
|
|
if len(insert_names)>0:
|
|
|
self.process_history(insert_names,grade,"insert")
|
|
|
|
|
|
|
|
|
+
|
|
|
# update document_product_dict
|
|
|
_d = {DOCUMENT_PRODUCT_DICT_ID:new_id,
|
|
|
DOCUMENT_PRODUCT_DICT_NAME:name,
|
|
@@ -568,13 +477,13 @@ class Product_Dict_Manager():
|
|
|
bool_query = BoolQuery(must_queries=[
|
|
|
TermQuery(DOCUMENT_PRODUCT_DICT_PARENT_ID,parent_id)
|
|
|
])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort(DOCUMENT_PRODUCT_DICT_PARENT_ID)]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
|
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
|
|
|
@@ -600,25 +509,23 @@ class Product_Dict_Manager():
|
|
|
TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,grade)
|
|
|
])
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
|
if total_count==0:
|
|
|
return
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
|
list_data.extend(getRow_ots(rows))
|
|
|
|
|
|
#delete milvus records
|
|
|
Coll,_ = self.get_collection(grade)
|
|
|
- for _data in list_data:
|
|
|
- o_id = _data.get(DOCUMENT_PRODUCT_DICT_ID)
|
|
|
- expr = " ots_id in ['%s']"%o_id
|
|
|
- Coll.delete(expr)
|
|
|
- time.sleep(3)
|
|
|
+
|
|
|
+ delete_record_from_milvus(Coll,name,standard_alias)
|
|
|
+ time.sleep(1)
|
|
|
|
|
|
#process_history data
|
|
|
self.process_history([name],grade,"delete")
|
|
@@ -640,13 +547,13 @@ class Product_Dict_Manager():
|
|
|
bool_query = BoolQuery(must_queries=[
|
|
|
TermQuery(DOCUMENT_PRODUCT_DICT_PARENT_ID,id)
|
|
|
])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort(DOCUMENT_PRODUCT_DICT_PARENT_ID)]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
|
|
|
list_data = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
|
|
|
|
|
@@ -665,7 +572,7 @@ class Product_Dict_Manager():
|
|
|
RangeQuery("status",1,50,True,True)
|
|
|
])
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict_interface","document_product_dict_interface_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_interface_table_name,Document_product_dict_interface_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort(DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME)]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
|
list_data = getRow_ots(rows)
|
|
@@ -673,7 +580,7 @@ class Product_Dict_Manager():
|
|
|
self.queue_product_interface.put(_data)
|
|
|
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict_interface","document_product_dict_interface_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_dict_interface_table_name,Document_product_dict_interface_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
|
list_data = getRow_ots(rows)
|
|
@@ -749,7 +656,7 @@ def search_similar():
|
|
|
must_queries=[RangeQuery(DOCUMENT_PRODUCT_DICT_GRADE,5,5,True,True)]
|
|
|
)
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort(DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED)]),limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
|
|
|
|
|
@@ -758,7 +665,7 @@ def search_similar():
|
|
|
list_data.append(_d)
|
|
|
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
columns_to_get=ColumnsToGet(columns,ColumnReturnType.SPECIFIED))
|
|
|
list_dict = getRow_ots(rows)
|
|
@@ -767,7 +674,14 @@ def search_similar():
|
|
|
if len(list_data)>=100000:
|
|
|
break
|
|
|
log("product_dict embedding total_count:%d"%total_count)
|
|
|
+ set_key = set()
|
|
|
for _d in list_data:
|
|
|
+ name = _d.get(DOCUMENT_PRODUCT_DICT_NAME)
|
|
|
+ grade = _d.get(DOCUMENT_PRODUCT_DICT_GRADE)
|
|
|
+ _key = "%s-%d"%(name,grade)
|
|
|
+ if _key in set_key:
|
|
|
+ continue
|
|
|
+ set_key.add(set_key)
|
|
|
task_queue.put(_d)
|
|
|
|
|
|
result_queue = Queue()
|
|
@@ -775,21 +689,27 @@ def search_similar():
|
|
|
def handle(item,result_queue):
|
|
|
id = item.get(DOCUMENT_PRODUCT_DICT_ID)
|
|
|
name = item.get(DOCUMENT_PRODUCT_DICT_NAME)
|
|
|
- vector = pdm.request_embedding(name)
|
|
|
+ vector = get_embedding_request(name)
|
|
|
parent_id = item.get(DOCUMENT_PRODUCT_DICT_PARENT_ID)
|
|
|
grade = item.get(DOCUMENT_PRODUCT_DICT_GRADE)
|
|
|
Coll,Coll_name = pdm.get_collection(grade)
|
|
|
- output_fields = ['ots_id','ots_name',"ots_parent_id"]
|
|
|
+ output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name"]
|
|
|
if vector is not None and Coll is not None:
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[vector],pdm.search_params,output_fields,limit=10)
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,name,grade,[vector],pdm.search_params,output_fields,limit=10)
|
|
|
for _item in search_list:
|
|
|
- ots_id = _item.id
|
|
|
- ots_name = _item.entity.get("ots_name")
|
|
|
- ots_parent_id = _item.entity.get("ots_parent_id")
|
|
|
+ ots_id = _item.get("id")
|
|
|
+ ots_name = _item.get("ots_name")
|
|
|
+ ots_parent_id = _item.get("ots_parent_id")
|
|
|
+ standard_name = _item.get("standard_name")
|
|
|
if name!=ots_name:
|
|
|
- if is_similar(name,ots_name):
|
|
|
- _d = {"source_id":id,"source_name":name,"grade":grade,"target_id":ots_id,"target_name":ots_name,"parent_id":parent_id,"target_parent_id":ots_parent_id}
|
|
|
- result_queue.put(_d)
|
|
|
+ if grade==4:
|
|
|
+ if is_similar(name,ots_name) or check_brand(name,ots_name):
|
|
|
+ _d = {"source_id":id,"source_name":name,"grade":grade,"target_id":ots_id,"target_name":ots_name,"parent_id":parent_id,"target_parent_id":ots_parent_id,"target_standard_name":standard_name}
|
|
|
+ result_queue.put(_d)
|
|
|
+ elif grade==5:
|
|
|
+ if is_similar(name,ots_name) and check_specs(name,ots_name):
|
|
|
+ _d = {"source_id":id,"source_name":name,"grade":grade,"target_id":ots_id,"target_name":ots_name,"parent_id":parent_id,"target_parent_id":ots_parent_id,"target_standard_name":standard_name}
|
|
|
+ result_queue.put(_d)
|
|
|
|
|
|
|
|
|
mt = MultiThreadHandler(task_queue,handle,result_queue,5,1)
|
|
@@ -817,6 +737,71 @@ def search_similar():
|
|
|
df.to_excel("search_similar1.xlsx",columns=df_columns)
|
|
|
|
|
|
|
|
|
+def insert_new_record_to_milvus(Coll,name,grade,parent_id,standard_alias):
|
|
|
+
|
|
|
+ n_name = get_milvus_standard_name(name)
|
|
|
+ name_id = get_milvus_product_dict_id(n_name)
|
|
|
+
|
|
|
+ vector = request_embedding(n_name)
|
|
|
+
|
|
|
+ log("insert name %s grade %d"%(name,grade))
|
|
|
+ if vector is not None and Coll is not None:
|
|
|
+
|
|
|
+ data = [[name_id],
|
|
|
+ [name],
|
|
|
+ [name],
|
|
|
+ [name_id],
|
|
|
+ [vector],
|
|
|
+ [parent_id],
|
|
|
+ [grade]]
|
|
|
+ insert_embedding(Coll,data)
|
|
|
+
|
|
|
+ if standard_alias is not None and standard_alias!="":
|
|
|
+ list_alias = standard_alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
+ for _alias in list_alias:
|
|
|
+ _alias = _alias.strip()
|
|
|
+ if len(_alias)==0:
|
|
|
+ continue
|
|
|
+ if _alias==name:
|
|
|
+ continue
|
|
|
+ _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
+ n_alias = get_milvus_standard_name(_alias)
|
|
|
+ vector = request_embedding(n_alias)
|
|
|
+ data = [[_id],
|
|
|
+ [_alias],
|
|
|
+ [name],
|
|
|
+ [name_id],
|
|
|
+ [vector],
|
|
|
+ [parent_id],
|
|
|
+ [grade]]
|
|
|
+ insert_embedding(Coll,data)
|
|
|
+ return True
|
|
|
+
|
|
|
+def delete_record_from_milvus(Coll,name,standard_alias):
|
|
|
+
|
|
|
+ n_name = get_milvus_standard_name(name)
|
|
|
+ name_id = get_milvus_product_dict_id(n_name)
|
|
|
+
|
|
|
+ log("delete name %s grade %s"%(str(name),str(standard_alias)))
|
|
|
+
|
|
|
+ expr = " ots_id in ['%s']"%name_id
|
|
|
+ Coll.delete(expr)
|
|
|
+
|
|
|
+ if standard_alias is not None and standard_alias!="":
|
|
|
+ list_alias = standard_alias.split(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR)
|
|
|
+ for _alias in list_alias:
|
|
|
+ _alias = _alias.strip()
|
|
|
+ if len(_alias)==0:
|
|
|
+ continue
|
|
|
+ if _alias==name:
|
|
|
+ continue
|
|
|
+ _id = get_document_product_dict_standard_alias_id(_alias)
|
|
|
+
|
|
|
+ expr = " ots_id in ['%s']"%_id
|
|
|
+ Coll.delete(expr)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
def dict_interface_delete(name,grade,ots_client = getConnect_ots()):
|
|
|
from uuid import uuid4
|
|
|
_d = {DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:name,
|
|
@@ -831,15 +816,16 @@ def dict_interface_delete(name,grade,ots_client = getConnect_ots()):
|
|
|
|
|
|
def interface_deletes():
|
|
|
a = '''
|
|
|
- MFUSONE
|
|
|
+ 眼科
|
|
|
'''
|
|
|
+ grade = 4
|
|
|
ots_client=getConnect_ots()
|
|
|
for s in re.split("[\n\s,.,。、]",a):
|
|
|
s = s.strip()
|
|
|
if s=="":
|
|
|
continue
|
|
|
print(s)
|
|
|
- dict_interface_delete(s,4,ots_client)
|
|
|
+ dict_interface_delete(s,grade,ots_client)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|