|
@@ -19,6 +19,11 @@ from random import randint
|
|
from BaseDataMaintenance.maintenance.product.product_dict import Product_Dict_Manager
|
|
from BaseDataMaintenance.maintenance.product.product_dict import Product_Dict_Manager
|
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
|
|
|
|
|
|
+import logging
|
|
|
|
+
|
|
|
|
+root = logging.getLogger()
|
|
|
|
+root.setLevel(logging.INFO)
|
|
|
|
+
|
|
class Product_Manager(Product_Dict_Manager):
|
|
class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
def __init__(self):
|
|
def __init__(self):
|
|
@@ -41,7 +46,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
unit_price = ""
|
|
unit_price = ""
|
|
else:
|
|
else:
|
|
unit_price = "%.2f"%float(unit_price)
|
|
unit_price = "%.2f"%float(unit_price)
|
|
- product_id = getMD5(str(docid))+str(name)+str(brand)+str(specs)+str(unit_price)+str(quantity)
|
|
|
|
|
|
+ product_id = getMD5(str(docid)+str(name)+str(brand)+str(specs)+str(unit_price)+str(quantity))
|
|
return product_id
|
|
return product_id
|
|
|
|
|
|
def producer(self,process_count=3000):
|
|
def producer(self,process_count=3000):
|
|
@@ -63,7 +68,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
list_id.append(_id)
|
|
list_id.append(_id)
|
|
self.process_queue.put(_d)
|
|
self.process_queue.put(_d)
|
|
while next_token:
|
|
while next_token:
|
|
- rows,next_token,total_count_is_all_succeed = self.ots_client.search("document_product_temp","document_product_temp_index",
|
|
|
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_temp","document_product_temp_index",
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
|
|
list_data = getRow_ots(rows)
|
|
list_data = getRow_ots(rows)
|
|
@@ -99,7 +104,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
self.standardize(item)
|
|
self.standardize(item)
|
|
|
|
|
|
|
|
|
|
- def standardize(self,tmp_dict,output_fields = ['ots_id','ots_name',"ots_parent_id"]):
|
|
|
|
|
|
+ def standardize(self,tmp_dict,output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id"]):
|
|
'''
|
|
'''
|
|
Standardizes the product data
|
|
Standardizes the product data
|
|
通过匹配标准参数表进行标准化,匹配是非精确匹配,校验规则是?
|
|
通过匹配标准参数表进行标准化,匹配是非精确匹配,校验规则是?
|
|
@@ -121,9 +126,9 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
document_product_tmp = Document_product_tmp(tmp_dict)
|
|
document_product_tmp = Document_product_tmp(tmp_dict)
|
|
|
|
|
|
- name = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME)
|
|
|
|
- brand = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND)
|
|
|
|
- specs = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS)
|
|
|
|
|
|
+ name = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,"")
|
|
|
|
+ brand = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,"")
|
|
|
|
+ specs = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,"")
|
|
|
|
|
|
new_name = ""
|
|
new_name = ""
|
|
new_brand = ""
|
|
new_brand = ""
|
|
@@ -139,8 +144,8 @@ class Product_Manager(Product_Dict_Manager):
|
|
search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
|
|
|
|
for _search in search_list:
|
|
for _search in search_list:
|
|
- ots_id = _search.entity.get("ots_id")
|
|
|
|
- ots_name = _search.entity.get("ots_name")
|
|
|
|
|
|
+ ots_id = _search.entity.get("standard_name_id")
|
|
|
|
+ ots_name = _search.entity.get("standard_name")
|
|
ots_parent_id = _search.entity.get("ots_parent_id")
|
|
ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
|
|
|
if is_similar(name,ots_name):
|
|
if is_similar(name,ots_name):
|
|
@@ -161,8 +166,9 @@ class Product_Manager(Product_Dict_Manager):
|
|
search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
|
|
|
|
for _search in search_list:
|
|
for _search in search_list:
|
|
- ots_id = _search.entity.get("ots_id")
|
|
|
|
- ots_name = _search.entity.get("ots_name")
|
|
|
|
|
|
+
|
|
|
|
+ ots_id = _search.entity.get("standard_name_id")
|
|
|
|
+ ots_name = _search.entity.get("standard_name")
|
|
ots_parent_id = _search.entity.get("ots_parent_id")
|
|
ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
|
|
|
if is_similar(brand,ots_name):
|
|
if is_similar(brand,ots_name):
|
|
@@ -175,7 +181,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
_d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
_d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(brand,new_brand),
|
|
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(str(brand).lower(),str(new_brand).lower()),
|
|
DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
@@ -204,8 +210,9 @@ class Product_Manager(Product_Dict_Manager):
|
|
search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
|
|
|
|
for _search in search_list:
|
|
for _search in search_list:
|
|
- ots_id = _search.entity.get("ots_id")
|
|
|
|
- ots_name = _search.entity.get("ots_name")
|
|
|
|
|
|
+
|
|
|
|
+ ots_id = _search.entity.get("standard_name_id")
|
|
|
|
+ ots_name = _search.entity.get("standard_name")
|
|
ots_parent_id = _search.entity.get("ots_parent_id")
|
|
ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
|
|
|
if is_similar(specs,ots_name):
|
|
if is_similar(specs,ots_name):
|
|
@@ -219,7 +226,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
_d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
_d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&"%(specs,new_specs),
|
|
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(str(specs).lower(),str(new_specs).lower()),
|
|
DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|