|
@@ -113,6 +113,54 @@ class Product_Manager(Product_Dict_Manager):
|
|
self.standardize(item)
|
|
self.standardize(item)
|
|
|
|
|
|
|
|
|
|
|
|
+ def match_specs(self,specs):
|
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
|
+ TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs),
|
|
|
|
+ TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
|
|
|
|
+ ])
|
|
|
|
+ rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
|
+ SearchQuery(bool_query,get_total_count=True))
|
|
|
|
+ if total_count>0:
|
|
|
|
+ new_specs = specs
|
|
|
|
+ return new_specs
|
|
|
|
+ else:
|
|
|
|
+ debug("getting sepcs %s"%(specs))
|
|
|
|
+ list_specs = []
|
|
|
|
+ c_specs = clean_product_specs(specs)
|
|
|
|
+ list_specs.append(c_specs)
|
|
|
|
+
|
|
|
|
+ for s in re.split("[\u4e00-\u9fff]",specs):
|
|
|
|
+ if s!="" and len(s)>4:
|
|
|
|
+ list_specs.append(s)
|
|
|
|
+ similar_flag = None
|
|
|
|
+ _index = 0
|
|
|
|
+ break_flag = False
|
|
|
|
+ for c_specs in list_specs:
|
|
|
|
+ if break_flag:
|
|
|
|
+ break
|
|
|
|
+ _index += 1
|
|
|
|
+ specs_vector = request_embedding(c_specs)
|
|
|
|
+
|
|
|
|
+ if specs_vector is not None:
|
|
|
|
+ Coll,_ = self.get_collection(SPECS_GRADE)
|
|
|
|
+ search_list = search_embedding(Coll,embedding_index_name,[specs_vector],self.search_params,output_fields,limit=60)
|
|
|
|
+
|
|
|
|
+ for _search in search_list:
|
|
|
|
+
|
|
|
|
+ ots_id = _search.entity.get("standard_name_id")
|
|
|
|
+ ots_name = _search.entity.get("standard_name")
|
|
|
|
+ ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
|
+
|
|
|
|
+ debug("checking specs %s and %s"%(specs,ots_name))
|
|
|
|
+ if is_similar(specs,ots_name):
|
|
|
|
+ # log("specs is_similar")
|
|
|
|
+ if check_specs(c_specs,ots_name):
|
|
|
|
+ break_flag = True
|
|
|
|
+ new_specs = ots_name
|
|
|
|
+ return new_specs
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
def standardize(self,tmp_dict,output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id"]):
|
|
def standardize(self,tmp_dict,output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id"]):
|
|
'''
|
|
'''
|
|
Standardizes the product data
|
|
Standardizes the product data
|
|
@@ -138,16 +186,15 @@ class Product_Manager(Product_Dict_Manager):
|
|
name = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,"")
|
|
name = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,"")
|
|
brand = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,"")
|
|
brand = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,"")
|
|
specs = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,"")
|
|
specs = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,"")
|
|
|
|
+ parameters = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_PARAMETER,"")
|
|
|
|
+
|
|
|
|
+ list_candidates = [a for a in [name,brand,specs,parameters] if a!=""]
|
|
|
|
|
|
- max_len = max(len(name),len(brand),len(specs))
|
|
|
|
- max_len_str = name if len(name)==max_len else brand if len(brand)==max_len else specs
|
|
|
|
|
|
+ if brand=="" and parameters!="":
|
|
|
|
+ brand = parameters
|
|
|
|
+ if specs=="" and parameters!="":
|
|
|
|
+ specs = parameters
|
|
|
|
|
|
- if name=="" and max_len>=8:
|
|
|
|
- name = max_len_str
|
|
|
|
- if brand=="" and max_len>=8:
|
|
|
|
- brand = max_len_str
|
|
|
|
- if specs=="" and max_len>=8:
|
|
|
|
- specs = max_len_str
|
|
|
|
|
|
|
|
new_name = ""
|
|
new_name = ""
|
|
new_brand = ""
|
|
new_brand = ""
|
|
@@ -177,6 +224,28 @@ class Product_Manager(Product_Dict_Manager):
|
|
# if _flag and _dpd.updateAlias(name):
|
|
# if _flag and _dpd.updateAlias(name):
|
|
# _dpd.update_row(self.ots_client)
|
|
# _dpd.update_row(self.ots_client)
|
|
break
|
|
break
|
|
|
|
+ if name_ots_id is None:
|
|
|
|
+ for name in list_candidates:
|
|
|
|
+ name_vector = request_embedding(name)
|
|
|
|
+ if name_vector is not None:
|
|
|
|
+ Coll,_ = self.get_collection(NAME_GRADE)
|
|
|
|
+ search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
|
|
+
|
|
|
|
+ for _search in search_list:
|
|
|
|
+ ots_id = _search.entity.get("standard_name_id")
|
|
|
|
+ ots_name = _search.entity.get("standard_name")
|
|
|
|
+ ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
|
+
|
|
|
|
+ if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
|
+ name_ots_id = ots_id
|
|
|
|
+ new_name = ots_name
|
|
|
|
+
|
|
|
|
+ # #update alias of name
|
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
|
+ # if _flag and _dpd.updateAlias(name):
|
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
|
+ break
|
|
if name_ots_id is not None:
|
|
if name_ots_id is not None:
|
|
|
|
|
|
if brand is not None and brand!="":
|
|
if brand is not None and brand!="":
|
|
@@ -277,7 +346,67 @@ class Product_Manager(Product_Dict_Manager):
|
|
dpdi.update_row(self.ots_client)
|
|
dpdi.update_row(self.ots_client)
|
|
break
|
|
break
|
|
|
|
|
|
|
|
+ if brand_ots_id is None:
|
|
|
|
+ _find = False
|
|
|
|
+ for brand in list_candidates:
|
|
|
|
+ if _find:
|
|
|
|
+ break
|
|
|
|
+ l_brand = [brand]
|
|
|
|
+ l_brand.append(clean_product_brand(s_brand))
|
|
|
|
+ brand_ch = get_chinese_string(brand)
|
|
|
|
+ l_brand.extend(brand_ch)
|
|
|
|
+
|
|
|
|
+ for brand in l_brand:
|
|
|
|
+ if _find:
|
|
|
|
+ break
|
|
|
|
+ brand_vector = request_embedding(brand)
|
|
|
|
+ if brand_vector is not None:
|
|
|
|
+ Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
|
+ search_list = search_embedding(Coll,embedding_index_name,[brand_vector],self.search_params,output_fields,limit=60)
|
|
|
|
+
|
|
|
|
+ # log("search brand %s"%(brand))
|
|
|
|
+ for _search in search_list:
|
|
|
|
|
|
|
|
+ ots_id = _search.entity.get("standard_name_id")
|
|
|
|
+ ots_name = _search.entity.get("standard_name")
|
|
|
|
+ ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
|
+
|
|
|
|
+ # log("check brand %s and %s"%(brand,ots_name))
|
|
|
|
+ if is_similar(brand,ots_name,_radio=95) or check_brand(brand,ots_name):
|
|
|
|
+ # log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
|
+ new_brand = ots_name
|
|
|
|
+ log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
|
+ # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
|
|
+
|
|
|
|
+ if name_ots_id is not None:
|
|
|
|
+ brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
|
|
|
|
+
|
|
|
|
+ _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
|
+ }
|
|
|
|
+ _dpd_brand = Document_product_dict(_d_brand)
|
|
|
|
+ # _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
|
+ if not _dpd_brand.exists_row(self.ots_client):
|
|
|
|
+ _dpd_brand.update_row(self.ots_client)
|
|
|
|
+
|
|
|
|
+ else:
|
|
|
|
+ pass
|
|
|
|
+ # #update alias
|
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
|
+ # if _flag:
|
|
|
|
+ # if _dpd.updateAlias(brand):
|
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
|
+
|
|
|
|
+ _find = True
|
|
|
|
+ break
|
|
|
|
|
|
if specs is not None and specs!="":
|
|
if specs is not None and specs!="":
|
|
|
|
|
|
@@ -432,10 +561,39 @@ class Product_Manager(Product_Dict_Manager):
|
|
}
|
|
}
|
|
_dpdi = Document_product_dict_interface(_d)
|
|
_dpdi = Document_product_dict_interface(_d)
|
|
_dpdi.update_row(self.ots_client)
|
|
_dpdi.update_row(self.ots_client)
|
|
|
|
+ if specs_ots_id is None:
|
|
|
|
+ _find = False
|
|
|
|
+ for specs in list_candidates:
|
|
|
|
+ if _find:
|
|
|
|
+ break
|
|
|
|
+ s = self.match_specs(specs)
|
|
|
|
+ if s is not None:
|
|
|
|
+ new_specs = s
|
|
|
|
+ if brand_ots_id is not None:
|
|
|
|
+ # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
|
+ specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
|
+
|
|
|
|
+ _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
|
+ DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
|
+ }
|
|
|
|
+ _dpd_specs = Document_product_dict(_d_specs)
|
|
|
|
+ # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
|
+ if not _dpd_specs.exists_row(self.ots_client):
|
|
|
|
+ _dpd_specs.update_row(self.ots_client)
|
|
|
|
+ _find = True
|
|
|
|
+ break
|
|
|
|
|
|
# judge if the product matches the standard product
|
|
# judge if the product matches the standard product
|
|
if name_ots_id is not None:
|
|
if name_ots_id is not None:
|
|
|
|
|
|
|
|
+ is_legal_data = True
|
|
#standard the product and same to document_product table
|
|
#standard the product and same to document_product table
|
|
_product = Document_product(tmp_dict)
|
|
_product = Document_product(tmp_dict)
|
|
docid = _product.getProperties().get(DOCUMENT_PRODUCT_DOCID)
|
|
docid = _product.getProperties().get(DOCUMENT_PRODUCT_DOCID)
|
|
@@ -445,11 +603,41 @@ class Product_Manager(Product_Dict_Manager):
|
|
unit_price = clean_product_unit_price(unit_price)
|
|
unit_price = clean_product_unit_price(unit_price)
|
|
quantity = clean_product_quantity(quantity)
|
|
quantity = clean_product_quantity(quantity)
|
|
|
|
|
|
|
|
+ total_price = _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)
|
|
|
|
+
|
|
_product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
|
|
_product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
|
|
_product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
|
|
_product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
|
|
- if isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)):
|
|
|
|
|
|
+
|
|
|
|
+ win_bid_price = _product.getProperties().get(DOCUMENT_PRODUCT_WIN_BID_PRICE)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ if isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)) and isinstance(total_price,(float,int)):
|
|
|
|
+ new_quantity = total_price/unit_price
|
|
|
|
+ if new_quantity!=quantity:
|
|
|
|
+ if new_quantity==total_price//unit_price:
|
|
|
|
+ quantity = int(new_quantity)
|
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
|
|
|
|
+ else:
|
|
|
|
+ is_legal_data = False
|
|
|
|
+ elif isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)):
|
|
total_price = float("%.2f"%(unit_price*quantity))
|
|
total_price = float("%.2f"%(unit_price*quantity))
|
|
_product.setValue(DOCUMENT_PRODUCT_TOTAL_PRICE,total_price,True)
|
|
_product.setValue(DOCUMENT_PRODUCT_TOTAL_PRICE,total_price,True)
|
|
|
|
+ elif isinstance(unit_price,(float,int)) and isinstance(total_price,(float,int)):
|
|
|
|
+ quantity = int(total_price//unit_price)
|
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
|
|
|
|
+ elif isinstance(quantity,(float,int)) and isinstance(total_price,(float,int)):
|
|
|
|
+ unit_price = float("%.2f"%(total_price/quantity))
|
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
|
|
|
|
+ elif isinstance(quantity,(float,int)) and quantity>10000:
|
|
|
|
+ is_legal_data = False
|
|
|
|
+
|
|
|
|
+ if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE),(float,int)) and isinstance(win_bid_price,(float,int)):
|
|
|
|
+ if _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)>win_bid_price:
|
|
|
|
+ is_legal_data = False
|
|
|
|
+
|
|
|
|
+ if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE),(float,int)) and _product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE)>100000000:
|
|
|
|
+ is_legal_data = False
|
|
|
|
+
|
|
|
|
|
|
new_id = self.get_product_id(docid,new_name,new_brand,new_specs,unit_price,quantity)
|
|
new_id = self.get_product_id(docid,new_name,new_brand,new_specs,unit_price,quantity)
|
|
|
|
|
|
@@ -481,7 +669,10 @@ class Product_Manager(Product_Dict_Manager):
|
|
if bid_filemd5s is not None:
|
|
if bid_filemd5s is not None:
|
|
_product.setValue(DOCUMENT_PRODUCT_BID_FILEMD5S,bid_filemd5s,True)
|
|
_product.setValue(DOCUMENT_PRODUCT_BID_FILEMD5S,bid_filemd5s,True)
|
|
|
|
|
|
- if self.dumplicate(_product):
|
|
|
|
|
|
+ if not is_legal_data:
|
|
|
|
+ _status = randint(501,550)
|
|
|
|
+
|
|
|
|
+ elif self.dumplicate(_product):
|
|
_status = randint(201,300)
|
|
_status = randint(201,300)
|
|
save_product_tmp.setValue(DOCUMENT_PRODUCT_TMP_NEW_ID,new_id,True)
|
|
save_product_tmp.setValue(DOCUMENT_PRODUCT_TMP_NEW_ID,new_id,True)
|
|
|
|
|