|
@@ -40,7 +40,6 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
self.set_id = set()
|
|
|
|
|
|
- self.area_set = get_area_set()
|
|
|
|
|
|
def get_product_id(self,docid,name,brand,specs,unit_price,quantity):
|
|
|
if name is None:
|
|
@@ -157,15 +156,16 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
brand_ots_id = None
|
|
|
specs_ots_id = None
|
|
|
if name is not None and name!="":
|
|
|
- name_vector = request_embedding(name)
|
|
|
+ name_vector = get_embedding_request(name)
|
|
|
if name_vector is not None:
|
|
|
Coll,_ = self.get_collection(NAME_GRADE)
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=60)
|
|
|
+
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,name,NAME_GRADE,[name_vector],self.search_params,output_fields,limit=60)
|
|
|
|
|
|
for _search in search_list:
|
|
|
- ots_id = _search.entity.get("standard_name_id")
|
|
|
- ots_name = _search.entity.get("standard_name")
|
|
|
- ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
name_ots_id = ots_id
|
|
@@ -179,15 +179,15 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
break
|
|
|
if name_ots_id is None:
|
|
|
for name in list_candidates:
|
|
|
- name_vector = request_embedding(name)
|
|
|
+ name_vector = get_embedding_request(name)
|
|
|
if name_vector is not None:
|
|
|
Coll,_ = self.get_collection(NAME_GRADE)
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[name_vector],self.search_params,output_fields,limit=20)
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,name,NAME_GRADE,[name_vector],self.search_params,output_fields,limit=20)
|
|
|
|
|
|
for _search in search_list:
|
|
|
- ots_id = _search.entity.get("standard_name_id")
|
|
|
- ots_name = _search.entity.get("standard_name")
|
|
|
- ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
if is_similar(name,ots_name) or check_product(name,ots_name):
|
|
|
name_ots_id = ots_id
|
|
@@ -203,101 +203,94 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
if brand is not None and brand!="":
|
|
|
|
|
|
- #check ots
|
|
|
- bool_query = BoolQuery(must_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand),
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,BRAND_GRADE)
|
|
|
- ])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
- SearchQuery(bool_query,get_total_count=True))
|
|
|
- if total_count>0:
|
|
|
- new_brand = brand
|
|
|
- else:
|
|
|
- s_brand = brand
|
|
|
- l_brand = [brand]
|
|
|
- l_brand.append(clean_product_brand(s_brand))
|
|
|
- brand_ch = get_chinese_string(brand)
|
|
|
- l_brand.extend(brand_ch)
|
|
|
+ s_brand = brand
|
|
|
+ l_brand = [brand]
|
|
|
+ l_brand.append(clean_product_brand(s_brand))
|
|
|
+ brand_ch = get_chinese_string(brand)
|
|
|
+ l_brand.extend(brand_ch)
|
|
|
|
|
|
- _find = False
|
|
|
- for brand in l_brand:
|
|
|
+ _find = False
|
|
|
+ for brand in l_brand:
|
|
|
|
|
|
- brand_vector = request_embedding(brand)
|
|
|
- if brand_vector is not None:
|
|
|
- Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[brand_vector],self.search_params,output_fields,limit=60)
|
|
|
+ brand_vector = get_embedding_request(brand)
|
|
|
+ if brand_vector is not None:
|
|
|
+ Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,brand,BRAND_GRADE,[brand_vector],self.search_params,output_fields,limit=60)
|
|
|
|
|
|
- # log("search brand %s"%(brand))
|
|
|
- for _search in search_list:
|
|
|
+ # log("search brand %s"%(brand))
|
|
|
+ for _search in search_list:
|
|
|
|
|
|
- ots_id = _search.entity.get("standard_name_id")
|
|
|
- ots_name = _search.entity.get("standard_name")
|
|
|
- ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
|
|
|
- # log("check brand %s and %s"%(brand,ots_name))
|
|
|
- if is_similar(brand,ots_name) or check_brand(brand,ots_name):
|
|
|
+ # log("check brand %s and %s"%(brand,ots_name))
|
|
|
+ if is_similar(brand,ots_name) or check_brand(brand,ots_name):
|
|
|
|
|
|
- # log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
- new_brand = ots_name
|
|
|
+ # log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
|
|
|
- log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
- # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
|
-
|
|
|
- if name_ots_id is not None:
|
|
|
- brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
|
|
|
-
|
|
|
- _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
- DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- }
|
|
|
- _dpd_brand = Document_product_dict(_d_brand)
|
|
|
- # _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
- if not _dpd_brand.exists_row(self.ots_client):
|
|
|
- _dpd_brand.update_row(self.ots_client)
|
|
|
-
|
|
|
- else:
|
|
|
- pass
|
|
|
- # #update alias
|
|
|
- # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
- # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- # if _flag:
|
|
|
- # if _dpd.updateAlias(brand):
|
|
|
- # _dpd.update_row(self.ots_client)
|
|
|
-
|
|
|
- _find = True
|
|
|
- break
|
|
|
- else:
|
|
|
- # log("check brand similar failed:%s and %s"%(brand,ots_name))
|
|
|
- # add new brand?
|
|
|
- pass
|
|
|
- if _find:
|
|
|
- break
|
|
|
- if not _find:
|
|
|
- for brand in l_brand:
|
|
|
- if self.check_new_brand(brand):
|
|
|
- new_brand = clean_product_brand(brand)
|
|
|
- if new_brand=="":
|
|
|
+ if ots_name==new_name:
|
|
|
continue
|
|
|
- log("adding new brand %s"%(str(new_brand)))
|
|
|
- _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(brand).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
- }
|
|
|
- dpdi = Document_product_dict_interface(_d_brand)
|
|
|
- dpdi.update_row(self.ots_client)
|
|
|
+ new_brand = ots_name
|
|
|
+
|
|
|
+ log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
+ # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
|
+
|
|
|
+ if name_ots_id is not None:
|
|
|
+ brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
|
|
|
+
|
|
|
+ _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_NAME:new_brand,
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(brand).lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
+ DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ }
|
|
|
+ _dpd_brand = Document_product_dict(_d_brand)
|
|
|
+ # _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
+ if not _dpd_brand.exists_row(self.ots_client):
|
|
|
+ _dpd_brand.update_row(self.ots_client)
|
|
|
+
|
|
|
+ else:
|
|
|
+ pass
|
|
|
+ # #update alias
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag:
|
|
|
+ # if _dpd.updateAlias(brand):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
+
|
|
|
+ _find = True
|
|
|
break
|
|
|
+ else:
|
|
|
+ # log("check brand similar failed:%s and %s"%(brand,ots_name))
|
|
|
+ # add new brand?
|
|
|
+ pass
|
|
|
+ if _find:
|
|
|
+ break
|
|
|
+ if not _find:
|
|
|
+ for brand in l_brand:
|
|
|
+ if self.check_new_brand(brand):
|
|
|
+ new_brand = clean_product_brand(brand)
|
|
|
+ if new_brand=="":
|
|
|
+ continue
|
|
|
+ log("adding new brand %s"%(str(new_brand)))
|
|
|
+ _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(brand).lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
+ }
|
|
|
+ dpdi = Document_product_dict_interface(_d_brand)
|
|
|
+ dpdi.update_row(self.ots_client)
|
|
|
+ break
|
|
|
|
|
|
if brand_ots_id is None:
|
|
|
_find = False
|
|
@@ -315,20 +308,17 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
start_time = time.time()
|
|
|
# brand_vector = request_embedding(brand)
|
|
|
brand_vector = get_embedding_request(brand)
|
|
|
- log("get embedding for brand %s takes %.4fs"%(brand,time.time()-start_time))
|
|
|
+ debug("get embedding for brand %s takes %.4fs"%(brand,time.time()-start_time))
|
|
|
if brand_vector is not None:
|
|
|
Coll,_ = self.get_collection(BRAND_GRADE)
|
|
|
start_time = time.time()
|
|
|
# search_list = search_embedding(Coll,embedding_index_name,[brand_vector],self.search_params,output_fields,limit=10)
|
|
|
- search_list = get_milvus_search(Coll,embedding_index_name,brand,[brand_vector],self.search_params,output_fields,limit=10)
|
|
|
- log("get search_list for brand %s takes %.4fs"%(brand,time.time()-start_time))
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,brand,BRAND_GRADE,[brand_vector],self.search_params,output_fields,limit=10)
|
|
|
+ debug("get search_list for brand %s takes %.4fs"%(brand,time.time()-start_time))
|
|
|
# log("search brand %s"%(brand))
|
|
|
for _search in search_list:
|
|
|
|
|
|
|
|
|
- # ots_id = _search.entity.get("standard_name_id")
|
|
|
- # ots_name = _search.entity.get("standard_name")
|
|
|
- # ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
ots_id = _search.get("standard_name_id")
|
|
|
ots_name = _search.get("standard_name")
|
|
|
ots_parent_id = _search.get("ots_parent_id")
|
|
@@ -336,7 +326,10 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
# log("check brand %s and %s"%(brand,ots_name))
|
|
|
if is_similar(brand,ots_name,_radio=95) or check_brand(brand,ots_name):
|
|
|
# log("check brand similar succeed:%s and %s"%(brand,ots_name))
|
|
|
+ if ots_name==new_name:
|
|
|
+ continue
|
|
|
new_brand = ots_name
|
|
|
+
|
|
|
log("checking brand %s succeed %s"%(brand,new_brand))
|
|
|
# judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
|
|
|
|
|
@@ -372,265 +365,172 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
if specs is not None and specs!="":
|
|
|
|
|
|
- #check ots
|
|
|
- bool_query = BoolQuery(must_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs),
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
|
|
|
- ])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
- SearchQuery(bool_query,get_total_count=True))
|
|
|
- if total_count>0:
|
|
|
- new_specs = specs
|
|
|
-
|
|
|
- if brand_ots_id is not None:
|
|
|
- # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
- specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
-
|
|
|
- _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
- DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- }
|
|
|
- _dpd_specs = Document_product_dict(_d_specs)
|
|
|
- # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
- if not _dpd_specs.exists_row(self.ots_client):
|
|
|
- _dpd_specs.update_row(self.ots_client)
|
|
|
- # user interface to add
|
|
|
- else:
|
|
|
- debug("getting sepcs %s"%(specs))
|
|
|
- list_specs = []
|
|
|
- c_specs = clean_product_specs(specs)
|
|
|
- list_specs.append(c_specs)
|
|
|
-
|
|
|
- for s in re.split("[\u4e00-\u9fff]",specs):
|
|
|
- if s!="" and len(s)>4:
|
|
|
- list_specs.append(s)
|
|
|
- similar_flag = None
|
|
|
- _index = 0
|
|
|
- break_flag = False
|
|
|
- for c_specs in list_specs:
|
|
|
- if break_flag:
|
|
|
- break
|
|
|
- _index += 1
|
|
|
- specs_vector = request_embedding(c_specs)
|
|
|
|
|
|
- if specs_vector is not None:
|
|
|
- Coll,_ = self.get_collection(SPECS_GRADE)
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[specs_vector],self.search_params,output_fields,limit=60)
|
|
|
+ debug("getting sepcs %s"%(specs))
|
|
|
+ list_specs = []
|
|
|
+ c_specs = clean_product_specs(specs)
|
|
|
+ list_specs.append(c_specs)
|
|
|
|
|
|
- for _search in search_list:
|
|
|
-
|
|
|
- ots_id = _search.entity.get("standard_name_id")
|
|
|
- ots_name = _search.entity.get("standard_name")
|
|
|
- ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
-
|
|
|
- debug("checking specs %s and %s"%(specs,ots_name))
|
|
|
- if is_similar(specs,ots_name):
|
|
|
- # log("specs is_similar")
|
|
|
- if check_specs(c_specs,ots_name):
|
|
|
- break_flag = True
|
|
|
- new_specs = ots_name
|
|
|
- log("check_specs %s succeed %s"%(specs,new_specs))
|
|
|
-
|
|
|
- # to update the document_product_dict which is builded for search
|
|
|
- if brand_ots_id is not None:
|
|
|
- # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
- specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
-
|
|
|
- _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
- DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- }
|
|
|
- _dpd_specs = Document_product_dict(_d_specs)
|
|
|
- # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
- if not _dpd_specs.exists_row(self.ots_client):
|
|
|
- _dpd_specs.update_row(self.ots_client)
|
|
|
- # user interface to add
|
|
|
- else:
|
|
|
- pass
|
|
|
- # #update alias
|
|
|
- # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
|
|
|
- # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- # if _flag:
|
|
|
- # if _dpd.updateAlias(specs):
|
|
|
- # _dpd.update_row(self.ots_client)
|
|
|
- break
|
|
|
- else:
|
|
|
- if _index == 1:
|
|
|
- similar_flag = True
|
|
|
-
|
|
|
- if not break_flag and similar_flag:
|
|
|
- debug("check_specs failed")
|
|
|
- new_specs = clean_product_specs(specs)
|
|
|
- # insert into document_product_dict a new record
|
|
|
- # to update the document_product_dict which is builded for search
|
|
|
- # add new specs
|
|
|
- if brand_ots_id is not None and name_ots_id is not None and len(specs)<MAX_NAME_LENGTH:
|
|
|
- # _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
- # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
|
|
|
- # DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- # DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(new_specs.lower()),
|
|
|
- # DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
- # DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- # DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
- # DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- # DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- # }
|
|
|
- # _dpd = Document_product_dict(_d)
|
|
|
- # # _dpd.updateAlias(new_specs)
|
|
|
- # _dpd.update_row(self.ots_client)
|
|
|
-
|
|
|
- # user interface to add
|
|
|
- log("adding new specs %s"%(new_specs))
|
|
|
- _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(specs),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
- }
|
|
|
- _dpdi = Document_product_dict_interface(_d)
|
|
|
- _dpdi.update_row(self.ots_client)
|
|
|
- else:
|
|
|
- # add new specs?
|
|
|
- debug("specs not similar")
|
|
|
- if is_legal_specs(specs) and len(specs)<MAX_NAME_LENGTH:
|
|
|
- debug("is_legal_specs")
|
|
|
- new_specs = clean_product_specs(specs)
|
|
|
- # insert into document_product_dict a new record
|
|
|
- # to update the document_product_dict which is builded for search
|
|
|
- # add new specs
|
|
|
- if brand_ots_id is not None and name_ots_id is not None:
|
|
|
- _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
-
|
|
|
- # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
|
|
|
- # DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- # DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(specs,new_specs),
|
|
|
- # DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
- # DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- # DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
- # DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- # DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- # }
|
|
|
- # _dpd = Document_product_dict(_d)
|
|
|
- # _dpd.update_row(self.ots_client)
|
|
|
-
|
|
|
- log("adding new specs %s"%(new_specs))
|
|
|
- # user interface to add
|
|
|
- _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(new_specs.lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
- }
|
|
|
- _dpdi = Document_product_dict_interface(_d)
|
|
|
- _dpdi.update_row(self.ots_client)
|
|
|
+ for s in re.split("[\u4e00-\u9fff]",specs):
|
|
|
+ if s!="" and len(s)>4:
|
|
|
+ list_specs.append(s)
|
|
|
+ similar_flag = None
|
|
|
+ _index = 0
|
|
|
+ break_flag = False
|
|
|
+ for c_specs in list_specs:
|
|
|
+ if break_flag:
|
|
|
+ break
|
|
|
+ _index += 1
|
|
|
+ specs_vector = get_embedding_request(c_specs)
|
|
|
+
|
|
|
+ if specs_vector is not None:
|
|
|
+ Coll,_ = self.get_collection(SPECS_GRADE)
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,c_specs,SPECS_GRADE,[specs_vector],self.search_params,output_fields,limit=60)
|
|
|
+
|
|
|
+ for _search in search_list:
|
|
|
+
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
+
|
|
|
+ debug("checking specs %s and %s"%(specs,ots_name))
|
|
|
+ if is_similar(specs,ots_name):
|
|
|
+ # log("specs is_similar")
|
|
|
+ if check_specs(c_specs,ots_name):
|
|
|
+ break_flag = True
|
|
|
+ new_specs = ots_name
|
|
|
+ log("check_specs %s succeed %s"%(specs,new_specs))
|
|
|
+
|
|
|
+ # to update the document_product_dict which is builded for search
|
|
|
+ if brand_ots_id is not None:
|
|
|
+ # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
+ specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
+
|
|
|
+ _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
+ DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ }
|
|
|
+ _dpd_specs = Document_product_dict(_d_specs)
|
|
|
+ # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
+ if not _dpd_specs.exists_row(self.ots_client):
|
|
|
+ _dpd_specs.update_row(self.ots_client)
|
|
|
+ # user interface to add
|
|
|
+ else:
|
|
|
+ pass
|
|
|
+ # #update alias
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag:
|
|
|
+ # if _dpd.updateAlias(specs):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ if _index == 1:
|
|
|
+ similar_flag = True
|
|
|
+
|
|
|
+ # add new specs?
|
|
|
+ debug("specs not similar")
|
|
|
+ if is_legal_specs(specs) and len(specs)<MAX_NAME_LENGTH and len(specs)>=5:
|
|
|
+ debug("is_legal_specs")
|
|
|
+ new_specs = clean_product_specs(specs)
|
|
|
+ # insert into document_product_dict a new record
|
|
|
+ # to update the document_product_dict which is builded for search
|
|
|
+ # add new specs
|
|
|
+ if brand_ots_id is not None and name_ots_id is not None:
|
|
|
+ _md5 = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
+
|
|
|
+ # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
|
|
|
+ # DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
+ # DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(specs,new_specs),
|
|
|
+ # DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
+ # DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
+ # DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
+ # DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ # DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ # }
|
|
|
+ # _dpd = Document_product_dict(_d)
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
+
|
|
|
+ log("adding new specs %s"%(new_specs))
|
|
|
+ # user interface to add
|
|
|
+ _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(new_specs.lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
|
|
|
+ }
|
|
|
+ _dpdi = Document_product_dict_interface(_d)
|
|
|
+ _dpdi.update_row(self.ots_client)
|
|
|
if specs_ots_id is None:
|
|
|
_find = False
|
|
|
for specs in list_candidates:
|
|
|
if _find:
|
|
|
break
|
|
|
- bool_query = BoolQuery(must_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs),
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
|
|
|
- ])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
- SearchQuery(bool_query,get_total_count=True))
|
|
|
- if total_count>0:
|
|
|
- new_specs = specs
|
|
|
- _find = True
|
|
|
- if brand_ots_id is not None:
|
|
|
- # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
- specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
-
|
|
|
- _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
- DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- }
|
|
|
- _dpd_specs = Document_product_dict(_d_specs)
|
|
|
- # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
- if not _dpd_specs.exists_row(self.ots_client):
|
|
|
- _dpd_specs.update_row(self.ots_client)
|
|
|
- # user interface to add
|
|
|
- else:
|
|
|
- debug("getting sepcs %s"%(specs))
|
|
|
- list_specs = []
|
|
|
- c_specs = clean_product_specs(specs)
|
|
|
- list_specs.append(c_specs)
|
|
|
-
|
|
|
- for s in re.split("[\u4e00-\u9fff]",specs):
|
|
|
- if s!="" and len(s)>4:
|
|
|
- list_specs.append(s)
|
|
|
- similar_flag = None
|
|
|
- _index = 0
|
|
|
- for c_specs in list_specs:
|
|
|
- if _find:
|
|
|
- break
|
|
|
- _index += 1
|
|
|
- specs_vector = request_embedding(c_specs)
|
|
|
|
|
|
- if specs_vector is not None:
|
|
|
- Coll,_ = self.get_collection(SPECS_GRADE)
|
|
|
- search_list = search_embedding(Coll,embedding_index_name,[specs_vector],self.search_params,output_fields,limit=20)
|
|
|
+ debug("getting sepcs %s"%(specs))
|
|
|
+ list_specs = []
|
|
|
+ c_specs = clean_product_specs(specs)
|
|
|
+ list_specs.append(c_specs)
|
|
|
+
|
|
|
+ for s in re.split("[\u4e00-\u9fff]",specs):
|
|
|
+ if s!="" and len(s)>4:
|
|
|
+ list_specs.append(s)
|
|
|
+ similar_flag = None
|
|
|
+ _index = 0
|
|
|
+ for c_specs in list_specs:
|
|
|
+ if _find:
|
|
|
+ break
|
|
|
+ _index += 1
|
|
|
+ specs_vector = get_embedding_request(c_specs)
|
|
|
+
|
|
|
+ if specs_vector is not None:
|
|
|
+ Coll,_ = self.get_collection(SPECS_GRADE)
|
|
|
+ search_list = get_embedding_search(Coll,embedding_index_name,c_specs,SPECS_GRADE,[specs_vector],self.search_params,output_fields,limit=20)
|
|
|
|
|
|
- for _search in search_list:
|
|
|
- if _find:
|
|
|
- break
|
|
|
+ for _search in search_list:
|
|
|
+ if _find:
|
|
|
+ break
|
|
|
|
|
|
- ots_id = _search.entity.get("standard_name_id")
|
|
|
- ots_name = _search.entity.get("standard_name")
|
|
|
- ots_parent_id = _search.entity.get("ots_parent_id")
|
|
|
-
|
|
|
- debug("checking specs %s and %s"%(specs,ots_name))
|
|
|
- if is_similar(specs,ots_name):
|
|
|
- # log("specs is_similar")
|
|
|
- if check_specs(c_specs,ots_name):
|
|
|
- break_flag = True
|
|
|
- new_specs = ots_name
|
|
|
- if brand_ots_id is not None:
|
|
|
- # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
- specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
-
|
|
|
- _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
- DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
- DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
- DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
- DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
- DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
- }
|
|
|
- _dpd_specs = Document_product_dict(_d_specs)
|
|
|
- # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
- if not _dpd_specs.exists_row(self.ots_client):
|
|
|
- _dpd_specs.update_row(self.ots_client)
|
|
|
- _find = True
|
|
|
- break
|
|
|
+ ots_id = _search.get("standard_name_id")
|
|
|
+ ots_name = _search.get("standard_name")
|
|
|
+ ots_parent_id = _search.get("ots_parent_id")
|
|
|
+
|
|
|
+ debug("checking specs %s and %s"%(specs,ots_name))
|
|
|
+ if is_similar(specs,ots_name):
|
|
|
+ # log("specs is_similar")
|
|
|
+ if check_specs(c_specs,ots_name):
|
|
|
+ break_flag = True
|
|
|
+ new_specs = ots_name
|
|
|
+ if brand_ots_id is not None:
|
|
|
+ # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
|
|
|
+ specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
+
|
|
|
+ _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(specs).lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
+ DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
+ DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
|
+ DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
|
|
|
+ DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
+ }
|
|
|
+ _dpd_specs = Document_product_dict(_d_specs)
|
|
|
+ # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
+ if not _dpd_specs.exists_row(self.ots_client):
|
|
|
+ _dpd_specs.update_row(self.ots_client)
|
|
|
+ _find = True
|
|
|
+ break
|
|
|
|
|
|
# judge if the product matches the standard product
|
|
|
if name_ots_id is not None:
|
|
@@ -681,7 +581,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
is_legal_data = False
|
|
|
|
|
|
if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE),(float,int)) and isinstance(win_bid_price,(float,int)):
|
|
|
- if _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)>win_bid_price*10:
|
|
|
+ if _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)>win_bid_price*10 and win_bid_price>0:
|
|
|
is_legal_data = False
|
|
|
|
|
|
if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE),(float,int)) and _product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE)>100000000:
|
|
@@ -737,65 +637,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
save_product_tmp.update_row(self.ots_client)
|
|
|
|
|
|
def check_new_brand(self,brand):
|
|
|
-
|
|
|
- _search = re.search("品牌[::;;](?P<brand>.{2,8}?)([.。、;::]|规格|型号|生产厂家|厂家)",brand)
|
|
|
- if _search is not None:
|
|
|
- brand = _search.groupdict().get("brand")
|
|
|
- if brand is None or len(brand)<2:
|
|
|
- return False
|
|
|
- # check whether this brand exists in interface and action is delete
|
|
|
- bool_query = BoolQuery(must_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_NAME,brand),
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE,BRAND_GRADE),
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION,"delete")
|
|
|
- ])
|
|
|
-
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict_interface","document_product_dict_interface_index",
|
|
|
- SearchQuery(bool_query,get_total_count=True))
|
|
|
- if total_count>0:
|
|
|
- return False
|
|
|
-
|
|
|
- # check whether this brand exists in dict and grade=name_grade or grade=specs_grade
|
|
|
- bool_query = BoolQuery(must_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand),
|
|
|
- BoolQuery(should_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,NAME_GRADE),
|
|
|
- TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE)
|
|
|
- ])
|
|
|
-
|
|
|
- ])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_dict","document_product_dict_index",
|
|
|
- SearchQuery(bool_query,get_total_count=True))
|
|
|
- if total_count>0:
|
|
|
- return False
|
|
|
-
|
|
|
- # check the area+brand type
|
|
|
- _f = is_area_brand(brand,self.area_set)
|
|
|
- if _f==1:
|
|
|
- return True
|
|
|
- elif _f==2:
|
|
|
- return False
|
|
|
-
|
|
|
- # check the company type
|
|
|
- _d = {ENTERPRISE_NAME:brand}
|
|
|
- _ent = Enterprise(_d)
|
|
|
- if _ent.exists_row(self.ots_client):
|
|
|
- return True
|
|
|
-
|
|
|
- # check the group count and char
|
|
|
- bool_query = BoolQuery(must_queries=[
|
|
|
- TermQuery(DOCUMENT_PRODUCT_TMP_BRAND,brand)
|
|
|
- ])
|
|
|
- rows,next_token,total_count,is_all_succeed = self.ots_client.search("document_product_temp","document_product_temp_index",
|
|
|
- SearchQuery(bool_query,get_total_count=True))
|
|
|
-
|
|
|
- if total_count>=5:
|
|
|
- new_brand = re.sub("[^\u4e00-\u9fff]",'',brand)
|
|
|
- if re.search("详见|无|国产|null|其他|详细|废标|[0-9/]|品牌|文件",brand) is None and len(brand)<=8:
|
|
|
- return True
|
|
|
-
|
|
|
- # extract the brand
|
|
|
- # "品牌[::]?(<brand>.{2,5}([.。、::]|型号|生产厂家|厂家))"
|
|
|
+ return is_legal_brand(self.ots_client,brand)
|
|
|
|
|
|
|
|
|
|
|
@@ -1004,21 +846,24 @@ def fix_product_data():
|
|
|
# delete document_product and change the record status to 1 in document_product_temp which id=original id
|
|
|
:return:
|
|
|
'''
|
|
|
+ table_name = "document_product_temp"
|
|
|
+ table_index = "document_product_temp_index"
|
|
|
+ columns = [DOCUMENT_PRODUCT_TMP_WIN_BID_PRICE]
|
|
|
ots_client = getConnect_ots()
|
|
|
bool_query = BoolQuery(must_queries=[
|
|
|
- # RangeQuery("status",1)
|
|
|
- TermQuery("docid",246032980)
|
|
|
+ RangeQuery("status",501),
|
|
|
+ # TermQuery("docid",246032980)
|
|
|
])
|
|
|
|
|
|
- rows,next_token,total_count,is_all_succeed = ots_client.search("document_product","document_product_index",
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(table_name,table_index,
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
- columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID,DOCUMENT_PRODUCT_DOCID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ columns_to_get=ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
|
|
|
|
|
|
list_rows = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
- rows,next_token,total_count,is_all_succeed = ots_client.search('document_product','document_product_index',
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search(table_name,table_index,
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
- columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID,DOCUMENT_PRODUCT_DOCID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ columns_to_get=ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_rows.extend(getRow_ots(rows))
|
|
|
print("%d/%d"%(len(list_rows),total_count))
|
|
|
# if len(list_rows)>10000:
|
|
@@ -1071,9 +916,14 @@ def fix_product_data():
|
|
|
dp.delete_row(ots_client)
|
|
|
|
|
|
def handle(item,result_queue):
|
|
|
- print("handle")
|
|
|
+ win_bid_price = item.get(DOCUMENT_PRODUCT_TMP_WIN_BID_PRICE,1)
|
|
|
+ if win_bid_price==0:
|
|
|
+ dpt = Document_product_tmp(item)
|
|
|
+ dpt.setValue(DOCUMENT_PRODUCT_TMP_STATUS,1,True)
|
|
|
+ dpt.update_row(ots_client)
|
|
|
+
|
|
|
|
|
|
- mt = MultiThreadHandler(task_queue,deleteAndReprocess,None,30,1)
|
|
|
+ mt = MultiThreadHandler(task_queue,handle,None,30,1)
|
|
|
mt.run()
|
|
|
|
|
|
def test_check_brand():
|