|
@@ -9,6 +9,8 @@ from BaseDataMaintenance.maintenance.product.productUtils import *
|
|
|
from BaseDataMaintenance.model.ots.document_product_tmp import *
|
|
|
from BaseDataMaintenance.model.ots.document_product import *
|
|
|
from BaseDataMaintenance.model.ots.document_product_dict import *
|
|
|
+from BaseDataMaintenance.model.ots.document import *
|
|
|
+from BaseDataMaintenance.model.ots.attachment import *
|
|
|
|
|
|
from tablestore import *
|
|
|
|
|
@@ -162,11 +164,11 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
name_ots_id = ots_id
|
|
|
new_name = ots_name
|
|
|
|
|
|
- #update alias of name
|
|
|
- _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
- _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- if _flag and _dpd.updateAlias(name):
|
|
|
- _dpd.update_row(self.ots_client)
|
|
|
+ # #update alias of name
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag and _dpd.updateAlias(name):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
break
|
|
|
if name_ots_id is not None:
|
|
|
if brand is not None and brand!="":
|
|
@@ -212,16 +214,17 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
}
|
|
|
_dpd_brand = Document_product_dict(_d_brand)
|
|
|
- _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
+ # _dpd_brand.updateAlias(str(new_brand).lower())
|
|
|
if not _dpd_brand.exists_row(self.ots_client):
|
|
|
_dpd_brand.update_row(self.ots_client)
|
|
|
else:
|
|
|
- #update alias
|
|
|
- _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
- _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- if _flag:
|
|
|
- if _dpd.updateAlias(brand):
|
|
|
- _dpd.update_row(self.ots_client)
|
|
|
+ pass
|
|
|
+ # #update alias
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag:
|
|
|
+ # if _dpd.updateAlias(brand):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
|
|
|
_find = True
|
|
|
break
|
|
@@ -235,7 +238,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
|
|
|
if specs is not None and specs!="":
|
|
|
specs_vector = request_embedding(specs)
|
|
|
- log("getting sepcs %s"%(specs))
|
|
|
+ debug("getting sepcs %s"%(specs))
|
|
|
if specs_vector is not None:
|
|
|
Coll,_ = self.get_collection(SPECS_GRADE)
|
|
|
search_list = search_embedding(Coll,embedding_index_name,[specs_vector],self.search_params,output_fields,limit=60)
|
|
@@ -268,16 +271,17 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
}
|
|
|
_dpd_specs = Document_product_dict(_d_specs)
|
|
|
- _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
+ # _dpd_specs.updateAlias(str(new_specs).lower())
|
|
|
if not _dpd_specs.exists_row(self.ots_client):
|
|
|
_dpd_specs.update_row(self.ots_client)
|
|
|
else:
|
|
|
- #update alias
|
|
|
- _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
|
|
|
- _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
- if _flag:
|
|
|
- if _dpd.updateAlias(specs):
|
|
|
- _dpd.update_row(self.ots_client)
|
|
|
+ pass
|
|
|
+ # #update alias
|
|
|
+ # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
|
|
|
+ # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
|
|
|
+ # if _flag:
|
|
|
+ # if _dpd.updateAlias(specs):
|
|
|
+ # _dpd.update_row(self.ots_client)
|
|
|
else:
|
|
|
# log("check_specs failed")
|
|
|
new_specs = clean_product_specs(specs)
|
|
@@ -288,7 +292,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
_md5 = get_document_product_dict_id(brand_ots_id,new_specs)
|
|
|
_d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
|
|
|
DOCUMENT_PRODUCT_DICT_NAME:new_specs,
|
|
|
- DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(specs.lower()),
|
|
|
+ DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(new_specs.lower()),
|
|
|
DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
|
|
|
DOCUMENT_PRODUCT_DICT_STATUS:1,
|
|
|
DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
|
|
@@ -296,7 +300,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
|
|
|
}
|
|
|
_dpd = Document_product_dict(_d)
|
|
|
- _dpd.updateAlias(new_specs)
|
|
|
+ # _dpd.updateAlias(new_specs)
|
|
|
_dpd.update_row(self.ots_client)
|
|
|
break
|
|
|
|
|
@@ -340,7 +344,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
_product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
|
|
|
_product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
|
|
|
if isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)):
|
|
|
- total_price = "%.2f"%(unit_price*quantity)
|
|
|
+ total_price = float("%.2f"%(unit_price*quantity))
|
|
|
_product.setValue(DOCUMENT_PRODUCT_TOTAL_PRICE,total_price,True)
|
|
|
|
|
|
new_id = self.get_product_id(docid,new_name,new_brand,new_specs,unit_price,quantity)
|
|
@@ -363,6 +367,15 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
_product.setValue(DOCUMENT_PRODUCT_BRANDSPECS,"%s&&%s"%(new_brand,new_specs),True)
|
|
|
_product.setValue(DOCUMENT_PRODUCT_FULL_NAME,"%s&&%s&&%s"%(new_name,new_brand,new_specs),True)
|
|
|
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_CREATE_TIME,getCurrent_date(format="%Y-%m-%d %H:%M:%S"),True)
|
|
|
+
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_NAME,document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,""),True)
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_BRAND,document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,""),True)
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_SPECS,document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,""),True)
|
|
|
+
|
|
|
+ bid_filemd5s = self.get_bid_filemd5s(docid,self.ots_client)
|
|
|
+ if bid_filemd5s is not None:
|
|
|
+ _product.setValue(DOCUMENT_PRODUCT_BID_FILEMD5S,bid_filemd5s,True)
|
|
|
|
|
|
if self.dumplicate(_product):
|
|
|
_status = randint(201,300)
|
|
@@ -379,6 +392,54 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
save_product_tmp.setValue(DOCUMENT_PRODUCT_TMP_STATUS,_status,True)
|
|
|
save_product_tmp.update_row(self.ots_client)
|
|
|
|
|
|
+ @staticmethod
|
|
|
+ def get_bid_filemd5s(docid,ots_client):
|
|
|
+
|
|
|
+ bool_query = BoolQuery(must_queries=[
|
|
|
+ TermQuery("docids",docid)
|
|
|
+ ])
|
|
|
+ rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
|
|
|
+ SearchQuery(bool_query,limit=10),
|
|
|
+ columns_to_get=ColumnsToGet(["docids"],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ list_data = getRow_ots(rows)
|
|
|
+
|
|
|
+ list_bid_filemd5s = []
|
|
|
+ set_docids = set([docid])
|
|
|
+ set_md5s = set()
|
|
|
+
|
|
|
+ for _d in list_data:
|
|
|
+ try:
|
|
|
+ docids = _d.get("docids","")
|
|
|
+ for _id in docids.split(","):
|
|
|
+ set_docids.add(int(_id))
|
|
|
+ except Exception as e:
|
|
|
+ pass
|
|
|
+ list_docids = list(set_docids)
|
|
|
+ for _docid in list_docids:
|
|
|
+ _d = {document_partitionkey:_docid%500+1,
|
|
|
+ document_docid:_docid}
|
|
|
+ _doc = Document(_d)
|
|
|
+ _doc.fix_columns(ots_client,[document_attachment_path],True)
|
|
|
+ page_attachments = _doc.getProperties().get(document_attachment_path)
|
|
|
+ if page_attachments is not None and page_attachments!="":
|
|
|
+ attachments = json.loads(page_attachments)
|
|
|
+ for _a in attachments:
|
|
|
+ _filemd5 = _a.get(document_attachment_path_filemd5)
|
|
|
+ if _filemd5 in set_md5s:
|
|
|
+ continue
|
|
|
+ set_md5s.add(_filemd5)
|
|
|
+ _da = {attachment_filemd5:_filemd5}
|
|
|
+ _attach = attachment(_da)
|
|
|
+ _attach.fix_columns(ots_client,[attachment_classification],True)
|
|
|
+ if _attach.getProperties().get(attachment_classification,"")=="招标文件":
|
|
|
+ list_bid_filemd5s.append(_filemd5)
|
|
|
+ if len(list_bid_filemd5s)==0:
|
|
|
+ return None
|
|
|
+ return ",".join(list(set(list_bid_filemd5s)))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
def get_value_count(self,name,brand,specs,unit_price,quantity):
|
|
|
|
|
|
value_count = 0
|
|
@@ -432,7 +493,7 @@ class Product_Manager(Product_Dict_Manager):
|
|
|
return list_data[0].get(DOCUMENT_PRODUCT_ID),1
|
|
|
|
|
|
if len(name)>0 and len(brand)>0 and len(supplier)>0 and len(tenderee)>0:
|
|
|
- log("docid %s name %s page_time_before %s page_time_after %s brand %s supplier %s tenderee %s"%(str(docid),name,page_time_before,page_time_after,brand,supplier,tenderee))
|
|
|
+ # log("docid %s name %s page_time_before %s page_time_after %s brand %s supplier %s tenderee %s"%(str(docid),name,page_time_before,page_time_after,brand,supplier,tenderee))
|
|
|
bool_query = BoolQuery(must_queries=[TermQuery("name",name),
|
|
|
RangeQuery("page_time",page_time_before,page_time_after,True,True),
|
|
|
TermQuery(DOCUMENT_PRODUCT_BRAND,brand),
|
|
@@ -531,22 +592,24 @@ def start_process_product():
|
|
|
pm.start_processing()
|
|
|
|
|
|
def fix_product_data():
|
|
|
+
|
|
|
'''
|
|
|
# delete document_product and change the record status to 1 in document_product_temp which id=original id
|
|
|
:return:
|
|
|
'''
|
|
|
ots_client = getConnect_ots()
|
|
|
- bool_query = BoolQuery(must_queries=[RangeQuery("status",1)])
|
|
|
+ bool_query = BoolQuery(must_queries=[TermQuery("docid",309258275)
|
|
|
+ ])
|
|
|
|
|
|
rows,next_token,total_count,is_all_succeed = ots_client.search("document_product","document_product_index",
|
|
|
SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
|
|
|
- columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID,DOCUMENT_PRODUCT_DOCID,DOCUMENT_PRODUCT_PROJECT_NAME],return_type=ColumnReturnType.SPECIFIED))
|
|
|
|
|
|
list_rows = getRow_ots(rows)
|
|
|
while next_token:
|
|
|
rows,next_token,total_count,is_all_succeed = ots_client.search('document_product','document_product_index',
|
|
|
SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
|
|
|
- columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID],return_type=ColumnReturnType.SPECIFIED))
|
|
|
+ columns_to_get=ColumnsToGet([DOCUMENT_PRODUCT_ORIGINAL_ID,DOCUMENT_PRODUCT_DOCID,DOCUMENT_PRODUCT_PROJECT_NAME],return_type=ColumnReturnType.SPECIFIED))
|
|
|
list_rows.extend(getRow_ots(rows))
|
|
|
|
|
|
task_queue = Queue()
|
|
@@ -555,13 +618,42 @@ def fix_product_data():
|
|
|
def handle(item,result_queue):
|
|
|
original_id = item.get(DOCUMENT_PRODUCT_ORIGINAL_ID)
|
|
|
|
|
|
+ # # delete data and rerun
|
|
|
+ # _d = {DOCUMENT_PRODUCT_TMP_ID:original_id,DOCUMENT_PRODUCT_TMP_STATUS:1}
|
|
|
+ # dpt = Document_product_tmp(_d)
|
|
|
+ # dpt.update_row(ots_client)
|
|
|
+ #
|
|
|
+ # _d = {DOCUMENT_PRODUCT_ID:item.get(DOCUMENT_PRODUCT_ID)}
|
|
|
+ # dp = Document_product(_d)
|
|
|
+ # dp.delete_row(ots_client)
|
|
|
+
|
|
|
+
|
|
|
_d = {DOCUMENT_PRODUCT_TMP_ID:original_id,DOCUMENT_PRODUCT_TMP_STATUS:1}
|
|
|
dpt = Document_product_tmp(_d)
|
|
|
- dpt.update_row(ots_client)
|
|
|
+ dpt.fix_columns(ots_client,["name","brand","specs"],True)
|
|
|
|
|
|
_d = {DOCUMENT_PRODUCT_ID:item.get(DOCUMENT_PRODUCT_ID)}
|
|
|
dp = Document_product(_d)
|
|
|
- dp.delete_row(ots_client)
|
|
|
+
|
|
|
+ #fix the project_code and original_name and bidi_filemd5s
|
|
|
+ docid = int(item.get(DOCUMENT_PRODUCT_DOCID))
|
|
|
+ partitionkey = docid%500+1
|
|
|
+ project_name = item.get(DOCUMENT_PRODUCT_PROJECT_NAME,"")
|
|
|
+ if project_name=="":
|
|
|
+ #fix project_name
|
|
|
+ _doc = Document({"partitionkey":partitionkey,
|
|
|
+ "docid":docid})
|
|
|
+ _doc.fix_columns(ots_client,["doctitle"],True)
|
|
|
+ dp.setValue(DOCUMENT_PRODUCT_DOCTITLE,_doc.getProperties().get("doctitle"),True)
|
|
|
+ bid_filemd5s = Product_Manager.get_bid_filemd5s(docid,ots_client)
|
|
|
+ if bid_filemd5s is not None:
|
|
|
+ dp.setValue(DOCUMENT_PRODUCT_BID_FILEMD5S,bid_filemd5s,True)
|
|
|
+
|
|
|
+ dp.setValue(DOCUMENT_PRODUCT_ORIGINAL_NAME,dpt.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,""),True)
|
|
|
+ dp.setValue(DOCUMENT_PRODUCT_ORIGINAL_BRAND,dpt.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,""),True)
|
|
|
+ dp.setValue(DOCUMENT_PRODUCT_ORIGINAL_SPECS,dpt.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,""),True)
|
|
|
+ dp.update_row(ots_client)
|
|
|
+
|
|
|
|
|
|
mt = MultiThreadHandler(task_queue,handle,None,30,1)
|
|
|
mt.run()
|
|
@@ -575,4 +667,10 @@ def test():
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
# start_process_product()
|
|
|
- print(getMD5('11936c56f2dd1426764e317ca2e8e1a7'+'&&鱼跃'))
|
|
|
+ # print(getMD5('11936c56f2dd1426764e317ca2e8e1a7'+'&&鱼跃'))
|
|
|
+ test()
|
|
|
+ print(Product_Manager.get_bid_filemd5s(174802483,getConnect_ots()))
|
|
|
+ name = "一"
|
|
|
+ ots_name = "一氧化碳分析仪"
|
|
|
+ print(is_similar(name,ots_name),check_product(name,ots_name))
|
|
|
+ print(is_legal_specs('SCM-A/SB(0.18D)'))
|