document_product_dict.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. from BaseDataMaintenance.model.ots.BaseModel import BaseModel
  2. DOCUMENT_PRODUCT_DICT_ID = "id"
  3. DOCUMENT_PRODUCT_DICT_NAME = "name"
  4. DOCUMENT_PRODUCT_DICT_ALIAS = "alias"
  5. DOCUMENT_PRODUCT_DICT_GRADE = "grade"
  6. DOCUMENT_PRODUCT_DICT_STATUS = "status"
  7. DOCUMENT_PRODUCT_DICT_PARENT_ID = "parent_id"
  8. DOCUMENT_PRODUCT_DICT_CREATE_TIME = "create_time"
  9. DOCUMENT_PRODUCT_DICT_UPDATE_TIME = "update_time"
  10. DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED = "is_synchonized"
  11. DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS = "standard_alias"
  12. DOCUMENT_PRODUCT_DICT_REMOVE_WORDS = "remove_words"
  13. DOCUMENT_PRODUCT_DICT_LEVEL = "level"
  14. DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS_SEPARATOR = "|"
  15. MAX_NAME_LENGTH = 300
  16. Document_product_dict_table_name = "document_product_dict2"
  17. class Document_product_dict(BaseModel):
  18. def __init__(self,_dict):
  19. BaseModel.__init__(self)
  20. for k,v in _dict.items():
  21. self.setValue(k,v,True)
  22. self.table_name = Document_product_dict_table_name
  23. def getPrimary_keys(self):
  24. return ["id"]
  25. def updateAlias(self,name):
  26. name = str(name).lower()
  27. alias = self.getProperties().get(DOCUMENT_PRODUCT_DICT_ALIAS,"")
  28. l_alias = alias.split("&&")
  29. if name not in set(l_alias):
  30. alias+="&&%s"%name
  31. self.setValue(DOCUMENT_PRODUCT_DICT_ALIAS,alias,True)
  32. return True
  33. return False
  34. from BaseDataMaintenance.common.documentFingerprint import getMD5
  35. def get_document_product_dict_id(parent_md5,name):
  36. return getMD5(parent_md5+"&&%s"%name)
  37. def get_document_product_dict_standard_alias_id(name):
  38. return get_milvus_product_dict_id(name)
  39. def get_milvus_standard_name(name):
  40. return "%s"%(str(name)[:MAX_NAME_LENGTH].lower())
  41. def get_milvus_product_dict_id(name):
  42. return getMD5(get_milvus_standard_name(name))
  43. from BaseDataMaintenance.model.ots.document_product import *
  44. from BaseDataMaintenance.dataSource.source import getConnect_ots
  45. from tablestore import *
  46. from BaseDataMaintenance.common.Utils import *
  47. from BaseDataMaintenance.common.multiThread import MultiThreadHandler
  48. from queue import Queue
  49. def move_document_product_dict():
  50. bool_query = BoolQuery(must_queries=[
  51. ExistsQuery(DOCUMENT_PRODUCT_NAME)
  52. ])
  53. ots_client = getConnect_ots()
  54. Document_product_table_name = "document_product_dict"
  55. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  56. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("grade")]),limit=100,get_total_count=True),
  57. ColumnsToGet(return_type=ColumnReturnType.ALL))
  58. list_data = getRow_ots(rows)
  59. while next_token:
  60. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  61. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  62. ColumnsToGet(return_type=ColumnReturnType.ALL))
  63. list_data.extend(getRow_ots(rows))
  64. print("%d/%d"%(len(list_data),total_count))
  65. # if len(list_data)>=2000:
  66. # break
  67. task_queue = Queue()
  68. dict_id_dict = {}
  69. for _data in list_data:
  70. task_queue.put(_data)
  71. id = _data.get(DOCUMENT_PRODUCT_DICT_ID)
  72. dict_id_dict[id] = _data
  73. def _handle(item,result_queue):
  74. status = item.get(DOCUMENT_PRODUCT_DICT_STATUS)
  75. D1 = Document_product_dict(item)
  76. if status==1:
  77. D1.update_row(ots_client)
  78. D1.table_name = Document_product_table_name
  79. D1.delete_row(ots_client)
  80. mt = MultiThreadHandler(task_queue,_handle,None,30)
  81. mt.run()
  82. if __name__ == '__main__':
  83. # print(get_milvus_product_dict_id("-sl-10xls"))
  84. move_document_product_dict()