products.py 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499
  1. from BaseDataMaintenance.common.documentFingerprint import getMD5
  2. from BaseDataMaintenance.common.Utils import *
  3. from BaseDataMaintenance.common.milvusUtil import *
  4. from BaseDataMaintenance.common.multiThread import MultiThreadHandler
  5. from BaseDataMaintenance.maintenance.product.productUtils import *
  6. from BaseDataMaintenance.model.ots.document_product_tmp import *
  7. from BaseDataMaintenance.model.ots.document_product import *
  8. from BaseDataMaintenance.model.ots.document_product_dict import *
  9. from BaseDataMaintenance.model.ots.document_product_dict_interface import *
  10. from BaseDataMaintenance.model.ots.document import *
  11. from BaseDataMaintenance.model.ots.attachment import *
  12. from BaseDataMaintenance.model.ots.enterprise import *
  13. from BaseDataMaintenance.model.ots.project import *
  14. from tablestore import *
  15. from BaseDataMaintenance.dataSource.source import getConnect_ots
  16. from multiprocessing import Process,Queue
  17. from random import randint
  18. from BaseDataMaintenance.maintenance.product.product_dict import Product_Dict_Manager
  19. from apscheduler.schedulers.blocking import BlockingScheduler
  20. from BaseDataMaintenance.maintenance.product.make_brand_pattern import *
  21. from BaseDataMaintenance.maintenance.product.product_dict import *
  22. import logging
  23. root = logging.getLogger()
  24. root.setLevel(logging.INFO)
  25. from uuid import uuid4
  26. from multiprocessing import Queue as PQueue
  27. class Product_Manager(Product_Dict_Manager):
  28. def __init__(self):
  29. super(Product_Manager, self).__init__()
  30. self.process_queue = PQueue()
  31. self.ots_client = getConnect_ots()
  32. self.set_id = set()
  33. def get_product_id(self,docid,name,brand,specs,unit_price,quantity):
  34. if name is None:
  35. name = ""
  36. if brand is None:
  37. brand = ""
  38. if specs is None:
  39. specs = ""
  40. if quantity is None:
  41. quantity = ""
  42. if unit_price is None or unit_price=="":
  43. unit_price = ""
  44. else:
  45. unit_price = "%.2f"%float(unit_price)
  46. product_id = getMD5(str(docid)+str(name)+str(brand)+str(specs)+str(unit_price)+str(quantity))
  47. return product_id
  48. def producer(self,process_count=3000):
  49. q_size = self.process_queue.qsize()
  50. if q_size>process_count/6:
  51. return
  52. bool_query = BoolQuery(must_queries=[RangeQuery(DOCUMENT_PRODUCT_TMP_STATUS,1,51)])
  53. rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_tmp_table_name,Document_product_tmp_table_name+"_index",
  54. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
  55. columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
  56. list_data = getRow_ots(rows)
  57. _count = len(list_data)
  58. log("producer %d/%d"%(q_size,total_count))
  59. list_id = []
  60. for _d in list_data:
  61. _id = _d.get(DOCUMENT_PRODUCT_TMP_ID)
  62. if _id in self.set_id:
  63. continue
  64. list_id.append(_id)
  65. self.process_queue.put(_d)
  66. while next_token:
  67. rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_tmp_table_name,Document_product_tmp_table_name+"_index",
  68. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  69. columns_to_get=ColumnsToGet(return_type=ColumnReturnType.ALL))
  70. list_data = getRow_ots(rows)
  71. for _d in list_data:
  72. _id = _d.get(DOCUMENT_PRODUCT_TMP_ID)
  73. if _id in self.set_id:
  74. continue
  75. list_id.append(_id)
  76. self.process_queue.put(_d)
  77. _count += len(list_data)
  78. if _count>=process_count:
  79. break
  80. self.set_id = set(list_id)
  81. def comsumer(self):
  82. def start_thread(thread_count):
  83. mt = MultiThreadHandler(self.process_queue,self.comsumer_handle,None,thread_count,1,False,True)
  84. mt.run()
  85. process_count = 6
  86. thread_count = 6
  87. list_process = []
  88. for _i in range(process_count):
  89. p = Process(target=start_thread,args=(thread_count,))
  90. list_process.append(p)
  91. for p in list_process:
  92. p.start()
  93. for p in list_process:
  94. p.join()
  95. def comsumer_handle(self,item,result_queue):
  96. try:
  97. self.standardize(item)
  98. except Exception as e:
  99. traceback.print_exc()
  100. def standardize(self,tmp_dict,output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id","remove_words","level"]):
  101. '''
  102. Standardizes the product data
  103. 通过匹配标准参数表进行标准化,匹配是非精确匹配,校验规则是?
  104. :return:
  105. only save the standard product
  106. one temp data is regard as standard product onli if match the name,contition on this,
  107. if the brand is matched: if can be standard then change else add new brand ;if not matched replace as ""
  108. and the same as specs
  109. auto add the connection of name-brand and brand-specs because the 3 degree tree structure
  110. '''
  111. # todo:1. 产品参数表自动添加新的数据? 1. add new contections between existing names.2. add new specs
  112. # 型号在进行匹配时要求差异字符串不能包含数字和字母和罗马数字,且不能忽略出现次数差异
  113. save_product_tmp = Document_product_tmp({DOCUMENT_PRODUCT_TMP_ID:tmp_dict.get(DOCUMENT_PRODUCT_TMP_ID)})
  114. _status = 0
  115. document_product_tmp = Document_product_tmp(tmp_dict)
  116. tenderee = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_TENDEREE,"")
  117. name = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,"")
  118. brand = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,"")
  119. specs = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,"")
  120. parameters = document_product_tmp.getProperties().get(DOCUMENT_PRODUCT_TMP_PARAMETER,"")
  121. name = name.replace(tenderee,"")
  122. brand = brand.replace(tenderee,"")
  123. original_name = name
  124. original_brand = brand
  125. original_specs = specs
  126. list_candidates = [a for a in [name,brand,specs,parameters] if a!=""]
  127. list_candidate_brand_specs = [a for a in [brand,specs,parameters,name] if a!=""]
  128. if brand=="" and parameters!="":
  129. brand = parameters
  130. if specs=="" and parameters!="":
  131. specs = parameters
  132. new_name = ""
  133. new_brand = ""
  134. new_specs = ""
  135. name_ots_id = None
  136. brand_ots_id = None
  137. specs_ots_id = None
  138. if name is not None and name!="":
  139. Coll,_ = self.get_collection(NAME_GRADE)
  140. search_list = get_intellect_search(Coll,embedding_index_name,name,NAME_GRADE,self.search_params,output_fields,limit=10)
  141. for _search in search_list:
  142. ots_id = _search.get("standard_name_id")
  143. ots_name = _search.get("ots_name")
  144. standard_name = _search.get("standard_name")
  145. ots_parent_id = _search.get("ots_parent_id")
  146. remove_words = _search.get("remove_words")
  147. if check_product(name,ots_name,remove_words):
  148. name_ots_id = get_document_product_dict_id(ots_parent_id,standard_name)
  149. original_name = name
  150. new_name = standard_name
  151. log("checking name %s succeed %s %s"%(name,ots_name,str(remove_words)))
  152. # #update alias of name
  153. # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
  154. # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
  155. # if _flag and _dpd.updateAlias(name):
  156. # _dpd.update_row(self.ots_client)
  157. break
  158. if name_ots_id is None:
  159. for name in list_candidates:
  160. Coll,_ = self.get_collection(NAME_GRADE)
  161. search_list = get_intellect_search(Coll,embedding_index_name,name,NAME_GRADE,self.search_params,output_fields,limit=10)
  162. for _search in search_list:
  163. ots_id = _search.get("standard_name_id")
  164. ots_name = _search.get("ots_name")
  165. standard_name = _search.get("standard_name")
  166. ots_parent_id = _search.get("ots_parent_id")
  167. remove_words = _search.get("remove_words")
  168. if check_product(name,ots_name,remove_words):
  169. log("checking name %s succeed %s %s"%(name,ots_name,str(remove_words)))
  170. name_ots_id = get_document_product_dict_id(ots_parent_id,standard_name)
  171. original_name = name
  172. new_name = standard_name
  173. # #update alias of name
  174. # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:name_ots_id})
  175. # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
  176. # if _flag and _dpd.updateAlias(name):
  177. # _dpd.update_row(self.ots_client)
  178. break
  179. if name_ots_id is not None:
  180. if brand is not None and brand!="":
  181. s_brand = brand
  182. l_brand = [brand]
  183. Coll,_ = self.get_collection(BRAND_GRADE)
  184. _find = False
  185. for brand in l_brand:
  186. if len(brand)>100:
  187. continue
  188. search_list = get_intellect_search(Coll,embedding_index_name,brand,BRAND_GRADE,self.search_params,output_fields,limit=10)
  189. # log("search brand %s"%(brand))
  190. for _search in search_list:
  191. ots_id = _search.get("standard_name_id")
  192. ots_name = _search.get("ots_name")
  193. standard_name = _search.get("standard_name")
  194. ots_parent_id = _search.get("ots_parent_id")
  195. remove_words = _search.get("remove_words")
  196. # log("check brand %s and %s"%(brand,ots_name))
  197. if check_brand(brand,ots_name,remove_words):
  198. # log("check brand similar succeed:%s and %s"%(brand,ots_name))
  199. if ots_name==new_name:
  200. continue
  201. original_brand = brand
  202. if original_brand==original_name:
  203. if len(new_name)+len(ots_name)>len(original_name):
  204. continue
  205. if original_brand.find(ots_name)>=1:
  206. continue
  207. if len(original_brand)<=3:
  208. continue
  209. new_brand = standard_name
  210. log("checking brand %s succeed %s"%(brand,new_brand))
  211. # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
  212. if name_ots_id is not None:
  213. brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
  214. _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
  215. DOCUMENT_PRODUCT_DICT_NAME:new_brand,
  216. DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(new_brand).lower()),
  217. DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
  218. DOCUMENT_PRODUCT_DICT_STATUS:1,
  219. DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
  220. DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
  221. DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  222. DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  223. }
  224. _dpd_brand = Document_product_dict(_d_brand)
  225. # _dpd_brand.updateAlias(str(new_brand).lower())
  226. if not _dpd_brand.exists_row(self.ots_client):
  227. _dpd_brand.update_row(self.ots_client)
  228. else:
  229. pass
  230. # #update alias
  231. # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
  232. # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
  233. # if _flag:
  234. # if _dpd.updateAlias(brand):
  235. # _dpd.update_row(self.ots_client)
  236. _find = True
  237. break
  238. else:
  239. # log("check brand similar failed:%s and %s"%(brand,ots_name))
  240. # add new brand?
  241. pass
  242. if _find:
  243. break
  244. if not _find:
  245. for brand in l_brand:
  246. if len(brand)>100:
  247. continue
  248. c_brand = clean_product_brand(brand)
  249. if self.check_new_brand(c_brand):
  250. if c_brand=="":
  251. continue
  252. original_brand = brand
  253. if original_brand==original_name:
  254. if len(new_name)+len(c_brand)>len(original_name):
  255. continue
  256. if new_name==original_brand:
  257. continue
  258. if original_brand.find(c_brand)>=1:
  259. continue
  260. if len(original_brand)<=3:
  261. continue
  262. new_brand = c_brand
  263. log("adding new brand %s"%(str(new_brand)))
  264. _d_brand = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
  265. DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_brand,
  266. DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(str(new_brand).lower()),
  267. DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:BRAND_GRADE,
  268. DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
  269. DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:name_ots_id,
  270. DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  271. DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  272. DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert",
  273. DOCUMENT_PRODUCT_ORIGINAL_BRAND:brand
  274. }
  275. dpdi = Document_product_dict_interface(_d_brand)
  276. dpdi.update_row(self.ots_client)
  277. break
  278. if brand_ots_id is None:
  279. _find = False
  280. Coll,_ = self.get_collection(BRAND_GRADE)
  281. for brand in list_candidates:
  282. if _find:
  283. break
  284. l_brand = [brand]
  285. for brand in l_brand:
  286. if len(brand)>100:
  287. continue
  288. if _find:
  289. break
  290. search_list = get_intellect_search(Coll,embedding_index_name,brand,BRAND_GRADE,self.search_params,output_fields,limit=10)
  291. # log("search brand %s"%(brand))
  292. for _search in search_list:
  293. ots_id = _search.get("standard_name_id")
  294. ots_name = _search.get("ots_name")
  295. standard_name = _search.get("standard_name")
  296. ots_parent_id = _search.get("ots_parent_id")
  297. remove_words = _search.get("remove_words")
  298. # log("check brand %s and %s"%(brand,ots_name))
  299. if check_brand(brand,ots_name,remove_words):
  300. # log("check brand similar succeed:%s and %s"%(brand,ots_name))
  301. if ots_name==new_name:
  302. continue
  303. original_brand = brand
  304. if original_brand==original_name:
  305. if len(new_name)+len(ots_name)>len(original_name):
  306. continue
  307. if original_brand.find(ots_name)>=1:
  308. continue
  309. if len(original_brand)<=3:
  310. continue
  311. new_brand = standard_name
  312. log("checking brand %s succeed %s"%(brand,new_brand))
  313. # judge if the brand which parent_id is name_ots_id exists,if not insert one else update alias
  314. if name_ots_id is not None:
  315. brand_ots_id = get_document_product_dict_id(name_ots_id,new_brand)
  316. _d_brand = {DOCUMENT_PRODUCT_DICT_ID:brand_ots_id,
  317. DOCUMENT_PRODUCT_DICT_NAME:new_brand,
  318. DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(new_brand).lower()),
  319. DOCUMENT_PRODUCT_DICT_GRADE:BRAND_GRADE,
  320. DOCUMENT_PRODUCT_DICT_STATUS:1,
  321. DOCUMENT_PRODUCT_DICT_PARENT_ID:name_ots_id,
  322. DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
  323. DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  324. DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  325. }
  326. _dpd_brand = Document_product_dict(_d_brand)
  327. # _dpd_brand.updateAlias(str(new_brand).lower())
  328. if not _dpd_brand.exists_row(self.ots_client):
  329. _dpd_brand.update_row(self.ots_client)
  330. else:
  331. pass
  332. # #update alias
  333. # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:brand_ots_id})
  334. # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
  335. # if _flag:
  336. # if _dpd.updateAlias(brand):
  337. # _dpd.update_row(self.ots_client)
  338. _find = True
  339. break
  340. if specs is not None and specs!="":
  341. debug("getting sepcs %s"%(specs))
  342. list_specs = []
  343. c_specs = clean_product_specs(specs)
  344. list_specs.append(c_specs)
  345. for s in re.split("[\u4e00-\u9fff]",specs):
  346. if s!="" and len(s)>4:
  347. list_specs.append(s)
  348. _index = 0
  349. break_flag = False
  350. list_similar_specs = []
  351. for c_specs in list_specs:
  352. if break_flag:
  353. break
  354. _index += 1
  355. specs_vector = get_embedding_request(c_specs)
  356. if specs_vector is not None:
  357. Coll,_ = self.get_collection(SPECS_GRADE)
  358. search_list = get_embedding_search(Coll,embedding_index_name,c_specs,SPECS_GRADE,[specs_vector],self.search_params,output_fields,limit=20)
  359. for _search in search_list:
  360. ots_id = _search.get("standard_name_id")
  361. ots_name = _search.get("ots_name")
  362. standard_name = _search.get("standard_name")
  363. ots_parent_id = _search.get("ots_parent_id")
  364. debug("checking specs %s and %s"%(specs,ots_name))
  365. if is_similar(specs,ots_name):
  366. # log("specs is_similar")
  367. if check_specs(c_specs,ots_name):
  368. break_flag = True
  369. original_specs = c_specs
  370. if standard_name==new_name or standard_name==new_brand:
  371. continue
  372. new_specs = standard_name
  373. log("check_specs %s succeed %s"%(specs,new_specs))
  374. # to update the document_product_dict which is builded for search
  375. if brand_ots_id is not None:
  376. # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
  377. specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
  378. _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
  379. DOCUMENT_PRODUCT_DICT_NAME:new_specs,
  380. DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(new_specs).lower()),
  381. DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
  382. DOCUMENT_PRODUCT_DICT_STATUS:1,
  383. DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
  384. DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
  385. DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  386. DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  387. }
  388. _dpd_specs = Document_product_dict(_d_specs)
  389. # _dpd_specs.updateAlias(str(new_specs).lower())
  390. if not _dpd_specs.exists_row(self.ots_client):
  391. _dpd_specs.update_row(self.ots_client)
  392. # user interface to add
  393. else:
  394. pass
  395. # #update alias
  396. # _dpd = Document_product_dict({DOCUMENT_PRODUCT_DICT_ID:specs_ots_id})
  397. # _flag = _dpd.fix_columns(self.ots_client,[DOCUMENT_PRODUCT_DICT_ALIAS],True)
  398. # if _flag:
  399. # if _dpd.updateAlias(specs):
  400. # _dpd.update_row(self.ots_client)
  401. break_flag = True
  402. break
  403. else:
  404. list_similar_specs.append(specs)
  405. # add new specs?
  406. if new_specs is not None and new_specs!="":
  407. pass
  408. else:
  409. debug("specs not similar")
  410. for specs in list_similar_specs:
  411. if is_legal_specs(specs) and len(specs)<MAX_NAME_LENGTH and len(specs)>=5:
  412. debug("is_legal_specs")
  413. original_specs = specs
  414. new_specs = clean_product_specs(specs)
  415. if new_specs==new_name or new_specs==new_brand:
  416. new_specs = ""
  417. continue
  418. # insert into document_product_dict a new record
  419. # to update the document_product_dict which is builded for search
  420. # add new specs
  421. if brand_ots_id is not None and name_ots_id is not None:
  422. specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
  423. # _d = {DOCUMENT_PRODUCT_DICT_ID:_md5,
  424. # DOCUMENT_PRODUCT_DICT_NAME:new_specs,
  425. # DOCUMENT_PRODUCT_DICT_ALIAS:"%s&&%s"%(specs,new_specs),
  426. # DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
  427. # DOCUMENT_PRODUCT_DICT_STATUS:1,
  428. # DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
  429. # DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  430. # DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  431. # }
  432. # _dpd = Document_product_dict(_d)
  433. # _dpd.update_row(self.ots_client)
  434. log("adding new specs %s"%(new_specs))
  435. # user interface to add
  436. _d = {DOCUMENT_PRODUCT_DICT_INTERFACE_ID:uuid4().hex,
  437. DOCUMENT_PRODUCT_DICT_INTERFACE_NAME:new_specs,
  438. DOCUMENT_PRODUCT_DICT_INTERFACE_ALIAS:"%s"%(new_specs.lower()),
  439. DOCUMENT_PRODUCT_DICT_INTERFACE_GRADE:SPECS_GRADE,
  440. DOCUMENT_PRODUCT_DICT_INTERFACE_STATUS:1,
  441. DOCUMENT_PRODUCT_DICT_INTERFACE_PARENT_ID:brand_ots_id,
  442. DOCUMENT_PRODUCT_DICT_INTERFACE_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  443. DOCUMENT_PRODUCT_DICT_INTERFACE_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  444. DOCUMENT_PRODUCT_DICT_INTERFACE_ACTION:"insert"
  445. }
  446. _dpdi = Document_product_dict_interface(_d)
  447. _dpdi.update_row(self.ots_client)
  448. break
  449. if specs_ots_id is None:
  450. _find = False
  451. for specs in list_candidate_brand_specs:
  452. if _find:
  453. break
  454. debug("getting sepcs %s"%(specs))
  455. list_specs = []
  456. c_specs = clean_product_specs(specs)
  457. list_specs.append(c_specs)
  458. for s in re.split("[\u4e00-\u9fff]",specs):
  459. if s!="" and len(s)>4:
  460. list_specs.append(s)
  461. similar_flag = None
  462. _index = 0
  463. for c_specs in list_specs:
  464. if _find:
  465. break
  466. _index += 1
  467. specs_vector = get_embedding_request(c_specs)
  468. if specs_vector is not None:
  469. Coll,_ = self.get_collection(SPECS_GRADE)
  470. search_list = get_embedding_search(Coll,embedding_index_name,c_specs,SPECS_GRADE,[specs_vector],self.search_params,output_fields,limit=10)
  471. for _search in search_list:
  472. if _find:
  473. break
  474. ots_id = _search.get("standard_name_id")
  475. ots_name = _search.get("ots_name")
  476. standard_name = _search.get("standard_name")
  477. ots_parent_id = _search.get("ots_parent_id")
  478. debug("checking specs %s and %s"%(specs,ots_name))
  479. if is_similar(c_specs,ots_name):
  480. # log("specs is_similar")
  481. if check_specs(c_specs,ots_name):
  482. break_flag = True
  483. original_specs = c_specs
  484. new_specs = standard_name
  485. if new_specs==new_name or new_specs==new_brand:
  486. new_specs = ""
  487. continue
  488. if brand_ots_id is not None:
  489. # judge if the specs which parent_id is brand_ots_id exists,insert one if not exists else update alias
  490. specs_ots_id = get_document_product_dict_id(brand_ots_id,new_specs)
  491. _d_specs = {DOCUMENT_PRODUCT_DICT_ID:specs_ots_id,
  492. DOCUMENT_PRODUCT_DICT_NAME:new_specs,
  493. DOCUMENT_PRODUCT_DICT_ALIAS:"%s"%(str(new_specs).lower()),
  494. DOCUMENT_PRODUCT_DICT_GRADE:SPECS_GRADE,
  495. DOCUMENT_PRODUCT_DICT_STATUS:1,
  496. DOCUMENT_PRODUCT_DICT_PARENT_ID:brand_ots_id,
  497. DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED:IS_SYNCHONIZED,
  498. DOCUMENT_PRODUCT_DICT_CREATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  499. DOCUMENT_PRODUCT_DICT_UPDATE_TIME:getCurrent_date(format="%Y-%m-%d %H:%M:%S"),
  500. }
  501. _dpd_specs = Document_product_dict(_d_specs)
  502. # _dpd_specs.updateAlias(str(new_specs).lower())
  503. if not _dpd_specs.exists_row(self.ots_client):
  504. _dpd_specs.update_row(self.ots_client)
  505. _find = True
  506. break
  507. # judge if the product matches the standard product
  508. if name_ots_id is not None:
  509. is_legal_data = True
  510. #standard the product and same to document_product table
  511. _product = Document_product(tmp_dict)
  512. docid = _product.getProperties().get(DOCUMENT_PRODUCT_DOCID)
  513. unit_price = _product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE)
  514. quantity = _product.getProperties().get(DOCUMENT_PRODUCT_QUANTITY)
  515. unit_price = clean_product_unit_price(unit_price)
  516. quantity = clean_product_quantity(quantity)
  517. total_price = _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)
  518. _product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
  519. _product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
  520. win_bid_price = _product.getProperties().get(DOCUMENT_PRODUCT_WIN_BID_PRICE)
  521. if isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)) and isinstance(total_price,(float,int)):
  522. if unit_price>0:
  523. new_quantity = total_price/unit_price
  524. if new_quantity!=quantity:
  525. # if new_quantity==total_price//unit_price:
  526. # quantity = int(new_quantity)
  527. # _product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
  528. # else:
  529. # is_legal_data = False
  530. is_legal_data = False
  531. elif quantity>0:
  532. unit_price = total_price/quantity
  533. _product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
  534. elif isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)):
  535. total_price = float("%.2f"%(unit_price*quantity))
  536. _product.setValue(DOCUMENT_PRODUCT_TOTAL_PRICE,total_price,True)
  537. elif isinstance(unit_price,(float,int)) and isinstance(total_price,(float,int)):
  538. if unit_price>0:
  539. quantity = int(total_price//unit_price)
  540. _product.setValue(DOCUMENT_PRODUCT_QUANTITY,quantity,True)
  541. elif isinstance(quantity,(float,int)) and isinstance(total_price,(float,int)):
  542. if quantity>0:
  543. unit_price = float("%.2f"%(total_price/quantity))
  544. _product.setValue(DOCUMENT_PRODUCT_UNIT_PRICE,unit_price,True)
  545. elif isinstance(quantity,(float,int)) and quantity>10000:
  546. is_legal_data = False
  547. if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE),(float,int)) and isinstance(win_bid_price,(float,int)):
  548. if _product.getProperties().get(DOCUMENT_PRODUCT_TOTAL_PRICE)>win_bid_price*10 and win_bid_price>0:
  549. is_legal_data = False
  550. if isinstance(_product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE),(float,int)) and _product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE)>100000000:
  551. is_legal_data = False
  552. new_id = self.get_product_id(docid,new_name,new_brand,new_specs,unit_price,quantity)
  553. _product.setValue(DOCUMENT_PRODUCT_ID,new_id,True)
  554. _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_ID,tmp_dict.get(DOCUMENT_PRODUCT_TMP_ID),True)
  555. if name_ots_id is not None:
  556. _product.setValue(DOCUMENT_PRODUCT_DICT_NAME_ID,name_ots_id,True)
  557. if brand_ots_id is not None:
  558. _product.setValue(DOCUMENT_PRODUCT_DICT_BRAND_ID,brand_ots_id,True)
  559. if specs_ots_id is not None:
  560. _product.setValue(DOCUMENT_PRODUCT_DICT_SPECS_ID,specs_ots_id,True)
  561. _product.setValue(DOCUMENT_PRODUCT_NAME,new_name,True)
  562. _product.setValue(DOCUMENT_PRODUCT_BRAND,new_brand,True)
  563. _product.setValue(DOCUMENT_PRODUCT_SPECS,new_specs,True)
  564. _product.setValue(DOCUMENT_PRODUCT_STATUS,randint(201,300),True)
  565. _product.setValue(DOCUMENT_PRODUCT_BRANDSPECS,"%s&&%s"%(new_brand,new_specs),True)
  566. _product.setValue(DOCUMENT_PRODUCT_FULL_NAME,"%s&&%s&&%s"%(new_name,new_brand,new_specs),True)
  567. _product.setValue(DOCUMENT_PRODUCT_CREATE_TIME,getCurrent_date(format="%Y-%m-%d %H:%M:%S"),True)
  568. _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_NAME,original_name,True)
  569. _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_BRAND,original_brand,True)
  570. _product.setValue(DOCUMENT_PRODUCT_ORIGINAL_SPECS,original_specs,True)
  571. bid_filemd5s = self.get_bid_filemd5s(docid,self.ots_client)
  572. if bid_filemd5s is not None:
  573. _product.setValue(DOCUMENT_PRODUCT_BID_FILEMD5S,bid_filemd5s,True)
  574. if not is_legal_data:
  575. _status = randint(501,550)
  576. else:
  577. _flag,dump_id = self.dumplicate(_product)
  578. if _flag:
  579. _status = randint(201,300)
  580. save_product_tmp.setValue(DOCUMENT_PRODUCT_TMP_NEW_ID,new_id,True)
  581. _product.update_row(self.ots_client)
  582. else:
  583. _status = randint(451,500)
  584. save_product_tmp.setValue(DOCUMENT_PRODUCT_DUMP_ID,str(dump_id),True)
  585. else:
  586. _status = randint(401,450)
  587. save_product_tmp.setValue(DOCUMENT_PRODUCT_TMP_STATUS,_status,True)
  588. save_product_tmp.update_row(self.ots_client)
  589. def check_new_brand(self,brand):
  590. return is_legal_brand(self.ots_client,brand)
  591. @staticmethod
  592. def get_bid_filemd5s(docid,ots_client):
  593. bool_query = BoolQuery(must_queries=[
  594. TermQuery("docids",docid)
  595. ])
  596. rows,next_token,total_count,is_all_succeed = ots_client.search("project2","project2_index",
  597. SearchQuery(bool_query,limit=10),
  598. columns_to_get=ColumnsToGet(["docids"],return_type=ColumnReturnType.SPECIFIED))
  599. list_data = getRow_ots(rows)
  600. list_bid_filemd5s = []
  601. set_docids = set([docid])
  602. set_md5s = set()
  603. for _d in list_data:
  604. try:
  605. docids = _d.get("docids","")
  606. for _id in docids.split(","):
  607. set_docids.add(int(_id))
  608. except Exception as e:
  609. pass
  610. list_docids = list(set_docids)
  611. for _docid in list_docids:
  612. _d = {document_partitionkey:_docid%500+1,
  613. document_docid:_docid}
  614. _doc = Document(_d)
  615. _doc.fix_columns(ots_client,[document_attachment_path],True)
  616. page_attachments = _doc.getProperties().get(document_attachment_path)
  617. if page_attachments is not None and page_attachments!="":
  618. attachments = json.loads(page_attachments)
  619. for _a in attachments:
  620. _filemd5 = _a.get(document_attachment_path_filemd5)
  621. if _filemd5 in set_md5s or _filemd5 is None:
  622. continue
  623. set_md5s.add(_filemd5)
  624. _da = {attachment_filemd5:_filemd5}
  625. _attach = attachment(_da)
  626. _attach.fix_columns(ots_client,[attachment_classification],True)
  627. if _attach.getProperties().get(attachment_classification,"")=="招标文件":
  628. list_bid_filemd5s.append(_filemd5)
  629. if len(list_bid_filemd5s)==0:
  630. return None
  631. return ",".join(list(set(list_bid_filemd5s)))
  632. def get_value_count(self,name,brand,specs,unit_price,quantity):
  633. value_count = 0
  634. if name is not None and len(name)>0:
  635. value_count += 1
  636. if brand is not None and len(brand)>0:
  637. value_count += 1
  638. if specs is not None and len(specs)>0:
  639. value_count += 1
  640. if isinstance(unit_price,(float,int)) and unit_price>0:
  641. value_count += 1
  642. if isinstance(quantity,(float,int)) and quantity>0:
  643. value_count += 1
  644. return value_count
  645. def dumplicate_search_product(self,document_product):
  646. docid = document_product.getProperties().get(DOCUMENT_PRODUCT_DOCID)
  647. name = str(document_product.getProperties().get(DOCUMENT_PRODUCT_NAME,""))
  648. brand = str(document_product.getProperties().get(DOCUMENT_PRODUCT_BRAND,""))
  649. specs = str(document_product.getProperties().get(DOCUMENT_PRODUCT_SPECS,""))
  650. unit_price = document_product.getProperties().get(DOCUMENT_PRODUCT_UNIT_PRICE,"")
  651. quantity = document_product.getProperties().get(DOCUMENT_PRODUCT_QUANTITY,"")
  652. page_time = document_product.getProperties().get(DOCUMENT_PRODUCT_PAGE_TIME)
  653. tenderee = str(document_product.getProperties().get(DOCUMENT_PRODUCT_TENDEREE,""))
  654. supplier = str(document_product.getProperties().get(DOCUMENT_PRODUCT_SUPPLIER,""))
  655. base_value_count = self.get_value_count(name,brand,specs,unit_price,quantity)
  656. list_dump_id = []
  657. page_time_before = page_time
  658. page_time_after = page_time
  659. try:
  660. page_time_before = timeAdd(page_time,-30,format="%Y-%m-%d",)
  661. page_time_after = timeAdd(page_time,30)
  662. except Exception as e:
  663. pass
  664. to_save = 1
  665. if len(name)>0 and len(brand)>0 and len(specs)>0 and isinstance(unit_price,(float,int)) and isinstance(quantity,(float,int)):
  666. bool_query = BoolQuery(must_queries=[TermQuery("name",name),
  667. RangeQuery("page_time",page_time_before,page_time_after,True,True),
  668. TermQuery("brand",brand),
  669. TermQuery("specs",specs),
  670. TermQuery("unit_price",unit_price),
  671. TermQuery("quantity",quantity)
  672. ])
  673. rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  674. SearchQuery(bool_query,limit=1),
  675. columns_to_get=ColumnsToGet(["name",'brand','specs'],return_type=ColumnReturnType.SPECIFIED))
  676. list_data = getRow_ots(rows)
  677. if len(list_data)>0:
  678. return list_data[0].get(DOCUMENT_PRODUCT_ID),0
  679. bool_query = BoolQuery(must_queries=[
  680. TermQuery(project_docids,str(docid)),
  681. ])
  682. rows,next_token,total_count,is_all_succeed = self.ots_client.search("project2","project2_index",
  683. SearchQuery(bool_query,limit=10),
  684. ColumnsToGet([project_docids],return_type=ColumnReturnType.SPECIFIED))
  685. list_data = getRow_ots(rows)
  686. set_docid = set()
  687. for _data in list_data:
  688. _docids = _data.get(project_docids,"")
  689. for d_id in _docids.split(","):
  690. d_id = d_id.strip()
  691. if d_id!="":
  692. set_docid.add(int(d_id))
  693. if docid in set_docid:
  694. set_docid.remove(docid)
  695. should_q = [TermQuery(DOCUMENT_PRODUCT_DOCID,did) for did in set_docid]
  696. if len(should_q)>0:
  697. bool_query = BoolQuery(must_queries=[TermQuery("name",name),
  698. BoolQuery(should_queries=should_q),
  699. ])
  700. rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  701. SearchQuery(bool_query,limit=50),
  702. columns_to_get=ColumnsToGet(["docid",'name','brand','specs','unit_price','quantity'],return_type=ColumnReturnType.SPECIFIED))
  703. list_data = getRow_ots(rows)
  704. dict_docid_name = {}
  705. match_ids = []
  706. for _data in list_data:
  707. docid1 = _data.get(DOCUMENT_PRODUCT_DOCID)
  708. name1 = _data.get(DOCUMENT_PRODUCT_NAME)
  709. brand1 = _data.get(DOCUMENT_PRODUCT_BRAND)
  710. specs1 = _data.get(DOCUMENT_PRODUCT_SPECS)
  711. unit_price1 = _data.get(DOCUMENT_PRODUCT_UNIT_PRICE)
  712. quantity1 = _data.get(DOCUMENT_PRODUCT_QUANTITY)
  713. id = _data.get(DOCUMENT_PRODUCT_ID)
  714. value_count1 = self.get_value_count(name1,brand1,specs1,unit_price1,quantity1)
  715. if name1==name:
  716. match_ids.append({DOCUMENT_PRODUCT_ID:id,"value_count":value_count1})
  717. if docid1 not in dict_docid_name:
  718. dict_docid_name[docid1] = []
  719. dict_docid_name[docid1].append(name)
  720. is_all_one = True
  721. for k,v in dict_docid_name.items():
  722. if len(v)!=1:
  723. is_all_one = False
  724. if is_all_one:
  725. match_ids.sort(key=lambda x:x.get("value_count",0),reverse=True)
  726. if len(match_ids)>0:
  727. _id = match_ids[0].get(DOCUMENT_PRODUCT_ID)
  728. value_count1 = match_ids[0]["value_count"]
  729. if base_value_count<value_count1:
  730. to_save = 0
  731. for _match in match_ids:
  732. list_dump_id.append(_match.get(DOCUMENT_PRODUCT_ID))
  733. if len(name)>0 and len(brand)>0 and len(supplier)>0 and len(tenderee)>0:
  734. # log("docid %s name %s page_time_before %s page_time_after %s brand %s supplier %s tenderee %s"%(str(docid),name,page_time_before,page_time_after,brand,supplier,tenderee))
  735. bool_query = BoolQuery(must_queries=[TermQuery("name",name),
  736. RangeQuery("page_time",page_time_before,page_time_after,True,True),
  737. TermQuery(DOCUMENT_PRODUCT_BRAND,brand),
  738. TermQuery(DOCUMENT_PRODUCT_TENDEREE,tenderee),
  739. TermQuery(DOCUMENT_PRODUCT_SUPPLIER,supplier),
  740. ])
  741. rows,next_token,total_count,is_all_succeed = self.ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  742. SearchQuery(bool_query,limit=50),
  743. columns_to_get=ColumnsToGet(['name','brand','specs','unit_price','quantity'],return_type=ColumnReturnType.SPECIFIED))
  744. list_data = getRow_ots(rows)
  745. for _d in list_data:
  746. s_id = _d.get(DOCUMENT_PRODUCT_ID)
  747. s_name = _d.get(DOCUMENT_PRODUCT_NAME,"")
  748. s_brand = _d.get(DOCUMENT_PRODUCT_BRAND,"")
  749. s_specs = _d.get(DOCUMENT_PRODUCT_SPECS,"")
  750. s_unit_price = _d.get(DOCUMENT_PRODUCT_UNIT_PRICE,"")
  751. s_quantity = _d.get(DOCUMENT_PRODUCT_QUANTITY,"")
  752. check_flag = True
  753. value_count1 = self.get_value_count(s_name,s_brand,s_specs,s_unit_price,s_quantity)
  754. if len(specs)>0 and len(s_specs)>0 and specs!=s_specs:
  755. check_flag = False
  756. elif isinstance(unit_price,(float,int)) and isinstance(s_unit_price,(float,int)) and unit_price!=s_unit_price:
  757. check_flag = False
  758. elif isinstance(quantity,(float,int)) and isinstance(s_quantity,(float,int)) and quantity!=s_quantity:
  759. check_flag = False
  760. if check_flag:
  761. if base_value_count<value_count1:
  762. to_save = 0
  763. list_dump_id.append(s_id)
  764. return list_dump_id,to_save
  765. def dumplicate(self,document_product):
  766. '''
  767. Duplicates the product data
  768. 将同一个产品的采购结果公示进行去重,结合公告进行。
  769. :return:True if not repeated else False
  770. '''
  771. dump_id,to_save = self.dumplicate_search_product(document_product)
  772. if dump_id is not None:
  773. document_product.setValue(DOCUMENT_PRODUCT_DUMP_ID,str(dump_id),True)
  774. if to_save==1:
  775. if dump_id is not None:
  776. if isinstance(dump_id,str):
  777. _d = {DOCUMENT_PRODUCT_ID:dump_id,
  778. DOCUMENT_PRODUCT_STATUS:randint(401,450),
  779. DOCUMENT_PRODUCT_DUMP_ID:document_product.getProperties().get(DOCUMENT_PRODUCT_ID)}
  780. _dp = Document_product(_d)
  781. _dp.update_row(self.ots_client)
  782. elif isinstance(dump_id,list):
  783. for d_id in dump_id:
  784. _d = {DOCUMENT_PRODUCT_ID:d_id,
  785. DOCUMENT_PRODUCT_STATUS:randint(401,450),
  786. DOCUMENT_PRODUCT_DUMP_ID:document_product.getProperties().get(DOCUMENT_PRODUCT_ID)}
  787. _dp = Document_product(_d)
  788. _dp.update_row(self.ots_client)
  789. return True,dump_id
  790. else:
  791. return False,dump_id
  792. def start_processing(self):
  793. scheduler = BlockingScheduler()
  794. scheduler.add_job(self.producer,"cron",second="*/20")
  795. scheduler.add_job(self.comsumer,"cron",minute="*/1")
  796. scheduler.add_job(self.embedding_comsumer,"cron",minute="*/1")
  797. scheduler.add_job(self.embedding_interface_comsumer,"cron",second="*/20")
  798. scheduler.start()
  799. def test(self):
  800. from BaseDataMaintenance.common.sentencesUtil import cosine_similarity
  801. import torch
  802. output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id"]
  803. id = '56bdad168c71a1fc4d57cd10bcd987f0'
  804. collection,_ = self.get_collection(SPECS_GRADE)
  805. vector = request_embedding("西门子MAGNETOMLumina")
  806. vector1 = request_embedding("西门子")
  807. print("cosine similarity",cosine_similarity(torch.from_numpy(np.array([vector])) ,torch.from_numpy(np.array([vector1]))))
  808. Coll,_ = self.get_collection(SPECS_GRADE)
  809. search_list = search_embedding(Coll,embedding_index_name,[vector],self.search_params,output_fields,limit=60)
  810. for p in search_list:
  811. print(p)
  812. #
  813. # res = collection.query(
  814. # expr = "ots_id in ['%s']"%(id),
  815. # offset = 0,
  816. # limit = 10,
  817. # output_fields = output_fields,
  818. # consistency_level="Strong"
  819. # )
  820. # print(res)
  821. def start_process_product():
  822. pm = Product_Manager()
  823. pm.start_processing()
  824. def fix_product_data():
  825. '''
  826. # delete document_product and change the record status to 1 in document_product_temp which id=original id
  827. :return:
  828. '''
  829. table_name = "document_product_temp"
  830. table_index = "document_product_temp_index"
  831. columns = [DOCUMENT_PRODUCT_TMP_NEW_ID,DOCUMENT_PRODUCT_TMP_STATUS]
  832. # table_name = Document_product_table_name
  833. # table_index = Document_product_table_name+"_index"
  834. # columns = [DOCUMENT_PRODUCT_ORIGINAL_ID]
  835. ots_client = getConnect_ots()
  836. bool_query = BoolQuery(should_queries=[
  837. # RangeQuery("status",501),
  838. # TermQuery("docid",246032980)
  839. RangeQuery("status",201,501),
  840. # RangeQuery("status",401,451)
  841. # WildcardQuery(DOCUMENT_PRODUCT_ORIGINAL_SPECS,"MFUSOne")
  842. # TermQuery(DOCUMENT_PRODUCT_SPECS,"MFUSOne")
  843. ])
  844. rows,next_token,total_count,is_all_succeed = ots_client.search(table_name,table_index,
  845. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
  846. columns_to_get=ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
  847. list_rows = getRow_ots(rows)
  848. print(total_count)
  849. while next_token:
  850. rows,next_token,total_count,is_all_succeed = ots_client.search(table_name,table_index,
  851. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  852. columns_to_get=ColumnsToGet(columns,return_type=ColumnReturnType.SPECIFIED))
  853. list_rows.extend(getRow_ots(rows))
  854. print("%d/%d"%(len(list_rows),total_count))
  855. # if len(list_rows)>10000:
  856. # break
  857. task_queue = Queue()
  858. for d in list_rows:
  859. task_queue.put(d)
  860. def fix_missing_data(item,result_queue):
  861. original_id = item.get(DOCUMENT_PRODUCT_ORIGINAL_ID)
  862. print("original_id",original_id)
  863. _d = {DOCUMENT_PRODUCT_TMP_ID:original_id,DOCUMENT_PRODUCT_TMP_STATUS:1}
  864. dpt = Document_product_tmp(_d)
  865. dpt.fix_columns(ots_client,["name","brand","specs"],True)
  866. _d = {DOCUMENT_PRODUCT_ID:item.get(DOCUMENT_PRODUCT_ID)}
  867. dp = Document_product(_d)
  868. #fix the project_code and original_name and bidi_filemd5s
  869. docid = int(item.get(DOCUMENT_PRODUCT_DOCID))
  870. partitionkey = docid%500+1
  871. # project_name = item.get(DOCUMENT_PRODUCT_PROJECT_NAME,"")
  872. # if project_name=="":
  873. # #fix project_name
  874. # _doc = Document({"partitionkey":partitionkey,
  875. # "docid":docid})
  876. # _doc.fix_columns(ots_client,["doctitle"],True)
  877. # dp.setValue(DOCUMENT_PRODUCT_DOCTITLE,_doc.getProperties().get("doctitle"),True)
  878. bid_filemd5s = Product_Manager.get_bid_filemd5s(docid,ots_client)
  879. if bid_filemd5s is not None:
  880. dp.setValue(DOCUMENT_PRODUCT_BID_FILEMD5S,bid_filemd5s,True)
  881. dp.setValue(DOCUMENT_PRODUCT_ORIGINAL_NAME,dpt.getProperties().get(DOCUMENT_PRODUCT_TMP_NAME,""),True)
  882. dp.setValue(DOCUMENT_PRODUCT_ORIGINAL_BRAND,dpt.getProperties().get(DOCUMENT_PRODUCT_TMP_BRAND,""),True)
  883. dp.setValue(DOCUMENT_PRODUCT_ORIGINAL_SPECS,dpt.getProperties().get(DOCUMENT_PRODUCT_TMP_SPECS,""),True)
  884. dp.update_row(ots_client)
  885. def deleteAndReprocess(item,result_queue):
  886. original_id = item.get(DOCUMENT_PRODUCT_TMP_ID)
  887. new_id = item.get(DOCUMENT_PRODUCT_TMP_NEW_ID)
  888. # original_id = item.get(DOCUMENT_PRODUCT_ORIGINAL_ID)
  889. # new_id = item.get(DOCUMENT_PRODUCT_ID)
  890. print("original_id",original_id,"id",item.get(DOCUMENT_PRODUCT_ID))
  891. # delete data and rerun
  892. _d = {DOCUMENT_PRODUCT_TMP_ID:original_id,DOCUMENT_PRODUCT_TMP_STATUS:1}
  893. dpt = Document_product_tmp(_d)
  894. dpt.update_row(ots_client)
  895. if new_id is not None and new_id!="":
  896. _d = {DOCUMENT_PRODUCT_ID:new_id}
  897. dp = Document_product(_d)
  898. dp.delete_row(ots_client)
  899. def handle(item,result_queue):
  900. win_bid_price = item.get(DOCUMENT_PRODUCT_TMP_WIN_BID_PRICE,1)
  901. if win_bid_price==0:
  902. dpt = Document_product_tmp(item)
  903. dpt.setValue(DOCUMENT_PRODUCT_TMP_STATUS,1,True)
  904. dpt.update_row(ots_client)
  905. mt = MultiThreadHandler(task_queue,deleteAndReprocess,None,30,1)
  906. mt.run()
  907. def test_check_brand():
  908. import logging
  909. root = logging.getLogger()
  910. root.setLevel(logging.DEBUG)
  911. from queue import Queue
  912. brand_path = "brand.txt"
  913. list_brand = []
  914. with open(brand_path,"r",encoding="utf8") as f:
  915. while 1:
  916. line = f.readline()
  917. if not line:
  918. break
  919. line = line.strip()
  920. if len(line)>0:
  921. brand = {"brand":line}
  922. list_brand.append(brand)
  923. # if len(list_brand)>100:
  924. # break
  925. task_queue = Queue()
  926. for _d in list_brand:
  927. task_queue.put(_d)
  928. pm = Product_Manager()
  929. def _handle(item,result_queue):
  930. brand = item.get("brand")
  931. new_brand = clean_product_brand(brand)
  932. _f = pm.check_new_brand(brand)
  933. item["f"] = _f
  934. item["new_brand"] = new_brand
  935. mt = MultiThreadHandler(task_queue,_handle,None,30,1)
  936. mt.run()
  937. list_legal_brand = []
  938. list_illegal_brand = []
  939. for _d in list_brand:
  940. f = _d.get("f")
  941. log("brand %s flag %s"%(brand,str(f)))
  942. if f:
  943. brand = _d.get("new_brand")
  944. list_legal_brand.append(brand)
  945. else:
  946. brand = _d.get("brand")
  947. list_illegal_brand.append(brand)
  948. with open("../../test/legal_brand.txt", "w", encoding="utf8") as f:
  949. for b in list_legal_brand:
  950. f.write(b+"\n")
  951. with open("../../test/illegal_brand.txt", "w", encoding="utf8") as f:
  952. for b in list_illegal_brand:
  953. f.write(b+"\n")
  954. def test_match():
  955. a = "迈瑞晟"
  956. # vector = request_embedding(get_milvus_standard_name(a))
  957. # vector = [get_embedding_request(b) for b in a]
  958. pm = Product_Manager()
  959. _GRADE = BRAND_GRADE
  960. Coll,_ = pm.get_collection(_GRADE)
  961. print(Coll.name)
  962. output_fields = ['ots_id','ots_name',"ots_parent_id","standard_name","standard_name_id","remove_words","level"]
  963. # start_time = time.time()
  964. _id = get_milvus_product_dict_id(a)
  965. print(Coll.query(expr=" ots_id in ['%s'] "%(_id),output_fields=output_fields))
  966. # print("cost",time.time()-start_time)
  967. # print(Coll.compact())
  968. # result = search_embedding(Coll,embedding_index_name,[vector],pm.search_params,output_fields,limit=20)
  969. #
  970. # final_list = []
  971. # for _search in result:
  972. # _d = {}
  973. # for k in output_fields:
  974. # _d[k] = _search.entity.get(k)
  975. # final_list.append(_d)
  976. # final_list = remove_repeat_item(final_list,k="ots_name")
  977. start_time = time.time()
  978. # final_list = get_embedding_search(Coll,embedding_index_name,a,_GRADE,vector,pm.search_params,output_fields,limit=5)
  979. final_list = get_intellect_search(Coll,embedding_index_name,a,_GRADE,pm.search_params,output_fields,limit=10)
  980. for _search in final_list:
  981. ots_id = _search.get("standard_name_id")
  982. ots_name = _search.get("ots_name")
  983. standard_name = _search.get("standard_name")
  984. ots_parent_id = _search.get("ots_parent_id")
  985. remove_words = _search.get("remove_words")
  986. if check_brand(a,ots_name,remove_words):
  987. print("similar",a,ots_name)
  988. else:
  989. print("not similar",a,ots_name)
  990. print("cost",time.time()-start_time)
  991. print(final_list)
  992. def rebuild_milvus():
  993. pdm = Product_Dict_Manager()
  994. from multiprocessing import Queue as PQueue
  995. bool_query = BoolQuery(must_queries=[
  996. RangeQuery(DOCUMENT_PRODUCT_DICT_GRADE,3)
  997. ])
  998. ots_client = getConnect_ots()
  999. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1000. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("name")]),limit=100,get_total_count=True),
  1001. ColumnsToGet([DOCUMENT_PRODUCT_DICT_GRADE,DOCUMENT_PRODUCT_DICT_NAME,DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS],return_type=ColumnReturnType.SPECIFIED))
  1002. list_data = getRow_ots(rows)
  1003. while next_token:
  1004. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1005. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  1006. ColumnsToGet([DOCUMENT_PRODUCT_DICT_GRADE,DOCUMENT_PRODUCT_DICT_NAME,DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS],return_type=ColumnReturnType.SPECIFIED))
  1007. list_data.extend(getRow_ots(rows))
  1008. print("%d/%d"%(len(list_data),total_count))
  1009. # if len(list_data)>1000:
  1010. # break
  1011. set_name_grade = set()
  1012. task_queue = PQueue()
  1013. for _data in list_data:
  1014. name = _data.get(DOCUMENT_PRODUCT_DICT_NAME)
  1015. grade = _data.get(DOCUMENT_PRODUCT_DICT_GRADE)
  1016. _key = "%s--%d"%(name,grade)
  1017. if _key not in set_name_grade:
  1018. task_queue.put(_data)
  1019. set_name_grade.add(_key)
  1020. log("rebuild milvus %d counts"%(task_queue.qsize()))
  1021. def insert_into_milvus(item,result_queue):
  1022. name = item.get(DOCUMENT_PRODUCT_DICT_NAME,"")
  1023. grade = item.get(DOCUMENT_PRODUCT_DICT_GRADE)
  1024. if grade==SPECS_GRADE:
  1025. name = clean_product_specs(name)
  1026. if len(name)<2:
  1027. return
  1028. if len(name)<2:
  1029. return
  1030. parent_id = item.get(DOCUMENT_PRODUCT_DICT_PARENT_ID,"")
  1031. Coll,_ = pdm.get_collection(grade)
  1032. standard_alias = item.get(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS,"")
  1033. log("insert name %s grade %d"%(name,grade))
  1034. remove_words = item.get(DOCUMENT_PRODUCT_DICT_REMOVE_WORDS,"")
  1035. level = item.get(DOCUMENT_PRODUCT_DICT_LEVEL)
  1036. if level is None:
  1037. if re.search("装置|设备",name) is not None:
  1038. level = 2
  1039. else:
  1040. level = 1
  1041. insert_new_record_to_milvus(Coll,name,grade,parent_id,standard_alias,remove_words,level)
  1042. def start_thread():
  1043. mt = MultiThreadHandler(task_queue,insert_into_milvus,None,5)
  1044. mt.run()
  1045. p_count = 5
  1046. list_p = []
  1047. for i in range(p_count):
  1048. p = Process(target=start_thread)
  1049. list_p.append(p)
  1050. for p in list_p:
  1051. p.start()
  1052. for p in list_p:
  1053. p.join()
  1054. def move_document_product():
  1055. bool_query = BoolQuery(must_queries=[
  1056. ExistsQuery(DOCUMENT_PRODUCT_NAME)
  1057. ])
  1058. ots_client = getConnect_ots()
  1059. Document_product_table_name = "document_product"
  1060. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  1061. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("name")]),limit=100,get_total_count=True),
  1062. ColumnsToGet(return_type=ColumnReturnType.ALL))
  1063. list_data = getRow_ots(rows)
  1064. while next_token:
  1065. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_table_name,Document_product_table_name+"_index",
  1066. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  1067. ColumnsToGet(return_type=ColumnReturnType.ALL))
  1068. list_data.extend(getRow_ots(rows))
  1069. print("%d/%d"%(len(list_data),total_count))
  1070. # if len(list_data)>=1000:
  1071. # break
  1072. task_queue = Queue()
  1073. for _data in list_data:
  1074. task_queue.put(_data)
  1075. def _handle(item,result_queue):
  1076. D1 = Document_product(item)
  1077. D1.update_row(ots_client)
  1078. D1.table_name = Document_product_table_name
  1079. D1.delete_row(ots_client)
  1080. mt = MultiThreadHandler(task_queue,_handle,None,30)
  1081. mt.run()
  1082. current_path = os.path.dirname(__file__)
  1083. def delete_brands():
  1084. filename = os.path.join(current_path,"illegal_brand.txt")
  1085. ots_client = getConnect_ots()
  1086. list_brand = []
  1087. with open(filename,"r",encoding="utf8") as f:
  1088. while 1:
  1089. brand = f.readline()
  1090. if not brand:
  1091. break
  1092. brand = brand.strip()
  1093. list_brand.append(brand)
  1094. pm = Product_Manager()
  1095. Coll,_ = pm.get_collection(BRAND_GRADE)
  1096. print(Coll.name)
  1097. Coll.compact()
  1098. _count = 0
  1099. task_queue = Queue()
  1100. for brand in list_brand:
  1101. _count += 1
  1102. task_queue.put(brand)
  1103. # if _count>=2:
  1104. # break
  1105. def _handle(brand,result_queue):
  1106. bool_query = BoolQuery(must_queries=[
  1107. TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,BRAND_GRADE),
  1108. TermQuery(DOCUMENT_PRODUCT_DICT_NAME,brand)
  1109. ])
  1110. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1111. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
  1112. ColumnsToGet(return_type=ColumnReturnType.NONE))
  1113. list_data = getRow_ots(rows)
  1114. _id = get_milvus_product_dict_id(brand)
  1115. while next_token:
  1116. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1117. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  1118. ColumnsToGet(return_type=ColumnReturnType.NONE))
  1119. list_data.extend(getRow_ots(rows))
  1120. for _d in list_data:
  1121. dpd = Document_product_dict(_d)
  1122. dpd.delete_row(ots_client)
  1123. # print(Coll.query(expr=" ots_id in ['%s']"%(_id),output_fields=["ots_id","ots_name"]))
  1124. delete_counts = Coll.delete(expr=" ots_id in ['%s']"%(_id)).delete_count
  1125. log("brand %s total_count %d md5:%s delete_counts:%d"%(brand,total_count,_id,delete_counts))
  1126. mt = MultiThreadHandler(task_queue,_handle,None,30)
  1127. mt.run()
  1128. def delete_specs():
  1129. filename = os.path.join(current_path,"illegal_specs.txt")
  1130. ots_client = getConnect_ots()
  1131. list_brand = []
  1132. with open(filename,"r",encoding="utf8") as f:
  1133. while 1:
  1134. brand = f.readline()
  1135. if not brand:
  1136. break
  1137. brand = brand.strip()
  1138. list_brand.append(brand)
  1139. pm = Product_Manager()
  1140. Coll,_ = pm.get_collection(SPECS_GRADE)
  1141. print(Coll.name)
  1142. Coll.compact()
  1143. _count = 0
  1144. task_queue = Queue()
  1145. for specs in list_brand:
  1146. task_queue.put(specs)
  1147. _count += 1
  1148. # if _count>=2:
  1149. # break
  1150. def _handle(specs,result_queue):
  1151. bool_query = BoolQuery(must_queries=[
  1152. TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,SPECS_GRADE),
  1153. TermQuery(DOCUMENT_PRODUCT_DICT_NAME,specs)
  1154. ])
  1155. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1156. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),limit=100,get_total_count=True),
  1157. ColumnsToGet(return_type=ColumnReturnType.NONE))
  1158. list_data = getRow_ots(rows)
  1159. _id = get_milvus_product_dict_id(specs)
  1160. while next_token:
  1161. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1162. SearchQuery(bool_query,next_token=next_token,limit=100,get_total_count=True),
  1163. ColumnsToGet(return_type=ColumnReturnType.NONE))
  1164. list_data.extend(getRow_ots(rows))
  1165. for _d in list_data:
  1166. dpd = Document_product_dict(_d)
  1167. dpd.delete_row(ots_client)
  1168. # print(Coll.query(expr=" ots_id in ['%s']"%(_id),output_fields=["ots_id","ots_name"]))
  1169. delete_counts = Coll.delete(expr=" ots_id in ['%s']"%(_id)).delete_count
  1170. log("brand %s total_count %d md5:%s delete_counts:%d"%(specs,total_count,_id,delete_counts))
  1171. mt = MultiThreadHandler(task_queue,_handle,None,30)
  1172. mt.run()
  1173. Coll.compact()
  1174. def remove_redis_keys():
  1175. db = redis.Redis(connection_pool=pool_product)
  1176. db.flushdb()
  1177. def update_document_product_dict():
  1178. import pandas as pd
  1179. filename = "update_product.csv"
  1180. df = pd.read_csv(filename,encoding="gbk")
  1181. ots_client = getConnect_ots()
  1182. for name,grade,standard_alias,remove_words,level in zip(df["name"],df["grade"],df["standard_alias"],df["remove_words"],df["level"]):
  1183. name = name.strip()
  1184. bool_query = BoolQuery(must_queries=[
  1185. TermQuery(DOCUMENT_PRODUCT_DICT_NAME,name),
  1186. TermQuery(DOCUMENT_PRODUCT_DICT_GRADE,grade)
  1187. ])
  1188. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_table_name,Document_product_dict_table_name+"_index",
  1189. SearchQuery(bool_query,get_total_count=True),
  1190. ColumnsToGet(return_type=ColumnReturnType.NONE))
  1191. if total_count==1:
  1192. list_data = getRow_ots(rows)
  1193. _data = list_data[0]
  1194. dpd = Document_product_dict(_data)
  1195. level = 1
  1196. if re.search("器械|设备|其他",name) is not None and level==1:
  1197. level = 2
  1198. if str(remove_words)=="nan":
  1199. remove_words = ""
  1200. dpd.setValue(DOCUMENT_PRODUCT_DICT_STANDARD_ALIAS,standard_alias,True)
  1201. dpd.setValue(DOCUMENT_PRODUCT_DICT_REMOVE_WORDS,remove_words,True)
  1202. dpd.setValue(DOCUMENT_PRODUCT_DICT_LEVEL,level,True)
  1203. dpd.setValue(DOCUMENT_PRODUCT_DICT_IS_SYNCHONIZED,IS_SYNCHONIZED+1,True)
  1204. dpd.update_row(ots_client)
  1205. print(dpd.getProperties())
  1206. def test():
  1207. # pm = Product_Manager()
  1208. # pm.test()
  1209. # fix_product_data()
  1210. # test_check_brand()
  1211. test_match()
  1212. # rebuild_milvus()
  1213. # move_document_product()
  1214. # delete_brands()
  1215. # delete_specs()
  1216. # remove_redis_keys()
  1217. # update_document_product_dict()
  1218. def clean_product_dict_interface():
  1219. ots_client = getConnect_ots()
  1220. bool_query = BoolQuery(must_queries=[
  1221. BoolQuery(should_queries=[
  1222. TermQuery("action","insert"),
  1223. TermQuery("action","base")
  1224. ])
  1225. ])
  1226. task_queue = Queue()
  1227. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_interface_table_name,Document_product_dict_interface_table_name+"_index",
  1228. SearchQuery(bool_query,sort=Sort(sorters=[FieldSort("status")]),get_total_count=True,limit=100),
  1229. columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
  1230. list_data = getRow_ots(rows)
  1231. for _data in list_data:
  1232. task_queue.put(_data)
  1233. print("%d/%d"%(task_queue.qsize(),total_count))
  1234. while next_token:
  1235. rows,next_token,total_count,is_all_succeed = ots_client.search(Document_product_dict_interface_table_name,Document_product_dict_interface_table_name+"_index",
  1236. SearchQuery(bool_query,next_token=next_token,get_total_count=True,limit=100),
  1237. columns_to_get=ColumnsToGet(return_type=ColumnReturnType.NONE))
  1238. list_data = getRow_ots(rows)
  1239. for _data in list_data:
  1240. task_queue.put(_data)
  1241. print("%d/%d"%(task_queue.qsize(),total_count))
  1242. def _handle(item,result_queue):
  1243. _dpd = Document_product_dict_interface(item)
  1244. _dpd.delete_row(ots_client)
  1245. mt = MultiThreadHandler(task_queue,_handle,None,30)
  1246. mt.run()
  1247. if __name__ == '__main__':
  1248. test()
  1249. # start_process_product()
  1250. # print(getMD5('11936c56f2dd1426764e317ca2e8e1a7'+'&&鱼跃'))
  1251. # print(Product_Manager.get_bid_filemd5s(155415770,getConnect_ots()))
  1252. # name = "一"
  1253. # ots_name = "一氧化碳分析仪"
  1254. # print(is_similar(name,ots_name),check_product(name,ots_name))
  1255. # print(is_legal_specs('SCM-A/SB(0.18D)'))
  1256. # clean_product_dict_interface()