productUtils.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. from BaseDataMaintenance.maintenance.product.product_setting import *
  2. import re
  3. # 判断是不是入参字符串为全中文
  4. def judge_pur_chinese(keyword):
  5. """
  6. 中文字符的编码范围为: u'\u4e00' -- u'\u9fff:只要在此范围内就可以判断为中文字符串
  7. @param keyword:
  8. @return:
  9. """
  10. # 定义一个需要删除的标点符号字符串列表
  11. remove_chars = '[·’!"\#$%&\'()#!()*+,-./:;<=>?\@,:?¥★、….>【】[]《》?“”‘’\[\\]^_`{|}~]+'
  12. # 利用re.sub来删除中文字符串中的标点符号
  13. strings = re.sub(remove_chars, "", keyword) # 将keyword中文字符串中remove_chars中包含的标点符号替换为空字符串
  14. for ch in strings:
  15. if u'\u4e00' <= ch <= u'\u9fff':
  16. pass
  17. else:
  18. return False
  19. return True
  20. from fuzzywuzzy import fuzz
  21. def is_similar(source,target):
  22. source = str(source).lower()
  23. target = str(target).lower()
  24. max_len = max(len(source),len(target))
  25. min_len = min(len(source),len(target))
  26. # dis_len = abs(len(source)-len(target))
  27. # min_dis = min(max_len*0.2,4)
  28. if min_len==0 and max_len>0:
  29. return False
  30. if max_len<=4:
  31. if source==target:
  32. return True
  33. else:
  34. #判断相似度
  35. similar = fuzz.ratio(source,target)
  36. if similar>90:
  37. return True
  38. # 全中文判断是否包含
  39. if judge_pur_chinese(source) and judge_pur_chinese(target):
  40. if len(source)==max_len:
  41. if str(source).find(target)>=0:
  42. return True
  43. else:
  44. if target.find(source)>=0:
  45. return True
  46. return False
  47. SPECS_CHECK_SET = set([i for i in 'abcdefghijklmnopqrstuvwxyz']) | set([i for i in '0123456789']) | set([i for i in 'IⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ'])
  48. def check_specs(source,target):
  49. '''
  50. check if the source specs is the same as the target
  51. same only if the chars in SPECS_CHECK_SET have the same counts
  52. :param source:
  53. :param target:
  54. :return:
  55. '''
  56. source = str(source).lower()
  57. target = str(target).lower()
  58. dict_source = {}
  59. dict_target = {}
  60. for s in source:
  61. if s in SPECS_CHECK_SET:
  62. if s not in dict_source:
  63. dict_source[s] = 0
  64. dict_source[s] += 1
  65. for s in target:
  66. if s in SPECS_CHECK_SET:
  67. if s not in dict_target:
  68. dict_target[s] = 0
  69. dict_target[s] += 1
  70. union_keys = set(list(dict_source.keys())) & set(list(dict_target.keys()))
  71. if len(dict_source.keys())!= len(union_keys):
  72. return False
  73. for k,v in dict_source.items():
  74. if v!=dict_target.get(k):
  75. return False
  76. return True
  77. import json
  78. import requests
  79. session = requests.Session()
  80. def request_embedding(sentence,retry_times=3):
  81. for _ in range(retry_times):
  82. resp = session.post(embedding_url,json={"sentence":sentence})
  83. if resp.status_code==200:
  84. content = resp.content.decode("utf-8")
  85. _d = json.loads(content)
  86. if _d.get("success"):
  87. return _d.get("vector")
  88. return None
  89. def clean_product_name(product_name):
  90. '''
  91. clean before insert
  92. :param product_name:
  93. :return:
  94. '''
  95. return product_name
  96. def clean_product_brand(product_brand):
  97. '''
  98. clean before insert
  99. :param product_brand:
  100. :return:
  101. '''
  102. return product_brand
  103. SPECS_PATTERN = re.compile("[^A-Za-z0-9-\\/()()]")
  104. def clean_product_specs(product_specs):
  105. '''
  106. clean before insert
  107. :param product_specs:
  108. :return:
  109. '''
  110. _specs = re.sub(SPECS_PATTERN,'',product_specs)
  111. if len(_specs)>0:
  112. return _specs
  113. return product_specs
  114. def clean_product_unit_price(product_unit_price):
  115. '''
  116. clean before insert
  117. :param product_unit_price:
  118. :return:
  119. '''
  120. try:
  121. if product_unit_price is not None and product_unit_price!="":
  122. _price = float(product_unit_price)
  123. return _price
  124. except Exception as e:
  125. return ""
  126. return ""
  127. def clean_product_quantity(product_quantity):
  128. '''
  129. :param product_quantity:
  130. :return:
  131. '''
  132. try:
  133. if product_quantity is not None and product_quantity!="":
  134. _quantity = int(product_quantity)
  135. return _quantity
  136. except Exception as e:
  137. return ""
  138. return ""
  139. if __name__ == '__main__':
  140. print(clean_product_specs("XY-K-JLJ-3A"))
  141. print(check_specs("佳士比F6",'佳士比”F6'))