|
@@ -52,19 +52,36 @@ def is_similar(source,target):
|
|
|
return False
|
|
|
|
|
|
|
|
|
-SPECS_CHECK_SET = set([i for i in 'abcdefghijklmnopqrstuvwxyz']) | set([i for i in '0123456789']) | set([i for i in 'IⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ'])
|
|
|
|
|
|
-def check_specs(source,target):
|
|
|
- '''
|
|
|
- check if the source specs is the same as the target
|
|
|
- same only if the chars in SPECS_CHECK_SET have the same counts
|
|
|
- :param source:
|
|
|
- :param target:
|
|
|
- :return:
|
|
|
- '''
|
|
|
+def is_contain(source,target,min_len=2):
|
|
|
+ if len(source)>=len(target) and target in source and len(target)>=min_len:
|
|
|
+ return True
|
|
|
+ if len(target)>len(source) and source in target and len(source)>=min_len:
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+def check_product(source,target):
|
|
|
+ if is_contain(source,target,min_len=3):
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def check_brand(source,target):
|
|
|
+ source = str(source).lower()
|
|
|
+ target = str(target).lower()
|
|
|
+
|
|
|
+ if is_contain(source,target):
|
|
|
+ return True
|
|
|
+
|
|
|
+SPECS_CHECK_SET = set([i for i in 'abcdefghijklmnopqrstuvwxyz']) | set([i for i in '0123456789.']) | set([i for i in 'IⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ'])
|
|
|
+SPECS_PATTERN = re.compile("[^%s]"%("".join(list(SPECS_CHECK_SET))))
|
|
|
+
|
|
|
+def has_same_specs_count(source, target):
|
|
|
+
|
|
|
source = str(source).lower()
|
|
|
target = str(target).lower()
|
|
|
|
|
|
+ # just take care of type and count,lack of order
|
|
|
dict_source = {}
|
|
|
dict_target = {}
|
|
|
for s in source:
|
|
@@ -83,8 +100,39 @@ def check_specs(source,target):
|
|
|
for k,v in dict_source.items():
|
|
|
if v!=dict_target.get(k):
|
|
|
return False
|
|
|
+
|
|
|
return True
|
|
|
|
|
|
+def check_specs(source,target):
|
|
|
+ '''
|
|
|
+ check if the source specs is the same as the target
|
|
|
+ same only if the chars in SPECS_CHECK_SET have the same counts
|
|
|
+ :param source:
|
|
|
+ :param target:
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ source = str(source).lower()
|
|
|
+ target = str(target).lower()
|
|
|
+
|
|
|
+ source = re.sub(SPECS_PATTERN,'',source)
|
|
|
+ target = re.sub(SPECS_PATTERN,'',target)
|
|
|
+
|
|
|
+ if source==target and len(source)>0:
|
|
|
+ return True
|
|
|
+
|
|
|
+ if has_same_specs_count(source,target):
|
|
|
+ _index = 0
|
|
|
+ for _i in range(min(len(source),len(target))):
|
|
|
+ _index = -(_i+1)
|
|
|
+ if source[_index]!=target[_index]:
|
|
|
+ break
|
|
|
+ if abs(_index)>min(len(source),len(target))//2:
|
|
|
+ return True
|
|
|
+
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
import json
|
|
|
|
|
|
import requests
|
|
@@ -115,7 +163,7 @@ def clean_product_brand(product_brand):
|
|
|
'''
|
|
|
return product_brand
|
|
|
|
|
|
-SPECS_PATTERN = re.compile("[^A-Za-z0-9-\\/()()]")
|
|
|
+SPECS_PATTERN = re.compile("[^A-Za-z0-9-\\/()().]")
|
|
|
def clean_product_specs(product_specs):
|
|
|
'''
|
|
|
clean before insert
|
|
@@ -160,4 +208,4 @@ def clean_product_quantity(product_quantity):
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
print(clean_product_specs("XY-K-JLJ-3A"))
|
|
|
- print(check_specs("佳士比F6",'佳士比”F6'))
|
|
|
+ print(check_specs("3.6",'3.6'))
|