Browse Source

无表格产品属性提取连接错误优化 例子273303637

lsm 1 year ago
parent
commit
1aecb22001
1 changed files with 11 additions and 2 deletions
  1. 11 2
      BiddingKG/dl/interface/predictor.py

+ 11 - 2
BiddingKG/dl/interface/predictor.py

@@ -3386,9 +3386,18 @@ class ProductAttributesPredictor():
                 for key_value in key_value_list:
                 for key_value in key_value_list:
                     key_value = re.sub("^[一二三四五六七八九十]{1,3}[、.]|^[\d]{1,2}[、.]\d{,2}|^[\((]?[一二三四五六七八九十]{1,3}[\))][、]?","",key_value)
                     key_value = re.sub("^[一二三四五六七八九十]{1,3}[、.]|^[\d]{1,2}[、.]\d{,2}|^[\((]?[一二三四五六七八九十]{1,3}[\))][、]?","",key_value)
                     temp = re.split("[::]",key_value)
                     temp = re.split("[::]",key_value)
-                    key = temp[-2]
+                    if len(temp)>2:
+                        if temp[0] in head_list:
+                            key = temp[0]
+                            value = "".join(temp[1:])
+                        else:
+                            key = temp[-2]
+                            value = temp[-1]
+                    else:
+                        key = temp[0]
+                        value = temp[1]
                     key = re.sub("^[一二三四五六七八九十]{1,3}[、.]|^[\d]{1,2}[、.]\d{,2}|^[\((]?[一二三四五六七八九十]{1,3}[\))][、]?","",key)
                     key = re.sub("^[一二三四五六七八九十]{1,3}[、.]|^[\d]{1,2}[、.]\d{,2}|^[\((]?[一二三四五六七八九十]{1,3}[\))][、]?","",key)
-                    value = temp[-1]
+
                     head_list.append(key)
                     head_list.append(key)
                     head_value_list.append(value)
                     head_value_list.append(value)
                 head_set = set(head_list)
                 head_set = set(head_list)