Kaynağa Gözat

公告处理超时修复

znj 3 yıl önce
ebeveyn
işleme
ca6d79de22

+ 9 - 4
BiddingKG/dl/interface/Preprocessing.py

@@ -1775,7 +1775,12 @@ def article_limit(soup,limit_words=30000):
         while len(_soup.find_all(recursive=False)) == 1 and \
                 _soup.get_text(strip=True) == _soup.find_all(recursive=False)[0].get_text(strip=True):
             _soup = _soup.find_all(recursive=False)[0]
-        try:
+        if len(_soup.find_all(recursive=False)) == 0:
+            _soup.string = str(_soup.get_text())[:max_count-_count]
+            _count += len(re.sub(sub_space, "", _soup.string))
+            _gap = _count - max_count
+            next_soup = None
+        else:
             for _soup_part in _soup.find_all(recursive=False):
                 if not _is_skip:
                     _count += len(re.sub(sub_space, "", _soup_part.get_text()))
@@ -1784,13 +1789,12 @@ def article_limit(soup,limit_words=30000):
                         if _gap <= max_gap:
                             _is_skip = True
                         else:
+                            _is_skip = True
                             next_soup = _soup_part
                             _count -= len(re.sub(sub_space, "", _soup_part.get_text()))
-                            break
+                            continue
                 else:
                     _soup_part.decompose()
-        except:
-            return _count,_gap,None
         return _count,_gap,next_soup
 
     text_count = 0
@@ -1809,6 +1813,7 @@ def article_limit(soup,limit_words=30000):
             text_count,gap,n_soup = soup_limit(soup,text_count,max_count=limit_words,max_gap=500)
             while n_soup:
                 text_count, gap, n_soup = soup_limit(n_soup, text_count, max_count=limit_words, max_gap=500)
+
     else:
         # 有附件
         _text = re.sub(sub_space, "", soup.get_text())

+ 0 - 1
BiddingKG/dl/interface/getAttributes.py

@@ -1139,7 +1139,6 @@ def findAttributeAfterEntity(PackDict,roleSet,PackageList,PackageSet,list_senten
                 else:
                     packageName_entity = packagePointer.entity_text
                 addRatioByEntity(PackDict, packageName_entity, _entity.entity_text, _attribute)
-
     ''''''
     # 通过模型分类的招标/代理联系人
     list_sentence = sorted(list_sentence, key=lambda x: x.sentence_index)