Browse Source

修复乃君句子和文章添加##attachment_begin##不一致导致报错问题

lsm 2 years ago
parent
commit
3cf2212cdc
1 changed files with 2 additions and 1 deletions
  1. 2 1
      BiddingKG/dl/interface/Preprocessing.py

+ 2 - 1
BiddingKG/dl/interface/Preprocessing.py

@@ -2160,7 +2160,7 @@ def get_preprocessed_sentences(list_articles,useselffool=True,cost_time=dict()):
                 if re.search("##attachment_begin##",sentence_text):
                     in_attachment = True
                     sentence_text = re.sub("##attachment_begin##","",sentence_text)
-                elif re.search("##attachment_end##",sentence_text):
+                if re.search("##attachment_end##",sentence_text):
                     in_attachment = False
                     sentence_text = re.sub("##attachment_end##", "", sentence_text)
                 if sentence_index >= attachment_begin_index and attachment_begin_index!=-1:
@@ -2177,6 +2177,7 @@ def get_preprocessed_sentences(list_articles,useselffool=True,cost_time=dict()):
             list_sentences_temp.append(Sentences(doc_id=doc_id,sentence_index=0,sentence_text="sentence_text",tokens=[],pos_tags=[],ner_tags=""))
         list_sentences.append(list_sentences_temp)
         list_outlines.append(outline_list)
+        article.content = re.sub("##attachment_begin##|##attachment_end##", "", article.content)
     return list_sentences,list_outlines
 
 def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):