|
@@ -2160,7 +2160,7 @@ def get_preprocessed_sentences(list_articles,useselffool=True,cost_time=dict()):
|
|
|
if re.search("##attachment_begin##",sentence_text):
|
|
|
in_attachment = True
|
|
|
sentence_text = re.sub("##attachment_begin##","",sentence_text)
|
|
|
- elif re.search("##attachment_end##",sentence_text):
|
|
|
+ if re.search("##attachment_end##",sentence_text):
|
|
|
in_attachment = False
|
|
|
sentence_text = re.sub("##attachment_end##", "", sentence_text)
|
|
|
if sentence_index >= attachment_begin_index and attachment_begin_index!=-1:
|
|
@@ -2177,6 +2177,7 @@ def get_preprocessed_sentences(list_articles,useselffool=True,cost_time=dict()):
|
|
|
list_sentences_temp.append(Sentences(doc_id=doc_id,sentence_index=0,sentence_text="sentence_text",tokens=[],pos_tags=[],ner_tags=""))
|
|
|
list_sentences.append(list_sentences_temp)
|
|
|
list_outlines.append(outline_list)
|
|
|
+ article.content = re.sub("##attachment_begin##|##attachment_end##", "", article.content)
|
|
|
return list_sentences,list_outlines
|
|
|
|
|
|
def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
|