2 years ago · 75a7301df8
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1418,7 +1418,7 @@ def segment(soup,final=True):
 
				 
			
 
				     if len(text)<10000000:
			
 
				         while(LOOP_BEGIN<len(text)):
			
 
				-            _text += re.sub("）",")",re.sub("（","(",re.sub("\s{2,}","",text[LOOP_BEGIN:LOOP_BEGIN+LOOP_LEN])))
			
 
				+            _text += re.sub("）",")",re.sub("（","(",re.sub("\s(?!\d{2}:\d{2})","",text[LOOP_BEGIN:LOOP_BEGIN+LOOP_LEN])))
			
 
				             LOOP_BEGIN += LOOP_LEN
			
 
				         text = _text
			
 
				     # 附件标识前修改为句号，避免正文和附件内容混合在一起
			
@@ -2737,7 +2737,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				 
			
 
				                 #去掉标点符号
			
 
				                 if entity_type!='time':
			
 
				-                    entity_text = re.sub("[,，。：!&@$\*]","",entity_text)
			
 
				+                    entity_text = re.sub("[,，。：!&@$\*\s]","",entity_text)
			
 
				                 entity_text = entity_text.replace("(","（").replace(")","）") if isinstance(entity_text,str) else entity_text
			
 
				                 # 组织机构实体名称补充
			
 
				                 if entity_type in ["org", "company"]: