|
@@ -35,7 +35,7 @@ def test(name,content):
|
|
|
if __name__=="__main__":
|
|
|
# filename = "比地_52_79929693.html"
|
|
|
#text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
|
|
|
- text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
|
|
|
+ text = codecs.open("C:\\Users\\admin\\Desktop\\新建文本文档 (3).txt","r",encoding="utf8").read()
|
|
|
content = str(BeautifulSoup(text).find("div",id="pcontent"))
|
|
|
# df_a = {"html":[]}
|
|
|
# df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
|