|
@@ -37,7 +37,7 @@ tf.nn.ctc_loss
|
|
|
if __name__=="__main__":
|
|
|
# filename = "比地_52_79929693.html"
|
|
|
#text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
|
|
|
- text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
|
|
|
+ text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
|
|
|
content = str(BeautifulSoup(text).find("div",id="pcontent"))
|
|
|
# df_a = {"html":[]}
|
|
|
# df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
|