12345678910111213141516171819202122 |
- import time
- import gunicorn
- import requests
- from bs4 import BeautifulSoup
- from BaseDataMaintenance.common.Utils import article_limit
- import codecs
- if __name__ == '__main__':
- text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
- content = str(BeautifulSoup(text).find("div"))
- _soup = BeautifulSoup(content,"lxml")
- print(len(str(_soup)))
- _soup = article_limit(_soup,100)
- print(len(str(_soup)))
- print(str(_soup))
|