|
@@ -6,6 +6,7 @@ import re
|
|
def html2text_with_tablehtml(_html):
|
|
def html2text_with_tablehtml(_html):
|
|
# 如果输入是字符串,使用 BeautifulSoup 解析
|
|
# 如果输入是字符串,使用 BeautifulSoup 解析
|
|
if isinstance(_html, str):
|
|
if isinstance(_html, str):
|
|
|
|
+ _html = re.sub("<html>|<body>|</body>|</html>","",_html)
|
|
_soup = BeautifulSoup(_html, "lxml")
|
|
_soup = BeautifulSoup(_html, "lxml")
|
|
else:
|
|
else:
|
|
_soup = _html
|
|
_soup = _html
|