|
@@ -13,9 +13,9 @@ def getHtmlText(sourceHtml):
|
|
|
_href = _a.attrs.get("href","")
|
|
|
if _href.find("www.bidizhaobiao.com")>0:
|
|
|
_a.decompose()
|
|
|
- richText = _soup.find("div",attrs={"class":"richTextFetch"})
|
|
|
- if richText is not None:
|
|
|
- richText.decompose()
|
|
|
+ # richText = _soup.find("div",attrs={"class":"richTextFetch"})
|
|
|
+ # if richText is not None:
|
|
|
+ # richText.decompose()
|
|
|
_text = _soup.get_text()
|
|
|
|
|
|
_text = re.sub("\s*",'',_text)
|