Browse Source

修复fingerprint没有计算附件的问题

luojiehua 1 year ago
parent
commit
e0259ad9aa
1 changed files with 3 additions and 3 deletions
  1. 3 3
      BiddingKG/dl/fingerprint/documentFingerprint.py

+ 3 - 3
BiddingKG/dl/fingerprint/documentFingerprint.py

@@ -13,9 +13,9 @@ def getHtmlText(sourceHtml):
         _href = _a.attrs.get("href","")
         if _href.find("www.bidizhaobiao.com")>0:
             _a.decompose()
-    richText = _soup.find("div",attrs={"class":"richTextFetch"})
-    if richText is not None:
-        richText.decompose()
+    # richText = _soup.find("div",attrs={"class":"richTextFetch"})
+    # if richText is not None:
+    #     richText.decompose()
     _text = _soup.get_text()
 
     _text = re.sub("\s*",'',_text)