import hashlib import codecs def getMD5(sourceHtml): if sourceHtml is not None and len(sourceHtml)>0: if isinstance(sourceHtml,str): bs = sourceHtml.encode() elif isinstance(sourceHtml,bytes): bs = sourceHtml else: return "" md5 = hashlib.md5() md5.update(bs) return md5.hexdigest() return "" def getFingerprint(sourceHtml): md5 = getMD5(sourceHtml) if md5!="": _fingerprint = "md5=%s"%(md5) else: _fingerprint = "" return _fingerprint if __name__=="__main__": sourceHtml = text = codecs.open("C:\\Users\\User\\Desktop\\2.html","rb",encoding="utf8").read() print(getFingerprint(sourceHtml))