documentFingerprint.py 727 B

1234567891011121314151617181920212223242526272829
  1. import hashlib
  2. import codecs
  3. def getMD5(sourceHtml):
  4. if sourceHtml is not None and len(sourceHtml)>0:
  5. if isinstance(sourceHtml,str):
  6. bs = sourceHtml.encode()
  7. elif isinstance(sourceHtml,bytes):
  8. bs = sourceHtml
  9. else:
  10. return ""
  11. md5 = hashlib.md5()
  12. md5.update(bs)
  13. return md5.hexdigest()
  14. return ""
  15. def getFingerprint(sourceHtml):
  16. md5 = getMD5(sourceHtml)
  17. if md5!="":
  18. _fingerprint = "md5=%s"%(md5)
  19. else:
  20. _fingerprint = ""
  21. return _fingerprint
  22. if __name__=="__main__":
  23. sourceHtml = text = codecs.open("C:\\Users\\User\\Desktop\\2.html","rb",encoding="utf8").read()
  24. print(getFingerprint(sourceHtml))