luojiehua 3 anni fa
parent
commit
87d642b5c8
2 ha cambiato i file con 2 aggiunte e 1 eliminazioni
  1. 1 1
      .idea/encodings.xml
  2. 1 0
      BiddingKG/dl/fingerprint/documentFingerprint.py

+ 1 - 1
.idea/encodings.xml

@@ -1,10 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="Encoding">
-    <file url="file://$PROJECT_DIR$/BiddingKG/dl/LEGAL_ENTERPRISE.txt" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BiddingKG/dl/LEGAL_ENTERPRISE.txt" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BiddingKG/dl/entityLink/LEGAL_ENTERPRISE.pk" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BiddingKG/dl/form/websource_67000_table.csv" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BiddingKG/dl/product/test/2021-01-29-2021-01-29公告信息.xlsx" charset="GBK" />
+    <file url="file://$PROJECT_DIR$/BiddingKG/maxcompute/documentMerge/data/2021-06-25-mergeTrain.pk" charset="GBK" />
   </component>
 </project>

+ 1 - 0
BiddingKG/dl/fingerprint/documentFingerprint.py

@@ -5,6 +5,7 @@ import codecs
 from bs4 import BeautifulSoup
 import re
 
+
 def getHtmlText(sourceHtml):
     _soup = BeautifulSoup(sourceHtml,"lxml")
     list_a = _soup.find_all("a")