Эх сурвалжийг харах

Merge branch 'master' of http://192.168.2.103:3000/luojiehua/BaseDataMaintenance

znj 1 жил өмнө
parent
commit
79d9c7ac1a

+ 2 - 2
BaseDataMaintenance/dataSource/interface.py

@@ -39,9 +39,9 @@ def getAttachDealInterface(_data,_type,path="",restry=1,kwargs={},url=interface_
                 _json.update(kwargs)
 
 
-            _json["timeout"] = 10000
+            _json["timeout"] = timeout
             with requests.Session() as sess:
-                _resp = sess.post(url,data=_json,timeout=timeout)
+                _resp = sess.post(url,data=_json,timeout=timeout+100)
 
             if _resp.status_code==200:
                 _result = json.loads(_resp.content.decode())

+ 1 - 1
BaseDataMaintenance/maintenance/dataflow.py

@@ -4412,7 +4412,7 @@ if __name__ == '__main__':
     # test_attachment_interface()
     df_dump = Dataflow_dumplicate(start_delete_listener=False)
     # df_dump.start_flow_dumplicate()
-    df_dump.test_dumplicate(332439629
+    df_dump.test_dumplicate(463253000
                             )
     # compare_dumplicate_check()
     # df_dump.test_merge([391898061

+ 13 - 2
BaseDataMaintenance/maxcompute/documentDumplicate.py

@@ -1064,8 +1064,18 @@ def check_dumplicate_rule(docid_less,docid_greater,fingerprint_less,fingerprint_
     #一篇要素都在附件,且两篇附件md5有重叠
     set_md5_less = set()
     set_md5_greater = set()
-    list_md5_less = json.loads(page_attachments_less)
-    list_md5_greater = json.loads(page_attachments_greater)
+    list_md5_less = []
+    if page_attachments_less:
+        try:
+            list_md5_less = json.loads(page_attachments_less)
+        except Exception as e:
+            pass
+    list_md5_greater = []
+    if page_attachments_greater:
+        try:
+            list_md5_greater = json.loads(page_attachments_greater)
+        except Exception as e:
+            pass
     for _l in list_md5_less:
         _md5 = _l.get("fileMd5")
         if _md5 is not None:
@@ -1704,6 +1714,7 @@ class f_redump_probability_final_check(BaseUDAF):
                     extract_json_less = document_less["extract_json"]
                     page_attachments_less = document_less["page_attachments"]
 
+                    _extract_less = {}
                     if extract_json_less is not None:
                         _extract_less = json.loads(extract_json_less)
                     _extract_greater = {}