convert_test.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import base64
  2. import json
  3. import os
  4. import random
  5. import sys
  6. import time
  7. from glob import glob
  8. from multiprocessing import Process
  9. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  10. from format_convert.utils import get_platform, request_post, get_md5_from_bytes
  11. from format_convert.convert import to_html
  12. def test_one(p, from_remote=False):
  13. start_time = time.time()
  14. with open(p, "rb") as f:
  15. file_bytes = f.read()
  16. file_base64 = base64.b64encode(file_bytes)
  17. _md5 = get_md5_from_bytes(file_bytes)
  18. data = {"file": file_base64, "type": p.split(".")[-1], "filemd5": 100}
  19. if from_remote:
  20. # _url = 'http://121.46.18.113:15010/convert'
  21. # _url = 'http://192.168.2.103:15010/convert'
  22. # _url = 'http://172.16.160.65:15010/convert'
  23. _url = 'http://127.0.0.1:15010/convert'
  24. result = json.loads(request_post(_url, data, time_out=10000))
  25. text_str = ""
  26. for t in result.get("result_html"):
  27. text_str += t
  28. to_html(os.path.dirname(os.path.abspath(__file__)) + "/../result.html",
  29. text_str)
  30. else:
  31. print("only support remote!")
  32. print(_md5)
  33. print("result_text", result.get("result_text")[0][:20])
  34. print("is_success", result.get("is_success"))
  35. print(time.time()-start_time)
  36. def test_duplicate(path_list, process_no=None):
  37. start_time = time.time()
  38. # random.shuffle(path_list)
  39. for i in range(10):
  40. if i % 10 == 0:
  41. if process_no is not None:
  42. print("Process", process_no, i*len(path_list), time.time()-start_time)
  43. else:
  44. print("Loop", i*len(path_list), time.time()-start_time)
  45. for p in path_list:
  46. test_one(p, from_remote=True)
  47. if __name__ == '__main__':
  48. if get_platform() == "Windows":
  49. # file_path = "C:/Users/Administrator/Desktop/test_xls/merge_cell.xlsx"
  50. # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/20210609202634853485.xlsx"
  51. # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_ODPS/1624325845476.pdf"
  52. # file_path = "C:/Users/Administrator/Downloads/神仙居旅游汽车租赁竞争性磋商文件(1).doc"
  53. # file_path = "C:/Users/Administrator/Desktop/test_xls/error2.xlsx"
  54. file_path = "C:/Users/Administrator/Desktop/test_doc/error5.docx"
  55. else:
  56. file_path = "1660296734009.pdf"
  57. test_one(file_path, from_remote=True)
  58. # paths = glob("C:/Users/Administrator/Desktop/test_image/*")
  59. # for file_path in paths:
  60. # test_one(file_path, from_remote=True)
  61. # if get_platform() == "Windows":
  62. # # file_path_list = ["D:/BIDI_DOC/比地_文档/2022/Test_Interface/1623328459080.doc",
  63. # # "D:/BIDI_DOC/比地_文档/2022/Test_Interface/94961e1987d1090e.xls",
  64. # # "D:/BIDI_DOC/比地_文档/2022/Test_Interface/11111111.rar"]
  65. # # file_path_list = ["D:/BIDI_DOC/比地_文档/2022/Test_Interface/1623328459080.doc",
  66. # # "D:/BIDI_DOC/比地_文档/2022/Test_Interface/94961e1987d1090e.xls"]
  67. # # file_path_list = ["D:/BIDI_DOC/比地_文档/2022/Test_Interface/1623423836610.pdf"]
  68. # file_path_list = ["C:/Users/Administrator/Desktop/error16.jpg"]
  69. # else:
  70. # file_path_list = ["1623423836610.pdf"]
  71. # start_time = time.time()
  72. # p_list = []
  73. # for j in range(3):
  74. # p = Process(target=test_duplicate, args=(file_path_list, j, ))
  75. # p.start()
  76. # p_list.append(p)
  77. # for p in p_list:
  78. # p.join()
  79. # print("finish", time.time() - start_time)