convert_test.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. import base64
  2. import json
  3. import os
  4. import random
  5. import sys
  6. import time
  7. from glob import glob
  8. from multiprocessing import Process
  9. from bs4 import BeautifulSoup
  10. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  11. from format_convert.utils import get_platform, request_post, get_md5_from_bytes
  12. from format_convert.convert import to_html
  13. def test_one(p, page_no_range=None, from_remote=False):
  14. start_time = time.time()
  15. with open(p, "rb") as f:
  16. file_bytes = f.read()
  17. file_base64 = base64.b64encode(file_bytes)
  18. _md5 = get_md5_from_bytes(file_bytes)
  19. data = {"file": file_base64, "type": p.split(".")[-1], "filemd5": 100, 'page_no': page_no_range}
  20. if from_remote:
  21. # _url = 'http://121.46.18.113:15010/convert'
  22. # _url = 'http://192.168.2.103:15010/convert'
  23. # _url = 'http://192.168.2.102:15011/convert'
  24. # _url = 'http://172.16.160.65:15010/convert'
  25. _url = 'http://127.0.0.1:15010/convert'
  26. result = json.loads(request_post(_url, data, time_out=10000))
  27. text_str = ""
  28. for t in result.get("result_html"):
  29. text_str += t
  30. to_html(os.path.dirname(os.path.abspath(__file__)) + "/../result.html",
  31. text_str)
  32. else:
  33. print("only support remote!")
  34. print(_md5)
  35. print("result_text", result.get("result_text")[0][:20])
  36. print("is_success", result.get("is_success"))
  37. print(time.time()-start_time)
  38. def test_duplicate(path_list, process_no=None):
  39. start_time = time.time()
  40. # random.shuffle(path_list)
  41. for i in range(10):
  42. if i % 10 == 0:
  43. if process_no is not None:
  44. print("Process", process_no, i*len(path_list), time.time()-start_time)
  45. else:
  46. print("Loop", i*len(path_list), time.time()-start_time)
  47. for p in path_list:
  48. test_one(p, from_remote=True)
  49. if __name__ == '__main__':
  50. if get_platform() == "Windows":
  51. # file_path = "C:/Users/Administrator/Desktop/2.png"
  52. # file_path = "C:/Users/Administrator/Desktop/test_xls/merge_cell.xlsx"
  53. # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/20210609202634853485.xlsx"
  54. # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_ODPS/1624325845476.pdf"
  55. # file_path = "C:/Users/Administrator/Downloads/20210508190133924ba.pdf"
  56. # file_path = "C:/Users/Administrator/Desktop/test_doc/error8.doc"
  57. # file_path = "C:/Users/Administrator/Desktop/test_image/error10.png"
  58. # file_path = "C:/Users/Administrator/Desktop/test_b_table/error1.png"
  59. file_path = "C:/Users/Administrator/Desktop/test_pdf/error1.pdf"
  60. # file_path = "C:/save_b_table/0-0895e32470613dd7be1139eefd1342c4.png"
  61. else:
  62. file_path = "1660296734009.pdf"
  63. test_one(file_path, page_no_range='13,14', from_remote=True)
  64. # paths = glob("C:/Users/Administrator/Desktop/test_image/*")
  65. # for file_path in paths:
  66. # test_one(file_path, from_remote=True)
  67. # if get_platform() == "Windows":
  68. # # file_path_list = ["D:/BIDI_DOC/比地_文档/2022/Test_Interface/1623328459080.doc",
  69. # # "D:/BIDI_DOC/比地_文档/2022/Test_Interface/94961e1987d1090e.xls",
  70. # # "D:/BIDI_DOC/比地_文档/2022/Test_Interface/11111111.rar"]
  71. # # file_path_list = ["D:/BIDI_DOC/比地_文档/2022/Test_Interface/1623328459080.doc",
  72. # # "D:/BIDI_DOC/比地_文档/2022/Test_Interface/94961e1987d1090e.xls"]
  73. # # file_path_list = ["D:/BIDI_DOC/比地_文档/2022/Test_Interface/1623423836610.pdf"]
  74. # file_path_list = ["C:/Users/Administrator/Desktop/error16.jpg"]
  75. # else:
  76. # file_path_list = ["1623423836610.pdf"]
  77. # start_time = time.time()
  78. # p_list = []
  79. # for j in range(3):
  80. # p = Process(target=test_duplicate, args=(file_path_list, j, ))
  81. # p.start()
  82. # p_list.append(p)
  83. # for p in p_list:
  84. # p.join()
  85. # print("finish", time.time() - start_time)
  86. # with open(file_path, 'r') as f:
  87. # t = f.read()
  88. # soup = BeautifulSoup(t, 'lxml')
  89. # print(soup.text)