convert_need_interface.py 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106
  1. # encoding=utf8
  2. import base64
  3. import json
  4. import multiprocessing
  5. import os
  6. import pickle
  7. import random
  8. import sys
  9. import time
  10. import uuid
  11. import cv2
  12. import torch
  13. from werkzeug.exceptions import NotFound
  14. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  15. from botr.yolov8.yolo_interface import yolo
  16. from botr.yolov8.model import Predictor
  17. from atc.atc_interface import AtcModels, atc
  18. from idc.idc_interface import IdcModels, idc
  19. from isr.isr_interface import IsrModels, isr
  20. import traceback
  21. import requests
  22. from format_convert import _global
  23. from format_convert.utils import get_platform, get_sequential_data, judge_error_code, request_post, get_ip_port, \
  24. get_intranet_ip, get_logger, log, get_args_from_config, get_using_ip, np2bytes, set_flask_global
  25. from ocr.ocr_interface import ocr, OcrModels
  26. from otr.otr_interface import otr, OtrModels
  27. from format_convert.libreoffice_interface import office_convert
  28. import numpy as np
  29. from format_convert.max_compute_config import max_compute
  30. MAX_COMPUTE = max_compute
  31. if get_platform() == "Windows":
  32. FROM_REMOTE = False
  33. only_test_ocr = False
  34. if only_test_ocr:
  35. ip_port_flag = {}
  36. ip_port_dict = get_ip_port()
  37. for _k in ip_port_dict.keys():
  38. ip_port_flag.update({_k: {"ocr": 0,
  39. "otr": 0,
  40. "convert": 0,
  41. "office": 0
  42. }})
  43. _global.update({"ip_port_flag": ip_port_flag})
  44. ip_port_dict["http://127.0.0.1"]["ocr"] = ["17000"]
  45. ip_port_dict["http://127.0.0.1"]["otr"] = ["18000"]
  46. _global.update({"ip_port": ip_port_dict})
  47. else:
  48. FROM_REMOTE = True
  49. if MAX_COMPUTE:
  50. FROM_REMOTE = False
  51. # ip_port_dict = get_ip_port()
  52. # ip = 'http://127.0.0.1'
  53. # ocr_port_list = ip_port_dict.get(ip).get("ocr")
  54. # otr_port_list = ip_port_dict.get(ip).get("otr")
  55. lock = multiprocessing.RLock()
  56. # 连接redis数据库
  57. # redis_db = redis.StrictRedis(host='192.168.2.103', port='6379',
  58. # db=1, password='bidi123456', health_check_interval=300)
  59. redis_db = None
  60. def _interface(_dict, time_out=60, retry_times=3):
  61. try:
  62. # 重试
  63. model_type = _dict.get("model_type")
  64. while retry_times:
  65. ip_port = interface_pool(model_type)
  66. if judge_error_code(ip_port):
  67. return ip_port
  68. _url = ip_port + "/" + model_type
  69. # base64_stream = base64.b64encode(pickle.dumps(_dict))
  70. r = json.loads(request_post(_url, {"data": json.dumps(_dict),
  71. "model_type": model_type}, time_out=time_out))
  72. log("get _interface return")
  73. if type(r) == list:
  74. # 接口连不上换个端口重试
  75. if retry_times <= 1:
  76. return r
  77. else:
  78. retry_times -= 1
  79. log("retry post _interface... left times " + str(retry_times) + " " + model_type)
  80. continue
  81. if judge_error_code(r):
  82. return r
  83. return r
  84. break
  85. except TimeoutError:
  86. return [-5]
  87. except requests.exceptions.ConnectionError as e:
  88. return [-2]
  89. def from_office_interface(src_path, dest_path, target_format, retry_times=1, from_remote=FROM_REMOTE):
  90. try:
  91. # Win10跳出超时装饰器
  92. # if get_platform() == "Windows":
  93. # # origin_office_convert = office_convert.__wrapped__
  94. # # file_path = origin_office_convert(src_path, dest_path, target_format, retry_times)
  95. # file_path = office_convert(src_path, dest_path, target_format, retry_times)
  96. # else:
  97. # # 将装饰器包装为一个类,否则多进程Pickle会报错 it's not the same object as xxx 问题,
  98. # # timeout_decorator_obj = my_timeout_decorator.TimeoutClass(office_convert, 180, TimeoutError)
  99. # # file_path = timeout_decorator_obj.run(src_path, dest_path, target_format, retry_times)
  100. #
  101. # file_path = office_convert(src_path, dest_path, target_format, retry_times)
  102. if from_remote:
  103. # 重试
  104. retry_times_1 = 1
  105. retry_times_2 = 2
  106. while retry_times_1 and retry_times_2:
  107. # _ip = ip_pool("soffice", _random=True)
  108. # _port = port_pool("soffice", _random=True)
  109. # _ip = interface_ip_list[0]
  110. # _port = "16002"
  111. # _ip, _port = interface_pool("soffice")
  112. # ip_port = from_schedule_interface("office")
  113. ip_port = interface_pool_gunicorn("office")
  114. if judge_error_code(ip_port):
  115. return ip_port
  116. _url = ip_port + "/soffice"
  117. with open(src_path, "rb") as f:
  118. file_bytes = f.read()
  119. base64_stream = base64.b64encode(file_bytes)
  120. start_time = time.time()
  121. r = json.loads(request_post(_url, {"src_path": src_path,
  122. "dest_path": dest_path,
  123. "file": base64_stream,
  124. "target_format": target_format,
  125. "retry_times": retry_times}, time_out=25))
  126. log("get interface return")
  127. log("office use time " + str(time.time()-start_time))
  128. if type(r) == list:
  129. # 接口连不上换个端口重试
  130. if retry_times_1 <= 1:
  131. return r
  132. else:
  133. retry_times_1 -= 1
  134. log("retry post office_interface... left times " + str(retry_times_1))
  135. continue
  136. file_str = r.get("data")
  137. if judge_error_code(file_str):
  138. if retry_times_2 <= 1:
  139. return file_str
  140. else:
  141. retry_times_2 -= 1
  142. continue
  143. file_bytes = eval(file_str)
  144. uid1 = src_path.split(os.sep)[-1].split(".")[0]
  145. file_path = dest_path + uid1 + "." + target_format
  146. if not os.path.exists(os.path.dirname(file_path)):
  147. os.makedirs(os.path.dirname(file_path), mode=0o777)
  148. with open(file_path, "wb") as f:
  149. f.write(file_bytes)
  150. break
  151. else:
  152. file_path = office_convert(src_path, dest_path, target_format, retry_times)
  153. if judge_error_code(file_path):
  154. return file_path
  155. return file_path
  156. except TimeoutError:
  157. log("from_office_interface timeout error!")
  158. return [-5]
  159. except:
  160. log("from_office_interface error!")
  161. print("from_office_interface", traceback.print_exc())
  162. return [-1]
  163. def from_ocr_interface(image_stream, is_table=False, only_rec=False, from_remote=FROM_REMOTE):
  164. log("into from_ocr_interface")
  165. try:
  166. base64_stream = base64.b64encode(image_stream)
  167. # 调用接口
  168. try:
  169. if from_remote:
  170. retry_times_1 = 3
  171. # 重试
  172. while retry_times_1:
  173. # _ip = ip_pool("ocr", _random=True)
  174. # _port = port_pool("ocr", _random=True)
  175. # if _ip == interface_ip_list[1]:
  176. # _port = ocr_port_list[0]
  177. # _ip, _port = interface_pool("ocr")
  178. # ip_port = _ip + ":" + _port
  179. # ip_port = from_schedule_interface("ocr")
  180. ip_port = interface_pool_gunicorn("ocr")
  181. if judge_error_code(ip_port):
  182. return ip_port
  183. _url = ip_port + "/ocr"
  184. r = json.loads(request_post(_url, {"data": base64_stream,
  185. "md5": _global.get("md5"),
  186. "only_rec": only_rec
  187. },
  188. time_out=60))
  189. log("get interface return")
  190. if type(r) == list:
  191. # 接口连不上换个端口重试
  192. if retry_times_1 <= 1:
  193. # if is_table:
  194. return r, r
  195. # else:
  196. # return r
  197. else:
  198. retry_times_1 -= 1
  199. log("retry post ocr_interface... left times " + str(retry_times_1))
  200. continue
  201. if judge_error_code(r):
  202. return r
  203. break
  204. else:
  205. if globals().get("global_ocr_model") is None:
  206. print("=========== init ocr model ===========")
  207. globals().update({"global_ocr_model": OcrModels().get_model()})
  208. r = ocr(data=base64_stream, ocr_model=globals().get("global_ocr_model"), only_rec=only_rec)
  209. except TimeoutError:
  210. # if is_table:
  211. return [-5], [-5]
  212. # else:
  213. # return [-5]
  214. except requests.exceptions.ConnectionError as e:
  215. # if is_table:
  216. return [-2], [-2]
  217. # else:
  218. # return [-2]
  219. _dict = r
  220. text_list = eval(_dict.get("text"))
  221. bbox_list = eval(_dict.get("bbox"))
  222. if text_list is None:
  223. text_list = []
  224. if bbox_list is None:
  225. bbox_list = []
  226. if is_table:
  227. return text_list, bbox_list
  228. else:
  229. if text_list and bbox_list:
  230. text = get_sequential_data(text_list, bbox_list, html=True)
  231. if judge_error_code(text):
  232. return text
  233. else:
  234. text = ""
  235. return text
  236. except Exception as e:
  237. log("from_ocr_interface error!")
  238. # print("from_ocr_interface", e, global_type)
  239. if is_table:
  240. return [-1], [-1]
  241. else:
  242. return [-1]
  243. def from_gpu_interface_flask(_dict, model_type, predictor_type):
  244. log("into from_gpu_interface")
  245. start_time = time.time()
  246. try:
  247. # 调用接口
  248. _dict.update({"predictor_type": predictor_type, "model_type": model_type})
  249. if model_type == "ocr":
  250. use_zlib = True
  251. else:
  252. use_zlib = False
  253. result = _interface(_dict, time_out=30, retry_times=2, use_zlib=use_zlib)
  254. log("from_gpu_interface finish size " + str(sys.getsizeof(_dict)) + " time " + str(time.time()-start_time))
  255. return result
  256. except Exception as e:
  257. log("from_gpu_interface error!")
  258. log("from_gpu_interface failed " + str(time.time()-start_time))
  259. traceback.print_exc()
  260. return [-2]
  261. def from_gpu_interface_redis(_dict, model_type, predictor_type):
  262. log("into from_gpu_interface")
  263. start_time = time.time()
  264. try:
  265. # 调用接口
  266. _uuid = uuid.uuid1().hex
  267. _dict.update({"predictor_type": predictor_type, "model_type": model_type,
  268. "uuid": _uuid})
  269. _time = time.time()
  270. log("pickle.dumps(_dict)" + str(_dict))
  271. redis_db.rpush("producer_"+model_type, pickle.dumps(_dict))
  272. log("producer_" + model_type + " len " + str(redis_db.llen("producer_" + model_type)))
  273. log("to producer_" + model_type + " time " + str(time.time()-_time))
  274. _time = time.time()
  275. time_out = 300
  276. while True:
  277. time.sleep(0.2)
  278. if time.time() - _time > time_out:
  279. raise Exception
  280. if redis_db.hexists("consumer_"+model_type, _uuid):
  281. time1 = time.time()
  282. result = redis_db.hget("consumer_"+model_type, _uuid)
  283. log("from consumer_"+model_type + " time " + str(time.time()-time1))
  284. break
  285. result = pickle.loads(result)
  286. log("from_gpu_interface finish - size " + str(sys.getsizeof(_dict)) + " - time " + str(time.time()-start_time))
  287. return result
  288. except Exception as e:
  289. log("from_gpu_interface error!")
  290. log("from_gpu_interface failed " + str(time.time()-start_time))
  291. traceback.print_exc()
  292. return [-2]
  293. # def from_gpu_flask_sm(_dict, model_type, predictor_type):
  294. # log("into from_gpu_share_memory")
  295. # start_time = time.time()
  296. # shm = None
  297. # try:
  298. # # 放入共享内存
  299. # _time = time.time()
  300. # np_data = _dict.get("inputs")
  301. # shm = to_share_memory(np_data)
  302. # log("data into share memory " + str(shm.name) + " " + str(time.time()-_time))
  303. #
  304. # # 调用接口
  305. # _time = time.time()
  306. # _dict.pop("inputs")
  307. # _dict.update({"predictor_type": predictor_type, "model_type": model_type,
  308. # "sm_name": shm.name, "sm_shape": np_data.shape,
  309. # "sm_dtype": str(np_data.dtype)})
  310. # result = _interface(_dict, time_out=30, retry_times=2)
  311. # log("_interface cost " + str(time.time()-_time))
  312. #
  313. # # 读取共享内存
  314. # _time = time.time()
  315. # sm_name = result.get("sm_name")
  316. # sm_shape = result.get("sm_shape")
  317. # sm_dtype = result.get("sm_dtype")
  318. # sm_dtype = get_np_type(sm_dtype)
  319. # if sm_name:
  320. # outputs = from_share_memory(sm_name, sm_shape, sm_dtype)
  321. # else:
  322. # log("from_share_memory failed!")
  323. # raise Exception
  324. # log("data from share memory " + sm_name + " " + str(time.time()-_time))
  325. #
  326. # log("from_gpu_interface finish - size " + str(sys.getsizeof(_dict)) + " - time " + str(time.time()-start_time))
  327. # return {"preds": outputs, "gpu_time": result.get("gpu_time")}
  328. # except Exception as e:
  329. # log("from_gpu_interface failed " + str(time.time()-start_time))
  330. # traceback.print_exc()
  331. # return [-2]
  332. # finally:
  333. # # del b # Unnecessary; merely emphasizing the array is no longer used
  334. # if shm:
  335. # try:
  336. # shm.close()
  337. # shm.unlink()
  338. # except FileNotFoundError:
  339. # log("share memory " + shm.name + " not exists!")
  340. # except Exception:
  341. # traceback.print_exc()
  342. #
  343. #
  344. # def from_gpu_share_memory(_dict, model_type, predictor_type):
  345. # log("into from_gpu_share_memory")
  346. # start_time = time.time()
  347. # try:
  348. # _dict.update({"model_type": model_type, "predictor_type": predictor_type})
  349. # outputs, gpu_time = share_memory_pool(_dict)
  350. # log("from_gpu_share_memory finish - size " + str(sys.getsizeof(_dict)) + " - time " + str(time.time()-start_time))
  351. # return {"preds": outputs, "gpu_time": float(gpu_time)}
  352. # except Exception as e:
  353. # log("from_gpu_interface failed " + str(time.time()-start_time))
  354. # traceback.print_exc()
  355. # return [-2]
  356. def from_otr_interface2(image_stream):
  357. log("into from_otr_interface")
  358. try:
  359. base64_stream = base64.b64encode(image_stream)
  360. # 调用接口
  361. try:
  362. if globals().get("global_otr_model") is None:
  363. globals().update({"global_otr_model": OtrModels().get_model()})
  364. print("=========== init otr model ===========")
  365. r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"))
  366. except TimeoutError:
  367. return [-5], [-5], [-5], [-5], [-5]
  368. except requests.exceptions.ConnectionError as e:
  369. log("from_otr_interface")
  370. print("from_otr_interface", traceback.print_exc())
  371. return [-2], [-2], [-2], [-2], [-2]
  372. # 处理结果
  373. _dict = r
  374. points = eval(_dict.get("points"))
  375. split_lines = eval(_dict.get("split_lines"))
  376. bboxes = eval(_dict.get("bboxes"))
  377. outline_points = eval(_dict.get("outline_points"))
  378. lines = eval(_dict.get("lines"))
  379. # print("from_otr_interface len(bboxes)", len(bboxes))
  380. if points is None:
  381. points = []
  382. if split_lines is None:
  383. split_lines = []
  384. if bboxes is None:
  385. bboxes = []
  386. if outline_points is None:
  387. outline_points = []
  388. if lines is None:
  389. lines = []
  390. return points, split_lines, bboxes, outline_points, lines
  391. except Exception as e:
  392. log("from_otr_interface error!")
  393. print("from_otr_interface", traceback.print_exc())
  394. return [-1], [-1], [-1], [-1], [-1]
  395. def from_otr_interface(image_stream, is_from_pdf=False, from_remote=FROM_REMOTE):
  396. log("into from_otr_interface")
  397. try:
  398. base64_stream = base64.b64encode(image_stream)
  399. # 调用接口
  400. try:
  401. if from_remote:
  402. log("from remote")
  403. retry_times_1 = 3
  404. # 重试
  405. while retry_times_1:
  406. # _ip = ip_pool("otr", _random=True)
  407. # _port = port_pool("otr", _random=True)
  408. # if _ip == interface_ip_list[1]:
  409. # _port = otr_port_list[0]
  410. ip_port = interface_pool_gunicorn("otr")
  411. # ip_port = from_schedule_interface("otr")
  412. if judge_error_code(ip_port):
  413. return ip_port
  414. _url = ip_port + "/otr"
  415. r = json.loads(request_post(_url, {"data": base64_stream,
  416. "is_from_pdf": is_from_pdf,
  417. "md5": _global.get("md5")}, time_out=60))
  418. log("get interface return")
  419. if type(r) == list:
  420. # 接口连不上换个端口重试
  421. if retry_times_1 <= 1:
  422. return r
  423. else:
  424. retry_times_1 -= 1
  425. log("retry post otr_interface... left times " + str(retry_times_1))
  426. continue
  427. if judge_error_code(r):
  428. return r
  429. break
  430. else:
  431. log("from local")
  432. log("otr_model " + str(globals().get("global_otr_model")))
  433. if globals().get("global_otr_model") is None:
  434. print("=========== init otr model ===========")
  435. globals().update({"global_otr_model": OtrModels().get_model()})
  436. log("init finish")
  437. r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"), is_from_pdf=is_from_pdf)
  438. # r = otr(data=base64_stream, otr_model=None, is_from_pdf=is_from_pdf)
  439. except TimeoutError:
  440. return [-5]
  441. except requests.exceptions.ConnectionError as e:
  442. log("from_otr_interface")
  443. print("from_otr_interface", traceback.print_exc())
  444. return [-2]
  445. # 处理结果
  446. _dict = r
  447. list_line = eval(_dict.get("list_line"))
  448. return list_line
  449. except Exception as e:
  450. log("from_otr_interface error!")
  451. print("from_otr_interface", traceback.print_exc())
  452. return [-1]
  453. def from_isr_interface(image_stream, from_remote=FROM_REMOTE):
  454. log("into from_isr_interface")
  455. # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
  456. start_time = time.time()
  457. try:
  458. base64_stream = base64.b64encode(image_stream)
  459. # 调用接口
  460. try:
  461. if from_remote:
  462. retry_times_1 = 3
  463. # 重试
  464. while retry_times_1:
  465. ip_port = interface_pool_gunicorn("isr")
  466. if judge_error_code(ip_port):
  467. return ip_port
  468. _url = ip_port + "/isr"
  469. r = json.loads(request_post(_url, {"data": base64_stream,
  470. "md5": _global.get("md5")},
  471. time_out=60))
  472. log("get interface return")
  473. if type(r) == list:
  474. # 接口连不上换个端口重试
  475. if retry_times_1 <= 1:
  476. return r
  477. else:
  478. retry_times_1 -= 1
  479. log("retry post isr_interface... left times " + str(retry_times_1))
  480. continue
  481. if judge_error_code(r):
  482. return r
  483. break
  484. else:
  485. if globals().get("global_isr_model") is None:
  486. print("=========== init isr model ===========")
  487. isr_yolo_model, isr_model = IsrModels().get_model()
  488. globals().update({"global_isr_yolo_model": isr_yolo_model})
  489. globals().update({"global_isr_model": isr_model})
  490. r = isr(data=base64_stream,
  491. isr_yolo_model=globals().get("global_isr_yolo_model"),
  492. isr_model=globals().get("global_isr_model"))
  493. except TimeoutError:
  494. return [-5]
  495. except requests.exceptions.ConnectionError as e:
  496. return [-2]
  497. _dict = r
  498. if from_remote:
  499. image_string = _dict.get("image")
  500. if judge_error_code(image_string):
  501. return image_string
  502. # [1]代表检测不到印章,直接返回
  503. if isinstance(image_string, list) and image_string == [1]:
  504. return image_string
  505. image_base64 = image_string.encode("utf-8")
  506. image_bytes = base64.b64decode(image_base64)
  507. buffer = np.frombuffer(image_bytes, dtype=np.uint8)
  508. image_np = cv2.imdecode(buffer, 1)
  509. else:
  510. image_np = _dict.get("image")
  511. log("from_isr_interface cost time " + str(time.time()-start_time))
  512. return image_np
  513. except Exception as e:
  514. log("from_isr_interface error!")
  515. traceback.print_exc()
  516. return [-11]
  517. finally:
  518. # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
  519. pass
  520. def from_idc_interface(image_stream, from_remote=FROM_REMOTE):
  521. log("into from_idc_interface")
  522. start_time = time.time()
  523. try:
  524. base64_stream = base64.b64encode(image_stream)
  525. # 调用接口
  526. try:
  527. if from_remote:
  528. retry_times_1 = 3
  529. # 重试
  530. while retry_times_1:
  531. ip_port = interface_pool_gunicorn("idc")
  532. if judge_error_code(ip_port):
  533. return ip_port
  534. _url = ip_port + "/idc"
  535. r = json.loads(request_post(_url, {"data": base64_stream,
  536. "md5": _global.get("md5")},
  537. time_out=60))
  538. log("get interface return")
  539. if type(r) == list:
  540. # 接口连不上换个端口重试
  541. if retry_times_1 <= 1:
  542. return r
  543. else:
  544. retry_times_1 -= 1
  545. log("retry post idc_interface... left times " + str(retry_times_1))
  546. continue
  547. if judge_error_code(r):
  548. return r
  549. break
  550. else:
  551. if globals().get("global_idc_model") is None:
  552. print("=========== init idc model ===========")
  553. idc_model = IdcModels().get_model()
  554. globals().update({"global_idc_model": idc_model})
  555. r = idc(data=base64_stream,
  556. model=globals().get("global_idc_model"))
  557. except TimeoutError:
  558. return [-5]
  559. except requests.exceptions.ConnectionError as e:
  560. return [-2]
  561. _dict = r
  562. angle = _dict.get("angle")
  563. log("from_idc_interface cost time " + str(time.time()-start_time))
  564. return angle
  565. except Exception as e:
  566. log("from_idc_interface error!")
  567. traceback.print_exc()
  568. return [-11]
  569. def from_atc_interface(text, from_remote=FROM_REMOTE):
  570. log("into from_atc_interface")
  571. start_time = time.time()
  572. try:
  573. # 调用接口
  574. try:
  575. if from_remote:
  576. retry_times_1 = 3
  577. # 重试
  578. while retry_times_1:
  579. ip_port = interface_pool_gunicorn("atc")
  580. if judge_error_code(ip_port):
  581. return ip_port
  582. _url = ip_port + "/atc"
  583. r = json.loads(request_post(_url, {"data": text,
  584. "md5": _global.get("md5")},
  585. time_out=60))
  586. log("get interface return")
  587. if type(r) == list:
  588. # 接口连不上换个端口重试
  589. if retry_times_1 <= 1:
  590. return r
  591. else:
  592. retry_times_1 -= 1
  593. log("retry post atc_interface... left times " + str(retry_times_1))
  594. continue
  595. if judge_error_code(r):
  596. return r
  597. break
  598. else:
  599. if globals().get("global_atc_model") is None:
  600. print("=========== init atc model ===========")
  601. atc_model = AtcModels().get_model()
  602. globals().update({"global_atc_model": atc_model})
  603. r = atc(data=text,
  604. model=globals().get("global_atc_model"))
  605. except TimeoutError:
  606. return [-5]
  607. except requests.exceptions.ConnectionError as e:
  608. return [-2]
  609. _dict = r
  610. classification = _dict.get("classification")
  611. log("from_atc_interface cost time " + str(time.time()-start_time))
  612. return classification
  613. except Exception as e:
  614. log("from_atc_interface error!")
  615. traceback.print_exc()
  616. return [-11]
  617. def from_yolo_interface(image_stream, from_remote=FROM_REMOTE):
  618. log("into from_yolo_interface")
  619. start_time = time.time()
  620. try:
  621. base64_stream = base64.b64encode(image_stream)
  622. # 调用接口
  623. try:
  624. if from_remote:
  625. retry_times_1 = 3
  626. # 重试
  627. while retry_times_1:
  628. ip_port = interface_pool_gunicorn("yolo")
  629. if judge_error_code(ip_port):
  630. return ip_port
  631. _url = ip_port + "/yolo"
  632. log('yolo _url ' + _url)
  633. r = json.loads(request_post(_url, {"data": base64_stream,
  634. "md5": _global.get("md5")},
  635. time_out=60))
  636. log("get interface return")
  637. if type(r) == list:
  638. # 接口连不上换个端口重试
  639. if retry_times_1 <= 1:
  640. return r
  641. else:
  642. retry_times_1 -= 1
  643. log("retry post yolo_interface... left times " + str(retry_times_1))
  644. continue
  645. if judge_error_code(r):
  646. return r
  647. break
  648. else:
  649. if globals().get("global_yolo_predictor") is None:
  650. print("=========== init yolo model ===========")
  651. ROOT = os.path.abspath(os.path.dirname(__file__)) + '/../'
  652. model_path = ROOT + 'botr/yolov8/weights.pt'
  653. image_size = 640
  654. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  655. yolo_predictor = Predictor(image_size, device, model_path)
  656. globals().update({"global_yolo_predictor": yolo_predictor})
  657. r = yolo(data=base64_stream,
  658. predictor=globals().get("global_yolo_predictor"))
  659. except TimeoutError:
  660. return [-5]
  661. except requests.exceptions.ConnectionError as e:
  662. return [-2]
  663. _dict = r
  664. b_table_list = _dict.get("b_table_list")
  665. log("from_yolo_interface cost time " + str(time.time()-start_time))
  666. return b_table_list
  667. except Exception as e:
  668. log("from_yolo_interface error!")
  669. traceback.print_exc()
  670. return [-11]
  671. # def from_schedule_interface(interface_type):
  672. # try:
  673. # _ip = "http://" + get_intranet_ip()
  674. # _port = ip_port_dict.get(_ip).get("schedule")[0]
  675. # _url = _ip + ":" + _port + "/schedule"
  676. # data = {"interface_type": interface_type}
  677. # result = json.loads(request_post(_url, data, time_out=10)).get("data")
  678. # if judge_error_code(result):
  679. # return result
  680. # _ip, _port = result
  681. # log("from_schedule_interface " + _ip + " " + _port)
  682. # return _ip + ":" + _port
  683. # except requests.exceptions.ConnectionError as e:
  684. # log("from_schedule_interface ConnectionError")
  685. # return [-2]
  686. # except:
  687. # log("from_schedule_interface error!")
  688. # traceback.print_exc()
  689. # return [-1]
  690. def interface_pool(interface_type, use_gunicorn=True):
  691. ip_port_flag = _global.get("ip_port_flag")
  692. ip_port_dict = _global.get("ip_port")
  693. try:
  694. if use_gunicorn:
  695. _ip = "http://127.0.0.1"
  696. _port = ip_port_dict.get(_ip).get(interface_type)[0]
  697. ip_port = _ip + ":" + str(_port)
  698. log(ip_port)
  699. return ip_port
  700. # 负载均衡, 选取ip
  701. interface_load_list = []
  702. for _ip in ip_port_flag.keys():
  703. if ip_port_dict.get(_ip).get(interface_type):
  704. load_scale = ip_port_flag.get(_ip).get(interface_type) / len(ip_port_dict.get(_ip).get(interface_type))
  705. interface_load_list.append([_ip, load_scale])
  706. if not interface_load_list:
  707. raise NotFound
  708. interface_load_list.sort(key=lambda x: x[-1])
  709. _ip = interface_load_list[0][0]
  710. # 负载均衡, 选取port
  711. ip_type_cnt = ip_port_flag.get(_ip).get(interface_type)
  712. ip_type_total = len(ip_port_dict.get(_ip).get(interface_type))
  713. if ip_type_cnt == 0:
  714. ip_type_cnt = random.randint(0, ip_type_total-1)
  715. port_index = ip_type_cnt % ip_type_total
  716. _port = ip_port_dict.get(_ip).get(interface_type)[port_index]
  717. # 更新flag
  718. current_flag = ip_type_cnt
  719. if current_flag >= 10000:
  720. ip_port_flag[_ip][interface_type] = 0
  721. else:
  722. ip_port_flag[_ip][interface_type] = current_flag + 1
  723. _global.update({"ip_port_flag": ip_port_flag})
  724. # log(str(_global.get("ip_port_flag")))
  725. ip_port = _ip + ":" + str(_port)
  726. log(ip_port)
  727. return ip_port
  728. except NotFound:
  729. log("cannot read ip from config! checkout config")
  730. return [-2]
  731. except:
  732. traceback.print_exc()
  733. return [-1]
  734. def interface_pool_gunicorn(interface_type):
  735. ip_port_flag_dict = _global.get("ip_port_flag")
  736. ip_port_dict = _global.get("ip_port")
  737. try:
  738. if ip_port_dict is None or ip_port_flag_dict is None:
  739. print('_global', _global.get_dict())
  740. raise NotFound
  741. # 负载均衡, 选取有该接口的ip
  742. min_cnt = 10000.
  743. interface_cnt = 0
  744. _ip = None
  745. port_list = []
  746. for key in ip_port_flag_dict.keys():
  747. temp_port_list = get_args_from_config(ip_port_dict, key, interface_type)
  748. if not temp_port_list:
  749. continue
  750. interface_cnt = ip_port_flag_dict.get(key).get(interface_type)
  751. if interface_cnt is not None and interface_cnt / len(temp_port_list[0]) < min_cnt:
  752. _ip = key
  753. min_cnt = interface_cnt / len(temp_port_list[0])
  754. port_list = temp_port_list[0]
  755. # 选取端口
  756. if interface_type == "office":
  757. if len(port_list) == 0:
  758. raise ConnectionError
  759. # 刚开始随机,后续求余
  760. if min_cnt == 0:
  761. _port = port_list[random.randint(0, len(port_list)-1)]
  762. ip_port_flag_dict[_ip][interface_type] = int(_port[-2:])
  763. else:
  764. _port = port_list[interface_cnt % len(port_list)]
  765. else:
  766. # 使用gunicorn则直接选第一个
  767. _port = port_list[0]
  768. # 更新flag
  769. if ip_port_flag_dict.get(_ip).get(interface_type) >= 10000:
  770. ip_port_flag_dict[_ip][interface_type] = 0
  771. else:
  772. ip_port_flag_dict[_ip][interface_type] += 1
  773. _global.update({"ip_port_flag": ip_port_flag_dict})
  774. ip_port = _ip + ":" + str(_port)
  775. log(interface_type)
  776. log(ip_port)
  777. return ip_port
  778. except NotFound:
  779. log("ip_port or ip_port_dict is None! checkout config")
  780. return [-2]
  781. except ConnectionError:
  782. log('no office interface running!')
  783. return [-15]
  784. except:
  785. traceback.print_exc()
  786. return [-1]
  787. def interface_pool_gunicorn_old(interface_type):
  788. ip_flag_list = _global.get("ip_flag")
  789. ip_port_flag_dict = _global.get("ip_port_flag")
  790. ip_port_dict = _global.get("ip_port")
  791. try:
  792. if ip_flag_list is None or ip_port_dict is None or ip_port_flag_dict is None:
  793. raise NotFound
  794. if interface_type == "office":
  795. # _ip = "http://127.0.0.1"
  796. _ip = get_using_ip()
  797. # 选取端口
  798. port_list = ip_port_dict.get(_ip).get("MASTER").get(interface_type)
  799. ip_type_cnt = ip_port_flag_dict.get(_ip).get(interface_type)
  800. if ip_type_cnt == 0:
  801. _port = port_list[random.randint(0, len(port_list)-1)]
  802. else:
  803. _port = port_list[ip_type_cnt % len(port_list)]
  804. # 更新flag
  805. if ip_port_flag_dict.get(_ip).get(interface_type) >= 10000:
  806. ip_port_flag_dict[_ip][interface_type] = 0
  807. else:
  808. ip_port_flag_dict[_ip][interface_type] += 1
  809. _global.update({"ip_port_flag": ip_port_flag_dict})
  810. else:
  811. # 负载均衡, 选取ip
  812. ip_flag_list.sort(key=lambda x: x[1])
  813. if ip_flag_list[-1][1] == 0:
  814. ip_index = random.randint(0, len(ip_flag_list)-1)
  815. else:
  816. ip_index = 0
  817. _ip = ip_flag_list[ip_index][0]
  818. if "master" in _ip:
  819. port_index = 1
  820. else:
  821. port_index = 0
  822. _ip = _ip.split("_")[0]
  823. # 选取端口, 使用gunicorn则直接选第一个
  824. # _port = ip_port_dict.get(_ip).get("MASTER").get(interface_type)[0]
  825. log("_ip " + _ip)
  826. log("interface_type " + interface_type)
  827. port_list = get_args_from_config(ip_port_dict, _ip, interface_type)
  828. log("port_list" + str(port_list))
  829. if port_index >= len(port_list):
  830. port_index = 0
  831. _port = port_list[port_index][0]
  832. # # 选取端口, 使用gunicorn则直接选第一个
  833. # _ip = _ip.split("_")[0]
  834. # port_list = get_args_from_config(ip_port_dict, _ip, interface_type)
  835. # if
  836. # print(port_list)
  837. # _port = port_list[0][0]
  838. # 更新flag
  839. if ip_flag_list[ip_index][1] >= 10000:
  840. ip_flag_list[ip_index][1] = 0
  841. else:
  842. ip_flag_list[ip_index][1] += + 1
  843. _global.update({"ip_flag": ip_flag_list})
  844. ip_port = _ip + ":" + str(_port)
  845. log(ip_port)
  846. return ip_port
  847. except NotFound:
  848. log("ip_flag or ip_port_dict is None! checkout config")
  849. return [-2]
  850. except:
  851. traceback.print_exc()
  852. return [-1]
  853. # def share_memory_pool(args_dict):
  854. # np_data = args_dict.get("inputs")
  855. # _type = args_dict.get("model_type")
  856. # args_dict.update({"sm_shape": np_data.shape, "sm_dtype": str(np_data.dtype)})
  857. #
  858. # if _type == 'ocr':
  859. # port_list = ocr_port_list
  860. # elif _type == 'otr':
  861. # port_list = otr_port_list
  862. # else:
  863. # log("type error! only support ocr otr")
  864. # raise Exception
  865. #
  866. # # 循环判断是否有空的share memory
  867. # empty_sm_list = None
  868. # sm_list_name = ""
  869. # while empty_sm_list is None:
  870. # for p in port_list:
  871. # sm_list_name = "sml_"+_type+"_"+str(p)
  872. # sm_list = get_share_memory_list(sm_list_name)
  873. # if sm_list[0] == "0":
  874. # lock.acquire(timeout=0.1)
  875. # if sm_list[0] == "0":
  876. # sm_list[0] = "1"
  877. # sm_list[-1] = "0"
  878. # empty_sm_list = sm_list
  879. # break
  880. # else:
  881. # continue
  882. # lock.release()
  883. #
  884. # log(str(os.getppid()) + " empty_sm_list " + sm_list_name)
  885. #
  886. # # numpy放入共享内存
  887. # _time = time.time()
  888. # release_share_memory(get_share_memory("psm_" + str(os.getpid())))
  889. # shm = to_share_memory(np_data)
  890. # log("data into share memory " + str(shm.name) + " " + str(time.time()-_time))
  891. #
  892. # # 参数放入共享内存列表
  893. # empty_sm_list[1] = args_dict.get("md5")
  894. # empty_sm_list[2] = args_dict.get("model_type")
  895. # empty_sm_list[3] = args_dict.get("predictor_type")
  896. # empty_sm_list[4] = args_dict.get("args")
  897. # empty_sm_list[5] = str(shm.name)
  898. # empty_sm_list[6] = str(args_dict.get("sm_shape"))
  899. # empty_sm_list[7] = args_dict.get("sm_dtype")
  900. # empty_sm_list[-1] = "1"
  901. # # log("empty_sm_list[7] " + empty_sm_list[7])
  902. # close_share_memory_list(empty_sm_list)
  903. #
  904. # # 循环判断是否完成
  905. # finish_sm_list = get_share_memory_list(sm_list_name)
  906. # while True:
  907. # if finish_sm_list[-1] == "0":
  908. # break
  909. #
  910. # # 读取共享内存
  911. # _time = time.time()
  912. # sm_name = finish_sm_list[5]
  913. # sm_shape = finish_sm_list[6]
  914. # sm_shape = eval(sm_shape)
  915. # sm_dtype = finish_sm_list[7]
  916. # gpu_time = finish_sm_list[8]
  917. # sm_dtype = get_np_type(sm_dtype)
  918. # outputs = from_share_memory(sm_name, sm_shape, sm_dtype)
  919. # log(args_dict.get("model_type") + " " + args_dict.get("predictor_type") + " outputs " + str(outputs.shape))
  920. # log("data from share memory " + sm_name + " " + str(time.time()-_time))
  921. #
  922. # # 释放
  923. # release_share_memory(get_share_memory(sm_name))
  924. #
  925. # # 重置share memory list
  926. # finish_sm_list[-1] = "0"
  927. # finish_sm_list[0] = "0"
  928. #
  929. # close_share_memory_list(finish_sm_list)
  930. # return outputs, gpu_time
  931. # def interface_pool(interface_type):
  932. # try:
  933. # ip_port_dict = _global.get("ip_port")
  934. # ip_list = list(ip_port_dict.keys())
  935. # _ip = random.choice(ip_list)
  936. # if interface_type != 'office':
  937. # _port = ip_port_dict.get(_ip).get(interface_type)[0]
  938. # else:
  939. # _port = random.choice(ip_port_dict.get(_ip).get(interface_type))
  940. # log(_ip + ":" + _port)
  941. # return _ip + ":" + _port
  942. # except Exception as e:
  943. # traceback.print_exc()
  944. # return [-1]
  945. # def ip_pool(interface_type, _random=False):
  946. # ip_flag_name = interface_type + '_ip_flag'
  947. # ip_flag = globals().get(ip_flag_name)
  948. # if ip_flag is None:
  949. # if _random:
  950. # _r = random.randint(0, len(interface_ip_list)-1)
  951. # ip_flag = _r
  952. # globals().update({ip_flag_name: ip_flag})
  953. # ip_index = _r
  954. # else:
  955. # ip_flag = 0
  956. # globals().update({ip_flag_name: ip_flag})
  957. # ip_index = 0
  958. # else:
  959. # ip_index = ip_flag % len(interface_ip_list)
  960. # ip_flag += 1
  961. #
  962. # if ip_flag >= 10000:
  963. # ip_flag = 0
  964. # globals().update({ip_flag_name: ip_flag})
  965. #
  966. # log("ip_pool " + interface_type + " " + str(ip_flag) + " " + str(interface_ip_list[ip_index]))
  967. # return interface_ip_list[ip_index]
  968. #
  969. #
  970. # def port_pool(interface_type, _random=False):
  971. # port_flag_name = interface_type + '_port_flag'
  972. #
  973. # port_flag = globals().get(port_flag_name)
  974. # if port_flag is None:
  975. # if _random:
  976. # if interface_type == "ocr":
  977. # _r = random.randint(0, len(ocr_port_list)-1)
  978. # elif interface_type == "otr":
  979. # _r = random.randint(0, len(otr_port_list)-1)
  980. # else:
  981. # _r = random.randint(0, len(soffice_port_list)-1)
  982. # port_flag = _r
  983. # globals().update({port_flag_name: port_flag})
  984. # port_index = _r
  985. # else:
  986. # port_flag = 0
  987. # globals().update({port_flag_name: port_flag})
  988. # port_index = 0
  989. # else:
  990. # if interface_type == "ocr":
  991. # port_index = port_flag % len(ocr_port_list)
  992. # elif interface_type == "otr":
  993. # port_index = port_flag % len(otr_port_list)
  994. # else:
  995. # port_index = port_flag % len(soffice_port_list)
  996. # port_flag += 1
  997. #
  998. # if port_flag >= 10000:
  999. # port_flag = 0
  1000. # globals().update({port_flag_name: port_flag})
  1001. #
  1002. # if interface_type == "ocr":
  1003. # log("port_pool " + interface_type + " " + str(port_flag) + " " + ocr_port_list[port_index])
  1004. # return ocr_port_list[port_index]
  1005. # elif interface_type == "otr":
  1006. # log("port_pool " + interface_type + " " + str(port_flag) + " " + otr_port_list[port_index])
  1007. # return otr_port_list[port_index]
  1008. # else:
  1009. # log("port_pool " + interface_type + " " + str(port_flag) + " " + soffice_port_list[port_index])
  1010. # return soffice_port_list[port_index]
  1011. if __name__ == "__main__":
  1012. _global._init()
  1013. set_flask_global()
  1014. _img = cv2.imread(r"C:/Users/Administrator/Desktop/test_b_table/error11.png")
  1015. _img_bytes = np2bytes(_img)
  1016. b_list = from_yolo_interface(_img_bytes, from_remote=True)
  1017. for l in b_list:
  1018. for b in l:
  1019. cv2.rectangle(_img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (0, 0, 255), 2)
  1020. cv2.namedWindow('img', cv2.WINDOW_NORMAL)
  1021. cv2.imshow('img', _img)
  1022. cv2.waitKey(0)