# encoding=utf8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import logging
import os
import sys
# __dir__ = os.path.dirname(os.path.abspath(__file__))
import zlib

import requests
# sys.path.append(__dir__)
# sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))

sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../../")
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import cv2
import numpy as np
import math
import time
import traceback
os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
import paddle

import ocr.tools.infer.utility as utility
from ocr.ppocr.postprocess import build_post_process
from ocr.ppocr.utils.logging import get_logger
from ocr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
from config.max_compute_config import MAX_COMPUTE
from format_convert.utils import judge_error_code, log, namespace_to_dict, get_platform, file_lock, \
    get_gpu_memory_usage, get_current_process_gpu_id
from format_convert import _global

import torch
from torch import nn
from ocr.tools.infer.torch_rec_model import Rec_ResNet_34
import gc

logger = get_logger()


class TextRecognizer(object):
    shrink_memory_count = 0

    def __init__(self, args):
        self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
        self.character_type = args.rec_char_type
        self.rec_batch_num = args.rec_batch_num
        self.rec_batch_num = 16
        print('self.rec_batch_num', self.rec_batch_num)
        self.rec_algorithm = args.rec_algorithm
        postprocess_params = {
            'name': 'CTCLabelDecode',
            "character_type": args.rec_char_type,
            "character_dict_path": args.rec_char_dict_path,
            # "use_space_char": args.use_space_char
            "use_space_char": False
        }
        self.postprocess_op = build_post_process(postprocess_params)

        rec_model_path = args.rec_model_dir
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = Rec_ResNet_34()
        mode_state_dict = torch.load(rec_model_path, self.device)['state_dict']
        if str(self.device)=='cpu': # cpu处理时精度调整，加速推理
            for name, value in mode_state_dict.items():
                if get_platform() != "Windows":
                    value = value.double()
                value = torch.where((value < 1.0e-23) & (value > 0.0), 1.0e-23, value)
                value = torch.where((value > -1.0e-23) & (value < 0.0), -1.0e-23, value)
                mode_state_dict[name] = value

        model.load_state_dict(mode_state_dict)

        self.predictor = model
        self.predictor.to(self.device)
        self.predictor.eval()

        if str(self.device) != 'cpu':
            self.gpu_id = get_current_process_gpu_id()
        else:
            self.gpu_id = None

    def resize_norm_img(self, img, max_wh_ratio):
        h, w = img.shape[:2]
        imgC, imgH, imgW = self.rec_image_shape
        assert imgC == img.shape[2]
        # print('max_wh_ratio', max_wh_ratio)
        # max_wh_ratio h是w的10倍，直接返回
        if max_wh_ratio < 0.1:
            # log('max_wh_ratio < 0.1', )
            resized_image = img.astype('float32')
            resized_image = resized_image.transpose((2, 0, 1)) / 255
            return resized_image
        else:
            if self.character_type == "ch":
                imgW = int((32 * max_wh_ratio))

            ratio = w / float(h)
            if math.ceil(imgH * ratio) > imgW:
                resized_w = imgW
            else:
                resized_w = int(math.ceil(imgH * ratio))

            try:
                resized_image = cv2.resize(img, (resized_w, imgH))
            except:
                log("predict_rec.py resize_norm_img resize shape " + str((resized_w, imgH, imgW, h, w, ratio, max_wh_ratio)) + ' ' + str(self.rec_image_shape))
                raise

        resized_image = resized_image.astype('float32')
        resized_image = resized_image.transpose((2, 0, 1)) / 255
        resized_image -= 0.5
        resized_image /= 0.5
        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
        if resized_w is not None:
            padding_im[:, :, 0:resized_w] = resized_image
        return padding_im

    def predict(self, norm_img_batch):
        tensor = torch.from_numpy(norm_img_batch).float()
        # if norm_img.shape[3] >= 100 and get_platform() != "Windows" and not MAX_COMPUTE:
        if get_platform() != "Windows" and not MAX_COMPUTE:
            # 加锁
            time2 = time.time()
            lock_file_sub = 'ocr'
            lock_file = os.path.abspath(os.path.dirname(__file__)) + "/" + lock_file_sub + ".lock"
            f = file_lock(lock_file)
            log("rec get file_lock " + lock_file + " time " + str(time.time()-time2))
            try:
                time2 = time.time()
                if str(self.device) != 'cpu':
                    torch.cuda.empty_cache()
                tensor = tensor.to(self.device)
                with torch.no_grad():
                    out = self.predictor(tensor)
                log("get file_lock run rec" + " time " + str(time.time()-time2))
            except RuntimeError:
                log("ocr/tools/infer/predict_rec.py predict.run error! maybe no gpu memory!")
                log("rec predictor shrink memory! ori_im.shape " + str(norm_img_batch.shape))
                get_gpu_memory_usage()
                raise RuntimeError
            finally:
                f.close()
                if str(self.device) != 'cpu':
                    torch.cuda.empty_cache()
                gc.collect()
        else:
            tensor = tensor.to(self.device)
            with torch.no_grad():
                out = self.predictor(tensor)
        # logging.info("ocr model predict time - rec" + str(time.time()-start_time))
        out = out.cpu().numpy()
        preds = out
        return preds

    def predict_batch(self, batch_list):
        batch_out_list = []
        if get_platform() != "Windows" and not MAX_COMPUTE and self.gpu_id is not None:
            # 加锁
            time2 = time.time()
            lock_file_sub = f'ocr_{self.gpu_id}'
            lock_file = os.path.abspath(os.path.dirname(__file__)) + "/" + lock_file_sub + ".lock"
            f = file_lock(lock_file)
            log("rec get file_lock " + lock_file + " time " + str(time.time()-time2))
            try:
                time2 = time.time()
                if str(self.device) != 'cpu':
                    torch.cuda.empty_cache()
                for sub_batch_list in batch_list:
                    sub_batch_out = []
                    for tensor in sub_batch_list:
                        with torch.no_grad():
                            out = self.predictor(tensor)
                            out = out.cpu().numpy()
                        sub_batch_out.append(out)
                    # sub_batch_out = np.concatenate(sub_batch_out, axis=0)
                    batch_out_list.append(sub_batch_out)
                log("get file_lock run rec" + " time " + str(time.time()-time2))

            except RuntimeError:
                log("ocr/tools/infer/predict_rec.py predict.run error! maybe no gpu memory!")
                log("rec predictor shrink memory! ori_im.shape " + str(tensor.shape))
                get_gpu_memory_usage()
                raise RuntimeError
            finally:
                f.close()
                if str(self.device) != 'cpu':
                    torch.cuda.empty_cache()
        else:
            for sub_batch_list in batch_list:
                sub_batch_out = []
                for tensor in sub_batch_list:
                    # print('tensor.shape', tensor.shape)
                    with torch.no_grad():
                        out = self.predictor(tensor)
                        out = out.cpu().numpy()
                    # print('out.shape', out.shape)
                    sub_batch_out.append(out)
                # sub_batch_out = np.concatenate(sub_batch_out, axis=0)
                batch_out_list.append(sub_batch_out)

        # 转为numpy
        for bi, sub_batch_out in enumerate(batch_out_list):
            batch_out_list[bi] = np.concatenate(sub_batch_out, axis=0)
        return batch_out_list

    def __call__(self, img_list):
        start_time = time.time()
        # print('into TextRecognizer __call__')
        img_num = len(img_list)

        # 过滤图片比例异常的
        # print('rec len(img_list)', len(img_list))
        temp_list = []
        for img in img_list:
            if img.shape[0] == 0 or img.shape[1] == 0 \
                    or img.shape[0] >= 10000 or img.shape[1] >= 10000 \
                    or img.shape[1] / img.shape[0] <= 0.5 \
                    or img.shape[1] / img.shape[0] >= 100:
                # print('rec img.shape[1] / img.shape[0] <= 0.5', img.shape)
                continue
            temp_list.append(img)
        if not temp_list:
            return None, 0
        img_list = temp_list

        # 按比例排序
        width_list = []
        i = 0
        for img in img_list:
            width_list.append(img.shape[1] / float(img.shape[0]))
        # Sorting can speed up the recognition process
        indices = np.argsort(np.array(width_list))

        # 分批预测
        # rec_res = []
        rec_res = [['', 0.0]] * img_num
        batch_num = self.rec_batch_num
        elapse = 0
        batch_list = []
        for beg_img_no in range(0, img_num, batch_num):
            end_img_no = min(img_num, beg_img_no + batch_num)
            norm_img_batch = []
            max_wh_ratio = 0
            # 取这个batch中比例最大的
            for ino in range(beg_img_no, end_img_no):
                # h, w = img_list[ino].shape[0:2]
                h, w = img_list[indices[ino]].shape[0:2]
                wh_ratio = w * 1.0 / h
                max_wh_ratio = max(max_wh_ratio, wh_ratio)
            # print('max_wh_ratio', max_wh_ratio)

            # resize image
            for ino in range(beg_img_no, end_img_no):
                # print('img_list[indices[ino]].shape', img_list[indices[ino]].shape)
                norm_img = self.resize_norm_img(img_list[indices[ino]],
                                                max_wh_ratio)
                # print('norm_img.shape', norm_img.shape)
                norm_img = norm_img[np.newaxis, :]
                norm_img_batch.append(norm_img)

            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()

            # 预测
            # starttime = time.time()
            # # 当图片很长时，降低batch，防止爆内存
            # # print('norm_img_batch.shape', norm_img_batch.shape)
            # preds = []
            # if norm_img_batch.shape[-1] >= 400:
            #     if norm_img_batch.shape[-1] <= 1000:
            #         mini_batch_size = 4
            #     elif norm_img_batch.shape[-1] <= 3000:
            #         mini_batch_size = 2
            #     else:
            #         mini_batch_size = 1
            #     for bi in range(0, norm_img_batch.shape[0], mini_batch_size):
            #         sub_batch = norm_img_batch[bi:bi+mini_batch_size]
            #         sub_preds = self.predict(sub_batch)
            #         preds.append(sub_preds)
            #         # print('type(sub_preds), sub_preds.shape', type(sub_preds), sub_preds.shape)
            #     preds = np.concatenate(preds, axis=0)
            # else:
            #     preds = self.predict(norm_img_batch)
            # # print('type(preds), preds.shape', type(preds), preds.shape)
            #
            # # 后处理
            # rec_result = self.postprocess_op(preds)
            # for rno in range(len(rec_result)):
            #     rec_res[indices[beg_img_no + rno]] = rec_result[rno]
            # elapse += time.time() - starttime

            # 根据长度，动态batch
            if norm_img_batch.shape[-1] >= 400:
                if norm_img_batch.shape[-1] <= 1000:
                    mini_batch_size = 4
                elif norm_img_batch.shape[-1] <= 3000:
                    mini_batch_size = 2
                else:
                    mini_batch_size = 1
                sub_batch_list = []
                for bi in range(0, norm_img_batch.shape[0], mini_batch_size):
                    sub_batch = norm_img_batch[bi:bi+mini_batch_size]
                    tensor = torch.from_numpy(sub_batch).float()
                    tensor = tensor.to(self.device)
                    sub_batch_list.append(tensor)
            else:
                tensor = torch.from_numpy(norm_img_batch).float()
                tensor = tensor.to(self.device)
                sub_batch_list = [tensor]

            batch_list.append(sub_batch_list)

        # 预测
        batch_out_list = self.predict_batch(batch_list)

        # 后处理
        for bi, out in enumerate(batch_out_list):
            begin_img_no = bi * batch_num
            rec_result = self.postprocess_op(out)
            for ri in range(len(rec_result)):
                rec_res[indices[begin_img_no + ri]] = rec_result[ri]
        elapse += time.time() - start_time
        return rec_res, elapse


class TextRecognizer2(object):
    shrink_memory_count = 0

    def __init__(self, args):
        self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
        self.character_type = args.rec_char_type
        self.rec_batch_num = args.rec_batch_num
        self.rec_algorithm = args.rec_algorithm
        postprocess_params = {
            'name': 'CTCLabelDecode',
            "character_type": args.rec_char_type,
            "character_dict_path": args.rec_char_dict_path,
            "use_space_char": args.use_space_char
        }
        self.postprocess_op = build_post_process(postprocess_params)
        self.args = args
        # self.predictor, self.input_tensor, self.output_tensors = \
        #     utility.create_predictor(args, 'rec', logger)

    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape
        assert imgC == img.shape[2]
        if self.character_type == "ch":
            imgW = int((32 * max_wh_ratio))
        h, w = img.shape[:2]
        ratio = w / float(h)
        if math.ceil(imgH * ratio) > imgW:
            resized_w = imgW
        else:
            resized_w = int(math.ceil(imgH * ratio))
        # print("predict_rec.py resize_norm_img resize shape", (resized_w, imgH))
        resized_image = cv2.resize(img, (resized_w, imgH))
        resized_image = resized_image.astype('float32')
        resized_image = resized_image.transpose((2, 0, 1)) / 255
        resized_image -= 0.5
        resized_image /= 0.5
        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
        padding_im[:, :, 0:resized_w] = resized_image
        return padding_im

    def __call__(self, img_list):
        from format_convert.convert_need_interface import from_gpu_interface_redis
        img_num = len(img_list)
        # Calculate the aspect ratio of all text bars
        width_list = []
        for img in img_list:
            width_list.append(img.shape[1] / float(img.shape[0]))

        # Sorting can speed up the recognition process
        indices = np.argsort(np.array(width_list))

        rec_res = [['', 0.0]] * img_num
        batch_num = self.rec_batch_num
        elapse = 0
        all_gpu_time = 0
        for beg_img_no in range(0, img_num, batch_num):
            # 预处理
            end_img_no = min(img_num, beg_img_no + batch_num)
            norm_img_batch = []
            max_wh_ratio = 0
            for ino in range(beg_img_no, end_img_no):
                h, w = img_list[indices[ino]].shape[0:2]
                wh_ratio = w * 1.0 / h
                max_wh_ratio = max(max_wh_ratio, wh_ratio)
            for ino in range(beg_img_no, end_img_no):
                norm_img = self.resize_norm_img(img_list[indices[ino]],
                                                max_wh_ratio)
                norm_img = norm_img[np.newaxis, :]
                norm_img_batch.append(norm_img)
            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()
            starttime = time.time()

            # # 压缩numpy
            # compressed_array = io.BytesIO()
            # np.savez_compressed(compressed_array, norm_img_batch)
            # compressed_array.seek(0)
            # norm_img_batch = compressed_array.read()

            # 调用GPU接口
            _dict = {"inputs": norm_img_batch, "args": str(namespace_to_dict(self.args)), "md5": _global.get("md5")}
            result = from_gpu_interface_redis(_dict, model_type="ocr", predictor_type="rec")
            if judge_error_code(result):
                logging.error("from_gpu_interface failed! " + str(result))
                raise requests.exceptions.RequestException

            preds = result.get("preds")
            gpu_time = result.get("gpu_time")
            all_gpu_time += round(gpu_time, 2)

            # # 解压numpy
            # decompressed_array = io.BytesIO()
            # decompressed_array.write(preds)
            # decompressed_array.seek(0)
            # preds = np.load(decompressed_array, allow_pickle=True)['arr_0']
            # log("inputs.shape" + str(preds.shape))

            # 后处理
            rec_result = self.postprocess_op(preds)
            for rno in range(len(rec_result)):
                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
            elapse += time.time() - starttime

        log("ocr model predict time - rec - time " + str(all_gpu_time) + " - num " + str(img_num))
        return rec_res, elapse


def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    text_recognizer = TextRecognizer(args)
    valid_image_file_list = []
    img_list = []
    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
        valid_image_file_list.append(image_file)
        img_list.append(img)
    try:
        rec_res, predict_time = text_recognizer(img_list)
    except:
        logger.info(traceback.format_exc())
        logger.info(
            "ERROR!!!! \n"
            "Please read the FAQ：https://github.com/PaddlePaddle/PaddleOCR#faq \n"
            "If your model has tps module:  "
            "TPS does not support variable shape.\n"
            "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
        exit()
    for ino in range(len(img_list)):
        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
                                               rec_res[ino]))
    logger.info("Total predict time for {} images, cost: {:.3f}".format(
        len(img_list), predict_time))


if __name__ == "__main__":
    main(utility.parse_args())