Browse Source

新增图片方向分类模型和图片印章去除模型

fangjiasheng 2 years ago
parent
commit
e9a12e8ce9

+ 38 - 10
format_convert/convert_image.py

@@ -17,7 +17,8 @@ from format_convert import get_memory_info, _global
 from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html, get_logger, log, \
 from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html, get_logger, log, \
     memory_decorator, pil_resize
     memory_decorator, pil_resize
 from format_convert.table_correct import get_rotated_image
 from format_convert.table_correct import get_rotated_image
-from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface, from_gpu_interface_redis
+from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface, from_gpu_interface_redis, \
+    from_idc_interface, from_isr_interface
 from otr.table_line import table_preprocess, table_postprocess
 from otr.table_line import table_preprocess, table_postprocess
 
 
 
 
@@ -86,20 +87,47 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, u
 
 
     log("into image_preprocess")
     log("into image_preprocess")
     try:
     try:
-        # 图片倾斜校正,写入原来的图片路径
-        # print("image_process", image_path)
-        g_r_i = get_rotated_image(image_np, image_path)
-        if judge_error_code(g_r_i):
+        # # 图片倾斜校正,写入原来的图片路径
+        # # print("image_process", image_path)
+        # g_r_i = get_rotated_image(image_np, image_path)
+        # if judge_error_code(g_r_i):
+        #     if is_from_docx:
+        #         return []
+        #     else:
+        #         return g_r_i
+        # image_np = cv2.imread(image_path)
+        # if image_np is None:
+        #     return []
+
+        if image_np is None:
+            return []
+
+        with open(image_path, "rb") as f:
+            image_bytes = f.read()
+
+        # idc模型实现图片倾斜校正
+        image_np = from_idc_interface(image_bytes)
+        if judge_error_code(image_np):
             if is_from_docx:
             if is_from_docx:
                 return []
                 return []
             else:
             else:
-                return g_r_i
+                return image_np
+        idc_path = image_path.split(".")[0] + "_idc." + image_path.split(".")[-1]
+        cv2.imwrite(idc_path, image_np)
+        with open(idc_path, "rb") as f:
+            image_bytes = f.read()
 
 
-        image_np = cv2.imread(image_path)
-        if image_np is None:
-            return []
+        # isr模型去除印章
+        image_np = from_isr_interface(image_bytes)
+        if judge_error_code(image_np):
+            if is_from_docx:
+                return []
+            else:
+                return image_np
+        isr_path = image_path.split(".")[0] + "_isr." + image_path.split(".")[-1]
+        cv2.imwrite(isr_path, image_np)
 
 
-        # otr需要图片resize成模型所需大小, 写入另一个路径
+        # otr模型识别表格,需要图片resize成模型所需大小, 写入另一个路径
         best_h, best_w = get_best_predict_size(image_np)
         best_h, best_w = get_best_predict_size(image_np)
         # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
         # image_resize = cv2.resize(image_np, (best_w, best_h), interpolation=cv2.INTER_AREA)
         image_resize = pil_resize(image_np, best_h, best_w)
         image_resize = pil_resize(image_np, best_h, best_w)

+ 113 - 1
format_convert/convert_need_interface.py

@@ -12,10 +12,14 @@ import time
 import uuid
 import uuid
 import zlib
 import zlib
 from queue import Queue
 from queue import Queue
-
 import redis
 import redis
 from werkzeug.exceptions import NotFound
 from werkzeug.exceptions import NotFound
+
+from idc.idc_interface import IdcModels, idc
+
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
+
+from isr.isr_interface import IsrModels, isr
 import traceback
 import traceback
 import requests
 import requests
 from format_convert import _global
 from format_convert import _global
@@ -485,6 +489,114 @@ def from_otr_interface(image_stream, is_from_pdf=False, from_remote=FROM_REMOTE)
         return [-1]
         return [-1]
 
 
 
 
+def from_isr_interface(image_stream, from_remote=False):
+    log("into from_isr_interface")
+    start_time = time.time()
+    try:
+        base64_stream = base64.b64encode(image_stream)
+
+        # 调用接口
+        try:
+            if from_remote:
+                retry_times_1 = 3
+                # 重试
+                while retry_times_1:
+                    ip_port = interface_pool_gunicorn("isr")
+                    if judge_error_code(ip_port):
+                        return ip_port
+                    _url = ip_port + "/isr"
+                    r = json.loads(request_post(_url, {"data": base64_stream,
+                                                       "md5": _global.get("md5")},
+                                                time_out=60))
+                    log("get interface return")
+                    if type(r) == list:
+                        # 接口连不上换个端口重试
+                        if retry_times_1 <= 1:
+                            return r
+                        else:
+                            retry_times_1 -= 1
+                            log("retry post isr_interface... left times " + str(retry_times_1))
+                            continue
+                    if judge_error_code(r):
+                        return r
+                    break
+            else:
+                if globals().get("global_isr_model") is None:
+                    print("=========== init isr model ===========")
+                    isr_yolo_model, isr_model = IsrModels().get_model()
+                    globals().update({"global_isr_yolo_model": isr_yolo_model})
+                    globals().update({"global_isr_model": isr_model})
+                r = isr(data=base64_stream,
+                        isr_yolo_model=globals().get("global_isr_yolo_model"),
+                        isr_model=globals().get("global_isr_model"))
+        except TimeoutError:
+            return [-5]
+        except requests.exceptions.ConnectionError as e:
+            return [-2]
+
+        _dict = r
+        image_np = _dict.get("image")
+        log("from_isr_interface cost time " + str(time.time()-start_time))
+        return image_np
+    except Exception as e:
+        log("from_isr_interface error!")
+        traceback.print_exc()
+        return [-11]
+
+
+def from_idc_interface(image_stream, from_remote=False):
+    log("into from_idc_interface")
+    start_time = time.time()
+    try:
+        base64_stream = base64.b64encode(image_stream)
+
+        # 调用接口
+        try:
+            if from_remote:
+                retry_times_1 = 3
+                # 重试
+                while retry_times_1:
+                    ip_port = interface_pool_gunicorn("isr")
+                    if judge_error_code(ip_port):
+                        return ip_port
+                    _url = ip_port + "/isr"
+                    r = json.loads(request_post(_url, {"data": base64_stream,
+                                                       "md5": _global.get("md5")},
+                                                time_out=60))
+                    log("get interface return")
+                    if type(r) == list:
+                        # 接口连不上换个端口重试
+                        if retry_times_1 <= 1:
+                            return r
+                        else:
+                            retry_times_1 -= 1
+                            log("retry post isr_interface... left times " + str(retry_times_1))
+                            continue
+                    if judge_error_code(r):
+                        return r
+                    break
+            else:
+                if globals().get("global_idc_model") is None:
+                    print("=========== init idc model ===========")
+                    idc_model = IdcModels().get_model()
+                    globals().update({"global_idc_model": idc_model})
+                r = idc(data=base64_stream,
+                        model=globals().get("global_idc_model"))
+        except TimeoutError:
+            return [-5]
+        except requests.exceptions.ConnectionError as e:
+            return [-2]
+
+        _dict = r
+        image_np = _dict.get("image")
+        log("from_idc_interface cost time " + str(time.time()-start_time))
+        return image_np
+    except Exception as e:
+        log("from_idc_interface error!")
+        traceback.print_exc()
+        return [-10]
+
+
 # def from_schedule_interface(interface_type):
 # def from_schedule_interface(interface_type):
 #     try:
 #     try:
 #         _ip = "http://" + get_intranet_ip()
 #         _ip = "http://" + get_intranet_ip()

+ 2 - 1
format_convert/convert_test.py

@@ -57,7 +57,8 @@ if __name__ == '__main__':
         # file_path = "C:/Users/Administrator/Desktop/test_xls/merge_cell.xlsx"
         # file_path = "C:/Users/Administrator/Desktop/test_xls/merge_cell.xlsx"
         # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/20210609202634853485.xlsx"
         # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/20210609202634853485.xlsx"
         # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_ODPS/1624325845476.pdf"
         # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_ODPS/1624325845476.pdf"
-        file_path = "C:/Users/Administrator/Downloads/22222.pdf"
+        # file_path = "C:/Users/Administrator/Downloads/1660296734009.pdf"
+        file_path = "C:/Users/Administrator/Desktop/test_image/error10.png"
     else:
     else:
         file_path = "test1.doc"
         file_path = "test1.doc"
     test_one(file_path, from_remote=True)
     test_one(file_path, from_remote=True)

+ 28 - 2
format_convert/utils.py

@@ -37,7 +37,7 @@ if get_platform() == "Linux":
 import math
 import math
 
 
 
 
-def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9]):
+def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11]):
     """
     """
     [0] : continue
     [0] : continue
     [-1]: 逻辑处理错误
     [-1]: 逻辑处理错误
@@ -51,7 +51,7 @@ def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9]):
     [-9]: 接口接收数据为空
     [-9]: 接口接收数据为空
     """
     """
     for c in code:
     for c in code:
-        if _list == [c]:
+        if len(_list) == 1 and _list == [c]:
             return True
             return True
     return False
     return False
 
 
@@ -2077,6 +2077,32 @@ def pil_resize(image_np, height, width):
     return image_np
     return image_np
 
 
 
 
+def np2pil(image_np):
+    image_pil = Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB))
+    return image_pil
+
+
+def pil2np(image_pil):
+    image_np = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
+    return image_np
+
+
+def bytes2np(_b):
+    try:
+        # 二进制数据流转np.ndarray [np.uint8: 8位像素]
+        image_np = cv2.imdecode(np.frombuffer(_b, np.uint8), cv2.IMREAD_COLOR)
+        # 将rgb转为bgr
+        # image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+        return image_np
+    except cv2.error as e:
+        if "src.empty()" in str(e):
+            log("bytes2np image is empty!")
+        return None
+    except:
+        traceback.print_exc()
+        return None
+
+
 if __name__ == "__main__":
 if __name__ == "__main__":
     # strs = r"D:\Project\temp\04384fcc9e8911ecbd2844f971944973\043876ca9e8911eca5e144f971944973_rar\1624114035529.jpeg"
     # strs = r"D:\Project\temp\04384fcc9e8911ecbd2844f971944973\043876ca9e8911eca5e144f971944973_rar\1624114035529.jpeg"
     # print(slash_replace(strs))
     # print(slash_replace(strs))

+ 113 - 0
idc/idc_interface.py

@@ -0,0 +1,113 @@
+import base64
+import copy
+import json
+import os
+import time
+import sys
+import traceback
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
+from format_convert import _global
+import cv2
+import numpy as np
+from PIL import Image
+from idc.model import direction_model
+from format_convert.utils import log, get_md5_from_bytes, request_post, np2pil, bytes2np, pil2np, pil_resize
+import tensorflow as tf
+sess = tf.compat.v1.Session(graph=tf.Graph())
+
+image_shape = (640, 640)
+
+
+def adjust_direction(image_np, model):
+    # 4个方向
+    cls_num = 4
+
+    # 构建数据
+    origin_image = copy.deepcopy(image_np)
+    image_np = pil_resize(image_np, image_shape[0], image_shape[1])
+    X = np.expand_dims(np.array(image_np), 0)
+
+    # 预测
+    with sess.as_default():
+        with sess.graph.as_default():
+            pred = model.predict(X)
+            pred = pred.astype(np.float64)
+            pred = np.argmax(pred[0])
+
+    # 根据分类计算角度
+    angle = 360 - pred*int((360/cls_num))
+
+    # 根据角度旋转
+    image_pil = Image.fromarray(origin_image)
+    image_rotate = np.array(image_pil.rotate(angle, expand=1))
+    return image_rotate
+
+
+def idc(data, model):
+    log("into idc_interface isr")
+    try:
+        # start_time = time.time()
+        img_data = base64.b64decode(data)
+        img_np = bytes2np(img_data)
+        image_rotate = adjust_direction(img_np, model)
+        # print(time.time()-start_time)
+        return {"image": image_rotate}
+    except TimeoutError:
+        return {"image": [-5]}
+    except:
+        traceback.print_exc()
+        return {"image": [-1]}
+
+
+class IdcModels:
+    def __init__(self):
+        # python文件所在目录
+        _dir = os.path.abspath(os.path.dirname(__file__))
+
+        # detect
+        model_path = _dir + "/models/model.h5"
+        with sess.as_default():
+            with sess.graph.as_default():
+                self.model = direction_model(input_shape=(image_shape[0], image_shape[1], 3),
+                                             output_shape=4)
+                self.model.load_weights(model_path)
+
+    def get_model(self):
+        return self.model
+
+
+def test_idc_model(from_remote=False):
+    file_path = "C:/Users/Administrator/Desktop/test_image/error10.jpg"
+    with open(file_path, "rb") as f:
+        file_bytes = f.read()
+    img_np = cv2.imread(file_path)
+    print(img_np.shape)
+    file_base64 = base64.b64encode(file_bytes)
+    _md5 = get_md5_from_bytes(file_bytes)[0]
+
+    _global._init()
+    _global.update({"port": 15010, "md5": _md5})
+
+    if from_remote:
+        file_json = {"data": file_base64, "md5": _md5}
+        # _url = "http://192.168.2.102:17000/ocr"
+        _url = "http://127.0.0.1:17000/ocr"
+        print(json.loads(request_post(_url, file_json)))
+    else:
+
+        idc_model = IdcModels().get_model()
+        result = idc(file_base64, idc_model)
+        # print(result)
+        if type(result.get("image")) == list:
+            print(result)
+        else:
+            img = result.get("image")
+            print(img.shape)
+            cv2.namedWindow('img', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
+            cv2.imshow("img", img)
+            cv2.waitKey(0)
+        # print(result)
+
+
+if __name__ == "__main__":
+    test_idc_model()

+ 38 - 0
idc/model.py

@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 21 10:53:51 2022
+model
+@author: fangjiasheng
+"""
+from keras.layers import Lambda, Dense, Reshape, Conv2D, BatchNormalization, LeakyReLU, Masking, MaxPool2D
+from keras import layers, models, Sequential
+import keras.backend as K
+
+
+def direction_model(input_shape, output_shape):
+    conv_num = 6
+
+    # Input
+    _input = layers.Input(shape=input_shape, dtype="float32", name="input")
+
+    conv = Conv2D(16, (3, 3), padding='same')(_input)
+    bn = BatchNormalization()(conv)
+    relu = LeakyReLU(alpha=0.)(bn)
+    max_pool = MaxPool2D()(relu)
+    for i in range(conv_num):
+        conv = Conv2D(16, (3, 3), padding='same')(max_pool)
+        bn = BatchNormalization()(conv)
+        relu = LeakyReLU(alpha=0.)(bn)
+        max_pool = MaxPool2D()(relu)
+    conv = Conv2D(16, (3, 3), padding='same')(max_pool)
+    bn = BatchNormalization()(conv)
+    relu = LeakyReLU(alpha=0.)(bn)
+    max_pool = MaxPool2D((5, 5))(relu)
+
+    dense = layers.Dense(output_shape, activation='softmax')(max_pool)
+    squeeze = Lambda(lambda x: K.squeeze(x, axis=1))(dense)
+    squeeze = Lambda(lambda x: K.squeeze(x, axis=1))(squeeze)
+
+    model = models.Model(inputs=_input, outputs=squeeze)
+    # model.summary(line_length=100)
+    return model

BIN
idc/models/model.h5


+ 5 - 0
idc/post_process.py

@@ -0,0 +1,5 @@
+
+
+def rotate():
+
+    return

+ 148 - 0
idc/pre_process.py

@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 21 10:53:51 2022
+pre_process.py
+@author: fangjiasheng
+"""
+import json
+import base64
+import random
+import traceback
+from glob import glob
+
+import numpy as np
+import six
+import cv2
+from PIL import Image
+import fitz
+Image.MAX_IMAGE_PIXELS = 2300000000
+
+
+def get_img_label(img_np, size, cls_num=4):
+    height, width = size
+    img_pil = Image.fromarray(cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB))
+
+    # 图片缩放
+    img_pil = img_pil.resize((int(width), int(height)), Image.BICUBIC)
+
+    # 生成旋转后的图片及其角度
+    img_label_list = [[np.array(img_pil), 0]]
+    # 图片旋转
+    angle_first = int(360/cls_num)
+    i = 1
+    for angle in range(angle_first, 360, angle_first):
+        img_label_list.append([np.array(img_pil.rotate(angle, expand=1)), i])
+        i += 1
+
+    # for _img, _label in img_label_list:
+    #     cv2.imshow("img", _img)
+    #     cv2.waitKey(0)
+    return img_label_list
+
+
+def gen(paths, batch_size=2, shape=(640, 640), cls_num=4, is_test=False):
+    num = len(paths)
+
+    i = 0
+    while True:
+        height, width = shape
+        if is_test:
+            X = np.zeros((batch_size, height, width, 3))
+            Y = np.zeros((batch_size, cls_num))
+        else:
+            X = np.zeros((batch_size * cls_num, height, width, 3))
+            Y = np.zeros((batch_size * cls_num, cls_num))
+        img_np_list = []
+
+        for j in range(batch_size):
+            if i >= num:
+                i = 0
+                np.random.shuffle(paths)
+            p = paths[i]
+            i += 1
+
+            # limit pixels 89478485
+            img_np = cv2.imread(p)
+            if img_np.shape[0] * img_np.shape[1] * img_np.shape[2] >= 89478485:
+                # print("image too large, limit 89478485 pixels", img_np.shape)
+                new_i = random.randint(0, num-1)
+                if i != new_i:
+                    p = paths[new_i]
+
+            img_label_list = get_img_label(img_np, size=(height, width), cls_num=cls_num)
+            random.shuffle(img_label_list)
+            if is_test:
+                img_label_list = random.sample(img_label_list, 1)
+
+            for c in range(cls_num):
+                if c >= len(img_label_list):
+                    break
+
+                img = img_label_list[c][0]
+                img_np_list.append(img)
+
+                # 模糊
+                if_blur = random.choice([0, 1])
+                # print(if_blur, img_label_list[c][1])
+                if if_blur:
+                    # 高斯模糊
+                    sigmaX = random.randint(1, 2)
+                    sigmaY = random.randint(1, 2)
+                    img = cv2.GaussianBlur(img, (5, 5), sigmaX, sigmaY)
+
+                # cv2.imshow("gen", img)
+                # cv2.waitKey(0)
+                # print("gen image size", img.shape)
+
+                # label
+                label_list = [0]*cls_num
+                label_list[img_label_list[c][1]] = 1
+                label = np.array(label_list)
+
+                # print(p, img_label_list[c][1])
+
+                X[j+c] = img
+                Y[j+c] = label
+        # print("X.shape", X.shape)
+        if is_test:
+            yield X, Y, img_np_list
+        else:
+            yield X, Y
+
+
+def get_image_from_pdf():
+    paths = glob("C:/Users/Administrator/Desktop/test_pdf/*")
+    save_dir = "D:/Project/image_direction_classification/data/1/"
+
+    i = 0
+    for path in paths:
+        try:
+            doc = fitz.open(path)
+            output_image_dict = {}
+            page_count = doc.page_count
+            for page_no in range(page_count):
+                try:
+                    page = doc.loadPage(page_no)
+                    output = save_dir + "pdf_" + str(i) + ".png"
+                    i += 1
+                    rotate = int(0)
+                    # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。
+                    # 此处若是不做设置,默认图片大小为:792X612, dpi=96
+                    # (1.33333333 --> 1056x816)   (2 --> 1584x1224)
+                    # (1.183, 2.28 --> 1920x1080)
+                    zoom_x = 1.3
+                    zoom_y = 1.3
+                    mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
+                    pix = page.getPixmap(matrix=mat, alpha=False)
+                    pix.writePNG(output)
+                except:
+                    continue
+        except Exception as e:
+            print("pdf2Image", traceback.print_exc())
+            continue
+
+
+if __name__ == '__main__':
+    get_img_label("data/0/7248_fe52d616989e19e6967e0461ef19b149.jpg", (640, 640))
+    # get_image_from_pdf()

+ 430 - 0
idc/utils.py

@@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 21 10:53:51 2022
+utils
+@author: fangjiasheng
+"""
+import platform
+
+import cv2
+import numpy as np
+from skimage import measure
+from scipy.spatial import distance as dist
+from PIL import Image
+from scipy.ndimage import filters, interpolation
+from numpy import amin, amax
+
+
+def get_platform():
+    _sys = platform.system()
+    return _sys
+
+
+def nms_box(boxes, scores, score_threshold=0.5, nms_threshold=0.3):
+    # nms box
+    boxes = np.array(boxes)
+    scores = np.array(scores)
+    ind = scores > score_threshold
+    boxes = boxes[ind]
+    scores = scores[ind]
+
+    def box_to_center(box):
+        xmin, ymin, xmax, ymax = [round(float(x), 4) for x in box]
+        w = xmax - xmin
+        h = ymax - ymin
+        return [round(xmin, 4), round(ymin, 4), round(w, 4), round(h, 4)]
+
+    newBoxes = [box_to_center(box) for box in boxes]
+    newscores = [round(float(x), 6) for x in scores]
+
+    index = cv2.dnn.NMSBoxes(newBoxes, newscores, score_threshold=score_threshold, nms_threshold=nms_threshold)
+    if len(index) > 0:
+        index = index.reshape((-1,))
+        return boxes[index], scores[index]
+    else:
+        return np.array([]), np.array([])
+
+
+def resize_im(im, scale, max_scale=None):
+    f = float(scale) / min(im.shape[0], im.shape[1])
+    if max_scale is not None and f * max(im.shape[0], im.shape[1]) > max_scale:
+        f = float(max_scale) / max(im.shape[0], im.shape[1])
+    return cv2.resize(im, (0, 0), fx=f, fy=f)
+
+
+def estimate_skew_angle(raw, angleRange=[-15, 15]):
+    """
+    估计图像文字偏转角度,
+    angleRange:角度估计区间
+    """
+    raw = resize_im(raw, scale=600, max_scale=900)
+    image = raw - amin(raw)
+    image = image / amax(image)
+    m = interpolation.zoom(image, 0.5)
+    m = filters.percentile_filter(m, 80, size=(20, 2))
+    m = filters.percentile_filter(m, 80, size=(2, 20))
+    m = interpolation.zoom(m, 1.0 / 0.5)
+    # w,h = image.shape[1],image.shape[0]
+    w, h = min(image.shape[1], m.shape[1]), min(image.shape[0], m.shape[0])
+    flat = np.clip(image[:h, :w] - m[:h, :w] + 1, 0, 1)
+    d0, d1 = flat.shape
+    o0, o1 = int(0.1 * d0), int(0.1 * d1)
+    flat = amax(flat) - flat
+    flat -= amin(flat)
+    est = flat[o0:d0 - o0, o1:d1 - o1]
+    angles = range(angleRange[0], angleRange[1])
+    estimates = []
+    for a in angles:
+        roest = interpolation.rotate(est, a, order=0, mode='constant')
+        v = np.mean(roest, axis=1)
+        v = np.var(v)
+        estimates.append((v, a))
+
+    _, a = max(estimates)
+    return a
+
+
+def eval_angle(img, angleRange=[-5, 5]):
+    """
+    估计图片文字的偏移角度
+    """
+    im = Image.fromarray(img)
+    degree = estimate_skew_angle(np.array(im.convert('L')), angleRange=angleRange)
+    im = im.rotate(degree, center=(im.size[0] / 2, im.size[1] / 2), expand=1, fillcolor=(255, 255, 255))
+    img = np.array(im)
+    return img, degree
+
+
+def letterbox_image(image, size, fillValue=[128, 128, 128]):
+    """
+    resize image with unchanged aspect ratio using padding
+    """
+    image_h, image_w = image.shape[:2]
+    w, h = size
+    new_w = int(image_w * min(w * 1.0 / image_w, h * 1.0 / image_h))
+    new_h = int(image_h * min(w * 1.0 / image_w, h * 1.0 / image_h))
+
+    resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    # cv2.imwrite('tmp/test.png', resized_image[...,::-1])
+    if fillValue is None:
+        # cv2.split 通道拆分
+        fillValue = [int(x.mean()) for x in cv2.split(np.array(image))]
+    boxed_image = np.zeros((size[1], size[0], 3), dtype=np.uint8)
+    boxed_image[:] = fillValue
+    boxed_image[:new_h, :new_w, :] = resized_image
+
+    return boxed_image, new_w / image_w, new_h / image_h
+
+
+def get_table_line(binimg, axis=0, lineW=10):
+    # 获取表格线
+    # axis=0 竖线
+    # axis=1 横线
+    labels = measure.label(binimg > 0, connectivity=2)  # 8连通区域标记
+    regions = measure.regionprops(labels)
+    if axis == 1:
+        # 如果是横线,判断线的bbox的右下角点的x-左上角点的x,判断横线长度
+        lineboxes = [minAreaRect(line.coords) for line in regions if line.bbox[2] - line.bbox[0] > lineW]
+    else:
+        # 如果是竖线,判断线的bbox的右下角点的y-左上角点的y,判断竖线长度
+        lineboxes = [minAreaRect(line.coords) for line in regions if line.bbox[3] - line.bbox[1] > lineW]
+    return lineboxes
+
+
+def sqrt(p1, p2):
+    return np.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
+
+
+def adjust_lines(RowsLines, ColsLines, alph=50):
+    # 调整line
+    nrow = len(RowsLines)
+    ncol = len(ColsLines)
+    newRowsLines = []
+    newColsLines = []
+    for i in range(nrow):
+
+        x1, y1, x2, y2 = RowsLines[i]
+        cx1, cy1 = (x1 + x2) / 2, (y1 + y2) / 2
+        for j in range(nrow):
+            if i != j:
+                x3, y3, x4, y4 = RowsLines[j]
+                cx2, cy2 = (x3 + x4) / 2, (y3 + y4) / 2
+                if (x3 < cx1 < x4 or y3 < cy1 < y4) or (x1 < cx2 < x2 or y1 < cy2 < y2):
+                    continue
+                else:
+                    r = sqrt((x1, y1), (x3, y3))
+                    if r < alph:
+                        newRowsLines.append([x1, y1, x3, y3])
+                    r = sqrt((x1, y1), (x4, y4))
+                    if r < alph:
+                        newRowsLines.append([x1, y1, x4, y4])
+
+                    r = sqrt((x2, y2), (x3, y3))
+                    if r < alph:
+                        newRowsLines.append([x2, y2, x3, y3])
+                    r = sqrt((x2, y2), (x4, y4))
+                    if r < alph:
+                        newRowsLines.append([x2, y2, x4, y4])
+
+    for i in range(ncol):
+        x1, y1, x2, y2 = ColsLines[i]
+        cx1, cy1 = (x1 + x2) / 2, (y1 + y2) / 2
+        for j in range(ncol):
+            if i != j:
+                x3, y3, x4, y4 = ColsLines[j]
+                cx2, cy2 = (x3 + x4) / 2, (y3 + y4) / 2
+                if (x3 < cx1 < x4 or y3 < cy1 < y4) or (x1 < cx2 < x2 or y1 < cy2 < y2):
+                    continue
+                else:
+                    r = sqrt((x1, y1), (x3, y3))
+                    if r < alph:
+                        newColsLines.append([x1, y1, x3, y3])
+                    r = sqrt((x1, y1), (x4, y4))
+                    if r < alph:
+                        newColsLines.append([x1, y1, x4, y4])
+
+                    r = sqrt((x2, y2), (x3, y3))
+                    if r < alph:
+                        newColsLines.append([x2, y2, x3, y3])
+                    r = sqrt((x2, y2), (x4, y4))
+                    if r < alph:
+                        newColsLines.append([x2, y2, x4, y4])
+
+    return newRowsLines, newColsLines
+
+
+def minAreaRect(coords):
+    """
+    多边形外接矩形
+    """
+    rect = cv2.minAreaRect(coords[:, ::-1])
+    # print("minAreaRect rect", rect)
+    box = cv2.boxPoints(rect)
+    # print("minAreaRect box", box)
+    box = box.reshape((8,)).tolist()
+    # print("minAreaRect box2", box)
+
+    box = image_location_sort_box(box)
+
+    x1, y1, x2, y2, x3, y3, x4, y4 = box
+    degree, w, h, cx, cy = solve(box)
+    if w < h:
+        xmin = (x1 + x2) / 2
+        xmax = (x3 + x4) / 2
+        ymin = (y1 + y2) / 2
+        ymax = (y3 + y4) / 2
+
+    else:
+        xmin = (x1 + x4) / 2
+        xmax = (x2 + x3) / 2
+        ymin = (y1 + y4) / 2
+        ymax = (y2 + y3) / 2
+    # degree,w,h,cx,cy = solve(box)
+    # x1,y1,x2,y2,x3,y3,x4,y4 = box
+    # return {'degree':degree,'w':w,'h':h,'cx':cx,'cy':cy}
+    return [xmin, ymin, xmax, ymax]
+
+
+def fit_line(p1, p2):
+    """A = Y2 - Y1
+       B = X1 - X2
+       C = X2*Y1 - X1*Y2
+       AX+BY+C=0
+    直线一般方程
+    """
+    x1, y1 = p1
+    x2, y2 = p2
+    A = y2 - y1
+    B = x1 - x2
+    C = x2 * y1 - x1 * y2
+    return A, B, C
+
+
+def point_line_cor(p, A, B, C):
+    # 判断点与之间的位置关系
+    # 一般式直线方程(Ax+By+c)=0
+    x, y = p
+    r = A * x + B * y + C
+    return r
+
+
+def line_to_line(points1, points2, alpha=10):
+    """
+    线段之间的距离
+    """
+    x1, y1, x2, y2 = points1
+    ox1, oy1, ox2, oy2 = points2
+    A1, B1, C1 = fit_line((x1, y1), (x2, y2))
+    A2, B2, C2 = fit_line((ox1, oy1), (ox2, oy2))
+    flag1 = point_line_cor([x1, y1], A2, B2, C2)
+    flag2 = point_line_cor([x2, y2], A2, B2, C2)
+
+    if (flag1 > 0 and flag2 > 0) or (flag1 < 0 and flag2 < 0):
+
+        x = (B1 * C2 - B2 * C1) / (A1 * B2 - A2 * B1)
+        y = (A2 * C1 - A1 * C2) / (A1 * B2 - A2 * B1)
+        p = (x, y)
+        r0 = sqrt(p, (x1, y1))
+        r1 = sqrt(p, (x2, y2))
+
+        if min(r0, r1) < alpha:
+
+            if r0 < r1:
+                points1 = [p[0], p[1], x2, y2]
+            else:
+                points1 = [x1, y1, p[0], p[1]]
+
+    return points1
+
+
+def _order_points(pts):
+    # 根据x坐标对点进行排序
+    x_sorted = pts[np.argsort(pts[:, 0]), :]
+
+    left_most = x_sorted[:2, :]
+    right_most = x_sorted[2:, :]
+    left_most = left_most[np.argsort(left_most[:, 1]), :]
+    (tl, bl) = left_most
+
+    distance = dist.cdist(tl[np.newaxis], right_most, "euclidean")[0]
+    (br, tr) = right_most[np.argsort(distance)[::-1], :]
+
+    return np.array([tl, tr, br, bl], dtype="float32")
+
+
+def image_location_sort_box(box):
+    x1, y1, x2, y2, x3, y3, x4, y4 = box[:8]
+    pts = (x1, y1), (x2, y2), (x3, y3), (x4, y4)
+    pts = np.array(pts, dtype="float32")
+    (x1, y1), (x2, y2), (x3, y3), (x4, y4) = _order_points(pts)
+    return [x1, y1, x2, y2, x3, y3, x4, y4]
+
+
+def solve(box):
+    """
+     绕 cx,cy点 w,h 旋转 angle 的坐标
+     x = cx-w/2
+     y = cy-h/2
+     x1-cx = -w/2*cos(angle) +h/2*sin(angle)
+     y1 -cy= -w/2*sin(angle) -h/2*cos(angle)
+     
+     h(x1-cx) = -wh/2*cos(angle) +hh/2*sin(angle)
+     w(y1 -cy)= -ww/2*sin(angle) -hw/2*cos(angle)
+     (hh+ww)/2sin(angle) = h(x1-cx)-w(y1 -cy)
+
+     """
+    x1, y1, x2, y2, x3, y3, x4, y4 = box[:8]
+    cx = (x1 + x3 + x2 + x4) / 4.0
+    cy = (y1 + y3 + y4 + y2) / 4.0
+    w = (np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) + np.sqrt((x3 - x4) ** 2 + (y3 - y4) ** 2)) / 2
+    h = (np.sqrt((x2 - x3) ** 2 + (y2 - y3) ** 2) + np.sqrt((x1 - x4) ** 2 + (y1 - y4) ** 2)) / 2
+    # x = cx-w/2
+    # y = cy-h/2
+    sinA = (h * (x1 - cx) - w * (y1 - cy)) * 1.0 / (h * h + w * w) * 2
+    angle = np.arcsin(sinA)
+    return angle, w, h, cx, cy
+
+
+def xy_rotate_box(cx, cy, w, h, angle=0, degree=None, **args):
+    """
+    绕 cx,cy点 w,h 旋转 angle 的坐标
+    x_new = (x-cx)*cos(angle) - (y-cy)*sin(angle)+cx
+    y_new = (x-cx)*sin(angle) + (y-cy)*sin(angle)+cy
+    """
+    if degree is not None:
+        angle = degree
+    cx = float(cx)
+    cy = float(cy)
+    w = float(w)
+    h = float(h)
+    angle = float(angle)
+    x1, y1 = rotate(cx - w / 2, cy - h / 2, angle, cx, cy)
+    x2, y2 = rotate(cx + w / 2, cy - h / 2, angle, cx, cy)
+    x3, y3 = rotate(cx + w / 2, cy + h / 2, angle, cx, cy)
+    x4, y4 = rotate(cx - w / 2, cy + h / 2, angle, cx, cy)
+    return x1, y1, x2, y2, x3, y3, x4, y4
+
+
+from numpy import cos, sin
+
+
+def rotate(x, y, angle, cx, cy):
+    angle = angle  # *pi/180
+    x_new = (x - cx) * cos(angle) - (y - cy) * sin(angle) + cx
+    y_new = (x - cx) * sin(angle) + (y - cy) * cos(angle) + cy
+    return x_new, y_new
+
+
+def minAreaRectbox(regions, flag=True, W=0, H=0, filtersmall=False, adjustBox=False):
+    """
+    多边形外接矩形
+    """
+    boxes = []
+    for region in regions:
+        rect = cv2.minAreaRect(region.coords[:, ::-1])
+
+        box = cv2.boxPoints(rect)
+        box = box.reshape((8,)).tolist()
+        box = image_location_sort_box(box)
+        x1, y1, x2, y2, x3, y3, x4, y4 = box
+        angle, w, h, cx, cy = solve(box)
+        if adjustBox:
+            x1, y1, x2, y2, x3, y3, x4, y4 = xy_rotate_box(cx, cy, w + 5, h + 5, angle=0, degree=None)
+
+        if w > 32 and h > 32 and flag:
+            if abs(angle / np.pi * 180) < 20:
+                if filtersmall and w < 10 or h < 10:
+                    continue
+                boxes.append([x1, y1, x2, y2, x3, y3, x4, y4])
+        else:
+            if w * h < 0.5 * W * H:
+                if filtersmall and w < 8 or h < 8:
+                    continue
+                boxes.append([x1, y1, x2, y2, x3, y3, x4, y4])
+    return boxes
+
+
+def rectangle(img, boxes):
+    tmp = np.copy(img)
+    for box in boxes:
+        xmin, ymin, xmax, ymax = box[:4]
+        cv2.rectangle(tmp, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 0), 1, lineType=cv2.LINE_AA)
+    return Image.fromarray(tmp)
+
+
+def draw_lines(im, bboxes, color=(0, 0, 0), lineW=3):
+    """
+        boxes: bounding boxes
+    """
+    tmp = np.copy(im)
+    c = color
+    h, w = im.shape[:2]
+
+    for box in bboxes:
+        x1, y1, x2, y2 = box[:4]
+        cv2.line(tmp, (int(x1), int(y1)), (int(x2), int(y2)), c, lineW, lineType=cv2.LINE_AA)
+
+    return tmp
+
+
+def draw_boxes(im, bboxes, color=(0, 0, 255)):
+    """
+        boxes: bounding boxes
+    """
+    tmp = np.copy(im)
+    c = color
+    h, w, _ = im.shape
+
+    for box in bboxes:
+        if type(box) is dict:
+            x1, y1, x2, y2, x3, y3, x4, y4 = xy_rotate_box(**box)
+        else:
+            x1, y1, x2, y2, x3, y3, x4, y4 = box[:8]
+
+        cv2.line(tmp, (int(x1), int(y1)), (int(x2), int(y2)), c, 1, lineType=cv2.LINE_AA)
+        cv2.line(tmp, (int(x2), int(y2)), (int(x3), int(y3)), c, 1, lineType=cv2.LINE_AA)
+        cv2.line(tmp, (int(x3), int(y3)), (int(x4), int(y4)), c, 1, lineType=cv2.LINE_AA)
+        cv2.line(tmp, (int(x4), int(y4)), (int(x1), int(y1)), c, 1, lineType=cv2.LINE_AA)
+
+    return tmp

BIN
isr/font/FiraMono-Medium.otf


+ 45 - 0
isr/font/SIL Open Font License.txt

@@ -0,0 +1,45 @@
+Copyright (c) 2014, Mozilla Foundation https://mozilla.org/ with Reserved Font Name Fira Mono.
+
+Copyright (c) 2014, Telefonica S.A.
+
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at: http://scripts.sil.org/OFL
+
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide development of collaborative font projects, to support the font creation efforts of academic and linguistic communities, and to provide a free and open framework in which fonts may be shared and improved in partnership with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and redistributed freely as long as they are not sold by themselves. The fonts, including any derivative works, can be bundled, embedded, redistributed and/or sold with any software provided that any reserved names are not used by derivative works. The fonts and derivatives, however, cannot be released under any other type of license. The requirement for fonts to remain under this license does not apply to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright Holder(s) under this license and clearly marked as such. This may include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting, or substituting -- in part or in whole -- any of the components of the Original Version, by changing formats or by porting the Font Software to a new environment.
+
+"Author" refers to any designer, engineer, programmer, technical writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining a copy of the Font Software, to use, study, copy, merge, embed, modify, redistribute, and sell modified and unmodified copies of the Font Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components, in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled, redistributed and/or sold with any software, provided that each copy contains the above copyright notice and this license. These can be included either as stand-alone text files, human-readable headers or in the appropriate machine-readable metadata fields within text or binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font Name(s) unless explicit written permission is granted by the corresponding Copyright Holder. This restriction only applies to the primary font name as presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font Software shall not be used to promote, endorse or advertise any Modified Version, except to acknowledge the contribution(s) of the Copyright Holder(s) and the Author(s) or with their explicit written permission.
+
+5) The Font Software, modified or unmodified, in part or in whole, must be distributed entirely under this license, and must not be distributed under any other license. The requirement for fonts to remain under this license does not apply to any document created using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM OTHER DEALINGS IN THE FONT SOFTWARE.

+ 175 - 0
isr/isr_interface.py

@@ -0,0 +1,175 @@
+import base64
+import json
+import os
+import time
+import sys
+import traceback
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
+from format_convert import _global
+import cv2
+import numpy as np
+from PIL import Image
+from format_convert.utils import log, get_md5_from_bytes, request_post, np2pil, bytes2np, pil2np
+from isr.post_process import get_seal_part, replace_seal_part
+from isr.model import get_tiny_inference_model, seal_model, seal_model_se
+from isr.pre_process import count_red_pixel, get_anchors, get_classes, get_colors
+from isr.utils import get_best_predict_size, pil_resize, letterbox_image, draw_boxes, adjust_boxes
+import tensorflow as tf
+sess1 = tf.compat.v1.Session(graph=tf.Graph())
+sess2 = tf.compat.v1.Session(graph=tf.Graph())
+
+
+def remove_seal(image_np, model):
+    # inference data
+    image_seal = image_np
+    h, w = image_seal.shape[:2]
+    best_h, best_w = get_best_predict_size(image_seal)
+    X = np.zeros((1, best_h, best_w, 3))
+
+    # resize
+    image_seal = pil_resize(image_seal, best_h, best_w)
+    # cv2.imshow("resize", image_seal)
+    X[0] = image_seal / 255
+
+    # predict
+    with sess2.as_default():
+        with sess2.graph.as_default():
+            pred = model.predict(X)
+            pred = pred[0]*255.
+            pred = pred.astype(np.uint8)
+            pred = pil_resize(pred, h, w)
+    # cv2.imshow("pred", pred)
+    # cv2.waitKey(0)
+    return pred
+
+
+def detect_seal(image_np, model):
+    image_pil = np2pil(image_np)
+
+    # 首先判断红色像素
+    if not count_red_pixel(image_np):
+        return image_np, [], []
+
+    # create image input
+    h, w = image_np.shape[:2]
+    # best_h, best_w = get_best_predict_size(image_np, times=32, max_size=1280)
+    best_h, best_w = 1024, 1024
+    image_resize = letterbox_image(image_pil, tuple(reversed([best_h, best_w])))
+    # cv2.imshow("letterbox_image", pil2np(image_resize))
+    # cv2.waitKey(0)
+    # image_resize = pil_resize(image_np, best_h, best_w)
+
+    # image_resize = image_pil.resize((int(416), int(416)), Image.BICUBIC)
+    image_resize = np.array(image_resize, dtype='float32')
+    image_resize = image_resize.astype('float32') / 255.
+    image_resize = np.expand_dims(image_resize, 0)
+
+    # create image shape input
+    image_shape = np.array([image_pil.size[1], image_pil.size[0]])
+    image_shape = np.expand_dims(image_shape, 0)
+
+    # inference data
+    with sess1.as_default():
+        with sess1.graph.as_default():
+            out_boxes, out_scores, out_classes = model.predict([image_resize, image_shape])
+    # print("image_size", image_shape)
+    # print("out_boxes", out_boxes)
+    # print("out_scores", out_scores)
+    # print("out_classes", out_classes)
+    out_boxes = out_boxes.astype(np.int32)
+    out_classes = out_classes.astype(np.int32)
+
+    boxes = adjust_boxes(image_pil, out_boxes)
+
+    # # draw
+    # class_names = get_classes(os.path.abspath(os.path.dirname(__file__))+"/yolo_data/my_classes.txt")
+    # colors = get_colors(len(class_names))
+    # image_draw = draw_boxes(image_pil, out_boxes, out_classes, out_scores, class_names, colors)
+    # image_draw = cv2.cvtColor(np.array(image_draw), cv2.COLOR_RGB2BGR)
+    # cv2.namedWindow('detect', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
+    # cv2.imshow("detect", image_draw)
+    # cv2.waitKey(0)
+    return image_np, boxes, out_classes
+
+
+def isr(data, isr_yolo_model, isr_model):
+    log("into isr_interface isr")
+    try:
+        img_data = base64.b64decode(data)
+        img_np = bytes2np(img_data)
+        _img, boxes, classes = detect_seal(img_np, isr_yolo_model)
+        if not boxes and not classes:
+            return {"image": img_np}
+        part_list = get_seal_part(_img, boxes, classes)
+        new_part_list = []
+        for part in part_list:
+            part_remove = remove_seal(part, isr_model)
+            new_part_list.append(part_remove)
+        img_replace = replace_seal_part(img_np, new_part_list, boxes)
+        return {"image": img_replace}
+    except TimeoutError:
+        return {"image": [-5]}
+    except:
+        traceback.print_exc()
+        return {"image": [-1]}
+
+
+class IsrModels:
+    def __init__(self):
+        # python文件所在目录
+        _dir = os.path.abspath(os.path.dirname(__file__))
+
+        # detect
+        model_path = _dir + "/models/seal_detect_yolo.h5"
+        anchors = get_anchors(_dir + "/yolo_data/my_anchors.txt")
+        class_names = get_classes(_dir + "/yolo_data/my_classes.txt")
+        colors = get_colors(len(class_names))
+        with sess1.as_default():
+            with sess1.graph.as_default():
+                self.isr_yolo_model = get_tiny_inference_model(anchors, len(class_names), weights_path=model_path)
+                self.isr_yolo_model.load_weights(model_path)
+
+        # remove
+        model_path = _dir + "/models/seal_remove_unet.h5"
+        with sess2.as_default():
+            with sess2.graph.as_default():
+                self.isr_model = seal_model_se(input_shape=(None, None, 3),
+                                               output_shape=(None, None, 3))
+                self.isr_model.load_weights(model_path)
+
+    def get_model(self):
+        return [self.isr_yolo_model, self.isr_model]
+
+
+def test_isr_model(from_remote=False):
+    file_path = "C:/Users/Administrator/Desktop/test_image/error10.jpg"
+    with open(file_path, "rb") as f:
+        file_bytes = f.read()
+    file_base64 = base64.b64encode(file_bytes)
+    _md5 = get_md5_from_bytes(file_bytes)[0]
+
+    _global._init()
+    _global.update({"port": 15010, "md5": _md5})
+
+    if from_remote:
+        file_json = {"data": file_base64, "md5": _md5}
+        # _url = "http://192.168.2.102:17000/ocr"
+        _url = "http://127.0.0.1:17000/ocr"
+        print(json.loads(request_post(_url, file_json)))
+    else:
+        isr_yolo_model, isr_model = IsrModels().get_model()
+        result = isr(file_base64, isr_yolo_model, isr_model)
+        # print(result)
+        if type(result.get("image")) == list:
+            print(result)
+        else:
+            img = result.get("image")
+            print(img.shape)
+            cv2.namedWindow('img', cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO)
+            cv2.imshow("img", img)
+            cv2.waitKey(0)
+        # print(result)
+
+
+if __name__ == "__main__":
+    test_isr_model()

+ 725 - 0
isr/model.py

@@ -0,0 +1,725 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 21 10:53:51 2022
+model
+@author: fangjiasheng
+"""
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from isr.post_process import yolo_eval
+import time
+from functools import wraps
+from keras.layers import Lambda, Dense, Reshape, Conv2D, BatchNormalization, LeakyReLU, Masking, MaxPool2D, \
+    MaxPooling2D, UpSampling2D, concatenate, Concatenate, Layer, GlobalAveragePooling2D, Multiply
+from keras import layers, models, Sequential, Input, Model
+import keras.backend as K
+import tensorflow as tf
+import numpy as np
+from keras.regularizers import l2
+from isr.utils import compose
+from tensorflow.python.framework import ops
+
+
+def seal_model_se(input_shape, output_shape, cls_num=3):
+    inputs = Input(shape=input_shape)
+    use_bias = False
+
+    # # 256
+    # down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
+    # down0 = BatchNormalization()(down0)
+    # down0 = LeakyReLU(alpha=0.1)(down0)
+    # down0 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down0)
+    # down0 = BatchNormalization()(down0)
+    # down0 = LeakyReLU(alpha=0.1)(down0)
+    # down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
+
+    # 128
+    down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down1)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1 = SeBlock()(down1)
+    down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
+
+    # 64
+    down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down2)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2 = SeBlock()(down2)
+    down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
+
+    # 32
+    down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3 = SeBlock()(down3)
+    down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
+
+    # 16
+    center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+    center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(center)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+    center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+    center = SeBlock()(center)
+
+    # 32
+    up3 = UpSampling2D((2, 2))(center)
+    up3 = concatenate([down3, up3], axis=3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = SeBlock()(up3)
+
+    # 64
+    up2 = UpSampling2D((2, 2))(up3)
+    up2 = concatenate([down2, up2], axis=3)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = SeBlock()(up2)
+
+    # 128
+    up1 = UpSampling2D((2, 2))(up2)
+    up1 = K.concatenate([down1, up1], axis=3)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = SeBlock()(up1)
+
+    # # 256
+    # up0 = UpSampling2D((2, 2))(up1)
+    # up0 = K.concatenate([down0, up0], axis=3)
+    # up0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0)
+    # up0 = BatchNormalization()(up0)
+    # up0 = LeakyReLU(alpha=0.1)(up0)
+    # up0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0)
+    # up0 = BatchNormalization()(up0)
+    # up0 = LeakyReLU(alpha=0.1)(up0)
+    # up0 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up0)
+    # up0 = BatchNormalization()(up0)
+    # up0 = LeakyReLU(alpha=0.1)(up0)
+
+    classify = Conv2D(cls_num, (1, 1), activation='sigmoid')(up1)
+    # classify = Dense(cls_num, activation="softmax")(up1)
+    model = Model(inputs=inputs, outputs=classify)
+
+    # model.summary(line_length=100)
+    return model
+
+
+def seal_model(input_shape, output_shape, cls_num=3):
+    inputs = Input(shape=input_shape)
+    use_bias = False
+
+    # # 256
+    # down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
+    # down0 = BatchNormalization()(down0)
+    # down0 = LeakyReLU(alpha=0.1)(down0)
+    # down0 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down0)
+    # down0 = BatchNormalization()(down0)
+    # down0 = LeakyReLU(alpha=0.1)(down0)
+    # down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
+
+    # 128
+    down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down1)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
+
+    # 64
+    down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down2)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
+
+    # 32
+    down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
+
+    # 16
+    center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+    center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(center)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+    center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+
+    # 32
+    up3 = UpSampling2D((2, 2))(center)
+    up3 = concatenate([down3, up3], axis=3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+
+    # 64
+    up2 = UpSampling2D((2, 2))(up3)
+    up2 = concatenate([down2, up2], axis=3)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+
+    # 128
+    up1 = UpSampling2D((2, 2))(up2)
+    up1 = K.concatenate([down1, up1], axis=3)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+
+    # # 256
+    # up0 = UpSampling2D((2, 2))(up1)
+    # up0 = K.concatenate([down0, up0], axis=3)
+    # up0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0)
+    # up0 = BatchNormalization()(up0)
+    # up0 = LeakyReLU(alpha=0.1)(up0)
+    # up0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0)
+    # up0 = BatchNormalization()(up0)
+    # up0 = LeakyReLU(alpha=0.1)(up0)
+    # up0 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up0)
+    # up0 = BatchNormalization()(up0)
+    # up0 = LeakyReLU(alpha=0.1)(up0)
+
+    classify = Conv2D(cls_num, (1, 1), activation='sigmoid')(up1)
+    # classify = Dense(cls_num, activation="softmax")(up1)
+    model = Model(inputs=inputs, outputs=classify)
+
+    model.summary(line_length=100)
+    return model
+
+
+def seal_model_small(input_shape, output_shape, cls_num=3):
+    inputs = Input(shape=input_shape)
+    use_bias = False
+
+    # 128
+    down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
+    down1 = BatchNormalization()(down1)
+    down1 = LeakyReLU(alpha=0.1)(down1)
+    down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
+
+    # 64
+    down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
+    down2 = BatchNormalization()(down2)
+    down2 = LeakyReLU(alpha=0.1)(down2)
+    down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
+
+    # 32
+    down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
+    down3 = BatchNormalization()(down3)
+    down3 = LeakyReLU(alpha=0.1)(down3)
+    down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
+
+    # 16
+    center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+    center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
+    center = BatchNormalization()(center)
+    center = LeakyReLU(alpha=0.1)(center)
+
+    # 32
+    up3 = UpSampling2D((2, 2))(center)
+    up3 = concatenate([down3, up3], axis=3)
+    up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+    up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
+    up3 = BatchNormalization()(up3)
+    up3 = LeakyReLU(alpha=0.1)(up3)
+
+    # 64
+    up2 = UpSampling2D((2, 2))(up3)
+    up2 = concatenate([down2, up2], axis=3)
+    up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+    up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
+    up2 = BatchNormalization()(up2)
+    up2 = LeakyReLU(alpha=0.1)(up2)
+
+    # 128
+    up1 = UpSampling2D((2, 2))(up2)
+    up1 = K.concatenate([down1, up1], axis=3)
+    up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+    up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
+    up1 = BatchNormalization()(up1)
+    up1 = LeakyReLU(alpha=0.1)(up1)
+
+    classify = Conv2D(cls_num, (1, 1), activation='sigmoid')(up1)
+    # classify = Dense(cls_num, activation="softmax")(up1)
+    model = Model(inputs=inputs, outputs=classify)
+
+    model.summary(line_length=100)
+    return model
+
+
+class SeBlock(Layer):
+    def __init__(self, reduction=4, **kwargs):
+        super(SeBlock, self).__init__(**kwargs)
+        self.reduction = reduction
+
+    def build(self, input_shape):
+        # 构建layer时需要实现
+        # 手动将该自定义层参数加入,否则参数为0
+        self.pool = GlobalAveragePooling2D(keepdims=True, name="my_pool")
+        self.dense_1 = Dense(int(input_shape[-1]) // self.reduction, use_bias=False, activation="relu", name='my_dense_1')
+        self.dense_2 = Dense(int(input_shape[-1]), use_bias=False, activation="hard_sigmoid", name='my_dense_2')
+        # self.dense_1.build(input_shape)
+        # self.dense_2.build((input_shape[0], input_shape[1], input_shape[2], int(input_shape[-1]) // self.reduction))
+        self._trainable_weights += self.dense_1._trainable_weights
+        self._trainable_weights += self.dense_2._trainable_weights
+        super(SeBlock, self).build(input_shape)
+
+    def call(self, inputs):
+        x = self.pool(inputs)
+        x = self.dense_1(x)
+        x = self.dense_2(x)
+        # 给通道加权重
+        return Multiply()([inputs, x])
+
+
+VGG_MEAN = [103.939, 116.779, 123.68]
+
+
+class Vgg16:
+    def __init__(self, vgg16_npy_path=None):
+        if vgg16_npy_path is None:
+            # path = inspect.getfile(Vgg16)
+            # path = os.path.abspath(os.path.join(path, os.pardir))
+            # path = os.path.join(path, "vgg16.npy")
+            # vgg16_npy_path = path
+            # print(path)
+            print("there is no vgg_16_npy!")
+            raise
+
+        self.data_dict = np.load(vgg16_npy_path, encoding='latin1', allow_pickle=True).item()
+        print("npy file loaded")
+
+    def build(self, bgr):
+        """
+        load variable from npy to build the VGG
+
+        :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
+        """
+
+        start_time = time.time()
+        print("build model started")
+        bgr_scaled = bgr * 255.0
+
+        # Convert RGB to BGR
+        # red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
+        # print("red", red)
+        # assert red.get_shape().as_list()[1:] == [224, 224, 1]
+        # assert green.get_shape().as_list()[1:] == [224, 224, 1]
+        # assert blue.get_shape().as_list()[1:] == [224, 224, 1]
+        # bgr = tf.concat(axis=3, values=[
+        #     blue - VGG_MEAN[0],
+        #     green - VGG_MEAN[1],
+        #     red - VGG_MEAN[2],
+        #     ])
+        # assert bgr.get_shape().as_list()[1:] == [224, 224, 3]
+
+        self.conv1_1 = self.conv_layer(bgr_scaled, "conv1_1")
+        self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
+        self.pool1 = self.max_pool(self.conv1_2, 'pool1')
+
+        self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
+        self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
+        self.pool2 = self.max_pool(self.conv2_2, 'pool2')
+
+        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
+        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
+        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
+        self.pool3 = self.max_pool(self.conv3_3, 'pool3')
+
+        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
+        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
+        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
+        self.pool4 = self.max_pool(self.conv4_3, 'pool4')
+
+        self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
+        self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
+        self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
+        self.pool5 = self.max_pool(self.conv5_3, 'pool5')
+
+        # self.fc6 = self.fc_layer(self.pool5, "fc6")
+        # # assert self.fc6.get_shape().as_list()[1:] == [4096]
+        # self.relu6 = tf.nn.relu(self.fc6)
+        #
+        # self.fc7 = self.fc_layer(self.relu6, "fc7")
+        # self.relu7 = tf.nn.relu(self.fc7)
+        #
+        # self.fc8 = self.fc_layer(self.relu7, "fc8")
+        #
+        # self.prob = tf.nn.softmax(self.fc8, name="prob")
+
+        # self.data_dict = None
+        print(("build model finished: %ds" % (time.time() - start_time)))
+
+    def avg_pool(self, bottom, name):
+        return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
+
+    def max_pool(self, bottom, name):
+        return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
+
+    def conv_layer(self, bottom, name):
+        with tf.compat.v1.variable_scope(name):
+            filt = self.get_conv_filter(name)
+
+            conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
+
+            conv_biases = self.get_bias(name)
+            bias = tf.nn.bias_add(conv, conv_biases)
+
+            relu = tf.nn.relu(bias)
+            return relu
+
+    def fc_layer(self, bottom, name):
+        with tf.compat.v1.variable_scope(name):
+            shape = bottom.get_shape().as_list()
+            dim = 1
+            for d in shape[1:]:
+                dim *= d
+            x = tf.reshape(bottom, [-1, dim])
+
+            weights = self.get_fc_weight(name)
+            biases = self.get_bias(name)
+
+            # Fully connected layer. Note that the '+' operation automatically
+            # broadcasts the biases.
+            fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
+
+            return fc
+
+    def get_conv_filter(self, name):
+        return tf.constant(self.data_dict[name][0], name="filter")
+
+    def get_bias(self, name):
+        return tf.constant(self.data_dict[name][1], name="biases")
+
+    def get_fc_weight(self, name):
+        return tf.constant(self.data_dict[name][0], name="weights")
+
+
+class Vgg19:
+    def __init__(self, vgg19_npy_path=None):
+        if vgg19_npy_path is None:
+            print("there is no vgg_16_npy!")
+            raise
+
+        self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item()
+
+    def build(self, bgr):
+        """
+        load variable from npy to build the VGG
+        :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
+        """
+        bgr = bgr * 255.0
+        # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3))
+
+        self.conv1_1 = self.conv_layer(bgr, "conv1_1")
+        self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
+        self.pool1 = self.max_pool(self.conv1_2, 'pool1')
+
+        self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
+        self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
+        self.pool2 = self.max_pool(self.conv2_2, 'pool2')
+
+        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
+        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
+        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
+        self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4")
+        self.pool3 = self.max_pool(self.conv3_4, 'pool3')
+
+        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
+        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
+        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
+        self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4")
+        self.pool4 = self.max_pool(self.conv4_4, 'pool4')
+
+        self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
+        self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
+        self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
+        self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4")
+        self.pool5 = self.max_pool(self.conv5_4, 'pool5')
+
+    def avg_pool(self, bottom, name):
+        return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
+
+    def max_pool(self, bottom, name):
+        return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
+
+    def conv_layer(self, bottom, name):
+        with tf.compat.v1.variable_scope(name):
+            filt = self.get_conv_filter(name)
+
+            conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
+
+            conv_biases = self.get_bias(name)
+            bias = tf.nn.bias_add(conv, conv_biases)
+
+            relu = tf.nn.relu(bias)
+            return relu
+
+    def fc_layer(self, bottom, name):
+        with tf.compat.v1.variable_scope(name):
+            shape = bottom.get_shape().as_list()
+            dim = 1
+            for d in shape[1:]:
+                dim *= d
+            x = tf.reshape(bottom, [-1, dim])
+
+            weights = self.get_fc_weight(name)
+            biases = self.get_bias(name)
+
+            # Fully connected layer. Note that the '+' operation automatically
+            # broadcasts the biases.
+            fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
+
+            return fc
+
+    def get_conv_filter(self, name):
+        return tf.constant(self.data_dict[name][0], name="filter")
+
+    def get_bias(self, name):
+        return tf.constant(self.data_dict[name][1], name="biases")
+
+    def get_fc_weight(self, name):
+        return tf.constant(self.data_dict[name][0], name="weights")
+
+
+def tiny_yolo_body(inputs, num_anchors, num_classes):
+    """Create Tiny YOLO_v3 model CNN body in keras."""
+    x1 = compose(
+        DarknetConv2D_BN_Leaky(16, (3, 3), ),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(32, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(64, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(128, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs)
+
+    x2 = compose(
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(512, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
+        DarknetConv2D_BN_Leaky(1024, (3, 3)),
+        DarknetConv2D_BN_Leaky(256, (1, 1)))(x1)
+
+    y1 = compose(
+        DarknetConv2D_BN_Leaky(512, (3, 3)),
+        DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))(x2)
+
+    x2 = compose(
+        DarknetConv2D_BN_Leaky(128, (1, 1)),
+        UpSampling2D(2))(x2)
+
+    y2 = compose(
+        Concatenate(),
+        DarknetConv2D_BN_Leaky(256, (3, 3)),
+        DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))([x2, x1])
+
+    return Model(inputs, [y1, y2])
+
+
+def tinier_yolo_se_body(inputs, num_anchors, num_classes):
+    """Create Tiny YOLO_v3 model CNN body in keras."""
+    x1 = compose(
+        DarknetConv2D_BN_Leaky(8, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(16, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(32, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(64, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(128, (3, 3)),
+    )(inputs)
+    x1 = SeBlock()(x1)
+
+    x2 = compose(
+        MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
+        DarknetConv2D_BN_Leaky(256, (3, 3)),
+        MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
+        DarknetConv2D_BN_Leaky(512, (3, 3)),
+        DarknetConv2D_BN_Leaky(128, (1, 1)),
+    )(x1)
+    x2 = SeBlock()(x2)
+
+    y1 = compose(
+        DarknetConv2D_BN_Leaky(256, (3, 3)),
+        DarknetConv2D(num_anchors*(num_classes+5), (1, 1))
+    )(x2)
+    y1 = SeBlock()(y1)
+
+    x2 = compose(
+        DarknetConv2D_BN_Leaky(64, (1, 1)),
+        UpSampling2D(2)
+    )(x2)
+    x2 = SeBlock()(x2)
+
+    y2 = compose(
+        Concatenate(),
+        DarknetConv2D_BN_Leaky(128, (3, 3)),
+        DarknetConv2D(num_anchors*(num_classes+5), (1, 1))
+    )([x2, x1])
+    y2 = SeBlock()(y2)
+
+    model = Model(inputs, [y1, y2])
+    model.summary(120)
+    return model
+
+
+@wraps(Conv2D)
+def DarknetConv2D(*args, **kwargs):
+    """Wrapper to set Darknet parameters for Convolution2D."""
+    darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4),
+                           'padding': 'valid' if kwargs.get('strides') == (2, 2) else 'same'}
+    darknet_conv_kwargs.update(kwargs)
+    return Conv2D(*args, **darknet_conv_kwargs)
+
+
+def DarknetConv2D_BN_Leaky(*args, **kwargs):
+    """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
+    no_bias_kwargs = {'use_bias': False}
+    no_bias_kwargs.update(kwargs)
+    return compose(
+        DarknetConv2D(*args, **no_bias_kwargs),
+        BatchNormalization(),
+        LeakyReLU(alpha=0.1))
+
+
+def get_tiny_inference_model(anchors, num_classes, weights_path='models/tiny_yolo_weights.h5'):
+    """create the inference model, for Tiny YOLOv3"""
+    image_input = Input(shape=(None, None, 3))
+    image_shape = Input(shape=(2,), dtype='int64', name='image_shape')
+    num_anchors = len(anchors)
+
+    model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
+    print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
+
+    model_body.load_weights(weights_path)
+    print('Load weights {}.'.format(weights_path))
+
+    boxes, scores, classes = Lambda(yolo_eval,
+                                    name='yolo_eval',
+                                    arguments={'anchors': anchors,
+                                               'num_classes': num_classes}
+                                    )([model_body.output, image_shape])
+    # boxes, scores, classes = yolo_eval([model_body.output, image_shape], anchors, num_classes)
+    model = Model([model_body.input, image_shape], [boxes, scores, classes])
+    # model.summary(120)
+    return model
+

BIN
isr/models/seal_detect_yolo.h5


BIN
isr/models/seal_remove_unet.h5


+ 146 - 0
isr/post_process.py

@@ -0,0 +1,146 @@
+import cv2
+from keras import backend as K
+import tensorflow as tf
+
+
+def yolo_eval(outputs,
+              anchors,
+              num_classes,
+              max_boxes=20,
+              score_threshold=.1,
+              iou_threshold=.1):
+    """Evaluate YOLO model on given input and return filtered boxes."""
+    # num_layers = len(anchors) // 3
+    # yolo_outputs = outputs[:num_layers]
+    # image_shape = outputs[num_layers]
+
+    yolo_outputs = outputs[0]
+    print("yolo_outputs", yolo_outputs[0])
+    num_layers = len(yolo_outputs)
+    image_shape = outputs[1]
+
+    print("num_layers", num_layers)
+    print("image_shape", image_shape)
+
+    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # default setting
+    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
+    boxes = []
+    box_scores = []
+    for l in range(num_layers):
+        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
+                                                    anchors[anchor_mask[l]],
+                                                    num_classes, input_shape, image_shape)
+        boxes.append(_boxes)
+        box_scores.append(_box_scores)
+    boxes = K.concatenate(boxes, axis=0)
+    box_scores = K.concatenate(box_scores, axis=0)
+
+    mask = box_scores >= score_threshold
+    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
+    boxes_ = []
+    scores_ = []
+    classes_ = []
+    for c in range(num_classes):
+        # TODO: use keras backend instead of tf.
+        class_boxes = tf.boolean_mask(boxes, mask[:, c])
+        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
+        nms_index = tf.image.non_max_suppression(
+            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
+        class_boxes = K.gather(class_boxes, nms_index)
+        class_box_scores = K.gather(class_box_scores, nms_index)
+        classes = K.ones_like(class_box_scores, 'int32') * c
+        boxes_.append(class_boxes)
+        scores_.append(class_box_scores)
+        classes_.append(classes)
+    boxes_ = K.concatenate(boxes_, axis=0)
+    scores_ = K.concatenate(scores_, axis=0)
+    classes_ = K.concatenate(classes_, axis=0)
+
+    return boxes_, scores_, classes_
+
+
+def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
+    """Process Conv layer output"""
+    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
+                                                                anchors, num_classes, input_shape)
+    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
+    boxes = K.reshape(boxes, [-1, 4])
+    box_scores = box_confidence * box_class_probs
+    box_scores = K.reshape(box_scores, [-1, num_classes])
+    return boxes, box_scores
+
+
+def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
+    """Convert final layer features to bounding box parameters."""
+    num_anchors = len(anchors)
+    # feats = K.constant(feats)
+    # Reshape to batch, height, width, num_anchors, box_params.
+    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
+
+    grid_shape = K.shape(feats)[1:3]  # height, width
+    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
+                    [1, grid_shape[1], 1, 1])
+    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
+                    [grid_shape[0], 1, 1, 1])
+    grid = K.concatenate([grid_x, grid_y])
+    grid = K.cast(grid, K.dtype(feats))
+
+    feats = K.reshape(
+        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
+
+    # Adjust predictions to each spatial grid point and anchor size.
+    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
+    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
+    box_confidence = K.sigmoid(feats[..., 4:5])
+    box_class_probs = K.sigmoid(feats[..., 5:])
+
+    if calc_loss:
+        return grid, feats, box_xy, box_wh
+    return box_xy, box_wh, box_confidence, box_class_probs
+
+
+def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
+    """Get corrected boxes"""
+    box_yx = box_xy[..., ::-1]
+    box_hw = box_wh[..., ::-1]
+    input_shape = K.cast(input_shape, K.dtype(box_yx))
+    image_shape = K.cast(image_shape, K.dtype(box_yx))
+    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
+    offset = (input_shape-new_shape)/2./input_shape
+    scale = input_shape/new_shape
+    box_yx = (box_yx - offset) * scale
+    box_hw *= scale
+
+    box_mins = box_yx - (box_hw / 2.)
+    box_maxes = box_yx + (box_hw / 2.)
+    boxes =  K.concatenate([
+        box_mins[..., 0:1],  # y_min
+        box_mins[..., 1:2],  # x_min
+        box_maxes[..., 0:1],  # y_max
+        box_maxes[..., 1:2]  # x_max
+    ])
+
+    # Scale boxes back to original image shape.
+    boxes *= K.concatenate([image_shape, image_shape])
+    return boxes
+
+
+def replace_seal_part(image_np, part_list, boxes):
+    i = 0
+    for box in boxes:
+        x_min, y_min = box[0]
+        x_max, y_max = box[1]
+        part = part_list[i]
+        image_np[y_min:y_max, x_min:x_max, :] = part
+        i += 1
+    return image_np
+
+
+def get_seal_part(image_np, boxes, classes):
+    part_list = []
+    for box in boxes:
+        x_min, y_min = box[0]
+        x_max, y_max = box[1]
+        part = image_np[y_min:y_max, x_min:x_max, :]
+        part_list.append(part)
+    return part_list

+ 60 - 0
isr/pre_process.py

@@ -0,0 +1,60 @@
+import colorsys
+import time
+import numpy as np
+import cv2
+
+
+def count_red_pixel(image_np, cnt=1000):
+    # 红色像素计数
+    start_time = time.time()
+    image_hsv = cv2.cvtColor(image_np, cv2.COLOR_BGR2HSV)
+    # minus_1 = image_np[:, :, 2].astype('int32') - image_np[:, :, 0].astype('int32')
+    # minus_2 = image_np[:, :, 2].astype('int32') - image_np[:, :, 1].astype('int32')
+    # red_mask = (image_np[:, :, 2] >= 180) & (minus_1 >= 60) & (minus_2 >= 60)
+    red_mask = ((image_hsv[:, :, 0] >= 0) & (image_hsv[:, :, 0] <= 10) | (image_hsv[:, :, 0] <= 180) & (image_hsv[:, :, 0] >= 156)) \
+               & (image_hsv[:, :, 1] <= 255) & (image_hsv[:, :, 1] >= 43) \
+               & (image_hsv[:, :, 2] <= 255) & (image_hsv[:, :, 2] >= 100)
+    red_cnt = np.sum(red_mask != 0)
+    print("red_cnt", red_cnt, time.time()-start_time)
+    if red_cnt >= cnt:
+        return True
+    else:
+        return False
+
+
+def get_classes(classes_path):
+    """loads the classes"""
+    with open(classes_path) as f:
+        class_names = f.readlines()
+    class_names = [c.strip() for c in class_names]
+    return class_names
+
+
+def get_anchors(anchors_path):
+    """loads the anchors from a file"""
+    with open(anchors_path) as f:
+        anchors = f.readline()
+    anchors = [float(x) for x in anchors.split(',')]
+    return np.array(anchors).reshape(-1, 2)
+
+
+def get_colors(number, bright=True):
+    """
+    Generate random colors for drawing bounding boxes.
+    To get visually distinct colors, generate them in HSV space then
+    convert to RGB.
+    """
+    if number <= 0:
+        return []
+
+    brightness = 1.0 if bright else 0.7
+    hsv_tuples = [(x / number, 1., brightness)
+                  for x in range(number)]
+    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+    colors = list(
+        map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
+            colors))
+    np.random.seed(10101)  # Fixed seed for consistent colors across runs.
+    np.random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
+    np.random.seed(None)  # Reset seed to default.
+    return colors

+ 129 - 0
isr/utils.py

@@ -0,0 +1,129 @@
+import os
+from functools import reduce
+import cv2
+import numpy as np
+from PIL import Image, ImageFont, ImageDraw
+import sys
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
+
+
+def pil_resize(image_np, height, width):
+    image_pil = Image.fromarray(cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB))
+    image_pil = image_pil.resize((int(width), int(height)), Image.BICUBIC)
+    image_np = cv2.cvtColor(np.asarray(image_pil), cv2.COLOR_RGB2BGR)
+    return image_np
+
+
+def get_best_predict_size(image_np, times=8, min_size=128, max_size=400):
+    sizes = []
+    for i in range(int(min_size/times), 100):
+        if i*times <= max_size:
+            sizes.append(i*times)
+    sizes.sort(key=lambda x: x, reverse=True)
+
+    min_len = 10000
+    best_height = sizes[0]
+    for height in sizes:
+        if abs(image_np.shape[0] - height) < min_len:
+            min_len = abs(image_np.shape[0] - height)
+            best_height = height
+
+    min_len = 10000
+    best_width = sizes[0]
+    for width in sizes:
+        if abs(image_np.shape[1] - width) < min_len:
+            min_len = abs(image_np.shape[1] - width)
+            best_width = width
+
+    if best_height > best_width:
+        best_width = best_height
+    else:
+        best_height = best_width
+
+    return best_height, best_width
+
+
+def letterbox_image(image, size):
+    """resize image with unchanged aspect ratio using padding"""
+    iw, ih = image.size
+    w, h = size
+    scale = min(w/iw, h/ih)
+    nw = int(iw*scale)
+    nh = int(ih*scale)
+
+    image = image.resize((nw,nh), Image.BICUBIC)
+    new_image = Image.new('RGB', size, (128,128,128))
+    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+    return new_image
+
+
+def compose(*funcs):
+    """Compose arbitrarily many functions, evaluated left to right.
+    Reference: https://mathieularose.com/function-composition-in-python/
+    """
+    if funcs:
+        return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
+    else:
+        raise ValueError('Composition of empty sequence not supported.')
+
+
+def draw_boxes(image, out_boxes, out_classes, out_scores, class_names, colors):
+    font = ImageFont.truetype(font=os.path.abspath(os.path.dirname(__file__))+'/font/FiraMono-Medium.otf',
+                              size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
+    thickness = (image.size[0] + image.size[1]) // 300
+
+    box_list = []
+    for i, c in reversed(list(enumerate(out_classes))):
+        predicted_class = class_names[c]
+        box = out_boxes[i]
+        score = out_scores[i]
+
+        label = '{} {:.2f}'.format(predicted_class, score)
+        draw = ImageDraw.Draw(image)
+        label_size = draw.textsize(label, font)
+
+        top, left, bottom, right = box
+        top = max(0, np.floor(top + 0.5).astype('int32'))
+        left = max(0, np.floor(left + 0.5).astype('int32'))
+        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
+        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
+        # print(label, (left, top), (right, bottom))
+        box_list.append([(left, top), (right, bottom)])
+
+        if top - label_size[1] >= 0:
+            text_origin = np.array([left, top - label_size[1]])
+        else:
+            text_origin = np.array([left, top + 1])
+
+        # My kingdom for a good redistributable image drawing library.
+        for i in range(thickness):
+            draw.rectangle(
+                [left + i, top + i, right - i, bottom - i],
+                outline=colors[c])
+        draw.rectangle(
+            [tuple(text_origin), tuple(text_origin + label_size)],
+            fill=colors[c])
+        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
+        del draw
+
+    return image
+
+
+def adjust_boxes(image, boxes, threshold=10):
+    new_boxes = []
+    for box in boxes:
+        w, h = image.size
+        top, left, bottom, right = box
+        top = max(0, np.floor(top + 0.5).astype('int32'))
+        left = max(0, np.floor(left + 0.5).astype('int32'))
+        bottom = min(h, np.floor(bottom + 0.5).astype('int32'))
+        right = min(w, np.floor(right + 0.5).astype('int32'))
+
+        # 把框加大,有时候圈不全
+        top = 0 if top - threshold < 0 else top - threshold
+        bottom = h if bottom + threshold > h else bottom + threshold
+        left = 0 if left - threshold < 0 else left - threshold
+        right = w if right + threshold > w else right + threshold
+
+        new_boxes.append([(left, top), (right, bottom)])
+    return new_boxes

+ 262 - 0
isr/yolo_data/convert.py

@@ -0,0 +1,262 @@
+#! /usr/bin/env python
+"""
+Reads Darknet config and weights and creates Keras model with TF backend.
+
+"""
+
+import argparse
+import configparser
+import io
+import os
+from collections import defaultdict
+
+import numpy as np
+from keras import backend as K
+from keras.layers import (Conv2D, Input, ZeroPadding2D, Add,
+                          UpSampling2D, MaxPooling2D, Concatenate)
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers import BatchNormalization
+from keras.models import Model
+from keras.regularizers import l2
+from keras.utils.vis_utils import plot_model as plot
+
+
+parser = argparse.ArgumentParser(description='Darknet To Keras Converter.')
+parser.add_argument('config_path', help='Path to Darknet cfg file.')
+parser.add_argument('weights_path', help='Path to Darknet weights file.')
+parser.add_argument('output_path', help='Path to output Keras model file.')
+parser.add_argument(
+    '-p',
+    '--plot_model',
+    help='Plot generated Keras model and save as image.',
+    action='store_true')
+parser.add_argument(
+    '-w',
+    '--weights_only',
+    help='Save as Keras weights file instead of model file.',
+    action='store_true')
+
+def unique_config_sections(config_file):
+    """Convert all config sections to have unique names.
+
+    Adds unique suffixes to config sections for compability with configparser.
+    """
+    section_counters = defaultdict(int)
+    output_stream = io.StringIO()
+    with open(config_file) as fin:
+        for line in fin:
+            if line.startswith('['):
+                section = line.strip().strip('[]')
+                _section = section + '_' + str(section_counters[section])
+                section_counters[section] += 1
+                line = line.replace(section, _section)
+            output_stream.write(line)
+    output_stream.seek(0)
+    return output_stream
+
+# %%
+def _main(args):
+    config_path = os.path.expanduser(args.config_path)
+    weights_path = os.path.expanduser(args.weights_path)
+    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
+        config_path)
+    assert weights_path.endswith(
+        '.weights'), '{} is not a .weights file'.format(weights_path)
+
+    output_path = os.path.expanduser(args.output_path)
+    assert output_path.endswith(
+        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
+    output_root = os.path.splitext(output_path)[0]
+
+    # Load weights and config.
+    print('Loading weights.')
+    weights_file = open(weights_path, 'rb')
+    major, minor, revision = np.ndarray(
+        shape=(3, ), dtype='int32', buffer=weights_file.read(12))
+    if (major*10+minor)>=2 and major<1000 and minor<1000:
+        seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
+    else:
+        seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
+    print('Weights Header: ', major, minor, revision, seen)
+
+    print('Parsing Darknet config.')
+    unique_config_file = unique_config_sections(config_path)
+    cfg_parser = configparser.ConfigParser()
+    cfg_parser.read_file(unique_config_file)
+
+    print('Creating Keras model.')
+    input_layer = Input(shape=(None, None, 3))
+    prev_layer = input_layer
+    all_layers = []
+
+    weight_decay = float(cfg_parser['net_0']['decay']
+                         ) if 'net_0' in cfg_parser.sections() else 5e-4
+    count = 0
+    out_index = []
+    for section in cfg_parser.sections():
+        print('Parsing section {}'.format(section))
+        if section.startswith('convolutional'):
+            filters = int(cfg_parser[section]['filters'])
+            size = int(cfg_parser[section]['size'])
+            stride = int(cfg_parser[section]['stride'])
+            pad = int(cfg_parser[section]['pad'])
+            activation = cfg_parser[section]['activation']
+            batch_normalize = 'batch_normalize' in cfg_parser[section]
+
+            padding = 'same' if pad == 1 and stride == 1 else 'valid'
+
+            # Setting weights.
+            # Darknet serializes convolutional weights as:
+            # [bias/beta, [gamma, mean, variance], conv_weights]
+            prev_layer_shape = K.int_shape(prev_layer)
+
+            weights_shape = (size, size, prev_layer_shape[-1], filters)
+            darknet_w_shape = (filters, weights_shape[2], size, size)
+            weights_size = np.product(weights_shape)
+
+            print('conv2d', 'bn'
+            if batch_normalize else '  ', activation, weights_shape)
+
+            conv_bias = np.ndarray(
+                shape=(filters, ),
+                dtype='float32',
+                buffer=weights_file.read(filters * 4))
+            count += filters
+
+            if batch_normalize:
+                bn_weights = np.ndarray(
+                    shape=(3, filters),
+                    dtype='float32',
+                    buffer=weights_file.read(filters * 12))
+                count += 3 * filters
+
+                bn_weight_list = [
+                    bn_weights[0],  # scale gamma
+                    conv_bias,  # shift beta
+                    bn_weights[1],  # running mean
+                    bn_weights[2]  # running var
+                ]
+
+            conv_weights = np.ndarray(
+                shape=darknet_w_shape,
+                dtype='float32',
+                buffer=weights_file.read(weights_size * 4))
+            count += weights_size
+
+            # DarkNet conv_weights are serialized Caffe-style:
+            # (out_dim, in_dim, height, width)
+            # We would like to set these to Tensorflow order:
+            # (height, width, in_dim, out_dim)
+            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
+            conv_weights = [conv_weights] if batch_normalize else [
+                conv_weights, conv_bias
+            ]
+
+            # Handle activation.
+            act_fn = None
+            if activation == 'leaky':
+                pass  # Add advanced activation later.
+            elif activation != 'linear':
+                raise ValueError(
+                    'Unknown activation function `{}` in section {}'.format(
+                        activation, section))
+
+            # Create Conv2D layer
+            if stride>1:
+                # Darknet uses left and top padding instead of 'same' mode
+                prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
+            conv_layer = (Conv2D(
+                filters, (size, size),
+                strides=(stride, stride),
+                kernel_regularizer=l2(weight_decay),
+                use_bias=not batch_normalize,
+                weights=conv_weights,
+                activation=act_fn,
+                padding=padding))(prev_layer)
+
+            if batch_normalize:
+                conv_layer = (BatchNormalization(
+                    weights=bn_weight_list))(conv_layer)
+            prev_layer = conv_layer
+
+            if activation == 'linear':
+                all_layers.append(prev_layer)
+            elif activation == 'leaky':
+                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
+                prev_layer = act_layer
+                all_layers.append(act_layer)
+
+        elif section.startswith('route'):
+            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
+            layers = [all_layers[i] for i in ids]
+            if len(layers) > 1:
+                print('Concatenating route layers:', layers)
+                concatenate_layer = Concatenate()(layers)
+                all_layers.append(concatenate_layer)
+                prev_layer = concatenate_layer
+            else:
+                skip_layer = layers[0]  # only one layer to route
+                all_layers.append(skip_layer)
+                prev_layer = skip_layer
+
+        elif section.startswith('maxpool'):
+            size = int(cfg_parser[section]['size'])
+            stride = int(cfg_parser[section]['stride'])
+            all_layers.append(
+                MaxPooling2D(
+                    pool_size=(size, size),
+                    strides=(stride, stride),
+                    padding='same')(prev_layer))
+            prev_layer = all_layers[-1]
+
+        elif section.startswith('shortcut'):
+            index = int(cfg_parser[section]['from'])
+            activation = cfg_parser[section]['activation']
+            assert activation == 'linear', 'Only linear activation supported.'
+            all_layers.append(Add()([all_layers[index], prev_layer]))
+            prev_layer = all_layers[-1]
+
+        elif section.startswith('upsample'):
+            stride = int(cfg_parser[section]['stride'])
+            assert stride == 2, 'Only stride=2 supported.'
+            all_layers.append(UpSampling2D(stride)(prev_layer))
+            prev_layer = all_layers[-1]
+
+        elif section.startswith('yolo'):
+            out_index.append(len(all_layers)-1)
+            all_layers.append(None)
+            prev_layer = all_layers[-1]
+
+        elif section.startswith('net'):
+            pass
+
+        else:
+            raise ValueError(
+                'Unsupported section header type: {}'.format(section))
+
+    # Create and save model.
+    if len(out_index)==0: out_index.append(len(all_layers)-1)
+    model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
+    print(model.summary())
+    if args.weights_only:
+        model.save_weights('{}'.format(output_path))
+        print('Saved Keras weights to {}'.format(output_path))
+    else:
+        model.save('{}'.format(output_path))
+        print('Saved Keras model to {}'.format(output_path))
+
+    # Check to see if all weights have been read.
+    remaining_weights = len(weights_file.read()) / 4
+    weights_file.close()
+    print('Read {} of {} from Darknet weights.'.format(count, count +
+                                                       remaining_weights))
+    if remaining_weights > 0:
+        print('Warning: {} unused weights'.format(remaining_weights))
+
+    if args.plot_model:
+        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
+        print('Saved model plot to {}.png'.format(output_root))
+
+
+if __name__ == '__main__':
+    _main(parser.parse_args())

+ 1 - 0
isr/yolo_data/my_anchors.txt

@@ -0,0 +1 @@
+24,29,  34,196,  154,192,  158,47,  319,196,  642,196

+ 1 - 0
isr/yolo_data/my_classes.txt

@@ -0,0 +1 @@
+seal

File diff suppressed because it is too large
+ 0 - 0
result.html


Some files were not shown because too many files changed in this diff