lishimin
/
VerificationCode


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322
							import copy
import json
import os
import random
import re
import sys
import time
import traceback
from glob import glob
from itertools import combinations, product

import chardet
import cv2
import jieba
import numpy as np
from PIL import ImageFont, ImageDraw, Image
from captcha.image import ImageCaptcha
from keras_preprocessing.sequence import pad_sequences
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb

sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from click_captcha.utils import np2pil, pil2np, pil_resize, pil_rotate, pil2np_a, np2pil_a, pil_resize_a, pil_rotate_a


def gen_siamese(paths, batch_size=32, shape=(40, 40), cls_num=1):
    num = len(paths)
    data_path = os.path.dirname(os.path.abspath(__file__)) + "/../data/click/"

    i = 0
    while True:
        if i >= num:
            i = 0
            random.shuffle(paths)

        height, width = shape[:2]
        X1 = np.zeros((batch_size, height, width, 1))
        X2 = np.zeros((batch_size, height, width, 1))
        Y = np.zeros((batch_size, 2))

        for j in range(batch_size):
            # 生成标注数据
            img1, img2, label = paths[i][:-1].split("\t")
            # print(img1, img2, label)
            img1 = cv2.imread(data_path + img1)
            img1 = pil_resize(img1, shape[0], shape[1])
            img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
            img1 = np.expand_dims(img1, axis=-1)
            img2 = cv2.imread(data_path + img2)
            img2 = pil_resize(img2, shape[0], shape[1])
            img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
            img2 = np.expand_dims(img2, axis=-1)
            if label == "1":
                label = np.array([0, 1])
            else:
                label = np.array([1, 0])
            X1[j] = img1
            X2[j] = img2
            Y[j] = label

        yield {"input_1": X1, "input_2": X2}, {"output": Y}


def gen_mobile(paths, batch_size=32, shape=(40, 40), cls_num=5710, data_path="click"):
    num = len(paths)
    data_path = os.path.dirname(os.path.abspath(__file__)) + "/../data/" + data_path + "/"

    i = 0
    random.shuffle(paths)
    while True:
        if i >= num:
            i = 0
            random.shuffle(paths)

        height, width = shape[:2]
        if len(shape) > 2:
            channel = 3
        else:
            channel = 1
        X = np.zeros((batch_size, height, width, channel))
        Y = np.zeros((batch_size, cls_num))

        j = 0
        error_num = 0
        while j < batch_size:
        # for j in range(batch_size):
            if i >= num:
                random.shuffle(paths)
                i = 0
            path = paths[i].split(os.sep)[-1]
            char_index = int(path.split("_")[0])
            label = np.zeros(cls_num)
            # print("char_index", char_index)
            label[char_index] = 1
            # print("label", np.argmax(label), char_index)


            img1 = cv2.imread(data_path + path)
            img1 = pil_resize(img1, shape[0], shape[1])
            img1 = img1 / 255.
            # img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
            # img1 = np.expand_dims(img1, axis=-1)

            X[j] = img1
            Y[j] = label
            i += 1
            j += 1
        # print("error_num", error_num)
        yield X, Y


def gen_yolo_char(paths, batch_size, input_shape, anchors, num_classes, box_num=6):
    """data generator for fit_generator"""
    n = len(paths)
    data_path = os.path.dirname(os.path.abspath(__file__)) + "/../data/detect/"
    i = 0
    while True:
        image_data = []
        box_data = []

        batch_cnt = 0
        while batch_cnt < batch_size:
            try:
                if i == 0:
                    np.random.shuffle(paths)

                ss = paths[i][:-1].split(" ")
                image_path = ss[0]
                image = cv2.imread(data_path+image_path)
                image = pil_resize(image, input_shape[0], input_shape[1])
                image_show = copy.deepcopy(image)
                image = image / 255.
                box = np.array([np.array(list(map(int, box.split(',')))) for box in ss[1:]])

                # box数不同，复制
                if box.shape[0] < box_num:
                    box = np.concatenate([box, box[:2, :]], axis=0)

                # show
                # box_show = box.tolist()
                # for b in box_show:
                #     print("box", b)
                #     cv2.rectangle(image_show, (b[0], b[1]), (b[2], b[3]), (255, 0, 0), 2)
                # cv2.imshow("image_show", image_show)
                # cv2.waitKey(0)

                image_data.append(image)
                box_data.append(box)
                i = (i+1) % n
                batch_cnt += 1
            except:
                i = (i+1) % n
                continue

            # print
            # print(image.shape)
            # image_show = (image*255).astype(np.uint8)
            # print("annotation_lines[i]", annotation_lines[i])
            # for _b in box:
            #     print(_b)
            #     cv2.rectangle(image_show, (int(_b[0]), int(_b[1])), (int(_b[2]), int(_b[3])), (0, 255, 0), 1)
            # cv2.imshow("image", image_show)
            # cv2.waitKey(0)

        image_data = np.array(image_data)
        box_data = np.array(box_data)
        # print(image_data.shape, box_data.shape)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        yield [image_data, *y_true], np.zeros(batch_size)


def gen_yolo_puzzle(paths, batch_size, input_shape, anchors, num_classes, box_num=1):
    """data generator for fit_generator"""
    n = len(paths)
    data_path = os.path.dirname(os.path.abspath(__file__)) + "/../data/detect2/"
    i = 0
    while True:
        image_data = []
        box_data = []

        batch_cnt = 0
        while batch_cnt < batch_size:
            try:
                if i == 0:
                    np.random.shuffle(paths)

                ss = paths[i][:-1].split(" ")
                image_path = ss[0]
                image = cv2.imread(data_path+image_path)
                image = pil_resize(image, input_shape[0], input_shape[1])
                image_show = copy.deepcopy(image)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                image = 255. - image
                image = np.uint8(image)
                # cv2.imshow("image", image)
                # cv2.waitKey(0)
                image = np.expand_dims(image, -1)
                image = image / 255.
                box = np.array([np.array(list(map(int, box.split(',')))) for box in ss[1:]])

                # box数不同，复制
                if box.shape[0] < box_num:
                    box = np.concatenate([box, box[:2, :]], axis=0)

                # show
                # box_show = box.tolist()
                # for b in box_show:
                #     print("box", b)
                #     cv2.rectangle(image_show, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
                # cv2.imshow("image_show", image_show)
                # cv2.waitKey(0)

                image_data.append(image)
                box_data.append(box)
                i = (i+1) % n
                batch_cnt += 1
            except:
                i = (i+1) % n
                continue

            # print
            # print(image.shape)
            # image_show = (image*255).astype(np.uint8)
            # for _b in box:
            #     print(_b)
            #     cv2.rectangle(image_show, (int(_b[0]), int(_b[1])), (int(_b[2]), int(_b[3])), (0, 255, 0), 1)
            # cv2.imshow("image", image_show)
            # cv2.waitKey(0)

        image_data = np.array(image_data)
        box_data = np.array(box_data)
        # print(image_data.shape, box_data.shape)
        y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
        yield [image_data, *y_true], np.zeros(batch_size)


def gen_drag(paths, batch_size=32, shape=(128, 256), cls_num=2):
    num = len(paths)
    data_path = os.path.dirname(os.path.abspath(__file__)) + "/../data/drag/"

    map_path = data_path+"map.txt"
    with open(map_path, "r") as f:
        _list = f.readlines()
    map_dict = {}
    for s in _list:
        ss = s[:-1].split(" ")
        map_dict[ss[0]] = ss[1]

    i = 0
    random.shuffle(paths)
    while True:
        if i >= num:
            i = 0
            random.shuffle(paths)

        height, width = shape[:2]
        if len(shape) > 2:
            channel = 3
        else:
            channel = 1
        X = np.zeros((batch_size, height, width, channel))
        Y = np.zeros((batch_size, height, width, 1))

        for j in range(batch_size):
            if i >= num:
                random.shuffle(paths)
                i = 0
            path = paths[i].split(os.sep)[-1]
            w_index = int(map_dict.get(path))
            # label = np.zeros(cls_num)
            # print("char_index", char_index)
            # label[w_index] = 1
            # print("label", np.argmax(label), char_index)

            img1 = cv2.imread(data_path + path)
            img1 = pil_resize(img1, shape[0], shape[1])
            # cv2.imshow("image", img1)
            label = np.full((shape[0], shape[1], 1), 0, dtype='uint8')
            label[:, w_index, 0] = 1
            # label[:, w_index, 1] = 1
            # cv2.imshow("label", np.expand_dims(label[..., 0], -1))
            # cv2.waitKey(0)
            img1 = img1 / 255.
            # img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
            # img1 = np.expand_dims(img1, axis=-1)
            i += 1

            X[j] = img1
            Y[j] = label

        yield X, Y


def gen_phrase(map_list, batch_size=32, shape=(5707, 3)):
    voc_dim, timesteps = shape[:2]

    data_list = []
    for line in map_list:
        data_list.append([eval(line[:-3]), int(line[-2:-1])])
    num = len(data_list)

    i = 0
    random.shuffle(data_list)
    while True:
        X = np.zeros((batch_size, timesteps))
        Y = np.zeros((batch_size, 1))

        for j in range(batch_size):
            if i >= num:
                random.shuffle(data_list)
                i = 0
            data = data_list[i]

            d_list = [x for x in data[0]]
            d_list = d_list + [voc_dim]*(timesteps-len(d_list))
            X[j] = np.array(d_list)
            Y[j] = data[1]
            i += 1
        yield X, Y


def generate_data_siamese(char_num=6, char_shape=(40, 40)):
    bg_paths = glob("../data/base/*")
    char_path = "../data/chinese_2500.txt"
    with open(char_path, "r") as f:
        char_str = f.read()

    data_dir = "../data/click/"
    for i in range(1000):
        bg = cv2.imread(random.sample(bg_paths, 1)[0])
        char_list = [char_str[x] for x in random.sample(range(len(char_str)), char_num)]
        image_np, position_list, tips_image_list = char_on_image(bg, char_list, char_shape)

        # for j in range(len(char_list)):
        #     char = char_list[j]
        #     p = position_list[j]
        #     print(char)
        #     cv2.rectangle(image_np, [p[1], p[0]], [p[1]+char_shape[1], p[0]+char_shape[0]], (255, 0, 0), 2)
        #     cv2.imshow("generate_data", image_np)
        #     cv2.waitKey(0)

        # 保存tips图片
        tips_path_list = []
        for k in range(len(tips_image_list)):
            tips_image = tips_image_list[k]
            tips_path = str(i) + "_" + str(k) + ".jpg"
            tips_path_list.append(tips_path)
            cv2.imwrite(data_dir+tips_path, tips_image)
        # 保存文字区域图片
        char_path_list = []
        for j in range(len(char_list)):
            p = position_list[j]
            char_path = str(i) + "_" + str(len(tips_image_list)+j) + ".jpg"
            char_path_list.append(char_path)
            cv2.imwrite(data_dir+char_path, image_np[p[0]:p[0]+char_shape[0], p[1]:p[1]+char_shape[1], :])

        # 生成映射数据
        with open("../data/click/map.txt", "a") as f:
            for j in range(len(tips_path_list)):
                tips_path = tips_path_list[j]
                for k in range(len(char_path_list)):
                    char_path = char_path_list[k]
                    if j == k:
                        f.write(tips_path + "\t" + char_path + "\t" + str(1) + "\n")
                    else:
                        f.write(tips_path + "\t" + char_path + "\t" + str(0) + "\n")


def generate_data_mobile(char_num=6, char_shape=(40, 40), image_shape=(160, 260)):
    # (40,40) (160, 260)
    # (80,80) (360, 590)

    bg_paths = glob("../data/base/*")
    char_path = "../data/chinese_5710.txt"
    with open(char_path, "r") as f:
        char_str = f.read()

    data_dir = "../data/click/"
    # 每个字生成多张图片
    for i in range(0, len(char_str)):
        if i % 100 == 0:
            print("Loop", i)

        char = char_str[i]
        # 生成带背景图数
        image_cnt = 1
        char_list = [char] * image_cnt

        tips_cnt = 0
        image_cnt = 0
        tips_list = []
        for l in range(50):
            # 背景图
            bg = cv2.imread(random.sample(bg_paths, 1)[0])
            if random.choice([0, 0, 1]):
                bg = distort_image(bg)
            if random.choice([0, 0, 1]):
                bg = flip_image(bg)

            # 生成4张tips图，6张带背景的旋转图
            image_np, p_list, t_list = char_on_image(bg, char_list, char_shape, image_shape)
            tips_list += t_list
            for p in p_list:
                char_path = str(i) + "_" + str(image_cnt) + "_2" + ".jpg"
                cv2.imwrite(data_dir+char_path, image_np[p[0]:p[0]+char_shape[0], p[1]:p[1]+char_shape[1], :])
                image_cnt += 1
        for tips_image in tips_list[:4]:
            tips_path = str(i) + "_" + str(tips_cnt) + "_1" + ".jpg"
            cv2.imwrite(data_dir+tips_path, tips_image)
            tips_cnt += 1


def generate_data_yolo_char(char_num=6, char_shape=(40, 40), image_shape=(160, 256)):
    bg_paths = glob("../data/base/*")
    char_path = "../data/chinese_5710.txt"
    with open(char_path, "r") as f:
        char_str = f.read()

    data_dir = "../data/detect/"
    # with open(data_dir+"map.txt", "w") as f:
    #     f.write("")

    for i in range(40000, 60000):
        if i % 1000 == 0:
            print("Loop", i)

        bg = cv2.imread(random.sample(bg_paths, 1)[0])
        if random.choice([0, 0, 1]):
            bg = distort_image(bg)
        if random.choice([0, 0, 1]):
            bg = flip_image(bg)

        char_list = [char_str[x] for x in random.sample(range(len(char_str)), char_num)]
        image_np, position_list, tips_image_list = char_on_image(bg, char_list, char_shape, image_shape, 4)

        if i < 5000:
            tips_image_np, tips_position_list = get_tips_image(tips_image_list, char_shape, image_shape)

        image_np_path = str(i) + ".jpg"
        cv2.imwrite(data_dir+image_np_path, image_np)

        if i < 5000:
            tips_image_np_path = str(i) + "_0.jpg"
            cv2.imwrite(data_dir+tips_image_np_path, tips_image_np)

        # 生成映射数据
        with open(data_dir+"map.txt", "a") as f:
            box_str = ""
            for p in position_list:
                box_str += str(p[1]) + "," + str(p[0]) + "," + \
                           str(p[1]+char_shape[1]) + "," + str(p[0]+char_shape[0]) +\
                           "," + str(0) + " "
            #     cv2.rectangle(image_np, (p[1], p[0]), (p[1]+char_shape[1], p[0]+char_shape[0]), (255, 0, 0), 2)
            # cv2.imshow("image_np", image_np)
            box_str = box_str[:-1]
            f.write(image_np_path + " " + box_str + "\n")

            if i < 5000:
                box_str = ""
                for p in tips_position_list:
                    box_str += str(p[1]) + "," + str(p[0]) + "," + \
                               str(p[1]+char_shape[1]) + "," + str(p[0]+char_shape[0]) + \
                               "," + str(0) + " "
                #     cv2.rectangle(tips_image_np, (p[1], p[0]), (p[1]+char_shape[1], p[0]+char_shape[0]), (255, 0, 0), 2)
                # cv2.imshow("tips_image_np", tips_image_np)
                # cv2.waitKey(0)
                box_str = box_str[:-1]
                f.write(tips_image_np_path + " " + box_str + "\n")


def generate_data_yolo_puzzle(image_shape=(160, 256)):
    bg_paths = glob("../data/base/*.jpeg")

    data_dir = "../data/detect2/"
    with open(data_dir+"map.txt", "w") as f:
        f.write("")

    for i in range(0, 10000):
        if i % 1000 == 0:
            print("Loop", i)

        bg = cv2.imread(random.sample(bg_paths, 1)[0])
        if random.choice([0, 0, 1]):
            bg = distort_image(bg)
        if random.choice([0, 0, 1]):
            bg = flip_image(bg)

        r = random.randint(35, 60)
        puzzle_shape = (r, r)
        image_np, position_list = puzzle_on_image(bg, puzzle_shape, image_shape)

        image_np_path = str(i) + ".jpg"
        cv2.imwrite(data_dir+image_np_path, image_np)

        # 生成映射数据
        with open(data_dir+"map.txt", "a") as f:
            box_str = ""
            for p in position_list:
                box_str += str(p[1]) + "," + str(p[0]) + "," + \
                           str(p[1]+puzzle_shape[1]) + "," + str(p[0]+puzzle_shape[0]) + \
                           "," + str(0) + " "
            #     cv2.rectangle(image_np, (p[1], p[0]), (p[1]+puzzle_shape[1], p[0]+puzzle_shape[0]), (255, 0, 0), 2)
            # cv2.imshow("image_np", image_np)
            # cv2.waitKey(0)
            box_str = box_str[:-1]
            f.write(image_np_path + " " + box_str + "\n")


def generate_data_drag_image(image_shape=(160, 260)):
    bg_paths = glob("../data/base/*")

    data_dir = "../data/drag/"
    with open(data_dir+"map.txt", "w") as f:
        f.write("")

    for i in range(10000):
        if i % 1000 == 0:
            print("Loop", i)

        bg = cv2.imread(random.sample(bg_paths, 1)[0])
        bg = pil_resize(bg, image_shape[0], image_shape[1])
        if random.choice([0, 0, 1]):
            bg = distort_image(bg)
        if random.choice([0, 0, 1]):
            bg = flip_image(bg)

        image_np, clip_line = get_drag_image(bg)

        image_np_path = str(i) + ".jpg"
        cv2.imwrite(data_dir+image_np_path, image_np)

        # 生成映射数据
        with open(data_dir+"map.txt", "a") as f:
            f.write(image_np_path + " " + str(clip_line[0][0]) + "\n")


def generate_data_phrase():
    data_path = os.path.dirname(os.path.abspath(__file__)) + "/../data/phrase/"

    char_path = data_path+"char.txt"
    with open(char_path, "r") as f:
        char_list = f.readlines()
    char_dict = {}
    for i in range(len(char_list)):
        char_dict[char_list[i]] = i

    phrase_list = []
    phrase_path = data_path+"phrase3.txt"
    with open(phrase_path, "r") as f:
        phrase_list += f.readlines()
    phrase_path = data_path+"phrase4.txt"
    with open(phrase_path, "r") as f:
        phrase_list += f.readlines()
    phrase_path = data_path+"phrase5.txt"
    with open(phrase_path, "r") as f:
        phrase_list += f.readlines()
    phrase_set = set(phrase_list)

    map_path = data_path+"map3.txt"
    with open(map_path, "w") as f:
        f.write("")
    data_list = []
    start_time = time.time()
    i = 0
    negative_way_flag = False
    for phrase in phrase_list:
        if i % 500000 == 0:
            with open(map_path, "a") as f:
                f.writelines(data_list)
            data_list = []
            print("Loop", i, len(phrase_list), time.time()-start_time)
            start_time = time.time()
        i += 1

        # 正样本
        index_list = []
        for char in phrase[:-1]:
            index_list.append(char_dict.get(char+"\n"))
        data_list.append(str(index_list) + " 1\n")

        # 负样本
        if negative_way_flag:
            index1 = random.randint(0, len(index_list)-1)
            find_flag = False
            while not find_flag:
                index2 = random.randint(0, len(index_list)-1)
                if index1 != index2:
                    find_flag = True
            temp = index_list[index1]
            index_list[index1] = index_list[index2]
            index_list[index2] = temp
            if "".join([char_list[x][:-1] for x in index_list]) + "\n" not in phrase_set:
                data_list.append(str(index_list) + " 0\n")
        else:
            products = list(product(index_list, repeat=len(index_list)))
            random.shuffle(products)
            negative_cnt = 0
            for p in products:
                if negative_cnt >= 2:
                    break
                p = list(p)
                if len(set(p)) != len(p):
                    continue
                if p != index_list and "".join([char_list[x][:-1] for x in p]) + "\n" not in phrase_set:
                    data_list.append(str(p) + " 0\n")
                    negative_cnt += 1

    with open(map_path, "a") as f:
        f.writelines(data_list)


def generate_data_phrase_raw(word_len=5):
    paths = glob("D:/Chinese_corpus/answer/*/*.txt")

    phrase_path = "../data/phrase/phrase" + str(word_len) + "_new.txt"
    triple_list = []
    reg = "[^\u4e00-\u9fa5]"
    start_time = time.time()
    for i in range(len(paths)):
        if i % 1000 == 0:
            with open(phrase_path, "w") as f:
                f.writelines(triple_list)
            print("Loop", i, len(paths), time.time()-start_time)
            start_time = time.time()
            triple_list = []

        with open(paths[i], "rb") as f:
            _b = f.read()
        try:
            text = _b.decode("gbk")
        except:
            try:
                text = _b.decode("gb2312")
            except:
                try:
                    text = _b.decode("gb18030")
                except:
                    print(chardet.detect(_b), "is None")

        filter_word = ["的"]
        for word in filter_word:
            text = re.sub(word, "#"*len(word), text)

        word_list = jieba.lcut(text, cut_all=False, HMM=True)

        for j in range(1, len(word_list)):
            current = word_list[j]
            current_re = re.search(reg, current)
            last = word_list[j-1]
            last_re = re.search(reg, last)

            if current_re:
                continue
            if len(current) == word_len:
                triple_list.append(current + "\n")
            elif len(current) + len(last) == word_len and not last_re:
                triple_list.append(last+current + "\n")

        triple_list = list(set(triple_list))

    print("len(triple_list)", len(triple_list))
    with open(phrase_path, "w") as f:
        f.writelines(triple_list)


def char_on_image(image_np, char_list, char_shape, image_shape, tip_char_num=1):
    position_list = []
    for char in char_list:
        # 获取单字图片
        char_image_pil = get_char_image(char, char_shape)

        image_np = pil_resize(image_np, image_shape[0], image_shape[1])

        # h, w
        fg_w, fg_h = char_image_pil.size[:2]
        bg_h, bg_w = image_np.shape[:2]

        # 字体放置的位置，且位置不重叠
        find_flag = 0
        while not find_flag:
            position_h = random.randint(0, bg_h-fg_h)
            position_w = random.randint(0, bg_w-fg_w)
            if len(position_list) < 1:
                find_flag = 1
                break
            for p in position_list:
                if get_iou(position_w, position_h, position_w+fg_w, position_h+fg_h,
                           p[1], p[0], p[1]+fg_w, p[0]+fg_h) > 0:
                    find_flag = 0
                    break
                else:
                    find_flag = 1
        position_list.append([position_h, position_w])
        # 字体添加到背景图上
        # image_np = get_image_roi(image_np, char_image_np, position_h, position_w)
        image_np = get_image_paste(image_np, char_image_pil, position_h, position_w)

    # 生成提示图片
    image_list = []
    for char in char_list[:tip_char_num]:
        char_image_pil = get_char_image(char, char_shape, rotate=False, bg_color=(255, 255, 255, 255))
        char_image_np = pil2np_a(char_image_pil)
        # char_image_np = pil_resize(char_image_np, char_shape[0], char_shape[1])
        image_list.append(char_image_np)
    tips_image_np = np.concatenate(image_list, axis=1)
    # 加干扰
    tips_image_np = create_noise(tips_image_np)

    # 切割
    image_list = []
    for i in range(tip_char_num):
        image_list.append(tips_image_np[:, i*char_shape[1]:(i+1)*char_shape[1], :])
    return image_np, position_list, image_list


def get_char_image(char, char_shape, rotate=True, bg_color=(0, 0, 0, 0)):
    # 创建空图
    image_pil = Image.new('RGBA', (80, 80), bg_color)

    # 空图上写字
    # font_size = 35 # (40, 40)
    font_size = 75 # (80, 80)
    font_type_list = glob("../font/*")
    font_type = random.sample(font_type_list, 1)[0]
    font_config = ImageFont.truetype(font_type, int(font_size))
    dr = ImageDraw.Draw(image_pil)
    fill_color = random_color()
    fill_color = (fill_color[0], fill_color[1], fill_color[2])
    dr.text((3, -6), char, font=font_config, fill=fill_color)

    if rotate:
        if random.choice([0, 1]):
            angle = random.randint(0, 80)
        else:
            angle = random.randint(280, 360)
        image_pil = image_pil.rotate(angle, expand=False, fillcolor=bg_color)

    # image_pil.show("1")
    image_pil = image_pil.resize(char_shape)
    # cv2.imshow("get_char_image", pil2np(image_pil))
    # cv2.waitKey(0)
    return image_pil


def get_tips_image(tips_image_list, char_shape, image_shape, roatate=True):
    new_list = []
    for img in tips_image_list:
        if random.choice([0, 0, 1]):
            angle = random.randint(0, 360)
            img = pil_rotate(img, angle, (255, 255, 255))
        new_list.append(img)

    tips_image_np = np.concatenate(new_list, axis=1)

    new_image = np.full((image_shape[0], image_shape[1], 3), 0, np.uint8)
    new_image[:tips_image_np.shape[0], :tips_image_np.shape[1], :] = tips_image_np

    position_list = []
    for i in range(len(new_list)):
        h = 0
        w = i*char_shape[1]
        position_list.append([h, w])
    return new_image, position_list


def get_image_roi(image_bg, image_fg, roi_h, roi_w):
    # h, w
    fg_h, fg_w = image_fg.shape[:2]
    bg_h, bg_w = image_bg.shape[:2]

    # roi取值范围
    roi = image_bg[roi_h:roi_h+fg_h, roi_w:roi_w+fg_w]

    # 获取bg中非fg字体部分的掩码，相当于排除fg的字体部分，只保留bg的除fg字体外的部分
    img_fg_gray = cv2.cvtColor(image_fg, cv2.COLOR_BGR2GRAY)
    ret, mask = cv2.threshold(img_fg_gray, 0, 255, cv2.THRESH_OTSU)
    bg_roi = cv2.bitwise_and(roi, roi, mask=mask)

    # 获取fg中字体部分的掩码，相当于排除fg中的白色背景，只保留fg的字体部分
    mask_inv = cv2.bitwise_not(mask)
    fg_roi = cv2.bitwise_and(image_fg, image_fg, mask=mask_inv)
    # 膨胀腐蚀去掉白色颗粒
    # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    # fg_roi = cv2.erode(fg_roi, kernel)
    # fg_roi = cv2.dilate(fg_roi, kernel)

    # bg的除字体外背景部分 + fg的字体部分
    image_roi = cv2.add(bg_roi, fg_roi)

    # 将roi部分放回原bg
    image_bg[roi_h:roi_h+fg_h, roi_w:roi_w+fg_w, :] = image_roi

    # cv2.imshow("image_fg", image_fg)
    # cv2.imshow("get_image_roi", image_bg)
    # cv2.waitKey(0)
    return image_bg


def get_image_paste(image_bg, image_fg, roi_h, roi_w):
    fg_h, fg_w = image_fg.size[:2]
    image_bg = cv2.cvtColor(image_bg, cv2.COLOR_BGR2BGRA)
    image_bg = np2pil_a(image_bg)
    # image_fg = np2pil_a(image_fg)
    image_bg.paste(image_fg, (roi_w, roi_h), image_fg)
    image_bg = pil2np(image_bg)

    # cv2.imshow("get_image_paste", image_bg)
    # cv2.waitKey(0)
    return image_bg


def random_color(dims=3):
    color = [0]*dims

    find_flag = 0
    while not find_flag:
        for dim in range(dims):
            color[dim] = random.randint(0, 255)
            if color[dim] <= 125:
                find_flag = 1

    # RGB
    # color_list = [
    #     [207, 91, 85],
    #     [0, 201, 88],
    #     [117, 74, 57],
    #     [210, 210, 27],
    #     [160, 157, 152],
    #     [181, 210, 210],
    #     [27, 112, 107],
    #     [87, 26, 44],
    #     [115, 19, 20],
    #     [161, 210, 68],
    #     [210, 108, 12],
    #     [112, 9, 142],
    #     [50, 41, 84],
    #     [72, 52, 210],
    #     [210, 177, 89],
    #     [148, 200, 89],
    #     [173, 116, 109],
    #     [185, 185, 210],
    #     [181, 7, 210],
    #     [80, 210, 30],
    #     [65, 72, 98],
    #     [210, 123, 109],
    #     [19, 64, 95],
    #     [128, 21, 210],
    #     [129, 137, 60]
    # ]
    # color = random.sample(color_list, 1)[0]
    return tuple(color)


def create_noise(image_np):
    ic = ImageCaptcha()
    image_pil = np2pil(image_np)
    image_pil = ic.create_noise_curve(image_pil, random_color())
    image_pil = ic.create_noise_curve(image_pil, random_color())
    image_pil = ic.create_noise_dots(image_pil, random_color())
    image_np = pil2np(image_pil)
    return image_np


def get_iou(x1, y1, x2, y2, a1, b1, a2, b2):
    # 相交区域左上角横坐标
    ax = max(x1, a1)
    # 相交区域左上角纵坐标
    ay = max(y1, b1)
    # 相交区域右下角横坐标
    bx = min(x2, a2)
    # 相交区域右下角纵坐标
    by = min(y2, b2)

    area_n = (x2 - x1) * (y2 - y1)
    area_m = (a2 - a1) * (b2 - b1)

    w = max(0, bx - ax)
    h = max(0, by - ay)
    area_x = w * h

    return area_x / (area_n + area_m - area_x)


def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    """Preprocess true boxes to training input format
    Parameters
    ----------
    true_boxes: array, shape=(m, T, 5)
        Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
    input_shape: array-like, hw, multiples of 32
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    Returns
    -------
    y_true: list of array, shape like yolo_outputs, xywh are reletive value
    """
    # print(true_boxes[..., 4])
    # print(num_classes)
    assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes'
    # default setting
    num_layers = len(anchors)//3
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    true_boxes = np.array(true_boxes, dtype='float32')
    input_shape = np.array(input_shape, dtype='int32')
    boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
    boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
    true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
    true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]

    m = true_boxes.shape[0]
    grid_shapes = [input_shape//{0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
    y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1],len(anchor_mask[l]), 5+num_classes),
                       dtype='float32') for l in range(num_layers)]

    # Expand dim to apply broadcasting.
    anchors = np.expand_dims(anchors, 0)
    anchor_maxes = anchors / 2.
    anchor_mins = -anchor_maxes
    valid_mask = boxes_wh[..., 0] > 0

    for b in range(m):
        # Discard zero rows.
        wh = boxes_wh[b, valid_mask[b]]
        if len(wh) == 0:
            continue
        # Expand dim to apply broadcasting.
        wh = np.expand_dims(wh, -2)
        box_maxes = wh / 2.
        box_mins = -box_maxes

        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)

        # Find best anchor for each true box
        best_anchor = np.argmax(iou, axis=-1)

        for t, n in enumerate(best_anchor):
            for l in range(num_layers):
                if n in anchor_mask[l]:
                    i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
                    j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
                    k = anchor_mask[l].index(n)
                    c = true_boxes[b, t, 4].astype('int32')
                    y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
                    y_true[l][b, j, i, k, 4] = 1
                    y_true[l][b, j, i, k, 5+c] = 1

    return y_true


def get_puzzle(shape=(80, 80)):
    # 创建空图
    image_pil = Image.new('RGBA', (shape[1], shape[0]), (255, 255, 255, 0))
    draw = ImageDraw.Draw(image_pil)

    # 居中创建矩形
    rec_shape = (40, 40)
    left_up_point = [int((shape[0]-rec_shape[0])/2), int((shape[1]-rec_shape[1])/2)]
    right_down_point = [left_up_point[0]+rec_shape[0], left_up_point[1]+rec_shape[1]]

    # 透明度
    flag = random.choice([0, 1])
    if flag:
        alpha = random.randint(100, 150)
    else:
        alpha = random.randint(160, 255)

    # 背景色
    if flag:
        r = random.randint(0, 30)
    else:
        r = random.randint(100, 180)
    # r = random.randint(0, 255)
    fill_color = (r, r, r, alpha)

    # 边缘色
    if random.choice([0, 1, 1]):
        if flag:
            r = random.randint(140, 170)
        else:
            r = random.randint(70, 100)
        outline_color = (r, r, r)
    else:
        outline_color = (fill_color[0], fill_color[1], fill_color[2])

    draw.rectangle((left_up_point[1],
                    left_up_point[0],
                    left_up_point[1]+rec_shape[1],
                    left_up_point[0]+rec_shape[0]),
                   fill=fill_color,
                   outline=outline_color)

    # 拼图的圆或半圆
    radius = random.randint(int(rec_shape[0] / 3 / 2), int(rec_shape[0] / 3 / 1.2))
    center_list = [[left_up_point[1], int((right_down_point[0]+left_up_point[0])/2), 1],
                   [right_down_point[1], int((right_down_point[0]+left_up_point[0])/2), 1],
                   [int((right_down_point[1]+left_up_point[1])/2), left_up_point[0], 0],
                   [int((right_down_point[1]+left_up_point[1])/2), right_down_point[0], 0]
                   ]
    circle_num = random.randint(1, 4)
    # print("circle_num", circle_num)
    center_list = random.sample(center_list, circle_num)

    min_w, min_h = left_up_point[1], left_up_point[0]
    max_w, max_h = right_down_point[1], right_down_point[0]
    for center in center_list:
        w, h = center[:2]
        is_width = center[2]

        # 判断长宽
        into_ratio = random.randint(int(1/2*radius), int(3/4*radius))
        if is_width:
            # 挑选圆是凸还是凹进去
            if random.choice([0, 1]):
                center = (center[0]+into_ratio, center[1])
            else:
                center = (center[0]-into_ratio, center[1])
        else:
            if random.choice([0, 1]):
                center = (center[0], center[1]+into_ratio)
            else:
                center = (center[0], center[1]-into_ratio)

        # 判断透明度
        color = fill_color
        if is_width:
            if left_up_point[1] <= center[0] <= right_down_point[1]:
                color = (0, 0, 0, 0)
        else:
            if left_up_point[0] <= center[1] <= right_down_point[0]:
                color = (0, 0, 0, 0)

        # print("center, color, alpha", center, color, alpha)
        draw.ellipse([(center[0]-radius, center[1]-radius),
                      (center[0]+radius, center[1]+radius)],
                     fill=color,
                     outline=outline_color)

        # 修补内部圆的边缘颜色
        if color[3] == alpha:
            if is_width:
                if center[0] < w:
                    draw.rectangle((w,
                                    h-radius,
                                    center[0]+radius,
                                    center[1]+radius),
                                   fill=fill_color)
                else:
                    draw.rectangle((center[0]-radius,
                                    center[1]-radius,
                                    w,
                                    h+radius),
                                   fill=fill_color)
            else:
                if center[1] < h:
                    draw.rectangle((w-radius,
                                    h,
                                    center[0]+radius,
                                    center[1]+radius),
                                   fill=fill_color)
                else:
                    draw.rectangle((center[0]-radius,
                                    center[1]-radius,
                                    w+radius,
                                    h),
                                   fill=fill_color)
        # 修补外部圆的边缘颜色
        else:
            if is_width:
                if center[0] > w:
                    draw.rectangle((center[0]-radius,
                                    center[1]-radius,
                                    w,
                                    h+radius),
                                   fill=(0, 0, 0, 0))
                else:
                    draw.rectangle((w,
                                    h-radius,
                                    center[0]+radius,
                                    center[1]+radius),
                                   fill=(0, 0, 0, 0))
            else:
                if center[1] > h:
                    draw.rectangle((center[0]-radius,
                                    center[1]-radius,
                                    w+radius,
                                    h),
                                   fill=(0, 0, 0, 0))
                else:
                    draw.rectangle((w-radius,
                                    h,
                                    center[0]+radius,
                                    center[1]+radius),
                                   fill=(0, 0, 0, 0))

        # 新增面积
        if color[3] == alpha:
            if center[0]-radius <= min_w:
                min_w = center[0]-radius
            if center[0]+radius >= max_w:
                max_w = center[0]+radius
            if center[1]-radius <= min_h:
                min_h = center[1]-radius
            if center[1]+radius >= max_h:
                max_h = center[1]+radius

    image_pil = image_pil.crop([min_w, min_h, max_w+1, max_h+1])
    # image_pil.show("2")
    return image_pil


def puzzle_on_image(image_np, puzzle_shape, image_shape):
    position_list = []
    # 获取拼图图片
    puzzle_image_pil = get_puzzle()
    puzzle_image_pil = puzzle_image_pil.resize(puzzle_shape)
    image_np = pil_resize(image_np, image_shape[0], image_shape[1])

    # h, w
    fg_w, fg_h = puzzle_image_pil.size[:2]
    bg_h, bg_w = image_np.shape[:2]
    # 拼图放置的位置
    position_h = random.randint(0, bg_h-fg_h)
    position_w = random.randint(0, bg_w-fg_w)
    position_list.append([position_h, position_w])
    # for p in position_list:
    #     cv2.rectangle(image_np, (p[1], p[0]),
    #                   (p[1]+puzzle_shape[1], p[0]+puzzle_shape[0]),
    #                   (0, 0, 255), 1)
    # 拼图添加到背景图上
    image_np = get_image_paste(image_np, puzzle_image_pil, position_h, position_w)
    # cv2.imshow("puzzle_on_image", image_np)
    # cv2.waitKey(0)
    return image_np, position_list


def distort_image(image_np, hue=.1, sat=1.5, val=1.5):
    """
    图像失真
    :return:
    """
    def rand(a=0, b=1):
        return np.random.rand()*(b-a) + a

    # cv2.imshow("distort_image1", image_np)
    hue = rand(-hue, hue)
    sat = rand(1, sat) if rand() < .5 else 1/rand(1, sat)
    val = rand(1, val) if rand() < .5 else 1/rand(1, val)

    image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
    x = rgb_to_hsv(image_np/255.)
    x[..., 0] += hue
    x[..., 0][x[..., 0] > 1] -= 1
    x[..., 0][x[..., 0] < 0] += 1
    x[..., 1] *= sat
    x[..., 2] *= val
    x[x > 1] = 1
    x[x < 0] = 0
    image_np = hsv_to_rgb(x)
    image_np = cv2.cvtColor(np.uint8(image_np*255), cv2.COLOR_RGB2BGR)

    # cv2.imshow("distort_image2", image_np)
    # cv2.waitKey(0)
    return image_np


def flip_image(image_np):
    # cv2.imshow("flip_image1", image_np)
    if random.choice([0, 1]):
        # 水平翻转
        image_np = cv2.flip(image_np, 1)
    else:
        # 垂直翻转
        image_np = cv2.flip(image_np, 0)
    # cv2.imshow("flip_image2", image_np)
    # cv2.waitKey(0)
    return image_np


def get_drag_image(image_np):
    h, w = image_np.shape[:2]

    # 取一定高度图片
    clip_h = random.randint(int(1/4*h), int(3/4*h))
    image_clip = image_np[:clip_h, ...]

    # 将图片在一定宽度截断，重新拼接
    clip_w = random.randint(int(1/6*w), int(5/6*w))
    image_w1 = image_clip[:, :clip_w, ...]
    image_w2 = image_clip[:, clip_w:, ...]
    image_new = np.concatenate([image_w2, image_w1], axis=1)

    # 分割线
    clip_line = [(image_w2.shape[1], 0), (image_w2.shape[1], clip_h)]

    # show
    # print(clip_line)
    # cv2.line(image_new, clip_line[0], clip_line[1], (0, 0, 255), 2)
    # cv2.imshow("get_drag_image", image_new)
    # cv2.waitKey(0)
    return image_new, clip_line


def get_real_data_puzzle(shape=(160, 256)):
    paths = glob("../data/detect2_real/*")
    i = 10000
    for p in paths:
        image = cv2.imread(p)
        image = pil_resize(image, shape[0], shape[1])
        cv2.imwrite("../data/detect2_real/"+str(i)+".jpg", image)
        i += 1

        image = distort_image(image)
        cv2.imwrite("../data/detect2_real/"+str(i)+".jpg", image)
        i += 1

        image = flip_image(image)
        cv2.imwrite("../data/detect2_real/"+str(i)+".jpg", image)
        i += 1


def read_label_puzzle():
    paths = glob("../data/detect2_real/*.json")
    map_path = "../data/detect2_real/map.txt"
    with open(map_path, "a") as f:
        for p in paths:
            with open(p, "r") as fp:
                _dict = json.loads(fp.read())
            points = _dict.get("shapes")[0].get("points")
            image_path = _dict.get("imagePath")
            ps = [str(int(points[0][0])), str(int(points[0][1])),
                  str(int(points[1][0])), str(int(points[1][1]))]
            p_str = ",".join(ps)
            f.write(image_path + " " + p_str + ",0" + "\n")


def fix_map_txt():
    path = "../data/map.txt"
    with open(path, "r") as f:
        _list = f.readlines()

    with open("../data/map_new.txt", "w") as f:
        new_list = []
        for line in _list:
            ss = line.split(" ")
            ps = ss[-1][:-1].split(",")[:-1]

            if random.choice([0, 1, 1, 1]):
                pix = random.choice([1, 2, 2, 3, 3, 4, 4])
                for i in range(len(ps)):
                    if i < 2:
                        ps[i] = str(int(ps[i]) - pix)
                    else:
                        ps[i] = str(int(ps[i]) + pix)
            new_line = ss[0] + " " + ",".join(ps) + ",0\n"
            new_list.append(new_line)
            print("line", line)
            print("new_line", new_line)
        f.writelines(new_list)


def get_char_map():
    path = "../data/phrase/phrase3.txt"
    with open(path, "r") as f:
        _list = f.readlines()
    path = "../data/phrase/phrase4.txt"
    with open(path, "r") as f:
        _list += f.readlines()
    path = "../data/phrase/phrase5.txt"
    with open(path, "r") as f:
        _list += f.readlines()

    _str = "".join(_list)
    _str = re.sub("\n", "", _str)
    _list = list(set([x+"\n" for x in _str]))
    _list.sort(key=lambda x: x)
    with open("../data/phrase/char.txt", "w") as f:
        f.writelines(_list)


if __name__ == "__main__":
    # from click_captcha.utils import get_classes, get_anchors
    # annotation_path = '../data/detect/map.txt'
    # log_dir = 'yolo_data/logs/000/'
    # classes_path = 'yolo_data/my_classes.txt'
    # anchors_path = 'yolo_data/tiny_yolo_anchors.txt'
    # class_names = get_classes(classes_path)
    # num_classes = len(class_names)
    # anchors = get_anchors(anchors_path)
    #
    # with open(annotation_path) as f:
    #     lines = f.readlines()
    # random.shuffle(lines)
    # input_shape = (160, 256)
    # g = gen_yolo(lines, 10, input_shape, anchors, num_classes)
    # list(g)

    generate_data_phrase()

    # generate_data_yolo_puzzle()
    # gen_yolo_puzzle()
    # _path = "../data/base/0b16d1f1a4e017d4a7ab5779263887f1.jpeg"
    # get_drag_image(cv2.imread(_path))

    # for ii in range(10):
    #     im = get_puzzle()

    # with open("../data/chinese.txt", "r") as f:
    #     _str = f.read()
    #
    # _list = [c for c in _str]
    # _list = list(set(_list))
    # _str = "".join(_list)

    # with open("../data/chinese.txt", "w") as f:
    #     f.write(_str)