lishimin
/
VerificationCode


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
							#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Author  : bidikeji
# @Time    : 2019/11/25 0025 9:54 
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

from tensorflow.keras import models
import tensorflow.keras.backend as K
import tensorflow as tf
from PIL import Image
import numpy as np
import string
import re

global graph
total_num = 0
neg_num = 0
graph = tf.get_default_graph()

digit_characters = string.digits
digit_base_model = models.load_model('gru_digit_base_model.h5')
# arith_characters = '0123456789+*-%'
# arith_characters = '0123456789+?-×=' #2021/11/17新增几种算术验证码
arith_characters = '0123456789+?-×/='  #2022/6/21 新增除法，新增两种验证码，两个算术符验证码
arith_base_model = models.load_model('gru_arith_base_model.h5')  #2021/11/17新增几种算术验证码
# chinese_characters = '四生乐句付仗斥令仔乎白仙甩他瓜们用丘仪失丛代印册匆禾'
with open('chinese_characters.txt', encoding='utf-8') as f:
    chinese_characters = f.read().strip()  # 20200728 更新到524个中文
chinese_base_model = models.load_model('gru_chinese_base_model.h5') # 20191219 新增 20200728 更新到524个中文
# english_characters = string.digits + string.ascii_uppercase + string.ascii_lowercase
english_characters = string.ascii_lowercase + string.digits  # 20200728 更新为全部小写多种验证码
english_base_model = models.load_model('gru_english_base_model.h5') # 20200518 新增  20200728 更新为全部小写多种验证码

up_low_case_characters = string.ascii_uppercase + string.ascii_lowercase + string.digits
up_low_case_model = models.load_model('gru_up_low_case_base_model.h5') # 20250110 区分大小写验证码

digit_input = digit_base_model.output
digit_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
digit_decode = K.ctc_decode(y_pred=digit_input, input_length=digit_input_length * K.shape(digit_input)[1])
digit_decode = K.function([digit_base_model.input, digit_input_length], [digit_decode[0][0]])

arith_input = arith_base_model.output
arith_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
arith_decode = K.ctc_decode(y_pred=arith_input, input_length=arith_input_length * K.shape(arith_input)[1])
arith_decode = K.function([arith_base_model.input, arith_input_length], [arith_decode[0][0]])

chinese_input = chinese_base_model.output
chinese_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
chinese_decode = K.ctc_decode(y_pred=chinese_input, input_length=chinese_input_length * K.shape(chinese_input)[1])
chinese_decode = K.function([chinese_base_model.input, chinese_input_length], [chinese_decode[0][0]])

english_input = english_base_model.output
english_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
english_decode = K.ctc_decode(y_pred=english_input, input_length=english_input_length * K.shape(english_input)[1])
english_decode = K.function([english_base_model.input, english_input_length], [english_decode[0][0]])

up_low_case_input = up_low_case_model.output
up_low_case_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
up_low_case_decode = K.ctc_decode(y_pred=up_low_case_input, input_length=up_low_case_input_length * K.shape(up_low_case_input)[1])
up_low_case_decode = K.function([up_low_case_model.input, up_low_case_input_length], [up_low_case_decode[0][0]])

# def decode_arith(arith = '2×?=12'):
#     arith = arith.replace('×', '*')
#     items = re.split('=', arith)
#     if len(items)==2:
#         if items[-1] in ['?', '']:
#             return eval(items[0])
#         l = re.split('-|\+|\*', items[0])
#         signs = re.findall('-|\+|\*', items[0])
#         if len(l)==2 and len(signs)==1:
#             if l[1] == '?':
#                 if signs[0] == '+':
#                     return eval('%s-%s'%(items[-1], l[0]))
#                 elif signs[0] == '-':
#                     return eval('%s-%s'%(l[0],items[-1]))
#                 elif signs[0] == '*':
#                     return int(eval('%s/%s'%(items[-1], l[0])))
#             elif l[0] == '?':
#                 if signs[0] == '+':
#                     return eval('%s-%s'%(items[-1], l[1]))
#                 elif signs[0] == '-':
#                     return eval('%s+%s'%(l[1],items[-1]))
#                 elif signs[0] == '*':
#                     return int(eval('%s/%s'%(items[-1], l[1])))
#     return ''


def decode_arith(arith='2×?=12'):
    try:
        arith = arith.replace('×', '*')
        if re.search('^(\d+|\?)([\+\-\*/](\d+|\?))+=(\d+|\?)?$', arith) and len(re.findall('\?', arith)) <= 1:
            if arith[-1] == '?':
                answer = str(int(eval(arith[:-2])))
            elif arith[-1] == '=':
                answer = str(int(eval(arith[:-1])))
            elif re.search('^(\d+|\?)[\+\-\*/](\d+|\?)=\d+$', arith):
                a, sign, b, _, quest = re.split('(\+|\-|\*|×|/|=)', arith)
                if a == '?':
                    if sign == "+":
                        sign = '-'
                    elif sign == '-':
                        sign = '+'
                    elif sign == "*":
                        sign = '/'
                    elif sign == '/':
                        sign = '*'
                    a, quest = quest, a
                elif b == '?':
                    if sign == "+":
                        sign = '-'
                        b, quest = quest, b
                        a, b = b, a
                    elif sign == '-':
                        b, quest = quest, b
                    elif sign == "*":
                        sign = '/'
                        b, quest = quest, b
                        a, b = b, a
                    elif sign == '/':
                        b, quest = quest, b
                else:
                    print('公式出错:', arith)
                answer = str(int(eval('%s%s%s' % (a, sign, b))))
            else:
                print('公式出错:', arith)
        elif re.search('^\d+[\+\-\*/]\d+$', arith):
            answer = str(int(eval(arith)))
        else:
            answer = ''
        return answer
    except:
        answer = ''
        return answer

def predict_digit(img):
    img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
    X_test = np.array([img_arr])
    with graph.as_default():
        out_pre = digit_decode([X_test, np.ones(X_test.shape[0])])[0]
        # y_pred = digit_base_model.predict(X_test)
        # out_pre = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :6]
    out = ''.join([digit_characters[x] for x in out_pre[0]])
    return out

def predict_arith(img):
    # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
    img_arr = np.array(img.resize((200, 64), Image.BILINEAR)) / 255.0  #20211117更换图片尺寸 20220621 由100，32 改为200 64
    X_test = np.array([img_arr])
    with graph.as_default():
        out_pre = arith_decode([X_test, np.ones(X_test.shape[0])])[0]
    out = ''.join([arith_characters[x] for x in out_pre[0]])
    try:
        out = decode_arith(out)
    except:
        out = ""
    return out

def predict_chinese(img):
    # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
    img_arr = np.array(img.resize((120, 40), Image.BILINEAR)) / 255.0 # 更新两种中文验证码
    X_test = np.array([img_arr])
    with graph.as_default():
        out_pre = chinese_decode([X_test, np.ones(X_test.shape[0])])[0]
    out = ''.join([chinese_characters[x] for x in out_pre[0]])
    return out

def predict_english(img):
    img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0  #BILINEAR  NEAREST
    X_test = np.array([img_arr])
    with graph.as_default():
        out_pre = english_decode([X_test, np.ones(X_test.shape[0])])[0]
    out = ''.join([english_characters[x] for x in out_pre[0]])
    return out

def predict_up_low_english(img):
    img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0  #BILINEAR  NEAREST
    X_test = np.array([img_arr])
    with graph.as_default():
        out_pre = up_low_case_decode([X_test, np.ones(X_test.shape[0])])[0]
    out = ''.join([up_low_case_characters[x] for x in out_pre[0]])
    return out

if __name__ == "__main__":
    import glob
    import time
    import sys
    import shutil
    neg = []
    # files = glob.glob(r'E:\linuxPro\captcha_pro\FileInfo0526\标注样本\shensexiansandian\*.jpg')[-3000:]

    files = glob.glob('E:/captcha_pic/up_low_case/*.jpg')

    t1 = time.time()
    pos = 0
    for i in range(len(files)):
        file = files[i].split('/')[-1]
        label = files[i].split('\\')[-1].split('.')[0]
        img = Image.open(files[i])
        if img.mode != "RGB":
            img = img.convert("RGB")
        pre = predict_up_low_english(img)
        if label!=pre:
            print(file,label, pre)
            neg.append(file)
        else:
            pos += 1
        # elif len(label) == 4:
        #     if os.path.exists(files[i]):
        #         try:
        #             shutil.copy(files[i], 'english_imgs/' + file)
        #         except IOError as e:
        #             print('Unable to copy file %s' % e)
        #         except:
        #             print('Unexcepted error', sys.exc_info)
    print(len(neg), pos, time.time()-t1)
    print('准确率：%.4f'%(pos/(len(neg)+pos)))