#!/usr/bin/python3 # -*- coding: utf-8 -*- # @Author : bidikeji # @Time : 2019/11/25 0025 9:54 import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from tensorflow.keras import models import tensorflow.keras.backend as K import tensorflow as tf from PIL import Image import numpy as np import string import re global graph total_num = 0 neg_num = 0 graph = tf.get_default_graph() digit_characters = string.digits digit_base_model = models.load_model('gru_digit_base_model.h5') # arith_characters = '0123456789+*-%' # arith_characters = '0123456789+?-×=' #2021/11/17新增几种算术验证码 arith_characters = '0123456789+?-×/=' #2022/6/21 新增除法,新增两种验证码,两个算术符验证码 arith_base_model = models.load_model('gru_arith_base_model.h5') #2021/11/17新增几种算术验证码 # chinese_characters = '四生乐句付仗斥令仔乎白仙甩他瓜们用丘仪失丛代印册匆禾' with open('chinese_characters.txt', encoding='utf-8') as f: chinese_characters = f.read().strip() # 20200728 更新到524个中文 chinese_base_model = models.load_model('gru_chinese_base_model.h5') # 20191219 新增 20200728 更新到524个中文 # english_characters = string.digits + string.ascii_uppercase + string.ascii_lowercase english_characters = string.ascii_lowercase + string.digits # 20200728 更新为全部小写多种验证码 english_base_model = models.load_model('gru_english_base_model.h5') # 20200518 新增 20200728 更新为全部小写多种验证码 digit_input = digit_base_model.output digit_input_length = tf.keras.Input(batch_shape=[None], dtype='int32') digit_decode = K.ctc_decode(y_pred=digit_input, input_length=digit_input_length * K.shape(digit_input)[1]) digit_decode = K.function([digit_base_model.input, digit_input_length], [digit_decode[0][0]]) arith_input = arith_base_model.output arith_input_length = tf.keras.Input(batch_shape=[None], dtype='int32') arith_decode = K.ctc_decode(y_pred=arith_input, input_length=arith_input_length * K.shape(arith_input)[1]) arith_decode = K.function([arith_base_model.input, arith_input_length], [arith_decode[0][0]]) chinese_input = chinese_base_model.output chinese_input_length = tf.keras.Input(batch_shape=[None], dtype='int32') chinese_decode = K.ctc_decode(y_pred=chinese_input, input_length=chinese_input_length * K.shape(chinese_input)[1]) chinese_decode = K.function([chinese_base_model.input, chinese_input_length], [chinese_decode[0][0]]) english_input = english_base_model.output english_input_length = tf.keras.Input(batch_shape=[None], dtype='int32') english_decode = K.ctc_decode(y_pred=english_input, input_length=english_input_length * K.shape(english_input)[1]) english_decode = K.function([english_base_model.input, english_input_length], [english_decode[0][0]]) # def decode_arith(arith = '2×?=12'): # arith = arith.replace('×', '*') # items = re.split('=', arith) # if len(items)==2: # if items[-1] in ['?', '']: # return eval(items[0]) # l = re.split('-|\+|\*', items[0]) # signs = re.findall('-|\+|\*', items[0]) # if len(l)==2 and len(signs)==1: # if l[1] == '?': # if signs[0] == '+': # return eval('%s-%s'%(items[-1], l[0])) # elif signs[0] == '-': # return eval('%s-%s'%(l[0],items[-1])) # elif signs[0] == '*': # return int(eval('%s/%s'%(items[-1], l[0]))) # elif l[0] == '?': # if signs[0] == '+': # return eval('%s-%s'%(items[-1], l[1])) # elif signs[0] == '-': # return eval('%s+%s'%(l[1],items[-1])) # elif signs[0] == '*': # return int(eval('%s/%s'%(items[-1], l[1]))) # return '' def decode_arith(arith='2×?=12'): try: arith = arith.replace('×', '*') if re.search('^(\d+|\?)([\+\-\*/](\d+|\?))+=(\d+|\?)?$', arith) and len(re.findall('\?', arith)) <= 1: if arith[-1] == '?': answer = str(int(eval(arith[:-2]))) elif arith[-1] == '=': answer = str(int(eval(arith[:-1]))) elif re.search('^(\d+|\?)[\+\-\*/](\d+|\?)=\d+$', arith): a, sign, b, _, quest = re.split('(\+|\-|\*|×|/|=)', arith) if a == '?': if sign == "+": sign = '-' elif sign == '-': sign = '+' elif sign == "*": sign = '/' elif sign == '/': sign = '*' a, quest = quest, a elif b == '?': if sign == "+": sign = '-' b, quest = quest, b a, b = b, a elif sign == '-': b, quest = quest, b elif sign == "*": sign = '/' b, quest = quest, b a, b = b, a elif sign == '/': b, quest = quest, b else: print('公式出错:', arith) answer = str(int(eval('%s%s%s' % (a, sign, b)))) else: print('公式出错:', arith) else: answer = '' return answer except: answer = '' return answer def predict_digit(img): img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0 X_test = np.array([img_arr]) with graph.as_default(): out_pre = digit_decode([X_test, np.ones(X_test.shape[0])])[0] # y_pred = digit_base_model.predict(X_test) # out_pre = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :6] out = ''.join([digit_characters[x] for x in out_pre[0]]) return out def predict_arith(img): # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0 img_arr = np.array(img.resize((200, 64), Image.BILINEAR)) / 255.0 #20211117更换图片尺寸 20220621 由100,32 改为200 64 X_test = np.array([img_arr]) with graph.as_default(): out_pre = arith_decode([X_test, np.ones(X_test.shape[0])])[0] out = ''.join([arith_characters[x] for x in out_pre[0]]) try: out = decode_arith(out) except: out = "" return out def predict_chinese(img): # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0 img_arr = np.array(img.resize((120, 40), Image.BILINEAR)) / 255.0 # 更新两种中文验证码 X_test = np.array([img_arr]) with graph.as_default(): out_pre = chinese_decode([X_test, np.ones(X_test.shape[0])])[0] out = ''.join([chinese_characters[x] for x in out_pre[0]]) return out def predict_english(img): img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0 #BILINEAR NEAREST X_test = np.array([img_arr]) with graph.as_default(): out_pre = english_decode([X_test, np.ones(X_test.shape[0])])[0] out = ''.join([english_characters[x] for x in out_pre[0]]) return out if __name__ == "__main__": import glob import time import sys import shutil neg = [] files = glob.glob(r'E:\linuxPro\captcha_pro\FileInfo0526\标注样本\shensexiansandian\*.jpg')[-3000:] t1 = time.time() for i in range(len(files)): file = files[i].split('\\')[-1] label = files[i].split('\\')[-1].split('_')[0] img = Image.open(files[i]) if img.mode != "RGB": img = img.convert("RGB") pre = predict_english(img) if label!=pre: print(file,label, pre) neg.append(file) elif len(label) == 4: if os.path.exists(files[i]): try: shutil.copy(files[i], 'english_imgs/' + file) except IOError as e: print('Unable to copy file %s' % e) except: print('Unexcepted error', sys.exc_info) print(len(neg), time.time()-t1)