123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- #!/usr/bin/python3
- # -*- coding: utf-8 -*-
- # @Author : bidikeji
- # @Time : 2019/11/25 0025 9:54
- import os
- os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
- os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
- from tensorflow.keras import models
- import tensorflow.keras.backend as K
- import tensorflow as tf
- from PIL import Image
- import numpy as np
- import string
- import re
- global graph
- total_num = 0
- neg_num = 0
- graph = tf.get_default_graph()
- digit_characters = string.digits
- digit_base_model = models.load_model('gru_digit_base_model.h5')
- # arith_characters = '0123456789+*-%'
- # arith_characters = '0123456789+?-×=' #2021/11/17新增几种算术验证码
- arith_characters = '0123456789+?-×/=' #2022/6/21 新增除法,新增两种验证码,两个算术符验证码
- arith_base_model = models.load_model('gru_arith_base_model.h5') #2021/11/17新增几种算术验证码
- # chinese_characters = '四生乐句付仗斥令仔乎白仙甩他瓜们用丘仪失丛代印册匆禾'
- with open('chinese_characters.txt', encoding='utf-8') as f:
- chinese_characters = f.read().strip() # 20200728 更新到524个中文
- chinese_base_model = models.load_model('gru_chinese_base_model.h5') # 20191219 新增 20200728 更新到524个中文
- # english_characters = string.digits + string.ascii_uppercase + string.ascii_lowercase
- english_characters = string.ascii_lowercase + string.digits # 20200728 更新为全部小写多种验证码
- english_base_model = models.load_model('gru_english_base_model.h5') # 20200518 新增 20200728 更新为全部小写多种验证码
- up_low_case_characters = string.ascii_uppercase + string.ascii_lowercase + string.digits
- up_low_case_model = models.load_model('gru_up_low_case_base_model.h5') # 20250110 区分大小写验证码
- digit_input = digit_base_model.output
- digit_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
- digit_decode = K.ctc_decode(y_pred=digit_input, input_length=digit_input_length * K.shape(digit_input)[1])
- digit_decode = K.function([digit_base_model.input, digit_input_length], [digit_decode[0][0]])
- arith_input = arith_base_model.output
- arith_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
- arith_decode = K.ctc_decode(y_pred=arith_input, input_length=arith_input_length * K.shape(arith_input)[1])
- arith_decode = K.function([arith_base_model.input, arith_input_length], [arith_decode[0][0]])
- chinese_input = chinese_base_model.output
- chinese_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
- chinese_decode = K.ctc_decode(y_pred=chinese_input, input_length=chinese_input_length * K.shape(chinese_input)[1])
- chinese_decode = K.function([chinese_base_model.input, chinese_input_length], [chinese_decode[0][0]])
- english_input = english_base_model.output
- english_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
- english_decode = K.ctc_decode(y_pred=english_input, input_length=english_input_length * K.shape(english_input)[1])
- english_decode = K.function([english_base_model.input, english_input_length], [english_decode[0][0]])
- up_low_case_input = up_low_case_model.output
- up_low_case_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
- up_low_case_decode = K.ctc_decode(y_pred=up_low_case_input, input_length=up_low_case_input_length * K.shape(up_low_case_input)[1])
- up_low_case_decode = K.function([up_low_case_model.input, up_low_case_input_length], [up_low_case_decode[0][0]])
- # def decode_arith(arith = '2×?=12'):
- # arith = arith.replace('×', '*')
- # items = re.split('=', arith)
- # if len(items)==2:
- # if items[-1] in ['?', '']:
- # return eval(items[0])
- # l = re.split('-|\+|\*', items[0])
- # signs = re.findall('-|\+|\*', items[0])
- # if len(l)==2 and len(signs)==1:
- # if l[1] == '?':
- # if signs[0] == '+':
- # return eval('%s-%s'%(items[-1], l[0]))
- # elif signs[0] == '-':
- # return eval('%s-%s'%(l[0],items[-1]))
- # elif signs[0] == '*':
- # return int(eval('%s/%s'%(items[-1], l[0])))
- # elif l[0] == '?':
- # if signs[0] == '+':
- # return eval('%s-%s'%(items[-1], l[1]))
- # elif signs[0] == '-':
- # return eval('%s+%s'%(l[1],items[-1]))
- # elif signs[0] == '*':
- # return int(eval('%s/%s'%(items[-1], l[1])))
- # return ''
- def decode_arith(arith='2×?=12'):
- try:
- arith = arith.replace('×', '*')
- if re.search('^(\d+|\?)([\+\-\*/](\d+|\?))+=(\d+|\?)?$', arith) and len(re.findall('\?', arith)) <= 1:
- if arith[-1] == '?':
- answer = str(int(eval(arith[:-2])))
- elif arith[-1] == '=':
- answer = str(int(eval(arith[:-1])))
- elif re.search('^(\d+|\?)[\+\-\*/](\d+|\?)=\d+$', arith):
- a, sign, b, _, quest = re.split('(\+|\-|\*|×|/|=)', arith)
- if a == '?':
- if sign == "+":
- sign = '-'
- elif sign == '-':
- sign = '+'
- elif sign == "*":
- sign = '/'
- elif sign == '/':
- sign = '*'
- a, quest = quest, a
- elif b == '?':
- if sign == "+":
- sign = '-'
- b, quest = quest, b
- a, b = b, a
- elif sign == '-':
- b, quest = quest, b
- elif sign == "*":
- sign = '/'
- b, quest = quest, b
- a, b = b, a
- elif sign == '/':
- b, quest = quest, b
- else:
- print('公式出错:', arith)
- answer = str(int(eval('%s%s%s' % (a, sign, b))))
- else:
- print('公式出错:', arith)
- else:
- answer = ''
- return answer
- except:
- answer = ''
- return answer
- def predict_digit(img):
- img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
- X_test = np.array([img_arr])
- with graph.as_default():
- out_pre = digit_decode([X_test, np.ones(X_test.shape[0])])[0]
- # y_pred = digit_base_model.predict(X_test)
- # out_pre = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :6]
- out = ''.join([digit_characters[x] for x in out_pre[0]])
- return out
- def predict_arith(img):
- # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
- img_arr = np.array(img.resize((200, 64), Image.BILINEAR)) / 255.0 #20211117更换图片尺寸 20220621 由100,32 改为200 64
- X_test = np.array([img_arr])
- with graph.as_default():
- out_pre = arith_decode([X_test, np.ones(X_test.shape[0])])[0]
- out = ''.join([arith_characters[x] for x in out_pre[0]])
- try:
- out = decode_arith(out)
- except:
- out = ""
- return out
- def predict_chinese(img):
- # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
- img_arr = np.array(img.resize((120, 40), Image.BILINEAR)) / 255.0 # 更新两种中文验证码
- X_test = np.array([img_arr])
- with graph.as_default():
- out_pre = chinese_decode([X_test, np.ones(X_test.shape[0])])[0]
- out = ''.join([chinese_characters[x] for x in out_pre[0]])
- return out
- def predict_english(img):
- img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0 #BILINEAR NEAREST
- X_test = np.array([img_arr])
- with graph.as_default():
- out_pre = english_decode([X_test, np.ones(X_test.shape[0])])[0]
- out = ''.join([english_characters[x] for x in out_pre[0]])
- return out
- def predict_up_low_english(img):
- img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0 #BILINEAR NEAREST
- X_test = np.array([img_arr])
- with graph.as_default():
- out_pre = up_low_case_decode([X_test, np.ones(X_test.shape[0])])[0]
- out = ''.join([up_low_case_characters[x] for x in out_pre[0]])
- return out
- if __name__ == "__main__":
- import glob
- import time
- import sys
- import shutil
- neg = []
- # files = glob.glob(r'E:\linuxPro\captcha_pro\FileInfo0526\标注样本\shensexiansandian\*.jpg')[-3000:]
- files = glob.glob('E:/captcha_pic/up_low_case/*.jpg')
- t1 = time.time()
- pos = 0
- for i in range(len(files)):
- file = files[i].split('/')[-1]
- label = files[i].split('\\')[-1].split('.')[0]
- img = Image.open(files[i])
- if img.mode != "RGB":
- img = img.convert("RGB")
- pre = predict_up_low_english(img)
- if label!=pre:
- print(file,label, pre)
- neg.append(file)
- else:
- pos += 1
- # elif len(label) == 4:
- # if os.path.exists(files[i]):
- # try:
- # shutil.copy(files[i], 'english_imgs/' + file)
- # except IOError as e:
- # print('Unable to copy file %s' % e)
- # except:
- # print('Unexcepted error', sys.exc_info)
- print(len(neg), pos, time.time()-t1)
- print('准确率:%.4f'%(pos/(len(neg)+pos)))
|