Jelajahi Sumber

补充区分英文大小写模型;更新数字英文模型

lsm 3 minggu lalu
induk
melakukan
883aa3811e

+ 13 - 20
captcha_flask_server.py

@@ -12,11 +12,11 @@ import time
 import logging
 
 coun_dic = {'shuzi':{'total_num':0, 'neg_num':0}, 'suanshu':{'total_num':0, 'neg_num':0}
-               ,'yingwen':{'total_num':0, 'neg_num':0},'hanzi':{'total_num':0, 'neg_num':0}}
+               ,'yingwen':{'total_num':0, 'neg_num':0},'hanzi':{'total_num':0, 'neg_num':0}
+                , 'uplowcase':{'total_num':0, 'neg_num':0}}
 
 app = Flask(__name__)
 
-
 @app.route("/getlog", methods=["POST"])
 def get_acc():
     clear = request.form.get('clear_log', 'no')
@@ -29,27 +29,22 @@ def get_acc():
             return 'clear_log error'
     return jsonify(coun_dic)
 
-
 @app.route("/errorlog", methods=["POST"])
 def save_error():
     """receive not success image and save """
     code_type = request.form.get('code', 'unkown')
     base64pic = request.form.get('base64pic')
     file_obj = request.files.get("pic")
-    data = {'save_success': False}
-    if code_type is None or str(code_type) not in ['shuzi', 'suanshu', 'yingwen', 'hanzi', '1', '2', '3', '4', '5', '6']:
-        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi or number 1-6'}
+    data = {'save_success':False}
+    if code_type is None or str(code_type) not in ['shuzi', 'suanshu','yingwen','hanzi','uplowcase']:
+        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi'}
         return jsonify(data)
-
     if base64pic is not None:
         try:
             src = base64.b64decode(base64pic.split(',')[-1])
             img = Image.open(BytesIO(src))
             time_tr = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
-            if code_type in ['1', '2', '3', '4', '5', '6']:
-                img.save('pic2/'+str(code_type)+'_'+time_tr+'.jpg')
-            else:
-                img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
+            img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
             data['save_success'] = True
             coun_dic[code_type]['neg_num'] += 1
             return jsonify(data)
@@ -59,10 +54,7 @@ def save_error():
         try:
             img = Image.open(file_obj)
             time_tr = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
-            if code_type in ['1', '2', '3', '4', '5', '6']:
-                img.save('pic2/'+str(code_type)+'_'+time_tr+'.jpg')
-            else:
-                img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
+            img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
             data['save_success'] = True
             coun_dic[code_type]['neg_num'] += 1
             return jsonify(data)
@@ -71,7 +63,6 @@ def save_error():
     else:
         return 'please check you parameter '
 
-
 @app.route("/upload", methods=["POST"])
 def upload():
     start_time = time.time()
@@ -80,8 +71,8 @@ def upload():
     base64pic = request.form.get('base64pic')
     file_obj = request.files.get("pic")
     data = {'success':False}
-    if code_type is None or str(code_type) not in ['shuzi', 'suanshu','yingwen','hanzi']:
-        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi'}
+    if code_type is None or str(code_type) not in ['shuzi', 'suanshu','yingwen','hanzi','uplowcase']:
+        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi/uplowcase'}
         return jsonify(data)
     if base64pic is not None:
         try:
@@ -93,11 +84,12 @@ def upload():
                 pre = predict_digit(img)
             elif code_type == 'suanshu':
                 pre = predict_arith(img)
-                # pre = str(eval(pre))
             elif code_type == 'hanzi':
                 pre = predict_chinese(img)
             elif code_type == 'yingwen':
                 pre = predict_english(img)
+            elif code_type == 'uplowcase':
+                pre = predict_up_low_english(img)
             data['predict'] = pre
             data['success'] = True
             coun_dic[code_type]['total_num'] +=1
@@ -115,11 +107,12 @@ def upload():
                 pre = predict_digit(img)
             elif code_type == 'suanshu':
                 pre = predict_arith(img)
-                # pre = str(eval(pre))
             elif code_type == 'hanzi':
                 pre = predict_chinese(img)
             elif code_type == 'yingwen':
                 pre = predict_english(img)
+            elif code_type == 'uplowcase':
+                pre = predict_up_low_english(img)
             data['success'] = True
             data['predict'] = pre
             coun_dic[code_type]['total_num'] += 1

TEMPAT SAMPAH
gru_english_base_model.h5


TEMPAT SAMPAH
gru_up_low_case_base_model.h5


+ 36 - 13
predict_model.py

@@ -33,6 +33,9 @@ chinese_base_model = models.load_model('gru_chinese_base_model.h5') # 20191219 
 english_characters = string.ascii_lowercase + string.digits  # 20200728 更新为全部小写多种验证码
 english_base_model = models.load_model('gru_english_base_model.h5') # 20200518 新增  20200728 更新为全部小写多种验证码
 
+up_low_case_characters = string.ascii_uppercase + string.ascii_lowercase + string.digits
+up_low_case_model = models.load_model('gru_up_low_case_base_model.h5') # 20250110 区分大小写验证码
+
 digit_input = digit_base_model.output
 digit_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
 digit_decode = K.ctc_decode(y_pred=digit_input, input_length=digit_input_length * K.shape(digit_input)[1])
@@ -53,6 +56,11 @@ english_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
 english_decode = K.ctc_decode(y_pred=english_input, input_length=english_input_length * K.shape(english_input)[1])
 english_decode = K.function([english_base_model.input, english_input_length], [english_decode[0][0]])
 
+up_low_case_input = up_low_case_model.output
+up_low_case_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
+up_low_case_decode = K.ctc_decode(y_pred=up_low_case_input, input_length=up_low_case_input_length * K.shape(up_low_case_input)[1])
+up_low_case_decode = K.function([up_low_case_model.input, up_low_case_input_length], [up_low_case_decode[0][0]])
+
 # def decode_arith(arith = '2×?=12'):
 #     arith = arith.replace('×', '*')
 #     items = re.split('=', arith)
@@ -164,32 +172,47 @@ def predict_english(img):
     out = ''.join([english_characters[x] for x in out_pre[0]])
     return out
 
+def predict_up_low_english(img):
+    img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0  #BILINEAR  NEAREST
+    X_test = np.array([img_arr])
+    with graph.as_default():
+        out_pre = up_low_case_decode([X_test, np.ones(X_test.shape[0])])[0]
+    out = ''.join([up_low_case_characters[x] for x in out_pre[0]])
+    return out
+
 if __name__ == "__main__":
     import glob
     import time
     import sys
     import shutil
     neg = []
-    files = glob.glob(r'E:\linuxPro\captcha_pro\FileInfo0526\标注样本\shensexiansandian\*.jpg')[-3000:]
+    # files = glob.glob(r'E:\linuxPro\captcha_pro\FileInfo0526\标注样本\shensexiansandian\*.jpg')[-3000:]
+
+    files = glob.glob('E:/captcha_pic/up_low_case/*.jpg')
+
     t1 = time.time()
+    pos = 0
     for i in range(len(files)):
-        file = files[i].split('\\')[-1]
-        label = files[i].split('\\')[-1].split('_')[0]
+        file = files[i].split('/')[-1]
+        label = files[i].split('\\')[-1].split('.')[0]
         img = Image.open(files[i])
         if img.mode != "RGB":
             img = img.convert("RGB")
-        pre = predict_english(img)
+        pre = predict_up_low_english(img)
         if label!=pre:
             print(file,label, pre)
             neg.append(file)
-        elif len(label) == 4:
-            if os.path.exists(files[i]):
-                try:
-                    shutil.copy(files[i], 'english_imgs/' + file)
-                except IOError as e:
-                    print('Unable to copy file %s' % e)
-                except:
-                    print('Unexcepted error', sys.exc_info)
-    print(len(neg), time.time()-t1)
+        else:
+            pos += 1
+        # elif len(label) == 4:
+        #     if os.path.exists(files[i]):
+        #         try:
+        #             shutil.copy(files[i], 'english_imgs/' + file)
+        #         except IOError as e:
+        #             print('Unable to copy file %s' % e)
+        #         except:
+        #             print('Unexcepted error', sys.exc_info)
+    print(len(neg), pos, time.time()-t1)
+    print('准确率:%.4f'%(pos/(len(neg)+pos)))
 
 

File diff ditekan karena terlalu besar
+ 4 - 4
train/captcha_DigitAndEnglish.ipynb


+ 1 - 1
train/captcha_digit.ipynb

@@ -1644,7 +1644,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.0"
+   "version": "3.6.13"
   }
  },
  "nbformat": 4,

File diff ditekan karena terlalu besar
+ 70 - 0
train/数字英文验证码区分大小写.ipynb


Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini