Эх сурвалжийг харах

更新最新数据及训练模型

lsm 3 жил өмнө
parent
commit
9295bbd3ca

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

+ 112 - 28
.idea/workspace.xml

@@ -1,26 +1,85 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="ChangeListManager">
-    <list default="true" id="3ec584ae-a8ab-4f1a-b0cb-1b73775cb899" name="Default" comment="" />
+    <list default="true" id="3ec584ae-a8ab-4f1a-b0cb-1b73775cb899" name="Default" comment="">
+      <change afterPath="$PROJECT_DIR$/train/captcha_DigitAndEnglish.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_EngAndNum.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_arithmetic.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_arithmetic0616.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_chinese.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_chinese_524char.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_cnn.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_digit.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_english.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/captcha_gen_train 自动编码器实现去噪.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/gan_captcha.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/百度通用文字识别接口调用.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/自动编码器实现去噪.ipynb" afterDir="false" />
+      <change afterPath="$PROJECT_DIR$/train/验证码图片服务器.ipynb" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/gru_arith_base_model.h5" beforeDir="false" afterPath="$PROJECT_DIR$/gru_arith_base_model.h5" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/predict_model.py" beforeDir="false" afterPath="$PROJECT_DIR$/predict_model.py" afterDir="false" />
+    </list>
     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
-    <option name="TRACKING_ENABLED" value="true" />
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
     <option name="LAST_RESOLUTION" value="IGNORE" />
   </component>
+  <component name="FUSProjectUsageTrigger">
+    <session id="-68870454">
+      <usages-collector id="statistics.lifecycle.project">
+        <counts>
+          <entry key="project.open.time.0" value="1" />
+          <entry key="project.opened" value="1" />
+        </counts>
+      </usages-collector>
+      <usages-collector id="statistics.file.extensions.open">
+        <counts>
+          <entry key="py" value="3" />
+        </counts>
+      </usages-collector>
+      <usages-collector id="statistics.file.types.open">
+        <counts>
+          <entry key="Python" value="3" />
+        </counts>
+      </usages-collector>
+    </session>
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file pinned="false" current-in-tab="false">
+        <entry file="file://$PROJECT_DIR$/predict_model.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="2439">
+              <caret line="134" column="14" selection-start-line="134" selection-start-column="14" selection-end-line="134" selection-end-column="14" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+      <file pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/captcha_flask_server.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="691">
+              <caret line="133" column="33" selection-start-line="133" selection-start-column="28" selection-end-line="133" selection-end-column="33" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
   <component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
   <component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
   <component name="JsGulpfileManager">
     <detection-done>true</detection-done>
     <sorting>DEFINITION_ORDER</sorting>
   </component>
-  <component name="NodePackageJsonFileManager">
-    <packageJsonPaths />
-  </component>
-  <component name="ProjectFrameBounds" extendedState="7">
-    <option name="x" value="65" />
-    <option name="y" value="224" />
+  <component name="ProjectFrameBounds" extendedState="6">
+    <option name="x" value="358" />
+    <option name="y" value="110" />
     <option name="width" value="1326" />
     <option name="height" value="515" />
   </component>
@@ -29,23 +88,23 @@
       <foldersAlwaysOnTop value="true" />
     </navigator>
     <panes>
+      <pane id="Scope" />
       <pane id="ProjectPane">
         <subPane>
           <expand>
             <path>
-              <item name="captcha_pro" type="b2602c69:ProjectViewProjectNode" />
-              <item name="captcha_pro" type="462c0819:PsiDirectoryNode" />
+              <item name="BIDI_CAPTCHA" type="b2602c69:ProjectViewProjectNode" />
+              <item name="BIDI_CAPTCHA" type="462c0819:PsiDirectoryNode" />
             </path>
           </expand>
           <select />
         </subPane>
       </pane>
-      <pane id="Scope" />
     </panes>
   </component>
   <component name="PropertiesComponent">
     <property name="WebServerToolWindowFactoryState" value="true" />
-    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="last_opened_file_path" value="F:/captcha_server" />
     <property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
     <property name="nodejs_npm_path_reset_for_default_project" value="true" />
   </component>
@@ -75,6 +134,7 @@
       <option name="ADD_SOURCE_ROOTS" value="true" />
       <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
       <option name="launchJavascriptDebuger" value="false" />
+      <method v="2" />
     </configuration>
   </component>
   <component name="SvnConfiguration">
@@ -91,30 +151,31 @@
     <servers />
   </component>
   <component name="ToolWindowManager">
-    <frame x="-8" y="-8" width="1456" height="876" extended-state="7" />
+    <frame x="-8" y="-8" width="1936" height="1056" extended-state="6" />
+    <editor active="true" />
     <layout>
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.17430703" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info id="Favorites" order="2" side_tool="true" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="bottom" id="Run" order="2" weight="0.34705076" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.5489933" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
       <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
+      <window_info anchor="bottom" id="Version Control" order="8" show_stripe_button="false" />
       <window_info anchor="bottom" id="Event Log" order="9" side_tool="true" />
       <window_info anchor="bottom" id="Database Changes" order="10" show_stripe_button="false" />
-      <window_info anchor="bottom" id="Version Control" order="8" show_stripe_button="false" />
-      <window_info anchor="bottom" id="Python Console" order="12" weight="0.32751676" />
-      <window_info anchor="bottom" id="Run" order="2" weight="0.34705076" />
       <window_info anchor="bottom" id="Terminal" order="11" />
+      <window_info anchor="bottom" id="Python Console" order="12" weight="0.32751676" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
       <window_info anchor="right" id="Remote Host" order="3" />
-      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.1260745" />
-      <window_info anchor="bottom" id="Docker" order="7" show_stripe_button="false" />
       <window_info anchor="right" id="Database" order="4" />
       <window_info anchor="right" id="SciView" order="5" weight="0.3252149" />
-      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
-      <window_info id="Favorites" order="2" side_tool="true" />
-      <window_info anchor="bottom" id="Debug" order="3" weight="0.5489933" />
-      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
-      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
-      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
-      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
-      <window_info anchor="bottom" id="Find" order="1" />
-      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
-      <window_info anchor="bottom" id="Message" order="0" />
     </layout>
   </component>
   <component name="TypeScriptGeneratedFilesManager">
@@ -123,4 +184,27 @@
   <component name="VcsContentAnnotationSettings">
     <option name="myLimit" value="2678400000" />
   </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/flash_server.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="672">
+          <caret line="94" column="33" lean-forward="true" selection-start-line="94" selection-start-column="33" selection-end-line="94" selection-end-column="33" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/predict_model.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="2439">
+          <caret line="134" column="14" selection-start-line="134" selection-start-column="14" selection-end-line="134" selection-end-column="14" />
+        </state>
+      </provider>
+    </entry>
+    <entry file="file://$PROJECT_DIR$/captcha_flask_server.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="691">
+          <caret line="133" column="33" selection-start-line="133" selection-start-column="28" selection-end-line="133" selection-end-column="33" />
+        </state>
+      </provider>
+    </entry>
+  </component>
 </project>

+ 135 - 0
captcha_flask_server.py

@@ -0,0 +1,135 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Author  : bidikeji
+# @Time    : 2019/11/21 0021 15:55
+
+from predict_model import *
+from flask import request, Flask, jsonify
+from PIL import Image
+from io import BytesIO
+import base64
+import time
+import logging
+
+coun_dic = {'shuzi':{'total_num':0, 'neg_num':0}, 'suanshu':{'total_num':0, 'neg_num':0}
+               ,'yingwen':{'total_num':0, 'neg_num':0},'hanzi':{'total_num':0, 'neg_num':0}}
+
+app = Flask(__name__)
+
+@app.route("/getlog", methods=["POST"])
+def get_acc():
+    clear = request.form.get('clear_log', 'no')
+    if clear == 'yes':
+        try:
+            with open('upload_num_log.txt', 'a', encoding='utf=8') as f:
+                f.write(str(coun_dic))
+                f.write('\n')
+        except:
+            return 'clear_log error'
+    return jsonify(coun_dic)
+
+@app.route("/errorlog", methods=["POST"])
+def save_error():
+    """receive not success image and save """
+    code_type = request.form.get('code', 'unkown')
+    base64pic = request.form.get('base64pic')
+    file_obj = request.files.get("pic")
+    data = {'save_success':False}
+    if code_type is None or str(code_type) not in ['shuzi', 'suanshu','yingwen','hanzi']:
+        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi'}
+        return jsonify(data)
+    if base64pic is not None:
+        try:
+            src = base64.b64decode(base64pic.split(',')[-1])
+            img = Image.open(BytesIO(src))
+            time_tr = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
+            img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
+            data['save_success'] = True
+            coun_dic[code_type]['neg_num'] += 1
+            return jsonify(data)
+        except:
+            return jsonify(data)
+    if file_obj is not None:
+        try:
+            img = Image.open(file_obj)
+            time_tr = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
+            img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
+            data['save_success'] = True
+            coun_dic[code_type]['neg_num'] += 1
+            return jsonify(data)
+        except:
+            return jsonify(data)
+    else:
+        return 'please check you parameter '
+
+@app.route("/upload", methods=["POST"])
+def upload():
+    start_time = time.time()
+    """receive image and predict """
+    code_type = request.form.get('code')
+    base64pic = request.form.get('base64pic')
+    file_obj = request.files.get("pic")
+    data = {'success':False}
+    if code_type is None or str(code_type) not in ['shuzi', 'suanshu','yingwen','hanzi']:
+        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi'}
+        return jsonify(data)
+    if base64pic is not None:
+        try:
+            src = base64.b64decode(base64pic.split(',')[-1])
+            img = Image.open(BytesIO(src))
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            if code_type == 'shuzi':
+                pre = predict_digit(img)
+            elif code_type == 'suanshu':
+                pre = predict_arith(img)
+                # pre = str(eval(pre))
+            elif code_type == 'hanzi':
+                pre = predict_chinese(img)
+            elif code_type == 'yingwen':
+                pre = predict_english(img)
+            data['predict'] = pre
+            data['success'] = True
+            coun_dic[code_type]['total_num'] +=1
+            app.logger.info("success ,use time:%.4f" %(time.time() - start_time))
+            return jsonify(data)
+        except:
+            app.logger.info("except error,use time:%.4f" %(time.time() - start_time))
+            return jsonify(data)
+    if file_obj is not None:
+        try:
+            img = Image.open(file_obj)
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            if code_type == 'shuzi':
+                pre = predict_digit(img)
+            elif code_type == 'suanshu':
+                pre = predict_arith(img)
+                # pre = str(eval(pre))
+            elif code_type == 'hanzi':
+                pre = predict_chinese(img)
+            elif code_type == 'yingwen':
+                pre = predict_english(img)
+            data['success'] = True
+            data['predict'] = pre
+            coun_dic[code_type]['total_num'] += 1
+            app.logger.info("success ,use time:%.4f" %(time.time() - start_time))
+            # print('graph_node_num',len(tf.get_default_graph().as_graph_def().node))
+            return jsonify(data)
+        except:
+            app.logger.info("except error, use time:%.4f" %(time.time() - start_time))
+            return jsonify(data)
+
+    return 'please check you post '
+
+
+if __name__ == '__main__':
+    handler = logging.FileHandler('flask.log', encoding='UTF-8')
+    app.logger.setLevel("INFO")
+    logging_format = logging.Formatter(
+        '%(asctime)s - %(levelname)s - %(filename)s -%(lineno)s - %(message)s'
+    )
+    handler.setFormatter(logging_format)
+    app.logger.addHandler(handler)
+    app.run("0.0.0.0", port=17052, debug=False) # 2.177 本地IP
+

+ 117 - 0
captcha_flask_server.py_bak

@@ -0,0 +1,117 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# @Author  : bidikeji
+# @Time    : 2019/11/21 0021 15:55
+
+from predict_model import *
+from flask import request, Flask, jsonify
+from PIL import Image
+from io import BytesIO
+import base64
+import time
+
+total_num = 0
+neg_num = 0
+
+app = Flask(__name__)
+
+@app.route("/getlog", methods=["POST"])
+def get_acc():
+    global total_num
+    global neg_num
+    data = {'total_num':total_num, 'neg_numative':neg_num}
+    clear = request.form.get('clear_log', 'no')
+    if clear == 'yes':
+        with open('upload_num_log.txt', 'a', encoding='utf=8') as f:
+            f.write('total_number:%d,\t error_number:%d\n'%(total_num, neg_num))
+        total_num = 0
+        neg_num = 0
+    return jsonify(data)
+
+@app.route("/errorlog", methods=["POST"])
+def save_error():
+    """receive not success image and save """
+    global total_num
+    global neg_num
+    code_type = request.form.get('code', 'unknow')
+    base64pic = request.form.get('base64pic')
+    file_obj = request.files.get("pic")
+    data = {'save_success':False}
+    if base64pic is not None:
+        try:
+            src = base64.b64decode(base64pic.split(',')[-1])
+            img = Image.open(BytesIO(src))
+            time_tr = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
+            img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
+            data['save_success'] = True
+            neg_num += 1
+            return jsonify(data)
+        except:
+            return jsonify(data)
+    if file_obj is not None:
+        try:
+            img = Image.open(file_obj)
+            time_tr = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
+            img.save('pic/'+str(code_type)+'_'+time_tr+'.jpg')
+            data['save_success'] = True
+            neg_num += 1
+            return jsonify(data)
+        except:
+            return jsonify(data)
+
+@app.route("/upload", methods=["POST"])
+def upload():
+    global total_num
+    global neg_num
+    """receive image and predict """
+    code_type = request.form.get('code')
+    base64pic = request.form.get('base64pic')
+    file_obj = request.files.get("pic")
+    data = {'success':False}
+    if code_type is None or str(code_type) not in ['shuzi', 'suanshu','yingwen','hanzi']:
+        data = {'errorinfo':'please check you param:code, code must be in shuzi/suanshu/yingwen/hanzi'}
+        return jsonify(data)
+    if base64pic is not None:
+        try:
+            src = base64.b64decode(base64pic.split(',')[-1])
+            img = Image.open(BytesIO(src))
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            if code_type == 'shuzi':
+                pre = predict_digit(img)
+            elif code_type == 'suanshu':
+                pre = predict_arith(img)
+                pre = str(eval(pre))
+            elif code_type == 'hanzi':
+                pre = predict_chinese(img)
+            data['predict'] = pre
+            data['success'] = True
+            total_num += 1
+            return jsonify(data)
+        except:
+            return jsonify(data)
+    if file_obj is not None:
+        try:
+            img = Image.open(file_obj)
+            if img.mode != "RGB":
+                img = img.convert("RGB")
+            if code_type == 'shuzi':
+                pre = predict_digit(img)
+            elif code_type == 'suanshu':
+                pre = predict_arith(img)
+                pre = str(eval(pre))
+            elif code_type == 'hanzi':
+                pre = predict_chinese(img)
+            data['success'] = True
+            data['predict'] = pre
+            total_num += 1
+            return jsonify(data)
+        except:
+            return jsonify(data)
+
+    return 'please check you post '
+
+
+if __name__ == '__main__':
+    app.run("0.0.0.0", port=17052, debug=False)
+

+ 1 - 0
chinese_characters.txt

@@ -0,0 +1 @@
+特啦同爱手使清弟时还睡近线却文政完么展便工在不办入强路安起理斥队何听农验它哦呢座口刚叔沙信內笑装往用更站社亲房跑啊章屋马直众立前都其造该瓜片论水夜习白感离下神送常当只认干发到思席研画领极指吗讲九越仗过受这已乐穿原才包丛满岸第千限今色种头被要活或转紧没后向气咯者跟想你吧学如问忙界解坚科一确非经张快顶由争目雪仔拿分小照般表牛命上北各他部之新孩令黑爬无级友睛脸关忽给动嘴南事写应光将情八晚成紫步所呀些飞息似车开边敢匆阵伯志禾四着够底处道衣说难七音伟仪儿通术面形停胜尤二谁题您赶热万深历导帮反收行传少生度侯员位斗会流五岁村因决但轿歌菜地找古再作物力总她样字围性准苦和怎人咱六候冲叶空加付提望外而熟共坐战连读吃点把是让心打于丘那间河整劳老建倒数治女本十接每高竺世身单山土城个敌明类士己失乎觉合门区轻究子定中我雨记体从至必甩半任告就得册民草姑产什报现青算钱太比大压见师国运石取们出怕像句唱知话家旧主眼自野去变化哪重花急火然哥并星别乡多书法月阶回相早意系以天很破的件业带跳仙机了林印声先代旁风渐长进许名晴块阳船也放几实年团此果最军史际脚革又公树呼婆切饭群两做全里平有庄等改未拉来根亮叫次百刻响海结三落走场识教且为义内条慢方寻真观看东日利答兴服枪掉量住好对可背细

BIN
gru_arith_base_model.h5


BIN
gru_chinese_base_model.h5


BIN
gru_english_base_model.h5


BIN
gru_english_base_model_2.h5


+ 159 - 8
predict_model.py

@@ -12,6 +12,7 @@ import tensorflow as tf
 from PIL import Image
 import numpy as np
 import string
+import re
 
 global graph
 total_num = 0
@@ -20,25 +21,175 @@ graph = tf.get_default_graph()
 
 digit_characters = string.digits
 digit_base_model = models.load_model('gru_digit_base_model.h5')
-arith_characters = '0123456789+*-%'
-arith_base_model = models.load_model('gru_arith_base_model.h5')
+# arith_characters = '0123456789+*-%'
+# arith_characters = '0123456789+?-×=' #2021/11/17新增几种算术验证码
+arith_characters = '0123456789+?-×/='  #2022/6/21 新增除法,新增两种验证码,两个算术符验证码
+arith_base_model = models.load_model('gru_arith_base_model.h5')  #2021/11/17新增几种算术验证码
+# chinese_characters = '四生乐句付仗斥令仔乎白仙甩他瓜们用丘仪失丛代印册匆禾'
+with open('chinese_characters.txt', encoding='utf-8') as f:
+    chinese_characters = f.read().strip()  # 20200728 更新到524个中文
+chinese_base_model = models.load_model('gru_chinese_base_model.h5') # 20191219 新增 20200728 更新到524个中文
+# english_characters = string.digits + string.ascii_uppercase + string.ascii_lowercase
+english_characters = string.ascii_lowercase + string.digits  # 20200728 更新为全部小写多种验证码
+english_base_model = models.load_model('gru_english_base_model.h5') # 20200518 新增  20200728 更新为全部小写多种验证码
+
+digit_input = digit_base_model.output
+digit_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
+digit_decode = K.ctc_decode(y_pred=digit_input, input_length=digit_input_length * K.shape(digit_input)[1])
+digit_decode = K.function([digit_base_model.input, digit_input_length], [digit_decode[0][0]])
+
+arith_input = arith_base_model.output
+arith_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
+arith_decode = K.ctc_decode(y_pred=arith_input, input_length=arith_input_length * K.shape(arith_input)[1])
+arith_decode = K.function([arith_base_model.input, arith_input_length], [arith_decode[0][0]])
+
+chinese_input = chinese_base_model.output
+chinese_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
+chinese_decode = K.ctc_decode(y_pred=chinese_input, input_length=chinese_input_length * K.shape(chinese_input)[1])
+chinese_decode = K.function([chinese_base_model.input, chinese_input_length], [chinese_decode[0][0]])
+
+english_input = english_base_model.output
+english_input_length = tf.keras.Input(batch_shape=[None], dtype='int32')
+english_decode = K.ctc_decode(y_pred=english_input, input_length=english_input_length * K.shape(english_input)[1])
+english_decode = K.function([english_base_model.input, english_input_length], [english_decode[0][0]])
+
+# def decode_arith(arith = '2×?=12'):
+#     arith = arith.replace('×', '*')
+#     items = re.split('=', arith)
+#     if len(items)==2:
+#         if items[-1] in ['?', '']:
+#             return eval(items[0])
+#         l = re.split('-|\+|\*', items[0])
+#         signs = re.findall('-|\+|\*', items[0])
+#         if len(l)==2 and len(signs)==1:
+#             if l[1] == '?':
+#                 if signs[0] == '+':
+#                     return eval('%s-%s'%(items[-1], l[0]))
+#                 elif signs[0] == '-':
+#                     return eval('%s-%s'%(l[0],items[-1]))
+#                 elif signs[0] == '*':
+#                     return int(eval('%s/%s'%(items[-1], l[0])))
+#             elif l[0] == '?':
+#                 if signs[0] == '+':
+#                     return eval('%s-%s'%(items[-1], l[1]))
+#                 elif signs[0] == '-':
+#                     return eval('%s+%s'%(l[1],items[-1]))
+#                 elif signs[0] == '*':
+#                     return int(eval('%s/%s'%(items[-1], l[1])))
+#     return ''
+
+
+def decode_arith(arith='2×?=12'):
+    try:
+        arith = arith.replace('×', '*')
+        if re.search('^(\d+|\?)([\+\-\*/](\d+|\?))+=(\d+|\?)?$', arith) and len(re.findall('\?', arith)) <= 1:
+            if arith[-1] == '?':
+                answer = str(int(eval(arith[:-2])))
+            elif arith[-1] == '=':
+                answer = str(int(eval(arith[:-1])))
+            elif re.search('^(\d+|\?)[\+\-\*/](\d+|\?)=\d+$', arith):
+                a, sign, b, _, quest = re.split('(\+|\-|\*|×|/|=)', arith)
+                if a == '?':
+                    if sign == "+":
+                        sign = '-'
+                    elif sign == '-':
+                        sign = '+'
+                    elif sign == "*":
+                        sign = '/'
+                    elif sign == '/':
+                        sign = '*'
+                    a, quest = quest, a
+                elif b == '?':
+                    if sign == "+":
+                        sign = '-'
+                        b, quest = quest, b
+                        a, b = b, a
+                    elif sign == '-':
+                        b, quest = quest, b
+                    elif sign == "*":
+                        sign = '/'
+                        b, quest = quest, b
+                        a, b = b, a
+                    elif sign == '/':
+                        b, quest = quest, b
+                else:
+                    print('公式出错:', arith)
+                answer = str(int(eval('%s%s%s' % (a, sign, b))))
+            else:
+                print('公式出错:', arith)
+        else:
+            answer = ''
+        return answer
+    except:
+        answer = ''
+        return answer
 
 def predict_digit(img):
     img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
     X_test = np.array([img_arr])
     with graph.as_default():
-        y_pred = digit_base_model.predict(X_test)
-        out_pre = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :6]
+        out_pre = digit_decode([X_test, np.ones(X_test.shape[0])])[0]
+        # y_pred = digit_base_model.predict(X_test)
+        # out_pre = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :6]
     out = ''.join([digit_characters[x] for x in out_pre[0]])
     return out
 
-
 def predict_arith(img):
-    img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
+    # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
+    img_arr = np.array(img.resize((200, 64), Image.BILINEAR)) / 255.0  #20211117更换图片尺寸 20220621 由100,32 改为200 64
     X_test = np.array([img_arr])
     with graph.as_default():
-        y_pred = arith_base_model.predict(X_test)
-        out_pre = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :6]
+        out_pre = arith_decode([X_test, np.ones(X_test.shape[0])])[0]
     out = ''.join([arith_characters[x] for x in out_pre[0]])
+    try:
+        out = decode_arith(out)
+    except:
+        out = ""
     return out
 
+def predict_chinese(img):
+    # img_arr = np.array(img.resize((100, 50), Image.BILINEAR)) / 255.0
+    img_arr = np.array(img.resize((120, 40), Image.BILINEAR)) / 255.0 # 更新两种中文验证码
+    X_test = np.array([img_arr])
+    with graph.as_default():
+        out_pre = chinese_decode([X_test, np.ones(X_test.shape[0])])[0]
+    out = ''.join([chinese_characters[x] for x in out_pre[0]])
+    return out
+
+def predict_english(img):
+    img_arr = np.array(img.resize((200, 70), Image.BILINEAR)) / 255.0  #BILINEAR  NEAREST
+    X_test = np.array([img_arr])
+    with graph.as_default():
+        out_pre = english_decode([X_test, np.ones(X_test.shape[0])])[0]
+    out = ''.join([english_characters[x] for x in out_pre[0]])
+    return out
+
+if __name__ == "__main__":
+    import glob
+    import time
+    import sys
+    import shutil
+    neg = []
+    files = glob.glob(r'E:\linuxPro\captcha_pro\FileInfo0526\标注样本\shensexiansandian\*.jpg')[-3000:]
+    t1 = time.time()
+    for i in range(len(files)):
+        file = files[i].split('\\')[-1]
+        label = files[i].split('\\')[-1].split('_')[0]
+        img = Image.open(files[i])
+        if img.mode != "RGB":
+            img = img.convert("RGB")
+        pre = predict_english(img)
+        if label!=pre:
+            print(file,label, pre)
+            neg.append(file)
+        elif len(label) == 4:
+            if os.path.exists(files[i]):
+                try:
+                    shutil.copy(files[i], 'english_imgs/' + file)
+                except IOError as e:
+                    print('Unable to copy file %s' % e)
+                except:
+                    print('Unexcepted error', sys.exc_info)
+    print(len(neg), time.time()-t1)
+
+

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 70 - 0
train/captcha_DigitAndEnglish.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 174 - 0
train/captcha_EngAndNum.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 55 - 0
train/captcha_arithmetic.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 89 - 0
train/captcha_arithmetic0616.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 154 - 0
train/captcha_chinese.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 249 - 0
train/captcha_chinese_524char.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 313 - 0
train/captcha_cnn.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 472 - 0
train/captcha_digit.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 79 - 0
train/captcha_english.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 83 - 0
train/captcha_gen_train 自动编码器实现去噪.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 6413 - 0
train/gan_captcha.ipynb


BIN
train/gru_arithmetic_ctc_best_20220617.h5


+ 155 - 0
train/百度通用文字识别接口调用.ipynb

@@ -0,0 +1,155 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import base64\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "24.d1553d0054c592e9a54671bb20c7ff98.2592000.1579349472.282335-15518595\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 获取token\n",
+    "# host 中的 client_id   client_secret 为注册成功后百度给的key\n",
+    "host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=bwnlUhy0DFLVGq72dQGs8Ao8&client_secret=E55Xu9YGMteKFG9AZWnWZGpGUXEriAXL'\n",
+    "headers = {\n",
+    "    'Content-Type': 'application/json;charset=UTF-8',\n",
+    "}\n",
+    "response = requests.get(url=host, headers=headers)\n",
+    "\n",
+    "# print(response.content)\n",
+    "if response:\n",
+    "    access_token=response.json()['access_token']\n",
+    "    print(access_token)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\"log_id\": 6154236719442914002, \"words_result_num\": 1, \"words_result\": [{\"location\": {\"width\": 92, \"top\": 15, \"left\": 7, \"height\": 24}, \"words\": \"847=\"}]}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 获取数据\n",
+    "# access_token = '24.5578dd7759f6e662c7beea352e3667fb.2592000.1579258726.282335-15518595'\n",
+    "url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + access_token\n",
+    "f = open(r'test.jpg', 'rb')\n",
+    "imgR = base64.b64encode(f.read())\n",
+    "params={'image':imgR}\n",
+    "headers = {\n",
+    "    'Content-Type':'application/x-www-form-urlencoded'\n",
+    "}\n",
+    "response = requests.post(url, params=params, headers=headers)\n",
+    "result = response.content.decode('utf-8')\n",
+    "print(result)\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "847=\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 解析json\n",
+    "dic = json.loads(result)\n",
+    "for item in dic['words_result']:\n",
+    "    print(item['words'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'words_result_num': 1, 'words_result': [{'words': '四仪御', 'location': {'width': 91, 'top': 0, 'left': 8, 'height': 24}}], 'log_id': 1380845182423524947}\n",
+      "['四仪御']\n"
+     ]
+    }
+   ],
+   "source": [
+    "def baidu_ocr(imgpath):\n",
+    "    import requests\n",
+    "    import base64\n",
+    "    import json\n",
+    "#     access_token = '24.5578dd7759f6e662c7beea352e3667fb.2592000.1579258726.282335-15518595'\n",
+    "#     url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + access_token  # 普通版\n",
+    "#     url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=' + access_token  # 高精度版\n",
+    "    url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate?access_token=' + access_token  # 高精度含位置版\n",
+    "    f = open(imgpath, 'rb')\n",
+    "    imgR = base64.b64encode(f.read())\n",
+    "    params={'image':imgR}\n",
+    "    headers = {\n",
+    "        'Content-Type':'application/x-www-form-urlencoded'\n",
+    "    }\n",
+    "    response = requests.post(url, params=params, headers=headers)\n",
+    "    result = response.content.decode('utf-8')\n",
+    "    dic = json.loads(result)\n",
+    "    print(dic)\n",
+    "    words_list = []\n",
+    "    for item in dic['words_result']:\n",
+    "        words_list.append(item['words'])\n",
+    "    return words_list\n",
+    "pic = '../FileInfo1031/057b6bb5-fbce-11e9-9bc7-408d5cd36814_四仪乐印.jpg'\n",
+    "# pic = '../FileInfo1031/0cd3bc65-fbc6-11e9-9bc7-408d5cd36814_5302.jpg'\n",
+    "print(baidu_ocr(pic))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 392 - 0
train/自动编码器实现去噪.ipynb


Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 462 - 0
train/验证码图片服务器.ipynb


Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно