from keras import Input, Model import keras.backend as K import tensorflow as tf import numpy as np from keras.layers import Conv2D, BatchNormalization, LeakyReLU, MaxPooling2D, UpSampling2D, concatenate, Lambda, \ Bidirectional, GRU, Dense from chinese_equation_denoise.loss import ctc_lambda_func def crnn_ctc_equation(input_shape=(32, 192, 3), class_num=32, is_train=True): _input = Input(input_shape) use_bias = False down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(64, (4, 4), use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3) x = Bidirectional(GRU(32, return_sequences=True))(sq) x = Bidirectional(GRU(32, return_sequences=True))(x) x = Dense(class_num, activation='softmax')(x) if not is_train: model = Model(inputs=_input, outputs=x) else: labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length]) model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out) model.summary() return model def crnn_ctc_equation_large(input_shape=(32, 192, 3), class_num=32, is_train=True): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3) x = Bidirectional(GRU(64, return_sequences=True))(sq) x = Bidirectional(GRU(64, return_sequences=True))(x) x = Dense(class_num, activation='softmax')(x) if not is_train: model = Model(inputs=_input, outputs=x) else: labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length]) model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out) model.summary() return model def u_net_denoise(input_shape=(32, 192, 3), class_num=3): inputs = Input(shape=input_shape) use_bias = False # 128 down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) # 64 down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) # 32 down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) # 16 center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool) center = BatchNormalization()(center) center = LeakyReLU(alpha=0.1)(center) center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center) center = BatchNormalization()(center) center = LeakyReLU(alpha=0.1)(center) # 32 up3 = UpSampling2D((2, 2))(center) up3 = concatenate([down3, up3], axis=3) up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3) up3 = BatchNormalization()(up3) up3 = LeakyReLU(alpha=0.1)(up3) up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3) up3 = BatchNormalization()(up3) up3 = LeakyReLU(alpha=0.1)(up3) # 64 up2 = UpSampling2D((2, 2))(up3) up2 = concatenate([down2, up2], axis=3) up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2) up2 = BatchNormalization()(up2) up2 = LeakyReLU(alpha=0.1)(up2) up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2) up2 = BatchNormalization()(up2) up2 = LeakyReLU(alpha=0.1)(up2) # 128 up1 = UpSampling2D((2, 2))(up2) up1 = K.concatenate([down1, up1], axis=3) up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1) up1 = BatchNormalization()(up1) up1 = LeakyReLU(alpha=0.1)(up1) up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1) up1 = BatchNormalization()(up1) up1 = LeakyReLU(alpha=0.1)(up1) classify = Conv2D(class_num, (1, 1), activation='sigmoid')(up1) # classify = Dense(cls_num, activation="softmax")(up1) model = Model(inputs=inputs, outputs=classify) # model.summary() return model def ctc_decode(image, model): x = model.output input_length = Input(batch_shape=[None], dtype='int32') ctc_decode = K.ctc_decode(x, input_length=input_length * K.shape(x)[1], greedy=False, beam_width=6) decode = K.function([model.input, input_length], [ctc_decode[0][0]]) out = decode([image, np.ones(image.shape[0])])[0][0] return out class Vgg19: def __init__(self, vgg19_npy_path=None): if vgg19_npy_path is None: print("there is no vgg_16_npy!") raise self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item() def build(self, bgr): """ load variable from npy to build the VGG :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1] """ bgr = bgr * 255.0 # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3)) self.conv1_1 = self.conv_layer(bgr, "conv1_1") self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2") self.pool1 = self.max_pool(self.conv1_2, 'pool1') self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") self.pool2 = self.max_pool(self.conv2_2, 'pool2') self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4") self.pool3 = self.max_pool(self.conv3_4, 'pool3') self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4") self.pool4 = self.max_pool(self.conv4_4, 'pool4') self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4") self.pool5 = self.max_pool(self.conv5_4, 'pool5') def avg_pool(self, bottom, name): return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def max_pool(self, bottom, name): return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def conv_layer(self, bottom, name): with tf.compat.v1.variable_scope(name): filt = self.get_conv_filter(name) conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') conv_biases = self.get_bias(name) bias = tf.nn.bias_add(conv, conv_biases) relu = tf.nn.relu(bias) return relu def fc_layer(self, bottom, name): with tf.compat.v1.variable_scope(name): shape = bottom.get_shape().as_list() dim = 1 for d in shape[1:]: dim *= d x = tf.reshape(bottom, [-1, dim]) weights = self.get_fc_weight(name) biases = self.get_bias(name) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. fc = tf.nn.bias_add(tf.matmul(x, weights), biases) return fc def get_conv_filter(self, name): return tf.constant(self.data_dict[name][0], name="filter") def get_bias(self, name): return tf.constant(self.data_dict[name][1], name="biases") def get_fc_weight(self, name): return tf.constant(self.data_dict[name][0], name="weights")