123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- from keras import Input, Model
- import keras.backend as K
- import tensorflow as tf
- import numpy as np
- from keras.layers import Conv2D, BatchNormalization, LeakyReLU, MaxPooling2D, UpSampling2D, concatenate, Lambda, \
- Bidirectional, GRU, Dense, Dropout, Add
- from chinese_equation_denoise.loss import ctc_lambda_func
- from chinese_equation_recognize.loss import ctc_decode_mse_loss2
- def crnn_ctc_equation(input_shape=(32, 192, 3), class_num=32, is_train=True):
- _input = Input(input_shape)
- use_bias = False
- down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(_input)
- down0 = BatchNormalization()(down0)
- down0 = LeakyReLU(alpha=0.1)(down0)
- down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0)
- down0 = BatchNormalization()(down0)
- down0 = LeakyReLU(alpha=0.1)(down0)
- down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
- down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
- down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
- down3 = Conv2D(64, (4, 4), use_bias=use_bias)(down2_pool)
- down3 = BatchNormalization()(down3)
- down3 = LeakyReLU(alpha=0.1)(down3)
- sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
- x = Bidirectional(GRU(32, return_sequences=True))(sq)
- x = Bidirectional(GRU(32, return_sequences=True))(x)
- x = Dense(class_num, activation='softmax')(x)
- if not is_train:
- model = Model(inputs=_input, outputs=x)
- else:
- labels = Input(name='the_labels', shape=[None], dtype='float32')
- input_length = Input(name='input_length', shape=[1], dtype='int64')
- label_length = Input(name='label_length', shape=[1], dtype='int64')
- loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
- model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
- model.summary()
- return model
- def crnn_ctc_equation_large(input_shape=(32, 192, 3), class_num=32, is_train=True):
- _input = Input(input_shape)
- use_bias = False
- down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
- down0 = BatchNormalization()(down0)
- down0 = LeakyReLU(alpha=0.1)(down0)
- down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
- down0 = BatchNormalization()(down0)
- down0 = LeakyReLU(alpha=0.1)(down0)
- down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
- down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
- down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
- down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
- down3 = BatchNormalization()(down3)
- down3 = LeakyReLU(alpha=0.1)(down3)
- sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
- x = Bidirectional(GRU(64, return_sequences=True))(sq)
- x = Bidirectional(GRU(64, return_sequences=True))(x)
- x = Dense(class_num, activation='softmax')(x)
- if not is_train:
- model = Model(inputs=_input, outputs=x)
- else:
- labels = Input(name='the_labels', shape=[None], dtype='float32')
- input_length = Input(name='input_length', shape=[1], dtype='int64')
- label_length = Input(name='label_length', shape=[1], dtype='int64')
- loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
- model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
- model.summary()
- return model
- def crnn_ctc_equation_loss(input_shape=(32, 192, 3), class_num=32, is_train=True):
- _input = Input(input_shape)
- use_bias = False
- down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
- down0 = BatchNormalization()(down0)
- down0 = LeakyReLU(alpha=0.1)(down0)
- down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
- down0 = BatchNormalization()(down0)
- down0 = LeakyReLU(alpha=0.1)(down0)
- down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
- down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
- down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
- down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
- down3 = BatchNormalization()(down3)
- down3 = LeakyReLU(alpha=0.1)(down3)
- sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
- x = Bidirectional(GRU(64, return_sequences=True))(sq)
- x = Bidirectional(GRU(64, return_sequences=True))(x)
- x = Dense(64, activation="relu")(x)
- x = Dropout(rate=0.2)(x)
- x = Dense(class_num, activation='softmax')(x)
- if not is_train:
- model = Model(inputs=_input, outputs=x)
- else:
- labels = Input(name='the_labels', shape=[None], dtype='float32')
- input_length = Input(name='input_length', shape=[1], dtype='int64')
- label_length = Input(name='label_length', shape=[1], dtype='int64')
- loss_out_1 = Lambda(ctc_lambda_func, output_shape=(1,), )([x, labels, input_length, label_length])
- loss_out_2 = Lambda(ctc_decode_mse_loss2, output_shape=(1, ))([x, labels, input_length, label_length])
- # loss_out_2 = CtcDecodeMseLoss(name='ctc')([x, labels, input_length, label_length])
- loss_out = Add(name='ctc')([loss_out_1, loss_out_2])
- model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
- model.summary(130)
- return model
- def u_net_denoise(input_shape=(32, 192, 3), class_num=3):
- inputs = Input(shape=input_shape)
- use_bias = False
- # 128
- down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
- down1 = BatchNormalization()(down1)
- down1 = LeakyReLU(alpha=0.1)(down1)
- down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
- # 64
- down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
- down2 = BatchNormalization()(down2)
- down2 = LeakyReLU(alpha=0.1)(down2)
- down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
- # 32
- down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
- down3 = BatchNormalization()(down3)
- down3 = LeakyReLU(alpha=0.1)(down3)
- down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
- down3 = BatchNormalization()(down3)
- down3 = LeakyReLU(alpha=0.1)(down3)
- down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
- # 16
- center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
- center = BatchNormalization()(center)
- center = LeakyReLU(alpha=0.1)(center)
- center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
- center = BatchNormalization()(center)
- center = LeakyReLU(alpha=0.1)(center)
- # 32
- up3 = UpSampling2D((2, 2))(center)
- up3 = concatenate([down3, up3], axis=3)
- up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
- up3 = BatchNormalization()(up3)
- up3 = LeakyReLU(alpha=0.1)(up3)
- up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
- up3 = BatchNormalization()(up3)
- up3 = LeakyReLU(alpha=0.1)(up3)
- # 64
- up2 = UpSampling2D((2, 2))(up3)
- up2 = concatenate([down2, up2], axis=3)
- up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
- up2 = BatchNormalization()(up2)
- up2 = LeakyReLU(alpha=0.1)(up2)
- up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
- up2 = BatchNormalization()(up2)
- up2 = LeakyReLU(alpha=0.1)(up2)
- # 128
- up1 = UpSampling2D((2, 2))(up2)
- up1 = K.concatenate([down1, up1], axis=3)
- up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
- up1 = BatchNormalization()(up1)
- up1 = LeakyReLU(alpha=0.1)(up1)
- up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
- up1 = BatchNormalization()(up1)
- up1 = LeakyReLU(alpha=0.1)(up1)
- classify = Conv2D(class_num, (1, 1), activation='sigmoid')(up1)
- # classify = Dense(cls_num, activation="softmax")(up1)
- model = Model(inputs=inputs, outputs=classify)
- # model.summary()
- return model
- def ctc_decode(image, model):
- x = model.output
- input_length = Input(batch_shape=[None], dtype='int32')
- ctc_decode = K.ctc_decode(x, input_length=input_length * K.shape(x)[1], greedy=False, beam_width=6)
- decode = K.function([model.input, input_length], [ctc_decode[0][0]])
- out = decode([image, np.ones(image.shape[0])])[0][0]
- return out
- class Vgg19:
- def __init__(self, vgg19_npy_path=None):
- if vgg19_npy_path is None:
- print("there is no vgg_16_npy!")
- raise
- self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item()
- def build(self, bgr):
- """
- load variable from npy to build the VGG
- :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
- """
- bgr = bgr * 255.0
- # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3))
- self.conv1_1 = self.conv_layer(bgr, "conv1_1")
- self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
- self.pool1 = self.max_pool(self.conv1_2, 'pool1')
- self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
- self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
- self.pool2 = self.max_pool(self.conv2_2, 'pool2')
- self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
- self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
- self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
- self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4")
- self.pool3 = self.max_pool(self.conv3_4, 'pool3')
- self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
- self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
- self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
- self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4")
- self.pool4 = self.max_pool(self.conv4_4, 'pool4')
- self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
- self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
- self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
- self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4")
- self.pool5 = self.max_pool(self.conv5_4, 'pool5')
- def avg_pool(self, bottom, name):
- return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
- def max_pool(self, bottom, name):
- return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
- def conv_layer(self, bottom, name):
- with tf.compat.v1.variable_scope(name):
- filt = self.get_conv_filter(name)
- conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
- conv_biases = self.get_bias(name)
- bias = tf.nn.bias_add(conv, conv_biases)
- relu = tf.nn.relu(bias)
- return relu
- def fc_layer(self, bottom, name):
- with tf.compat.v1.variable_scope(name):
- shape = bottom.get_shape().as_list()
- dim = 1
- for d in shape[1:]:
- dim *= d
- x = tf.reshape(bottom, [-1, dim])
- weights = self.get_fc_weight(name)
- biases = self.get_bias(name)
- # Fully connected layer. Note that the '+' operation automatically
- # broadcasts the biases.
- fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
- return fc
- def get_conv_filter(self, name):
- return tf.constant(self.data_dict[name][0], name="filter")
- def get_bias(self, name):
- return tf.constant(self.data_dict[name][1], name="biases")
- def get_fc_weight(self, name):
- return tf.constant(self.data_dict[name][0], name="weights")
- if __name__ == '__main__':
- crnn_ctc_equation_loss()
|