model.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. from keras import Input, Model
  2. import keras.backend as K
  3. import tensorflow as tf
  4. import numpy as np
  5. from keras.layers import Conv2D, BatchNormalization, LeakyReLU, MaxPooling2D, UpSampling2D, concatenate, Lambda, \
  6. Bidirectional, GRU, Dense
  7. from chinese_equation_denoise.loss import ctc_lambda_func
  8. def crnn_ctc_equation(input_shape=(32, 192, 3), class_num=32, is_train=True):
  9. _input = Input(input_shape)
  10. use_bias = False
  11. down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(_input)
  12. down0 = BatchNormalization()(down0)
  13. down0 = LeakyReLU(alpha=0.1)(down0)
  14. down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0)
  15. down0 = BatchNormalization()(down0)
  16. down0 = LeakyReLU(alpha=0.1)(down0)
  17. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  18. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  19. down1 = BatchNormalization()(down1)
  20. down1 = LeakyReLU(alpha=0.1)(down1)
  21. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1)
  22. down1 = BatchNormalization()(down1)
  23. down1 = LeakyReLU(alpha=0.1)(down1)
  24. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  25. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  26. down2 = BatchNormalization()(down2)
  27. down2 = LeakyReLU(alpha=0.1)(down2)
  28. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2)
  29. down2 = BatchNormalization()(down2)
  30. down2 = LeakyReLU(alpha=0.1)(down2)
  31. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  32. down3 = Conv2D(64, (4, 4), use_bias=use_bias)(down2_pool)
  33. down3 = BatchNormalization()(down3)
  34. down3 = LeakyReLU(alpha=0.1)(down3)
  35. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  36. x = Bidirectional(GRU(32, return_sequences=True))(sq)
  37. x = Bidirectional(GRU(32, return_sequences=True))(x)
  38. x = Dense(class_num, activation='softmax')(x)
  39. if not is_train:
  40. model = Model(inputs=_input, outputs=x)
  41. else:
  42. labels = Input(name='the_labels', shape=[None], dtype='float32')
  43. input_length = Input(name='input_length', shape=[1], dtype='int64')
  44. label_length = Input(name='label_length', shape=[1], dtype='int64')
  45. loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
  46. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  47. model.summary()
  48. return model
  49. def crnn_ctc_equation_large(input_shape=(32, 192, 3), class_num=32, is_train=True):
  50. _input = Input(input_shape)
  51. use_bias = False
  52. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  53. down0 = BatchNormalization()(down0)
  54. down0 = LeakyReLU(alpha=0.1)(down0)
  55. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  56. down0 = BatchNormalization()(down0)
  57. down0 = LeakyReLU(alpha=0.1)(down0)
  58. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  59. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  60. down1 = BatchNormalization()(down1)
  61. down1 = LeakyReLU(alpha=0.1)(down1)
  62. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  63. down1 = BatchNormalization()(down1)
  64. down1 = LeakyReLU(alpha=0.1)(down1)
  65. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  66. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  67. down2 = BatchNormalization()(down2)
  68. down2 = LeakyReLU(alpha=0.1)(down2)
  69. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  70. down2 = BatchNormalization()(down2)
  71. down2 = LeakyReLU(alpha=0.1)(down2)
  72. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  73. down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
  74. down3 = BatchNormalization()(down3)
  75. down3 = LeakyReLU(alpha=0.1)(down3)
  76. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  77. x = Bidirectional(GRU(64, return_sequences=True))(sq)
  78. x = Bidirectional(GRU(64, return_sequences=True))(x)
  79. x = Dense(class_num, activation='softmax')(x)
  80. if not is_train:
  81. model = Model(inputs=_input, outputs=x)
  82. else:
  83. labels = Input(name='the_labels', shape=[None], dtype='float32')
  84. input_length = Input(name='input_length', shape=[1], dtype='int64')
  85. label_length = Input(name='label_length', shape=[1], dtype='int64')
  86. loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
  87. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  88. model.summary()
  89. return model
  90. def u_net_denoise(input_shape=(32, 192, 3), class_num=3):
  91. inputs = Input(shape=input_shape)
  92. use_bias = False
  93. # 128
  94. down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
  95. down1 = BatchNormalization()(down1)
  96. down1 = LeakyReLU(alpha=0.1)(down1)
  97. down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
  98. down1 = BatchNormalization()(down1)
  99. down1 = LeakyReLU(alpha=0.1)(down1)
  100. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  101. # 64
  102. down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  103. down2 = BatchNormalization()(down2)
  104. down2 = LeakyReLU(alpha=0.1)(down2)
  105. down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
  106. down2 = BatchNormalization()(down2)
  107. down2 = LeakyReLU(alpha=0.1)(down2)
  108. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  109. # 32
  110. down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
  111. down3 = BatchNormalization()(down3)
  112. down3 = LeakyReLU(alpha=0.1)(down3)
  113. down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
  114. down3 = BatchNormalization()(down3)
  115. down3 = LeakyReLU(alpha=0.1)(down3)
  116. down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
  117. # 16
  118. center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
  119. center = BatchNormalization()(center)
  120. center = LeakyReLU(alpha=0.1)(center)
  121. center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
  122. center = BatchNormalization()(center)
  123. center = LeakyReLU(alpha=0.1)(center)
  124. # 32
  125. up3 = UpSampling2D((2, 2))(center)
  126. up3 = concatenate([down3, up3], axis=3)
  127. up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
  128. up3 = BatchNormalization()(up3)
  129. up3 = LeakyReLU(alpha=0.1)(up3)
  130. up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
  131. up3 = BatchNormalization()(up3)
  132. up3 = LeakyReLU(alpha=0.1)(up3)
  133. # 64
  134. up2 = UpSampling2D((2, 2))(up3)
  135. up2 = concatenate([down2, up2], axis=3)
  136. up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
  137. up2 = BatchNormalization()(up2)
  138. up2 = LeakyReLU(alpha=0.1)(up2)
  139. up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
  140. up2 = BatchNormalization()(up2)
  141. up2 = LeakyReLU(alpha=0.1)(up2)
  142. # 128
  143. up1 = UpSampling2D((2, 2))(up2)
  144. up1 = K.concatenate([down1, up1], axis=3)
  145. up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
  146. up1 = BatchNormalization()(up1)
  147. up1 = LeakyReLU(alpha=0.1)(up1)
  148. up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
  149. up1 = BatchNormalization()(up1)
  150. up1 = LeakyReLU(alpha=0.1)(up1)
  151. classify = Conv2D(class_num, (1, 1), activation='sigmoid')(up1)
  152. # classify = Dense(cls_num, activation="softmax")(up1)
  153. model = Model(inputs=inputs, outputs=classify)
  154. # model.summary()
  155. return model
  156. def ctc_decode(image, model):
  157. x = model.output
  158. input_length = Input(batch_shape=[None], dtype='int32')
  159. ctc_decode = K.ctc_decode(x, input_length=input_length * K.shape(x)[1], greedy=False, beam_width=6)
  160. decode = K.function([model.input, input_length], [ctc_decode[0][0]])
  161. out = decode([image, np.ones(image.shape[0])])[0][0]
  162. return out
  163. class Vgg19:
  164. def __init__(self, vgg19_npy_path=None):
  165. if vgg19_npy_path is None:
  166. print("there is no vgg_16_npy!")
  167. raise
  168. self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item()
  169. def build(self, bgr):
  170. """
  171. load variable from npy to build the VGG
  172. :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
  173. """
  174. bgr = bgr * 255.0
  175. # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3))
  176. self.conv1_1 = self.conv_layer(bgr, "conv1_1")
  177. self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
  178. self.pool1 = self.max_pool(self.conv1_2, 'pool1')
  179. self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
  180. self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
  181. self.pool2 = self.max_pool(self.conv2_2, 'pool2')
  182. self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
  183. self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
  184. self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
  185. self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4")
  186. self.pool3 = self.max_pool(self.conv3_4, 'pool3')
  187. self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
  188. self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
  189. self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
  190. self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4")
  191. self.pool4 = self.max_pool(self.conv4_4, 'pool4')
  192. self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
  193. self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
  194. self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
  195. self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4")
  196. self.pool5 = self.max_pool(self.conv5_4, 'pool5')
  197. def avg_pool(self, bottom, name):
  198. return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  199. def max_pool(self, bottom, name):
  200. return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  201. def conv_layer(self, bottom, name):
  202. with tf.compat.v1.variable_scope(name):
  203. filt = self.get_conv_filter(name)
  204. conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
  205. conv_biases = self.get_bias(name)
  206. bias = tf.nn.bias_add(conv, conv_biases)
  207. relu = tf.nn.relu(bias)
  208. return relu
  209. def fc_layer(self, bottom, name):
  210. with tf.compat.v1.variable_scope(name):
  211. shape = bottom.get_shape().as_list()
  212. dim = 1
  213. for d in shape[1:]:
  214. dim *= d
  215. x = tf.reshape(bottom, [-1, dim])
  216. weights = self.get_fc_weight(name)
  217. biases = self.get_bias(name)
  218. # Fully connected layer. Note that the '+' operation automatically
  219. # broadcasts the biases.
  220. fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
  221. return fc
  222. def get_conv_filter(self, name):
  223. return tf.constant(self.data_dict[name][0], name="filter")
  224. def get_bias(self, name):
  225. return tf.constant(self.data_dict[name][1], name="biases")
  226. def get_fc_weight(self, name):
  227. return tf.constant(self.data_dict[name][0], name="weights")