model.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. from keras import Input, Model
  2. import keras.backend as K
  3. import tensorflow as tf
  4. import numpy as np
  5. from keras.layers import Conv2D, BatchNormalization, LeakyReLU, MaxPooling2D, UpSampling2D, concatenate, Lambda, \
  6. Bidirectional, GRU, Dense, Dropout, Add
  7. from chinese_equation_denoise.loss import ctc_lambda_func
  8. from chinese_equation_recognize.loss import ctc_decode_mse_loss2
  9. def crnn_ctc_equation(input_shape=(32, 192, 3), class_num=32, is_train=True):
  10. _input = Input(input_shape)
  11. use_bias = False
  12. down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(_input)
  13. down0 = BatchNormalization()(down0)
  14. down0 = LeakyReLU(alpha=0.1)(down0)
  15. down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0)
  16. down0 = BatchNormalization()(down0)
  17. down0 = LeakyReLU(alpha=0.1)(down0)
  18. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  19. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  20. down1 = BatchNormalization()(down1)
  21. down1 = LeakyReLU(alpha=0.1)(down1)
  22. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1)
  23. down1 = BatchNormalization()(down1)
  24. down1 = LeakyReLU(alpha=0.1)(down1)
  25. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  26. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  27. down2 = BatchNormalization()(down2)
  28. down2 = LeakyReLU(alpha=0.1)(down2)
  29. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2)
  30. down2 = BatchNormalization()(down2)
  31. down2 = LeakyReLU(alpha=0.1)(down2)
  32. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  33. down3 = Conv2D(64, (4, 4), use_bias=use_bias)(down2_pool)
  34. down3 = BatchNormalization()(down3)
  35. down3 = LeakyReLU(alpha=0.1)(down3)
  36. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  37. x = Bidirectional(GRU(32, return_sequences=True))(sq)
  38. x = Bidirectional(GRU(32, return_sequences=True))(x)
  39. x = Dense(class_num, activation='softmax')(x)
  40. if not is_train:
  41. model = Model(inputs=_input, outputs=x)
  42. else:
  43. labels = Input(name='the_labels', shape=[None], dtype='float32')
  44. input_length = Input(name='input_length', shape=[1], dtype='int64')
  45. label_length = Input(name='label_length', shape=[1], dtype='int64')
  46. loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
  47. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  48. model.summary()
  49. return model
  50. def crnn_ctc_equation_large(input_shape=(32, 192, 3), class_num=32, is_train=True):
  51. _input = Input(input_shape)
  52. use_bias = False
  53. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  54. down0 = BatchNormalization()(down0)
  55. down0 = LeakyReLU(alpha=0.1)(down0)
  56. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  57. down0 = BatchNormalization()(down0)
  58. down0 = LeakyReLU(alpha=0.1)(down0)
  59. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  60. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  61. down1 = BatchNormalization()(down1)
  62. down1 = LeakyReLU(alpha=0.1)(down1)
  63. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  64. down1 = BatchNormalization()(down1)
  65. down1 = LeakyReLU(alpha=0.1)(down1)
  66. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  67. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  68. down2 = BatchNormalization()(down2)
  69. down2 = LeakyReLU(alpha=0.1)(down2)
  70. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  71. down2 = BatchNormalization()(down2)
  72. down2 = LeakyReLU(alpha=0.1)(down2)
  73. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  74. down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
  75. down3 = BatchNormalization()(down3)
  76. down3 = LeakyReLU(alpha=0.1)(down3)
  77. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  78. x = Bidirectional(GRU(64, return_sequences=True))(sq)
  79. x = Bidirectional(GRU(64, return_sequences=True))(x)
  80. x = Dense(class_num, activation='softmax')(x)
  81. if not is_train:
  82. model = Model(inputs=_input, outputs=x)
  83. else:
  84. labels = Input(name='the_labels', shape=[None], dtype='float32')
  85. input_length = Input(name='input_length', shape=[1], dtype='int64')
  86. label_length = Input(name='label_length', shape=[1], dtype='int64')
  87. loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
  88. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  89. model.summary()
  90. return model
  91. def crnn_ctc_equation_loss(input_shape=(32, 192, 3), class_num=32, is_train=True):
  92. _input = Input(input_shape)
  93. use_bias = False
  94. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  95. down0 = BatchNormalization()(down0)
  96. down0 = LeakyReLU(alpha=0.1)(down0)
  97. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  98. down0 = BatchNormalization()(down0)
  99. down0 = LeakyReLU(alpha=0.1)(down0)
  100. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  101. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  102. down1 = BatchNormalization()(down1)
  103. down1 = LeakyReLU(alpha=0.1)(down1)
  104. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  105. down1 = BatchNormalization()(down1)
  106. down1 = LeakyReLU(alpha=0.1)(down1)
  107. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  108. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  109. down2 = BatchNormalization()(down2)
  110. down2 = LeakyReLU(alpha=0.1)(down2)
  111. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  112. down2 = BatchNormalization()(down2)
  113. down2 = LeakyReLU(alpha=0.1)(down2)
  114. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  115. down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
  116. down3 = BatchNormalization()(down3)
  117. down3 = LeakyReLU(alpha=0.1)(down3)
  118. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  119. x = Bidirectional(GRU(64, return_sequences=True))(sq)
  120. x = Bidirectional(GRU(64, return_sequences=True))(x)
  121. x = Dense(64, activation="relu")(x)
  122. x = Dropout(rate=0.2)(x)
  123. x = Dense(class_num, activation='softmax')(x)
  124. if not is_train:
  125. model = Model(inputs=_input, outputs=x)
  126. else:
  127. labels = Input(name='the_labels', shape=[None], dtype='float32')
  128. input_length = Input(name='input_length', shape=[1], dtype='int64')
  129. label_length = Input(name='label_length', shape=[1], dtype='int64')
  130. loss_out_1 = Lambda(ctc_lambda_func, output_shape=(1,), )([x, labels, input_length, label_length])
  131. loss_out_2 = Lambda(ctc_decode_mse_loss2, output_shape=(1, ))([x, labels, input_length, label_length])
  132. # loss_out_2 = CtcDecodeMseLoss(name='ctc')([x, labels, input_length, label_length])
  133. loss_out = Add(name='ctc')([loss_out_1, loss_out_2])
  134. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  135. model.summary(130)
  136. return model
  137. def u_net_denoise(input_shape=(32, 192, 3), class_num=3):
  138. inputs = Input(shape=input_shape)
  139. use_bias = False
  140. # 128
  141. down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
  142. down1 = BatchNormalization()(down1)
  143. down1 = LeakyReLU(alpha=0.1)(down1)
  144. down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
  145. down1 = BatchNormalization()(down1)
  146. down1 = LeakyReLU(alpha=0.1)(down1)
  147. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  148. # 64
  149. down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  150. down2 = BatchNormalization()(down2)
  151. down2 = LeakyReLU(alpha=0.1)(down2)
  152. down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
  153. down2 = BatchNormalization()(down2)
  154. down2 = LeakyReLU(alpha=0.1)(down2)
  155. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  156. # 32
  157. down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
  158. down3 = BatchNormalization()(down3)
  159. down3 = LeakyReLU(alpha=0.1)(down3)
  160. down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
  161. down3 = BatchNormalization()(down3)
  162. down3 = LeakyReLU(alpha=0.1)(down3)
  163. down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
  164. # 16
  165. center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
  166. center = BatchNormalization()(center)
  167. center = LeakyReLU(alpha=0.1)(center)
  168. center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
  169. center = BatchNormalization()(center)
  170. center = LeakyReLU(alpha=0.1)(center)
  171. # 32
  172. up3 = UpSampling2D((2, 2))(center)
  173. up3 = concatenate([down3, up3], axis=3)
  174. up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
  175. up3 = BatchNormalization()(up3)
  176. up3 = LeakyReLU(alpha=0.1)(up3)
  177. up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
  178. up3 = BatchNormalization()(up3)
  179. up3 = LeakyReLU(alpha=0.1)(up3)
  180. # 64
  181. up2 = UpSampling2D((2, 2))(up3)
  182. up2 = concatenate([down2, up2], axis=3)
  183. up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
  184. up2 = BatchNormalization()(up2)
  185. up2 = LeakyReLU(alpha=0.1)(up2)
  186. up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
  187. up2 = BatchNormalization()(up2)
  188. up2 = LeakyReLU(alpha=0.1)(up2)
  189. # 128
  190. up1 = UpSampling2D((2, 2))(up2)
  191. up1 = K.concatenate([down1, up1], axis=3)
  192. up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
  193. up1 = BatchNormalization()(up1)
  194. up1 = LeakyReLU(alpha=0.1)(up1)
  195. up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
  196. up1 = BatchNormalization()(up1)
  197. up1 = LeakyReLU(alpha=0.1)(up1)
  198. classify = Conv2D(class_num, (1, 1), activation='sigmoid')(up1)
  199. # classify = Dense(cls_num, activation="softmax")(up1)
  200. model = Model(inputs=inputs, outputs=classify)
  201. # model.summary()
  202. return model
  203. def ctc_decode(image, model):
  204. x = model.output
  205. input_length = Input(batch_shape=[None], dtype='int32')
  206. ctc_decode = K.ctc_decode(x, input_length=input_length * K.shape(x)[1], greedy=False, beam_width=6)
  207. decode = K.function([model.input, input_length], [ctc_decode[0][0]])
  208. out = decode([image, np.ones(image.shape[0])])[0][0]
  209. return out
  210. class Vgg19:
  211. def __init__(self, vgg19_npy_path=None):
  212. if vgg19_npy_path is None:
  213. print("there is no vgg_16_npy!")
  214. raise
  215. self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item()
  216. def build(self, bgr):
  217. """
  218. load variable from npy to build the VGG
  219. :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
  220. """
  221. bgr = bgr * 255.0
  222. # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3))
  223. self.conv1_1 = self.conv_layer(bgr, "conv1_1")
  224. self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
  225. self.pool1 = self.max_pool(self.conv1_2, 'pool1')
  226. self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
  227. self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
  228. self.pool2 = self.max_pool(self.conv2_2, 'pool2')
  229. self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
  230. self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
  231. self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
  232. self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4")
  233. self.pool3 = self.max_pool(self.conv3_4, 'pool3')
  234. self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
  235. self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
  236. self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
  237. self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4")
  238. self.pool4 = self.max_pool(self.conv4_4, 'pool4')
  239. self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
  240. self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
  241. self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
  242. self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4")
  243. self.pool5 = self.max_pool(self.conv5_4, 'pool5')
  244. def avg_pool(self, bottom, name):
  245. return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  246. def max_pool(self, bottom, name):
  247. return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  248. def conv_layer(self, bottom, name):
  249. with tf.compat.v1.variable_scope(name):
  250. filt = self.get_conv_filter(name)
  251. conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
  252. conv_biases = self.get_bias(name)
  253. bias = tf.nn.bias_add(conv, conv_biases)
  254. relu = tf.nn.relu(bias)
  255. return relu
  256. def fc_layer(self, bottom, name):
  257. with tf.compat.v1.variable_scope(name):
  258. shape = bottom.get_shape().as_list()
  259. dim = 1
  260. for d in shape[1:]:
  261. dim *= d
  262. x = tf.reshape(bottom, [-1, dim])
  263. weights = self.get_fc_weight(name)
  264. biases = self.get_bias(name)
  265. # Fully connected layer. Note that the '+' operation automatically
  266. # broadcasts the biases.
  267. fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
  268. return fc
  269. def get_conv_filter(self, name):
  270. return tf.constant(self.data_dict[name][0], name="filter")
  271. def get_bias(self, name):
  272. return tf.constant(self.data_dict[name][1], name="biases")
  273. def get_fc_weight(self, name):
  274. return tf.constant(self.data_dict[name][0], name="weights")
  275. if __name__ == '__main__':
  276. crnn_ctc_equation_loss()