model.py 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324
  1. import math
  2. import time
  3. from functools import wraps
  4. import numpy as np
  5. import tensorflow as tf
  6. from keras import Input, Model
  7. from keras.engine.base_layer import Layer
  8. from keras.layers import Lambda, Dense, Conv2D, Reshape, GlobalAveragePooling2D, BatchNormalization, Activation, Add, \
  9. Multiply, DepthwiseConv2D, LeakyReLU, MaxPooling2D, UpSampling2D, Concatenate, Dropout, concatenate, Embedding, \
  10. LSTM, \
  11. Bidirectional, CuDNNLSTM, Conv1D, MaxPooling1D, GlobalMaxPooling1D, GlobalMaxPooling2D, GRU
  12. import keras.backend as K
  13. from keras.regularizers import l2
  14. from click_captcha.loss import ctc_lambda_func, ctc_decode_mse_loss, CtcDecodeMseLoss, ctc_decode_mse_loss2
  15. from click_captcha.utils import compose
  16. def yolo_net(input_shape, anchors, num_classes, load_pretrained=True,
  17. weights_path='models/tiny_yolo_weights.h5'):
  18. """create the training model, for Tiny YOLOv3"""
  19. from loss import yolo_loss
  20. # get a new session
  21. # ops.reset_default_graph()
  22. K.clear_session()
  23. image_input = Input(shape=(None, None, 3))
  24. h, w = input_shape
  25. num_anchors = len(anchors)
  26. y_true = [Input(shape=(h//{0: 32, 1: 16}[l], w//{0: 32, 1: 16}[l],
  27. num_anchors//2, num_classes+5)) for l in range(2)]
  28. model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
  29. print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
  30. if load_pretrained:
  31. model_body.load_weights(weights_path)
  32. print('Load weights {}.'.format(weights_path))
  33. model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
  34. arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 1.})(
  35. [*model_body.output, *y_true])
  36. model = Model([model_body.input, *y_true], model_loss)
  37. model.summary(120)
  38. return model
  39. def mobile_net(input_shape, output_shape=5710):
  40. model = MobileNetV3Small(input_shape, output_shape).build()
  41. model.summary()
  42. return model
  43. def cnn_net(input_shape, output_shape=5710):
  44. _input = Input(input_shape)
  45. use_bias = False
  46. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  47. down0 = BatchNormalization()(down0)
  48. down0 = LeakyReLU(alpha=0.1)(down0)
  49. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  50. down0 = BatchNormalization()(down0)
  51. down0 = LeakyReLU(alpha=0.1)(down0)
  52. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  53. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  54. down1 = BatchNormalization()(down1)
  55. down1 = LeakyReLU(alpha=0.1)(down1)
  56. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  57. down1 = BatchNormalization()(down1)
  58. down1 = LeakyReLU(alpha=0.1)(down1)
  59. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  60. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  61. down2 = BatchNormalization()(down2)
  62. down2 = LeakyReLU(alpha=0.1)(down2)
  63. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  64. down2 = BatchNormalization()(down2)
  65. down2 = LeakyReLU(alpha=0.1)(down2)
  66. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  67. conv = Conv2D(256, (3, 3))(down2_pool)
  68. bn = BatchNormalization()(conv)
  69. rl = LeakyReLU(alpha=0.1)(bn)
  70. conv = Conv2D(256, (3, 3))(rl)
  71. bn = BatchNormalization()(conv)
  72. rl = LeakyReLU(alpha=0.1)(bn)
  73. dense = Dense(128, activation="relu")(rl)
  74. drop = Dropout(0.2)(dense)
  75. dense = Dense(output_shape, activation="softmax")(drop)
  76. x = Reshape((output_shape,))(dense)
  77. model = Model(_input, x)
  78. model.summary()
  79. return model
  80. def cnn_net_small(input_shape, output_shape=6270):
  81. _input = Input(input_shape)
  82. use_bias = False
  83. down0 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(_input)
  84. down0 = BatchNormalization()(down0)
  85. down0 = LeakyReLU(alpha=0.1)(down0)
  86. down0 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0)
  87. down0 = BatchNormalization()(down0)
  88. down0 = LeakyReLU(alpha=0.1)(down0)
  89. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  90. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  91. down1 = BatchNormalization()(down1)
  92. down1 = LeakyReLU(alpha=0.1)(down1)
  93. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  94. down1 = BatchNormalization()(down1)
  95. down1 = LeakyReLU(alpha=0.1)(down1)
  96. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  97. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  98. down2 = BatchNormalization()(down2)
  99. down2 = LeakyReLU(alpha=0.1)(down2)
  100. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  101. down2 = BatchNormalization()(down2)
  102. down2 = LeakyReLU(alpha=0.1)(down2)
  103. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  104. conv = Conv2D(128, (3, 3))(down2_pool)
  105. bn = BatchNormalization()(conv)
  106. rl = LeakyReLU(alpha=0.1)(bn)
  107. conv = Conv2D(128, (3, 3))(rl)
  108. bn = BatchNormalization()(conv)
  109. rl = LeakyReLU(alpha=0.1)(bn)
  110. conv = Conv2D(output_shape, (1, 1), activation='softmax')(rl)
  111. pool = GlobalAveragePooling2D()(conv)
  112. x = Reshape((output_shape,))(pool)
  113. model = Model(_input, x)
  114. model.summary()
  115. return model
  116. def cnn_net_tiny(input_shape, output_shape=6270):
  117. _input = Input(input_shape)
  118. use_bias = False
  119. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  120. down0 = BatchNormalization()(down0)
  121. down0 = LeakyReLU(alpha=0.1)(down0)
  122. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  123. down0 = BatchNormalization()(down0)
  124. down0 = LeakyReLU(alpha=0.1)(down0)
  125. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  126. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  127. down1 = BatchNormalization()(down1)
  128. down1 = LeakyReLU(alpha=0.1)(down1)
  129. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1)
  130. down1 = BatchNormalization()(down1)
  131. down1 = LeakyReLU(alpha=0.1)(down1)
  132. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  133. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  134. down2 = BatchNormalization()(down2)
  135. down2 = LeakyReLU(alpha=0.1)(down2)
  136. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2)
  137. down2 = BatchNormalization()(down2)
  138. down2 = LeakyReLU(alpha=0.1)(down2)
  139. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  140. conv = Conv2D(64, (3, 3))(down2_pool)
  141. bn = BatchNormalization()(conv)
  142. rl = LeakyReLU(alpha=0.1)(bn)
  143. conv = Conv2D(64, (3, 3))(rl)
  144. bn = BatchNormalization()(conv)
  145. rl = LeakyReLU(alpha=0.1)(bn)
  146. conv = Conv2D(output_shape, (1, 1), activation='softmax')(rl)
  147. pool = GlobalAveragePooling2D()(conv)
  148. x = Reshape((output_shape,))(pool)
  149. #
  150. # dense = Dense(16, activation="relu")(rl)
  151. # drop = Dropout(0.2)(dense)
  152. # dense = Dense(output_shape, activation="softmax")(drop)
  153. # drop = Dropout(0.2)(dense)
  154. # x = Reshape((output_shape,))(drop)
  155. model = Model(_input, x)
  156. model.summary()
  157. return model
  158. def cnn_net_tiny_dropout(input_shape, output_shape=6270):
  159. _input = Input(input_shape)
  160. use_bias = False
  161. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  162. down0 = Dropout(0.2)(down0)
  163. down0 = LeakyReLU(alpha=0.1)(down0)
  164. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  165. down0 = Dropout(0.2)(down0)
  166. down0 = LeakyReLU(alpha=0.1)(down0)
  167. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  168. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  169. down1 = Dropout(0.2)(down1)
  170. down1 = LeakyReLU(alpha=0.1)(down1)
  171. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1)
  172. down1 = Dropout(0.2)(down1)
  173. down1 = LeakyReLU(alpha=0.1)(down1)
  174. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  175. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  176. down2 = Dropout(0.2)(down2)
  177. down2 = LeakyReLU(alpha=0.1)(down2)
  178. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2)
  179. down2 = Dropout(0.2)(down2)
  180. down2 = LeakyReLU(alpha=0.1)(down2)
  181. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  182. conv = Conv2D(64, (3, 3))(down2_pool)
  183. bn = Dropout(0.2)(conv)
  184. rl = LeakyReLU(alpha=0.1)(bn)
  185. conv = Conv2D(64, (3, 3))(rl)
  186. bn = Dropout(0.2)(conv)
  187. rl = LeakyReLU(alpha=0.1)(bn)
  188. conv = Conv2D(output_shape, (1, 1), activation='softmax')(rl)
  189. pool = GlobalAveragePooling2D()(conv)
  190. x = Reshape((output_shape,))(pool)
  191. #
  192. # dense = Dense(16, activation="relu")(rl)
  193. # drop = Dropout(0.2)(dense)
  194. # dense = Dense(output_shape, activation="softmax")(drop)
  195. # drop = Dropout(0.2)(dense)
  196. # x = Reshape((output_shape,))(drop)
  197. model = Model(_input, x)
  198. model.summary()
  199. return model
  200. def cnn_net_drag(input_shape, output_shape=260):
  201. _input = Input(input_shape)
  202. use_bias = False
  203. down0 = Conv2D(16, (3, 3), use_bias=use_bias)(_input)
  204. down0 = BatchNormalization()(down0)
  205. down0 = LeakyReLU(alpha=0.1)(down0)
  206. down0 = Conv2D(16, (3, 3), use_bias=use_bias)(down0)
  207. down0 = BatchNormalization()(down0)
  208. down0 = LeakyReLU(alpha=0.1)(down0)
  209. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  210. down1 = Conv2D(32, (3, 3), use_bias=use_bias)(down0_pool)
  211. down1 = BatchNormalization()(down1)
  212. down1 = LeakyReLU(alpha=0.1)(down1)
  213. down1 = Conv2D(32, (3, 3), use_bias=use_bias)(down1)
  214. down1 = BatchNormalization()(down1)
  215. down1 = LeakyReLU(alpha=0.1)(down1)
  216. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  217. down2 = Conv2D(64, (3, 3), use_bias=use_bias)(down1_pool)
  218. down2 = BatchNormalization()(down2)
  219. down2 = LeakyReLU(alpha=0.1)(down2)
  220. down2 = Conv2D(64, (3, 3), use_bias=use_bias)(down2)
  221. down2 = BatchNormalization()(down2)
  222. down2 = LeakyReLU(alpha=0.1)(down2)
  223. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  224. down3 = Conv2D(64, (3, 3), use_bias=use_bias)(down2_pool)
  225. down3 = BatchNormalization()(down3)
  226. down3 = LeakyReLU(alpha=0.1)(down3)
  227. down3 = Conv2D(64, (3, 3), use_bias=use_bias)(down3)
  228. down3 = BatchNormalization()(down3)
  229. down3 = LeakyReLU(alpha=0.1)(down3)
  230. down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
  231. gap = GlobalAveragePooling2D()(down3_pool)
  232. dense = Dense(32, activation="relu")(gap)
  233. drop = Dropout(0.2)(dense)
  234. dense = Dense(output_shape, activation="softmax")(drop)
  235. model = Model(_input, dense)
  236. model.summary()
  237. return model
  238. def u_net_drag(input_shape, output_shape=260, cls_num=2):
  239. inputs = Input(shape=input_shape)
  240. use_bias = False
  241. # 128
  242. down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
  243. down1 = BatchNormalization()(down1)
  244. down1 = LeakyReLU(alpha=0.1)(down1)
  245. down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
  246. down1 = BatchNormalization()(down1)
  247. down1 = LeakyReLU(alpha=0.1)(down1)
  248. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  249. # 64
  250. down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  251. down2 = BatchNormalization()(down2)
  252. down2 = LeakyReLU(alpha=0.1)(down2)
  253. down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
  254. down2 = BatchNormalization()(down2)
  255. down2 = LeakyReLU(alpha=0.1)(down2)
  256. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  257. # 32
  258. down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
  259. down3 = BatchNormalization()(down3)
  260. down3 = LeakyReLU(alpha=0.1)(down3)
  261. down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
  262. down3 = BatchNormalization()(down3)
  263. down3 = LeakyReLU(alpha=0.1)(down3)
  264. down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
  265. # 16
  266. center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
  267. center = BatchNormalization()(center)
  268. center = LeakyReLU(alpha=0.1)(center)
  269. center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
  270. center = BatchNormalization()(center)
  271. center = LeakyReLU(alpha=0.1)(center)
  272. # 32
  273. up3 = UpSampling2D((2, 2))(center)
  274. up3 = concatenate([down3, up3], axis=3)
  275. up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
  276. up3 = BatchNormalization()(up3)
  277. up3 = LeakyReLU(alpha=0.1)(up3)
  278. up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
  279. up3 = BatchNormalization()(up3)
  280. up3 = LeakyReLU(alpha=0.1)(up3)
  281. # 64
  282. up2 = UpSampling2D((2, 2))(up3)
  283. up2 = concatenate([down2, up2], axis=3)
  284. up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
  285. up2 = BatchNormalization()(up2)
  286. up2 = LeakyReLU(alpha=0.1)(up2)
  287. up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
  288. up2 = BatchNormalization()(up2)
  289. up2 = LeakyReLU(alpha=0.1)(up2)
  290. # 128
  291. up1 = UpSampling2D((2, 2))(up2)
  292. up1 = K.concatenate([down1, up1], axis=3)
  293. up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
  294. up1 = BatchNormalization()(up1)
  295. up1 = LeakyReLU(alpha=0.1)(up1)
  296. up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
  297. up1 = BatchNormalization()(up1)
  298. up1 = LeakyReLU(alpha=0.1)(up1)
  299. classify = Conv2D(1, (1, 1), activation='sigmoid')(up1)
  300. # classify = Dense(cls_num, activation="softmax")(up1)
  301. model = Model(inputs=inputs, outputs=classify)
  302. model.summary(line_length=100)
  303. return model
  304. def lstm_phrase(input_shape, output_shape=1):
  305. inputs = Input(shape=input_shape[0])
  306. x = Embedding(input_shape[1]+1, 16, input_length=input_shape[0])(inputs)
  307. # x = Dropout(0.2)(x)
  308. x = Bidirectional(LSTM(32))(x)
  309. # x = Dropout(0.2)(x)
  310. x = Dense(16)(x)
  311. x = Dense(output_shape, activation="sigmoid")(x)
  312. model = Model(inputs=inputs, outputs=x)
  313. model.summary(line_length=100)
  314. return model
  315. def text_cnn_phrase(input_shape, output_shape=1):
  316. inputs = Input(shape=input_shape[0])
  317. x = Embedding(input_shape[1]+1, 50, input_length=input_shape[0])(inputs)
  318. x1 = Conv1D(64, 3, activation="relu", padding="same")(x)
  319. x1 = GlobalMaxPooling1D()(x1)
  320. x2 = Conv1D(64, 4, activation="relu", padding="same")(x)
  321. x2 = GlobalMaxPooling1D()(x2)
  322. x3 = Conv1D(64, 5, activation="relu", padding="same")(x)
  323. x3 = GlobalMaxPooling1D()(x3)
  324. x = Concatenate()([x1, x2, x3])
  325. x = Dense(output_shape, activation="sigmoid")(x)
  326. model = Model(inputs=inputs, outputs=x)
  327. model.summary(line_length=100)
  328. return model
  329. def siamese_net(input_shape, output_shape=2):
  330. input_image_1 = Input(shape=input_shape, name="input_1")
  331. input_image_2 = Input(shape=input_shape, name="input_2")
  332. input_init = Input(shape=input_shape, name="input3")
  333. model = mobile_net_v3_tiny(input_init, n_class=500)
  334. model1 = model(input_image_1)
  335. model2 = model(input_image_2)
  336. l1_distance_layer = Lambda(lambda tensors: K.square(tensors[0] - tensors[1]))
  337. l1_distance = l1_distance_layer([model1, model2])
  338. out = Dense(100, activation='relu')(l1_distance)
  339. out = Dense(output_shape, activation='softmax', name='output')(out)
  340. model = Model([input_image_1, input_image_2], out)
  341. model.summary()
  342. return model
  343. def crnn_ctc_equation(input_shape=(32, 192, 3), class_num=32, is_train=True):
  344. _input = Input(input_shape)
  345. use_bias = False
  346. down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(_input)
  347. down0 = BatchNormalization()(down0)
  348. down0 = LeakyReLU(alpha=0.1)(down0)
  349. down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0)
  350. down0 = BatchNormalization()(down0)
  351. down0 = LeakyReLU(alpha=0.1)(down0)
  352. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  353. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  354. down1 = BatchNormalization()(down1)
  355. down1 = LeakyReLU(alpha=0.1)(down1)
  356. down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1)
  357. down1 = BatchNormalization()(down1)
  358. down1 = LeakyReLU(alpha=0.1)(down1)
  359. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  360. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  361. down2 = BatchNormalization()(down2)
  362. down2 = LeakyReLU(alpha=0.1)(down2)
  363. down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2)
  364. down2 = BatchNormalization()(down2)
  365. down2 = LeakyReLU(alpha=0.1)(down2)
  366. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  367. down3 = Conv2D(64, (4, 4), use_bias=use_bias)(down2_pool)
  368. down3 = BatchNormalization()(down3)
  369. down3 = LeakyReLU(alpha=0.1)(down3)
  370. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  371. x = Bidirectional(GRU(32, return_sequences=True))(sq)
  372. x = Bidirectional(GRU(32, return_sequences=True))(x)
  373. x = Dense(class_num, activation='softmax')(x)
  374. if not is_train:
  375. model = Model(inputs=_input, outputs=x)
  376. else:
  377. labels = Input(name='the_labels', shape=[None], dtype='float32')
  378. input_length = Input(name='input_length', shape=[1], dtype='int64')
  379. label_length = Input(name='label_length', shape=[1], dtype='int64')
  380. loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
  381. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  382. model.summary()
  383. return model
  384. def crnn_ctc_equation_large(input_shape=(32, 192, 3), class_num=32, is_train=True):
  385. _input = Input(input_shape)
  386. use_bias = False
  387. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  388. down0 = BatchNormalization()(down0)
  389. down0 = LeakyReLU(alpha=0.1)(down0)
  390. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  391. down0 = BatchNormalization()(down0)
  392. down0 = LeakyReLU(alpha=0.1)(down0)
  393. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  394. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  395. down1 = BatchNormalization()(down1)
  396. down1 = LeakyReLU(alpha=0.1)(down1)
  397. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  398. down1 = BatchNormalization()(down1)
  399. down1 = LeakyReLU(alpha=0.1)(down1)
  400. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  401. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  402. down2 = BatchNormalization()(down2)
  403. down2 = LeakyReLU(alpha=0.1)(down2)
  404. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  405. down2 = BatchNormalization()(down2)
  406. down2 = LeakyReLU(alpha=0.1)(down2)
  407. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  408. down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
  409. down3 = BatchNormalization()(down3)
  410. down3 = LeakyReLU(alpha=0.1)(down3)
  411. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  412. x = Bidirectional(GRU(64, return_sequences=True))(sq)
  413. x = Bidirectional(GRU(64, return_sequences=True))(x)
  414. x = Dense(64, activation="relu")(x)
  415. # x = Dropout(rate=0.2)(x)
  416. # x = Dense(class_num, activation='softmax')(x)
  417. if not is_train:
  418. model = Model(inputs=_input, outputs=x)
  419. else:
  420. labels = Input(name='the_labels', shape=[None], dtype='float32')
  421. input_length = Input(name='input_length', shape=[1], dtype='int64')
  422. label_length = Input(name='label_length', shape=[1], dtype='int64')
  423. loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
  424. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  425. model.summary()
  426. return model
  427. def crnn_ctc_equation_loss(input_shape=(32, 192, 3), class_num=32, is_train=True):
  428. _input = Input(input_shape)
  429. use_bias = False
  430. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  431. down0 = BatchNormalization()(down0)
  432. down0 = LeakyReLU(alpha=0.1)(down0)
  433. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  434. down0 = BatchNormalization()(down0)
  435. down0 = LeakyReLU(alpha=0.1)(down0)
  436. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  437. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  438. down1 = BatchNormalization()(down1)
  439. down1 = LeakyReLU(alpha=0.1)(down1)
  440. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  441. down1 = BatchNormalization()(down1)
  442. down1 = LeakyReLU(alpha=0.1)(down1)
  443. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  444. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  445. down2 = BatchNormalization()(down2)
  446. down2 = LeakyReLU(alpha=0.1)(down2)
  447. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  448. down2 = BatchNormalization()(down2)
  449. down2 = LeakyReLU(alpha=0.1)(down2)
  450. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  451. down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool)
  452. down3 = BatchNormalization()(down3)
  453. down3 = LeakyReLU(alpha=0.1)(down3)
  454. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3)
  455. x = Bidirectional(GRU(64, return_sequences=True))(sq)
  456. x = Bidirectional(GRU(64, return_sequences=True))(x)
  457. x = Dense(64, activation="relu")(x)
  458. x = Dropout(rate=0.2)(x)
  459. x = Dense(class_num, activation='softmax')(x)
  460. if not is_train:
  461. model = Model(inputs=_input, outputs=x)
  462. else:
  463. labels = Input(name='the_labels', shape=[None], dtype='float32')
  464. input_length = Input(name='input_length', shape=[1], dtype='int64')
  465. label_length = Input(name='label_length', shape=[1], dtype='int64')
  466. loss_out_1 = Lambda(ctc_lambda_func, output_shape=(1,),)([x, labels, input_length, label_length])
  467. loss_out_2 = Lambda(ctc_decode_mse_loss2, output_shape=(1, ))([x, labels, input_length, label_length])
  468. # loss_out_2 = CtcDecodeMseLoss(name='ctc')([x, labels, input_length, label_length])
  469. loss_out = Add(name='ctc')([loss_out_1, loss_out_2])
  470. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  471. model.summary(130)
  472. return model
  473. def crnn_ctc_equation_less(input_shape=(32, 192, 3), class_num=32, is_train=True):
  474. _input = Input(input_shape)
  475. use_bias = False
  476. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input)
  477. down0 = BatchNormalization()(down0)
  478. down0 = LeakyReLU(alpha=0.1)(down0)
  479. down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
  480. down0 = BatchNormalization()(down0)
  481. down0 = LeakyReLU(alpha=0.1)(down0)
  482. down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
  483. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
  484. down1 = BatchNormalization()(down1)
  485. down1 = LeakyReLU(alpha=0.1)(down1)
  486. down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
  487. down1 = BatchNormalization()(down1)
  488. down1 = LeakyReLU(alpha=0.1)(down1)
  489. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  490. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  491. down2 = BatchNormalization()(down2)
  492. down2 = LeakyReLU(alpha=0.1)(down2)
  493. down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
  494. down2 = BatchNormalization()(down2)
  495. down2 = LeakyReLU(alpha=0.1)(down2)
  496. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  497. down3 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
  498. down3 = BatchNormalization()(down3)
  499. down3 = LeakyReLU(alpha=0.1)(down3)
  500. down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
  501. down4 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
  502. down4 = BatchNormalization()(down4)
  503. down4 = LeakyReLU(alpha=0.1)(down4)
  504. down4_pool = MaxPooling2D((2, 2), strides=(1, 1))(down4)
  505. sq = Lambda(lambda x: K.squeeze(x, axis=1))(down4_pool)
  506. x = Bidirectional(GRU(64, return_sequences=True))(sq)
  507. x = Bidirectional(GRU(64, return_sequences=True))(x)
  508. x = Dense(64, activation="relu")(x)
  509. x = Dropout(rate=0.3)(x)
  510. x = Dense(class_num, activation='softmax')(x)
  511. if not is_train:
  512. model = Model(inputs=_input, outputs=x)
  513. else:
  514. labels = Input(name='the_labels', shape=[None], dtype='float32')
  515. input_length = Input(name='input_length', shape=[1], dtype='int64')
  516. label_length = Input(name='label_length', shape=[1], dtype='int64')
  517. loss_out_1 = Lambda(ctc_lambda_func, output_shape=(1,), )([x, labels, input_length, label_length])
  518. loss_out_2 = Lambda(ctc_decode_mse_loss2, output_shape=(1, ))([x, labels, input_length, label_length])
  519. loss_out = Add(name='ctc')([loss_out_1, loss_out_2])
  520. model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out)
  521. model.summary()
  522. return model
  523. def u_net_denoise(input_shape=(32, 192, 3), class_num=3):
  524. inputs = Input(shape=input_shape)
  525. use_bias = False
  526. # 128
  527. down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
  528. down1 = BatchNormalization()(down1)
  529. down1 = LeakyReLU(alpha=0.1)(down1)
  530. down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1)
  531. down1 = BatchNormalization()(down1)
  532. down1 = LeakyReLU(alpha=0.1)(down1)
  533. down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
  534. # 64
  535. down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
  536. down2 = BatchNormalization()(down2)
  537. down2 = LeakyReLU(alpha=0.1)(down2)
  538. down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2)
  539. down2 = BatchNormalization()(down2)
  540. down2 = LeakyReLU(alpha=0.1)(down2)
  541. down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
  542. # 32
  543. down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
  544. down3 = BatchNormalization()(down3)
  545. down3 = LeakyReLU(alpha=0.1)(down3)
  546. down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3)
  547. down3 = BatchNormalization()(down3)
  548. down3 = LeakyReLU(alpha=0.1)(down3)
  549. down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
  550. # 16
  551. center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
  552. center = BatchNormalization()(center)
  553. center = LeakyReLU(alpha=0.1)(center)
  554. center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center)
  555. center = BatchNormalization()(center)
  556. center = LeakyReLU(alpha=0.1)(center)
  557. # 32
  558. up3 = UpSampling2D((2, 2))(center)
  559. up3 = concatenate([down3, up3], axis=3)
  560. up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3)
  561. up3 = BatchNormalization()(up3)
  562. up3 = LeakyReLU(alpha=0.1)(up3)
  563. up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3)
  564. up3 = BatchNormalization()(up3)
  565. up3 = LeakyReLU(alpha=0.1)(up3)
  566. # 64
  567. up2 = UpSampling2D((2, 2))(up3)
  568. up2 = concatenate([down2, up2], axis=3)
  569. up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2)
  570. up2 = BatchNormalization()(up2)
  571. up2 = LeakyReLU(alpha=0.1)(up2)
  572. up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2)
  573. up2 = BatchNormalization()(up2)
  574. up2 = LeakyReLU(alpha=0.1)(up2)
  575. # 128
  576. up1 = UpSampling2D((2, 2))(up2)
  577. up1 = K.concatenate([down1, up1], axis=3)
  578. up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1)
  579. up1 = BatchNormalization()(up1)
  580. up1 = LeakyReLU(alpha=0.1)(up1)
  581. up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1)
  582. up1 = BatchNormalization()(up1)
  583. up1 = LeakyReLU(alpha=0.1)(up1)
  584. classify = Conv2D(class_num, (1, 1), activation='sigmoid')(up1)
  585. # classify = Dense(cls_num, activation="softmax")(up1)
  586. model = Model(inputs=inputs, outputs=classify)
  587. # model.summary()
  588. return model
  589. def ctc_decode(image, model):
  590. x = model.output
  591. input_length = Input(batch_shape=[None], dtype='int32')
  592. ctc_decode = K.ctc_decode(x, input_length=input_length * K.shape(x)[1], greedy=False, beam_width=100)
  593. decode = K.function([model.input, input_length], [ctc_decode[0][0]])
  594. out = decode([image, np.ones(image.shape[0])])
  595. # print(len(out))
  596. # print(len(out[0]))
  597. # print(len(out[0][0]))
  598. # print(out[0][0])
  599. # print(len(out[0][0][0]))
  600. # print(out[0][0][0])
  601. # print(out[0][0][0][0].shape)
  602. # print(out[0][0][0][0])
  603. return out
  604. class Vgg16:
  605. def __init__(self, vgg16_npy_path="./vgg16.npy"):
  606. if vgg16_npy_path is None:
  607. # path = inspect.getfile(Vgg16)
  608. # path = os.path.abspath(os.path.join(path, os.pardir))
  609. # path = os.path.join(path, "vgg16.npy")
  610. # vgg16_npy_path = path
  611. # print(path)
  612. print("there is no vgg_16_npy!")
  613. raise
  614. self.data_dict = np.load(vgg16_npy_path, encoding='latin1', allow_pickle=True).item()
  615. print("npy file loaded")
  616. def build(self, bgr):
  617. """
  618. load variable from npy to build the VGG
  619. :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
  620. """
  621. start_time = time.time()
  622. print("build model started")
  623. bgr_scaled = bgr * 255.0
  624. # Convert RGB to BGR
  625. # red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
  626. # print("red", red)
  627. # assert red.get_shape().as_list()[1:] == [224, 224, 1]
  628. # assert green.get_shape().as_list()[1:] == [224, 224, 1]
  629. # assert blue.get_shape().as_list()[1:] == [224, 224, 1]
  630. # bgr = tf.concat(axis=3, values=[
  631. # blue - VGG_MEAN[0],
  632. # green - VGG_MEAN[1],
  633. # red - VGG_MEAN[2],
  634. # ])
  635. # assert bgr.get_shape().as_list()[1:] == [224, 224, 3]
  636. self.conv1_1 = self.conv_layer(bgr_scaled, "conv1_1")
  637. self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
  638. self.pool1 = self.max_pool(self.conv1_2, 'pool1')
  639. self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
  640. self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
  641. self.pool2 = self.max_pool(self.conv2_2, 'pool2')
  642. self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
  643. self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
  644. self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
  645. self.pool3 = self.max_pool(self.conv3_3, 'pool3')
  646. self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
  647. self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
  648. self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
  649. self.pool4 = self.max_pool(self.conv4_3, 'pool4')
  650. self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
  651. self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
  652. self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
  653. self.pool5 = self.max_pool(self.conv5_3, 'pool5')
  654. self.fc6 = self.fc_layer(self.pool5, "fc6")
  655. # assert self.fc6.get_shape().as_list()[1:] == [4096]
  656. self.relu6 = tf.nn.relu(self.fc6)
  657. self.fc7 = self.fc_layer(self.relu6, "fc7")
  658. self.relu7 = tf.nn.relu(self.fc7)
  659. self.fc8 = self.fc_layer(self.relu7, "fc8")
  660. self.prob = tf.nn.softmax(self.fc8, name="prob")
  661. # self.data_dict = None
  662. print(("build model finished: %ds" % (time.time() - start_time)))
  663. return self.prob
  664. def avg_pool(self, bottom, name):
  665. return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  666. def max_pool(self, bottom, name):
  667. return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  668. def conv_layer(self, bottom, name):
  669. with tf.compat.v1.variable_scope(name):
  670. filt = self.get_conv_filter(name)
  671. conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
  672. conv_biases = self.get_bias(name)
  673. bias = tf.nn.bias_add(conv, conv_biases)
  674. relu = tf.nn.relu(bias)
  675. return relu
  676. def fc_layer(self, bottom, name):
  677. with tf.compat.v1.variable_scope(name):
  678. shape = bottom.get_shape().as_list()
  679. dim = 1
  680. for d in shape[1:]:
  681. dim *= d
  682. x = tf.reshape(bottom, [-1, dim])
  683. weights = self.get_fc_weight(name)
  684. biases = self.get_bias(name)
  685. # Fully connected layer. Note that the '+' operation automatically
  686. # broadcasts the biases.
  687. fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
  688. return fc
  689. def get_conv_filter(self, name):
  690. return tf.constant(self.data_dict[name][0], name="filter")
  691. def get_bias(self, name):
  692. return tf.constant(self.data_dict[name][1], name="biases")
  693. def get_fc_weight(self, name):
  694. return tf.constant(self.data_dict[name][0], name="weights")
  695. class Vgg19:
  696. def __init__(self, vgg19_npy_path=None):
  697. if vgg19_npy_path is None:
  698. print("there is no vgg_16_npy!")
  699. raise
  700. self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item()
  701. def build(self, bgr):
  702. """
  703. load variable from npy to build the VGG
  704. :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
  705. """
  706. bgr = bgr * 255.0
  707. # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3))
  708. self.conv1_1 = self.conv_layer(bgr, "conv1_1")
  709. self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
  710. self.pool1 = self.max_pool(self.conv1_2, 'pool1')
  711. self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
  712. self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
  713. self.pool2 = self.max_pool(self.conv2_2, 'pool2')
  714. self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
  715. self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
  716. self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
  717. self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4")
  718. self.pool3 = self.max_pool(self.conv3_4, 'pool3')
  719. self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
  720. self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
  721. self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
  722. self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4")
  723. self.pool4 = self.max_pool(self.conv4_4, 'pool4')
  724. self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
  725. self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
  726. self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
  727. self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4")
  728. self.pool5 = self.max_pool(self.conv5_4, 'pool5')
  729. def avg_pool(self, bottom, name):
  730. return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  731. def max_pool(self, bottom, name):
  732. return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
  733. def conv_layer(self, bottom, name):
  734. with tf.compat.v1.variable_scope(name):
  735. filt = self.get_conv_filter(name)
  736. conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
  737. conv_biases = self.get_bias(name)
  738. bias = tf.nn.bias_add(conv, conv_biases)
  739. relu = tf.nn.relu(bias)
  740. return relu
  741. def fc_layer(self, bottom, name):
  742. with tf.compat.v1.variable_scope(name):
  743. shape = bottom.get_shape().as_list()
  744. dim = 1
  745. for d in shape[1:]:
  746. dim *= d
  747. x = tf.reshape(bottom, [-1, dim])
  748. weights = self.get_fc_weight(name)
  749. biases = self.get_bias(name)
  750. # Fully connected layer. Note that the '+' operation automatically
  751. # broadcasts the biases.
  752. fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
  753. return fc
  754. def get_conv_filter(self, name):
  755. return tf.constant(self.data_dict[name][0], name="filter")
  756. def get_bias(self, name):
  757. return tf.constant(self.data_dict[name][1], name="biases")
  758. def get_fc_weight(self, name):
  759. return tf.constant(self.data_dict[name][0], name="weights")
  760. def my_conv(inputs, output_shape=100):
  761. x = Conv2D(64, (3, 3), padding='same')(inputs)
  762. x = BatchNormalization()(x)
  763. x = LeakyReLU(alpha=0.1)(x)
  764. x = Conv2D(64, (3, 3), padding='same')(x)
  765. x = BatchNormalization()(x)
  766. x = LeakyReLU(alpha=0.1)(x)
  767. x = MaxPooling2D((2, 2), strides=(2, 2))(x)
  768. x = Conv2D(128, (3, 3), padding='same')(x)
  769. x = BatchNormalization()(x)
  770. x = LeakyReLU(alpha=0.1)(x)
  771. x = Conv2D(128, (3, 3), padding='same')(x)
  772. x = BatchNormalization()(x)
  773. x = LeakyReLU(alpha=0.1)(x)
  774. x = MaxPooling2D((2, 2), strides=(2, 2))(x)
  775. x = Conv2D(256, (3, 3), padding='same')(x)
  776. x = BatchNormalization()(x)
  777. x = LeakyReLU(alpha=0.1)(x)
  778. x = Conv2D(256, (3, 3), padding='same')(x)
  779. x = BatchNormalization()(x)
  780. x = LeakyReLU(alpha=0.1)(x)
  781. x = MaxPooling2D((2, 2), strides=(2, 2))(x)
  782. x = Conv2D(output_shape, (1, 1), activation='relu')(x)
  783. x = MaxPooling2D((5, 5))(x)
  784. x = Reshape((output_shape,))(x)
  785. model = Model(inputs, x)
  786. return model
  787. def mobile_net_v3_tiny(inputs, n_class=1000):
  788. # inputs = Input(shape)
  789. # 224,224,3 -> 112,112,16
  790. x = conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS')
  791. # 112,112,16 -> 56,56,16
  792. x = bottleneck(x, 16, (3, 3), up_dim=16, stride=2, sq=True, nl='RE')
  793. # 56,56,16 -> 28,28,24
  794. x = bottleneck(x, 24, (3, 3), up_dim=32, stride=2, sq=False, nl='RE')
  795. x = bottleneck(x, 24, (3, 3), up_dim=32, stride=1, sq=False, nl='RE')
  796. # 28,28,24 -> 14,14,40
  797. x = bottleneck(x, 40, (5, 5), up_dim=64, stride=2, sq=True, nl='HS')
  798. x = bottleneck(x, 40, (5, 5), up_dim=64, stride=1, sq=True, nl='HS')
  799. # 14,14,40 -> 14,14,48
  800. x = bottleneck(x, 48, (5, 5), up_dim=128, stride=1, sq=True, nl='HS')
  801. x = bottleneck(x, 48, (5, 5), up_dim=128, stride=1, sq=True, nl='HS')
  802. x = conv_block(x, 256, (1, 1), strides=(1, 1), nl='HS')
  803. x = GlobalAveragePooling2D()(x)
  804. x = Reshape((1, 1, 256))(x)
  805. x = Conv2D(256, (1, 1), padding='same')(x)
  806. x = return_activation(x, 'HS')
  807. x = Conv2D(n_class, (1, 1), padding='same', activation='relu')(x)
  808. x = Reshape((n_class,))(x)
  809. model = Model(inputs, x)
  810. return model
  811. class MobileNetBase:
  812. def __init__(self, shape, n_class, alpha=1.0):
  813. """Init
  814. # Arguments
  815. input_shape: An integer or tuple/list of 3 integers, shape
  816. of input tensor.
  817. n_class: Integer, number of classes.
  818. alpha: Integer, width multiplier.
  819. """
  820. self.shape = shape
  821. self.n_class = n_class
  822. self.alpha = alpha
  823. def _relu6(self, x):
  824. """Relu 6
  825. """
  826. return K.relu(x, max_value=6.0)
  827. def _hard_swish(self, x):
  828. """Hard swish
  829. """
  830. return x * K.relu(x + 3.0, max_value=6.0) / 6.0
  831. def _return_activation(self, x, nl):
  832. """Convolution Block
  833. This function defines a activation choice.
  834. # Arguments
  835. x: Tensor, input tensor of conv layer.
  836. nl: String, nonlinearity activation type.
  837. # Returns
  838. Output tensor.
  839. """
  840. if nl == 'HS':
  841. x = Activation(self._hard_swish)(x)
  842. if nl == 'RE':
  843. x = Activation(self._relu6)(x)
  844. return x
  845. def _conv_block(self, inputs, filters, kernel, strides, nl):
  846. """Convolution Block
  847. This function defines a 2D convolution operation with BN and activation.
  848. # Arguments
  849. inputs: Tensor, input tensor of conv layer.
  850. filters: Integer, the dimensionality of the output space.
  851. kernel: An integer or tuple/list of 2 integers, specifying the
  852. width and height of the 2D convolution window.
  853. strides: An integer or tuple/list of 2 integers,
  854. specifying the strides of the convolution along the width and height.
  855. Can be a single integer to specify the same value for
  856. all spatial dimensions.
  857. nl: String, nonlinearity activation type.
  858. # Returns
  859. Output tensor.
  860. """
  861. channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  862. x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
  863. x = BatchNormalization(axis=channel_axis)(x)
  864. return self._return_activation(x, nl)
  865. def _squeeze(self, inputs):
  866. """Squeeze and Excitation.
  867. This function defines a squeeze structure.
  868. # Arguments
  869. inputs: Tensor, input tensor of conv layer.
  870. """
  871. input_channels = int(inputs.shape[-1])
  872. x = GlobalAveragePooling2D()(inputs)
  873. x = Dense(input_channels, activation='relu')(x)
  874. x = Dense(input_channels, activation='hard_sigmoid')(x)
  875. x = Reshape((1, 1, input_channels))(x)
  876. x = Multiply()([inputs, x])
  877. return x
  878. def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl):
  879. """Bottleneck
  880. This function defines a basic bottleneck structure.
  881. # Arguments
  882. inputs: Tensor, input tensor of conv layer.
  883. filters: Integer, the dimensionality of the output space.
  884. kernel: An integer or tuple/list of 2 integers, specifying the
  885. width and height of the 2D convolution window.
  886. e: Integer, expansion factor.
  887. t is always applied to the input size.
  888. s: An integer or tuple/list of 2 integers,specifying the strides
  889. of the convolution along the width and height.Can be a single
  890. integer to specify the same value for all spatial dimensions.
  891. squeeze: Boolean, Whether to use the squeeze.
  892. nl: String, nonlinearity activation type.
  893. # Returns
  894. Output tensor.
  895. """
  896. channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  897. input_shape = K.int_shape(inputs)
  898. tchannel = int(e)
  899. cchannel = int(self.alpha * filters)
  900. r = s == 1 and input_shape[3] == filters
  901. x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl)
  902. x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x)
  903. x = BatchNormalization(axis=channel_axis)(x)
  904. x = self._return_activation(x, nl)
  905. if squeeze:
  906. x = self._squeeze(x)
  907. x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x)
  908. x = BatchNormalization(axis=channel_axis)(x)
  909. if r:
  910. x = Add()([x, inputs])
  911. return x
  912. def build(self):
  913. pass
  914. class MobileNetV3Small(MobileNetBase):
  915. def __init__(self, shape, n_class, alpha=1.0, include_top=True):
  916. """Init.
  917. # Arguments
  918. input_shape: An integer or tuple/list of 3 integers, shape
  919. of input tensor.
  920. n_class: Integer, number of classes.
  921. alpha: Integer, width multiplier.
  922. include_top: if inculde classification layer.
  923. # Returns
  924. MobileNetv3 model.
  925. """
  926. super(MobileNetV3Small, self).__init__(shape, n_class, alpha)
  927. self.include_top = include_top
  928. def build(self):
  929. """build MobileNetV3 Small.
  930. # Arguments
  931. plot: Boolean, weather to plot model.
  932. # Returns
  933. model: Model, model.
  934. """
  935. inputs = Input(shape=self.shape)
  936. x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS')
  937. x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE')
  938. x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE')
  939. x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE')
  940. x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS')
  941. x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS')
  942. x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS')
  943. x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS')
  944. x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS')
  945. x = self._bottleneck(x, 96, (5, 5), e=288, s=2, squeeze=True, nl='HS')
  946. x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS')
  947. x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS')
  948. x = self._conv_block(x, 576, (1, 1), strides=(1, 1), nl='HS')
  949. x = GlobalAveragePooling2D()(x)
  950. x = Reshape((1, 1, 576))(x)
  951. x = Conv2D(1280, (1, 1), padding='same')(x)
  952. x = self._return_activation(x, 'HS')
  953. if self.include_top:
  954. x = Conv2D(self.n_class, (1, 1), padding='same', activation='softmax')(x)
  955. x = Reshape((self.n_class,))(x)
  956. model = Model(inputs, x)
  957. return model
  958. def bottleneck(inputs, filters, kernel, up_dim, stride, sq, nl):
  959. channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  960. input_shape = K.int_shape(inputs)
  961. tchannel = int(up_dim)
  962. alpha = 1
  963. cchannel = int(alpha * filters)
  964. r = stride == 1 and input_shape[3] == filters
  965. # 1x1卷积调整通道数,通道数上升
  966. x = conv_block(inputs, tchannel, (1, 1), (1, 1), nl)
  967. # 进行3x3深度可分离卷积
  968. x = DepthwiseConv2D(kernel, strides=(stride, stride), depth_multiplier=1, padding='same')(x)
  969. x = BatchNormalization(axis=channel_axis)(x)
  970. x = return_activation(x, nl)
  971. # 引入注意力机制
  972. if sq:
  973. x = squeeze(x)
  974. # 下降通道数
  975. x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x)
  976. x = BatchNormalization(axis=channel_axis)(x)
  977. if r:
  978. x = Add()([x, inputs])
  979. return x
  980. def squeeze(inputs):
  981. # 注意力机制单元
  982. input_channels = int(inputs.shape[-1])
  983. x = GlobalAveragePooling2D()(inputs)
  984. x = Dense(int(input_channels/4))(x)
  985. x = Activation(relu6)(x)
  986. x = Dense(input_channels)(x)
  987. x = Activation(hard_swish)(x)
  988. x = Reshape((1, 1, input_channels))(x)
  989. x = Multiply()([inputs, x])
  990. return x
  991. def conv_block(inputs, filters, kernel, strides, nl):
  992. # 一个卷积单元,也就是conv2d + batchnormalization + activation
  993. channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
  994. x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
  995. x = BatchNormalization(axis=channel_axis)(x)
  996. return return_activation(x, nl)
  997. def return_activation(x, nl):
  998. # 用于判断使用哪个激活函数
  999. if nl == 'HS':
  1000. x = Activation(hard_swish)(x)
  1001. if nl == 'RE':
  1002. x = Activation(relu6)(x)
  1003. return x
  1004. def relu6(x):
  1005. # relu函数
  1006. return K.relu(x, max_value=6.0)
  1007. def hard_swish(x):
  1008. # 利用relu函数乘上x模拟sigmoid
  1009. return x * K.relu(x + 3.0, max_value=6.0) / 6.0
  1010. def tiny_yolo_body(inputs, num_anchors, num_classes):
  1011. """Create Tiny YOLO_v3 model CNN body in keras."""
  1012. x1 = compose(
  1013. DarknetConv2D_BN_Leaky(16, (3, 3), ),
  1014. MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
  1015. DarknetConv2D_BN_Leaky(32, (3, 3)),
  1016. MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
  1017. DarknetConv2D_BN_Leaky(64, (3, 3)),
  1018. MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
  1019. DarknetConv2D_BN_Leaky(128, (3, 3)),
  1020. MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
  1021. DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs)
  1022. x2 = compose(
  1023. MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
  1024. DarknetConv2D_BN_Leaky(512, (3, 3)),
  1025. MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
  1026. DarknetConv2D_BN_Leaky(1024, (3, 3)),
  1027. DarknetConv2D_BN_Leaky(256, (1, 1)))(x1)
  1028. y1 = compose(
  1029. DarknetConv2D_BN_Leaky(512, (3, 3)),
  1030. DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))(x2)
  1031. x2 = compose(
  1032. DarknetConv2D_BN_Leaky(128, (1, 1)),
  1033. UpSampling2D(2))(x2)
  1034. y2 = compose(
  1035. Concatenate(),
  1036. DarknetConv2D_BN_Leaky(256, (3, 3)),
  1037. DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))([x2, x1])
  1038. return Model(inputs, [y1, y2])
  1039. @wraps(Conv2D)
  1040. def DarknetConv2D(*args, **kwargs):
  1041. """Wrapper to set Darknet parameters for Convolution2D."""
  1042. darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4),
  1043. 'padding': 'valid' if kwargs.get('strides') == (2, 2) else 'same'}
  1044. darknet_conv_kwargs.update(kwargs)
  1045. return Conv2D(*args, **darknet_conv_kwargs)
  1046. def DarknetConv2D_BN_Leaky(*args, **kwargs):
  1047. """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
  1048. no_bias_kwargs = {'use_bias': False}
  1049. no_bias_kwargs.update(kwargs)
  1050. return compose(
  1051. DarknetConv2D(*args, **no_bias_kwargs),
  1052. BatchNormalization(),
  1053. LeakyReLU(alpha=0.1))
  1054. if __name__ == "__main__":
  1055. crnn_ctc_equation_less()