import math import time from functools import wraps import numpy as np import tensorflow as tf from keras import Input, Model from keras.engine.base_layer import Layer from keras.layers import Lambda, Dense, Conv2D, Reshape, GlobalAveragePooling2D, BatchNormalization, Activation, Add, \ Multiply, DepthwiseConv2D, LeakyReLU, MaxPooling2D, UpSampling2D, Concatenate, Dropout, concatenate, Embedding, \ LSTM, \ Bidirectional, CuDNNLSTM, Conv1D, MaxPooling1D, GlobalMaxPooling1D, GlobalMaxPooling2D, GRU import keras.backend as K from keras.regularizers import l2 from click_captcha.loss import ctc_lambda_func, ctc_decode_mse_loss, CtcDecodeMseLoss, ctc_decode_mse_loss2 from click_captcha.utils import compose def yolo_net(input_shape, anchors, num_classes, load_pretrained=True, weights_path='models/tiny_yolo_weights.h5'): """create the training model, for Tiny YOLOv3""" from loss import yolo_loss # get a new session # ops.reset_default_graph() K.clear_session() image_input = Input(shape=(None, None, 3)) h, w = input_shape num_anchors = len(anchors) y_true = [Input(shape=(h//{0: 32, 1: 16}[l], w//{0: 32, 1: 16}[l], num_anchors//2, num_classes+5)) for l in range(2)] model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes) print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) if load_pretrained: model_body.load_weights(weights_path) print('Load weights {}.'.format(weights_path)) model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 1.})( [*model_body.output, *y_true]) model = Model([model_body.input, *y_true], model_loss) model.summary(120) return model def mobile_net(input_shape, output_shape=5710): model = MobileNetV3Small(input_shape, output_shape).build() model.summary() return model def cnn_net(input_shape, output_shape=5710): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) conv = Conv2D(256, (3, 3))(down2_pool) bn = BatchNormalization()(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(256, (3, 3))(rl) bn = BatchNormalization()(conv) rl = LeakyReLU(alpha=0.1)(bn) dense = Dense(128, activation="relu")(rl) drop = Dropout(0.2)(dense) dense = Dense(output_shape, activation="softmax")(drop) x = Reshape((output_shape,))(dense) model = Model(_input, x) model.summary() return model def cnn_net_small(input_shape, output_shape=6270): _input = Input(input_shape) use_bias = False down0 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) conv = Conv2D(128, (3, 3))(down2_pool) bn = BatchNormalization()(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(128, (3, 3))(rl) bn = BatchNormalization()(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(output_shape, (1, 1), activation='softmax')(rl) pool = GlobalAveragePooling2D()(conv) x = Reshape((output_shape,))(pool) model = Model(_input, x) model.summary() return model def cnn_net_tiny(input_shape, output_shape=6270): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) conv = Conv2D(64, (3, 3))(down2_pool) bn = BatchNormalization()(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(64, (3, 3))(rl) bn = BatchNormalization()(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(output_shape, (1, 1), activation='softmax')(rl) pool = GlobalAveragePooling2D()(conv) x = Reshape((output_shape,))(pool) # # dense = Dense(16, activation="relu")(rl) # drop = Dropout(0.2)(dense) # dense = Dense(output_shape, activation="softmax")(drop) # drop = Dropout(0.2)(dense) # x = Reshape((output_shape,))(drop) model = Model(_input, x) model.summary() return model def cnn_net_tiny_dropout(input_shape, output_shape=6270): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = Dropout(0.2)(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = Dropout(0.2)(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = Dropout(0.2)(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = Dropout(0.2)(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = Dropout(0.2)(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = Dropout(0.2)(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) conv = Conv2D(64, (3, 3))(down2_pool) bn = Dropout(0.2)(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(64, (3, 3))(rl) bn = Dropout(0.2)(conv) rl = LeakyReLU(alpha=0.1)(bn) conv = Conv2D(output_shape, (1, 1), activation='softmax')(rl) pool = GlobalAveragePooling2D()(conv) x = Reshape((output_shape,))(pool) # # dense = Dense(16, activation="relu")(rl) # drop = Dropout(0.2)(dense) # dense = Dense(output_shape, activation="softmax")(drop) # drop = Dropout(0.2)(dense) # x = Reshape((output_shape,))(drop) model = Model(_input, x) model.summary() return model def cnn_net_drag(input_shape, output_shape=260): _input = Input(input_shape) use_bias = False down0 = Conv2D(16, (3, 3), use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(16, (3, 3), use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(32, (3, 3), use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(32, (3, 3), use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(64, (3, 3), use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(64, (3, 3), use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(64, (3, 3), use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3 = Conv2D(64, (3, 3), use_bias=use_bias)(down3) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) gap = GlobalAveragePooling2D()(down3_pool) dense = Dense(32, activation="relu")(gap) drop = Dropout(0.2)(dense) dense = Dense(output_shape, activation="softmax")(drop) model = Model(_input, dense) model.summary() return model def u_net_drag(input_shape, output_shape=260, cls_num=2): inputs = Input(shape=input_shape) use_bias = False # 128 down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) # 64 down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) # 32 down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) # 16 center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool) center = BatchNormalization()(center) center = LeakyReLU(alpha=0.1)(center) center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center) center = BatchNormalization()(center) center = LeakyReLU(alpha=0.1)(center) # 32 up3 = UpSampling2D((2, 2))(center) up3 = concatenate([down3, up3], axis=3) up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3) up3 = BatchNormalization()(up3) up3 = LeakyReLU(alpha=0.1)(up3) up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3) up3 = BatchNormalization()(up3) up3 = LeakyReLU(alpha=0.1)(up3) # 64 up2 = UpSampling2D((2, 2))(up3) up2 = concatenate([down2, up2], axis=3) up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2) up2 = BatchNormalization()(up2) up2 = LeakyReLU(alpha=0.1)(up2) up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2) up2 = BatchNormalization()(up2) up2 = LeakyReLU(alpha=0.1)(up2) # 128 up1 = UpSampling2D((2, 2))(up2) up1 = K.concatenate([down1, up1], axis=3) up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1) up1 = BatchNormalization()(up1) up1 = LeakyReLU(alpha=0.1)(up1) up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1) up1 = BatchNormalization()(up1) up1 = LeakyReLU(alpha=0.1)(up1) classify = Conv2D(1, (1, 1), activation='sigmoid')(up1) # classify = Dense(cls_num, activation="softmax")(up1) model = Model(inputs=inputs, outputs=classify) model.summary(line_length=100) return model def lstm_phrase(input_shape, output_shape=1): inputs = Input(shape=input_shape[0]) x = Embedding(input_shape[1]+1, 16, input_length=input_shape[0])(inputs) # x = Dropout(0.2)(x) x = Bidirectional(LSTM(32))(x) # x = Dropout(0.2)(x) x = Dense(16)(x) x = Dense(output_shape, activation="sigmoid")(x) model = Model(inputs=inputs, outputs=x) model.summary(line_length=100) return model def text_cnn_phrase(input_shape, output_shape=1): inputs = Input(shape=input_shape[0]) x = Embedding(input_shape[1]+1, 50, input_length=input_shape[0])(inputs) x1 = Conv1D(64, 3, activation="relu", padding="same")(x) x1 = GlobalMaxPooling1D()(x1) x2 = Conv1D(64, 4, activation="relu", padding="same")(x) x2 = GlobalMaxPooling1D()(x2) x3 = Conv1D(64, 5, activation="relu", padding="same")(x) x3 = GlobalMaxPooling1D()(x3) x = Concatenate()([x1, x2, x3]) x = Dense(output_shape, activation="sigmoid")(x) model = Model(inputs=inputs, outputs=x) model.summary(line_length=100) return model def siamese_net(input_shape, output_shape=2): input_image_1 = Input(shape=input_shape, name="input_1") input_image_2 = Input(shape=input_shape, name="input_2") input_init = Input(shape=input_shape, name="input3") model = mobile_net_v3_tiny(input_init, n_class=500) model1 = model(input_image_1) model2 = model(input_image_2) l1_distance_layer = Lambda(lambda tensors: K.square(tensors[0] - tensors[1])) l1_distance = l1_distance_layer([model1, model2]) out = Dense(100, activation='relu')(l1_distance) out = Dense(output_shape, activation='softmax', name='output')(out) model = Model([input_image_1, input_image_2], out) model.summary() return model def crnn_ctc_equation(input_shape=(32, 192, 3), class_num=32, is_train=True): _input = Input(input_shape) use_bias = False down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(64, (4, 4), use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3) x = Bidirectional(GRU(32, return_sequences=True))(sq) x = Bidirectional(GRU(32, return_sequences=True))(x) x = Dense(class_num, activation='softmax')(x) if not is_train: model = Model(inputs=_input, outputs=x) else: labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length]) model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out) model.summary() return model def crnn_ctc_equation_large(input_shape=(32, 192, 3), class_num=32, is_train=True): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3) x = Bidirectional(GRU(64, return_sequences=True))(sq) x = Bidirectional(GRU(64, return_sequences=True))(x) x = Dense(64, activation="relu")(x) # x = Dropout(rate=0.2)(x) # x = Dense(class_num, activation='softmax')(x) if not is_train: model = Model(inputs=_input, outputs=x) else: labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length]) model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out) model.summary() return model def crnn_ctc_equation_loss(input_shape=(32, 192, 3), class_num=32, is_train=True): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(128, (4, 4), use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) sq = Lambda(lambda x: K.squeeze(x, axis=1))(down3) x = Bidirectional(GRU(64, return_sequences=True))(sq) x = Bidirectional(GRU(64, return_sequences=True))(x) x = Dense(64, activation="relu")(x) x = Dropout(rate=0.2)(x) x = Dense(class_num, activation='softmax')(x) if not is_train: model = Model(inputs=_input, outputs=x) else: labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out_1 = Lambda(ctc_lambda_func, output_shape=(1,),)([x, labels, input_length, label_length]) loss_out_2 = Lambda(ctc_decode_mse_loss2, output_shape=(1, ))([x, labels, input_length, label_length]) # loss_out_2 = CtcDecodeMseLoss(name='ctc')([x, labels, input_length, label_length]) loss_out = Add(name='ctc')([loss_out_1, loss_out_2]) model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out) model.summary(130) return model def crnn_ctc_equation_less(input_shape=(32, 192, 3), class_num=32, is_train=True): _input = Input(input_shape) use_bias = False down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(_input) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0) down0 = BatchNormalization()(down0) down0 = LeakyReLU(alpha=0.1)(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) down3 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) down4 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down3_pool) down4 = BatchNormalization()(down4) down4 = LeakyReLU(alpha=0.1)(down4) down4_pool = MaxPooling2D((2, 2), strides=(1, 1))(down4) sq = Lambda(lambda x: K.squeeze(x, axis=1))(down4_pool) x = Bidirectional(GRU(64, return_sequences=True))(sq) x = Bidirectional(GRU(64, return_sequences=True))(x) x = Dense(64, activation="relu")(x) x = Dropout(rate=0.3)(x) x = Dense(class_num, activation='softmax')(x) if not is_train: model = Model(inputs=_input, outputs=x) else: labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out_1 = Lambda(ctc_lambda_func, output_shape=(1,), )([x, labels, input_length, label_length]) loss_out_2 = Lambda(ctc_decode_mse_loss2, output_shape=(1, ))([x, labels, input_length, label_length]) loss_out = Add(name='ctc')([loss_out_1, loss_out_2]) model = Model(inputs=[_input, labels, input_length, label_length], outputs=loss_out) model.summary() return model def u_net_denoise(input_shape=(32, 192, 3), class_num=3): inputs = Input(shape=input_shape) use_bias = False # 128 down1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(down1) down1 = BatchNormalization()(down1) down1 = LeakyReLU(alpha=0.1)(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) # 64 down2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down1_pool) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(down2) down2 = BatchNormalization()(down2) down2 = LeakyReLU(alpha=0.1)(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) # 32 down3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down2_pool) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(down3) down3 = BatchNormalization()(down3) down3 = LeakyReLU(alpha=0.1)(down3) down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) # 16 center = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down3_pool) center = BatchNormalization()(center) center = LeakyReLU(alpha=0.1)(center) center = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(center) center = BatchNormalization()(center) center = LeakyReLU(alpha=0.1)(center) # 32 up3 = UpSampling2D((2, 2))(center) up3 = concatenate([down3, up3], axis=3) up3 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up3) up3 = BatchNormalization()(up3) up3 = LeakyReLU(alpha=0.1)(up3) up3 = Conv2D(64, (1, 1), padding='same', use_bias=use_bias)(up3) up3 = BatchNormalization()(up3) up3 = LeakyReLU(alpha=0.1)(up3) # 64 up2 = UpSampling2D((2, 2))(up3) up2 = concatenate([down2, up2], axis=3) up2 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up2) up2 = BatchNormalization()(up2) up2 = LeakyReLU(alpha=0.1)(up2) up2 = Conv2D(32, (1, 1), padding='same', use_bias=use_bias)(up2) up2 = BatchNormalization()(up2) up2 = LeakyReLU(alpha=0.1)(up2) # 128 up1 = UpSampling2D((2, 2))(up2) up1 = K.concatenate([down1, up1], axis=3) up1 = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up1) up1 = BatchNormalization()(up1) up1 = LeakyReLU(alpha=0.1)(up1) up1 = Conv2D(16, (1, 1), padding='same', use_bias=use_bias)(up1) up1 = BatchNormalization()(up1) up1 = LeakyReLU(alpha=0.1)(up1) classify = Conv2D(class_num, (1, 1), activation='sigmoid')(up1) # classify = Dense(cls_num, activation="softmax")(up1) model = Model(inputs=inputs, outputs=classify) # model.summary() return model def ctc_decode(image, model): x = model.output input_length = Input(batch_shape=[None], dtype='int32') ctc_decode = K.ctc_decode(x, input_length=input_length * K.shape(x)[1], greedy=False, beam_width=100) decode = K.function([model.input, input_length], [ctc_decode[0][0]]) out = decode([image, np.ones(image.shape[0])]) # print(len(out)) # print(len(out[0])) # print(len(out[0][0])) # print(out[0][0]) # print(len(out[0][0][0])) # print(out[0][0][0]) # print(out[0][0][0][0].shape) # print(out[0][0][0][0]) return out class Vgg16: def __init__(self, vgg16_npy_path="./vgg16.npy"): if vgg16_npy_path is None: # path = inspect.getfile(Vgg16) # path = os.path.abspath(os.path.join(path, os.pardir)) # path = os.path.join(path, "vgg16.npy") # vgg16_npy_path = path # print(path) print("there is no vgg_16_npy!") raise self.data_dict = np.load(vgg16_npy_path, encoding='latin1', allow_pickle=True).item() print("npy file loaded") def build(self, bgr): """ load variable from npy to build the VGG :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1] """ start_time = time.time() print("build model started") bgr_scaled = bgr * 255.0 # Convert RGB to BGR # red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled) # print("red", red) # assert red.get_shape().as_list()[1:] == [224, 224, 1] # assert green.get_shape().as_list()[1:] == [224, 224, 1] # assert blue.get_shape().as_list()[1:] == [224, 224, 1] # bgr = tf.concat(axis=3, values=[ # blue - VGG_MEAN[0], # green - VGG_MEAN[1], # red - VGG_MEAN[2], # ]) # assert bgr.get_shape().as_list()[1:] == [224, 224, 3] self.conv1_1 = self.conv_layer(bgr_scaled, "conv1_1") self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2") self.pool1 = self.max_pool(self.conv1_2, 'pool1') self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") self.pool2 = self.max_pool(self.conv2_2, 'pool2') self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") self.pool3 = self.max_pool(self.conv3_3, 'pool3') self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") self.pool4 = self.max_pool(self.conv4_3, 'pool4') self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") self.pool5 = self.max_pool(self.conv5_3, 'pool5') self.fc6 = self.fc_layer(self.pool5, "fc6") # assert self.fc6.get_shape().as_list()[1:] == [4096] self.relu6 = tf.nn.relu(self.fc6) self.fc7 = self.fc_layer(self.relu6, "fc7") self.relu7 = tf.nn.relu(self.fc7) self.fc8 = self.fc_layer(self.relu7, "fc8") self.prob = tf.nn.softmax(self.fc8, name="prob") # self.data_dict = None print(("build model finished: %ds" % (time.time() - start_time))) return self.prob def avg_pool(self, bottom, name): return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def max_pool(self, bottom, name): return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def conv_layer(self, bottom, name): with tf.compat.v1.variable_scope(name): filt = self.get_conv_filter(name) conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') conv_biases = self.get_bias(name) bias = tf.nn.bias_add(conv, conv_biases) relu = tf.nn.relu(bias) return relu def fc_layer(self, bottom, name): with tf.compat.v1.variable_scope(name): shape = bottom.get_shape().as_list() dim = 1 for d in shape[1:]: dim *= d x = tf.reshape(bottom, [-1, dim]) weights = self.get_fc_weight(name) biases = self.get_bias(name) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. fc = tf.nn.bias_add(tf.matmul(x, weights), biases) return fc def get_conv_filter(self, name): return tf.constant(self.data_dict[name][0], name="filter") def get_bias(self, name): return tf.constant(self.data_dict[name][1], name="biases") def get_fc_weight(self, name): return tf.constant(self.data_dict[name][0], name="weights") class Vgg19: def __init__(self, vgg19_npy_path=None): if vgg19_npy_path is None: print("there is no vgg_16_npy!") raise self.data_dict = np.load(vgg19_npy_path, encoding='latin1', allow_pickle=True).item() def build(self, bgr): """ load variable from npy to build the VGG :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1] """ bgr = bgr * 255.0 # bgr = bgr - np.array(VGG_MEAN).reshape((1, 1, 1, 3)) self.conv1_1 = self.conv_layer(bgr, "conv1_1") self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2") self.pool1 = self.max_pool(self.conv1_2, 'pool1') self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") self.pool2 = self.max_pool(self.conv2_2, 'pool2') self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4") self.pool3 = self.max_pool(self.conv3_4, 'pool3') self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4") self.pool4 = self.max_pool(self.conv4_4, 'pool4') self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4") self.pool5 = self.max_pool(self.conv5_4, 'pool5') def avg_pool(self, bottom, name): return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def max_pool(self, bottom, name): return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) def conv_layer(self, bottom, name): with tf.compat.v1.variable_scope(name): filt = self.get_conv_filter(name) conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') conv_biases = self.get_bias(name) bias = tf.nn.bias_add(conv, conv_biases) relu = tf.nn.relu(bias) return relu def fc_layer(self, bottom, name): with tf.compat.v1.variable_scope(name): shape = bottom.get_shape().as_list() dim = 1 for d in shape[1:]: dim *= d x = tf.reshape(bottom, [-1, dim]) weights = self.get_fc_weight(name) biases = self.get_bias(name) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. fc = tf.nn.bias_add(tf.matmul(x, weights), biases) return fc def get_conv_filter(self, name): return tf.constant(self.data_dict[name][0], name="filter") def get_bias(self, name): return tf.constant(self.data_dict[name][1], name="biases") def get_fc_weight(self, name): return tf.constant(self.data_dict[name][0], name="weights") def my_conv(inputs, output_shape=100): x = Conv2D(64, (3, 3), padding='same')(inputs) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) x = Conv2D(64, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = Conv2D(128, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) x = Conv2D(128, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = Conv2D(256, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) x = Conv2D(256, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D((2, 2), strides=(2, 2))(x) x = Conv2D(output_shape, (1, 1), activation='relu')(x) x = MaxPooling2D((5, 5))(x) x = Reshape((output_shape,))(x) model = Model(inputs, x) return model def mobile_net_v3_tiny(inputs, n_class=1000): # inputs = Input(shape) # 224,224,3 -> 112,112,16 x = conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') # 112,112,16 -> 56,56,16 x = bottleneck(x, 16, (3, 3), up_dim=16, stride=2, sq=True, nl='RE') # 56,56,16 -> 28,28,24 x = bottleneck(x, 24, (3, 3), up_dim=32, stride=2, sq=False, nl='RE') x = bottleneck(x, 24, (3, 3), up_dim=32, stride=1, sq=False, nl='RE') # 28,28,24 -> 14,14,40 x = bottleneck(x, 40, (5, 5), up_dim=64, stride=2, sq=True, nl='HS') x = bottleneck(x, 40, (5, 5), up_dim=64, stride=1, sq=True, nl='HS') # 14,14,40 -> 14,14,48 x = bottleneck(x, 48, (5, 5), up_dim=128, stride=1, sq=True, nl='HS') x = bottleneck(x, 48, (5, 5), up_dim=128, stride=1, sq=True, nl='HS') x = conv_block(x, 256, (1, 1), strides=(1, 1), nl='HS') x = GlobalAveragePooling2D()(x) x = Reshape((1, 1, 256))(x) x = Conv2D(256, (1, 1), padding='same')(x) x = return_activation(x, 'HS') x = Conv2D(n_class, (1, 1), padding='same', activation='relu')(x) x = Reshape((n_class,))(x) model = Model(inputs, x) return model class MobileNetBase: def __init__(self, shape, n_class, alpha=1.0): """Init # Arguments input_shape: An integer or tuple/list of 3 integers, shape of input tensor. n_class: Integer, number of classes. alpha: Integer, width multiplier. """ self.shape = shape self.n_class = n_class self.alpha = alpha def _relu6(self, x): """Relu 6 """ return K.relu(x, max_value=6.0) def _hard_swish(self, x): """Hard swish """ return x * K.relu(x + 3.0, max_value=6.0) / 6.0 def _return_activation(self, x, nl): """Convolution Block This function defines a activation choice. # Arguments x: Tensor, input tensor of conv layer. nl: String, nonlinearity activation type. # Returns Output tensor. """ if nl == 'HS': x = Activation(self._hard_swish)(x) if nl == 'RE': x = Activation(self._relu6)(x) return x def _conv_block(self, inputs, filters, kernel, strides, nl): """Convolution Block This function defines a 2D convolution operation with BN and activation. # Arguments inputs: Tensor, input tensor of conv layer. filters: Integer, the dimensionality of the output space. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. nl: String, nonlinearity activation type. # Returns Output tensor. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs) x = BatchNormalization(axis=channel_axis)(x) return self._return_activation(x, nl) def _squeeze(self, inputs): """Squeeze and Excitation. This function defines a squeeze structure. # Arguments inputs: Tensor, input tensor of conv layer. """ input_channels = int(inputs.shape[-1]) x = GlobalAveragePooling2D()(inputs) x = Dense(input_channels, activation='relu')(x) x = Dense(input_channels, activation='hard_sigmoid')(x) x = Reshape((1, 1, input_channels))(x) x = Multiply()([inputs, x]) return x def _bottleneck(self, inputs, filters, kernel, e, s, squeeze, nl): """Bottleneck This function defines a basic bottleneck structure. # Arguments inputs: Tensor, input tensor of conv layer. filters: Integer, the dimensionality of the output space. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. e: Integer, expansion factor. t is always applied to the input size. s: An integer or tuple/list of 2 integers,specifying the strides of the convolution along the width and height.Can be a single integer to specify the same value for all spatial dimensions. squeeze: Boolean, Whether to use the squeeze. nl: String, nonlinearity activation type. # Returns Output tensor. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 input_shape = K.int_shape(inputs) tchannel = int(e) cchannel = int(self.alpha * filters) r = s == 1 and input_shape[3] == filters x = self._conv_block(inputs, tchannel, (1, 1), (1, 1), nl) x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) x = self._return_activation(x, nl) if squeeze: x = self._squeeze(x) x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) if r: x = Add()([x, inputs]) return x def build(self): pass class MobileNetV3Small(MobileNetBase): def __init__(self, shape, n_class, alpha=1.0, include_top=True): """Init. # Arguments input_shape: An integer or tuple/list of 3 integers, shape of input tensor. n_class: Integer, number of classes. alpha: Integer, width multiplier. include_top: if inculde classification layer. # Returns MobileNetv3 model. """ super(MobileNetV3Small, self).__init__(shape, n_class, alpha) self.include_top = include_top def build(self): """build MobileNetV3 Small. # Arguments plot: Boolean, weather to plot model. # Returns model: Model, model. """ inputs = Input(shape=self.shape) x = self._conv_block(inputs, 16, (3, 3), strides=(2, 2), nl='HS') x = self._bottleneck(x, 16, (3, 3), e=16, s=2, squeeze=True, nl='RE') x = self._bottleneck(x, 24, (3, 3), e=72, s=2, squeeze=False, nl='RE') x = self._bottleneck(x, 24, (3, 3), e=88, s=1, squeeze=False, nl='RE') x = self._bottleneck(x, 40, (5, 5), e=96, s=2, squeeze=True, nl='HS') x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') x = self._bottleneck(x, 40, (5, 5), e=240, s=1, squeeze=True, nl='HS') x = self._bottleneck(x, 48, (5, 5), e=120, s=1, squeeze=True, nl='HS') x = self._bottleneck(x, 48, (5, 5), e=144, s=1, squeeze=True, nl='HS') x = self._bottleneck(x, 96, (5, 5), e=288, s=2, squeeze=True, nl='HS') x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') x = self._bottleneck(x, 96, (5, 5), e=576, s=1, squeeze=True, nl='HS') x = self._conv_block(x, 576, (1, 1), strides=(1, 1), nl='HS') x = GlobalAveragePooling2D()(x) x = Reshape((1, 1, 576))(x) x = Conv2D(1280, (1, 1), padding='same')(x) x = self._return_activation(x, 'HS') if self.include_top: x = Conv2D(self.n_class, (1, 1), padding='same', activation='softmax')(x) x = Reshape((self.n_class,))(x) model = Model(inputs, x) return model def bottleneck(inputs, filters, kernel, up_dim, stride, sq, nl): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 input_shape = K.int_shape(inputs) tchannel = int(up_dim) alpha = 1 cchannel = int(alpha * filters) r = stride == 1 and input_shape[3] == filters # 1x1卷积调整通道数,通道数上升 x = conv_block(inputs, tchannel, (1, 1), (1, 1), nl) # 进行3x3深度可分离卷积 x = DepthwiseConv2D(kernel, strides=(stride, stride), depth_multiplier=1, padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) x = return_activation(x, nl) # 引入注意力机制 if sq: x = squeeze(x) # 下降通道数 x = Conv2D(cchannel, (1, 1), strides=(1, 1), padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) if r: x = Add()([x, inputs]) return x def squeeze(inputs): # 注意力机制单元 input_channels = int(inputs.shape[-1]) x = GlobalAveragePooling2D()(inputs) x = Dense(int(input_channels/4))(x) x = Activation(relu6)(x) x = Dense(input_channels)(x) x = Activation(hard_swish)(x) x = Reshape((1, 1, input_channels))(x) x = Multiply()([inputs, x]) return x def conv_block(inputs, filters, kernel, strides, nl): # 一个卷积单元,也就是conv2d + batchnormalization + activation channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs) x = BatchNormalization(axis=channel_axis)(x) return return_activation(x, nl) def return_activation(x, nl): # 用于判断使用哪个激活函数 if nl == 'HS': x = Activation(hard_swish)(x) if nl == 'RE': x = Activation(relu6)(x) return x def relu6(x): # relu函数 return K.relu(x, max_value=6.0) def hard_swish(x): # 利用relu函数乘上x模拟sigmoid return x * K.relu(x + 3.0, max_value=6.0) / 6.0 def tiny_yolo_body(inputs, num_anchors, num_classes): """Create Tiny YOLO_v3 model CNN body in keras.""" x1 = compose( DarknetConv2D_BN_Leaky(16, (3, 3), ), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(32, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(64, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(128, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs) x2 = compose( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(512, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'), DarknetConv2D_BN_Leaky(1024, (3, 3)), DarknetConv2D_BN_Leaky(256, (1, 1)))(x1) y1 = compose( DarknetConv2D_BN_Leaky(512, (3, 3)), DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))(x2) x2 = compose( DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(256, (3, 3)), DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2]) @wraps(Conv2D) def DarknetConv2D(*args, **kwargs): """Wrapper to set Darknet parameters for Convolution2D.""" darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4), 'padding': 'valid' if kwargs.get('strides') == (2, 2) else 'same'} darknet_conv_kwargs.update(kwargs) return Conv2D(*args, **darknet_conv_kwargs) def DarknetConv2D_BN_Leaky(*args, **kwargs): """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose( DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(), LeakyReLU(alpha=0.1)) if __name__ == "__main__": crnn_ctc_equation_less()