import torch.nn as nn import torch.nn.functional as F class BidirectionalLSTM(nn.Module): # Inputs hidden units Out def __init__(self, nIn, nHidden, nOut): super(BidirectionalLSTM, self).__init__() self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) self.embedding = nn.Linear(nHidden * 2, nOut) def forward(self, input): recurrent, _ = self.rnn(input) T, b, h = recurrent.size() t_rec = recurrent.view(T * b, h) output = self.embedding(t_rec) # [T * b, nOut] output = output.view(T, b, -1) return output class CRNN(nn.Module): def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False): super(CRNN, self).__init__() assert imgH % 16 == 0, 'imgH has to be a multiple of 16' ks = [3, 3, 3, 3, 3, 3, 2] ps = [1, 1, 1, 1, 1, 1, 0] ss = [1, 1, 1, 1, 1, 1, 1] nm = [64, 128, 256, 256, 512, 512, 512] cnn = nn.Sequential() def convRelu(i, batchNormalization=False): nIn = nc if i == 0 else nm[i - 1] nOut = nm[i] cnn.add_module('conv{0}'.format(i), nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) if batchNormalization: cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) if leakyRelu: cnn.add_module('relu{0}'.format(i), nn.LeakyReLU(0.2, inplace=True)) else: cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) convRelu(0) cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 convRelu(1) cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 convRelu(2, True) convRelu(3) cnn.add_module('pooling{0}'.format(2), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 convRelu(4, True) convRelu(5) cnn.add_module('pooling{0}'.format(3), nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 convRelu(6, True) # 512x1x16 self.cnn = cnn self.rnn = nn.Sequential( BidirectionalLSTM(512, nh, nh), BidirectionalLSTM(nh, nh, nclass)) def forward(self, input): # conv features conv = self.cnn(input) print('size', conv.size()) b, c, h, w = conv.size() print("b, c, h, w :",b, c, h, w ) assert h == 1, "the height of conv must be 1" conv = conv.squeeze(2) # b *512 * width print('size', conv.size()) conv = conv.permute(2, 0, 1) # [w, b, c] print('size', conv.size()) output = F.log_softmax(self.rnn(conv), dim=2) print('size', output.size()) return output class SeModule(nn.Module): def __init__(self, in_size, reduction=4): super(SeModule, self).__init__() expand_size = max(in_size // reduction, 8) self.se = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(in_size, expand_size, kernel_size=1, bias=False), nn.BatchNorm2d(expand_size), nn.ReLU(inplace=True), nn.Conv2d(expand_size, in_size, kernel_size=1, bias=False), nn.Hardsigmoid() ) def forward(self, x): return x * self.se(x) class Block(nn.Module): '''expand + depthwise + pointwise''' def __init__(self, kernel_size, in_size, expand_size, out_size, act, se, stride): super(Block, self).__init__() self.stride = stride self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(expand_size) self.act1 = act(inplace=True) self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=expand_size, bias=False) self.bn2 = nn.BatchNorm2d(expand_size) self.act2 = act(inplace=True) self.se = SeModule(expand_size) if se else nn.Identity() self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, bias=False) self.bn3 = nn.BatchNorm2d(out_size) self.act3 = act(inplace=True) self.skip = None if stride == 1 and in_size != out_size: self.skip = nn.Sequential( nn.Conv2d(in_size, out_size, kernel_size=1, bias=False), nn.BatchNorm2d(out_size) ) if stride == 2 and in_size != out_size: self.skip = nn.Sequential( nn.Conv2d(in_channels=in_size, out_channels=in_size, kernel_size=3, groups=in_size, stride=2, padding=1, bias=False), nn.BatchNorm2d(in_size), nn.Conv2d(in_size, out_size, kernel_size=1, bias=True), nn.BatchNorm2d(out_size) ) if stride == 2 and in_size == out_size: self.skip = nn.Sequential( nn.Conv2d(in_channels=in_size, out_channels=out_size, kernel_size=3, groups=in_size, stride=2, padding=1, bias=False), nn.BatchNorm2d(out_size) ) def forward(self, x): skip = x out = self.act1(self.bn1(self.conv1(x))) out = self.act2(self.bn2(self.conv2(out))) out = self.se(out) out = self.bn3(self.conv3(out)) if self.skip is not None: skip = self.skip(skip) return self.act3(out + skip) from torch.nn import init class MobileNetV3_Large(nn.Module): def __init__(self, num_classes=1000, act=nn.Hardswish): self.num_classes = num_classes super(MobileNetV3_Large, self).__init__() # self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False) self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(16) self.hs1 = act(inplace=True) self.bneck = nn.Sequential( Block(3, 16, 16, 16, nn.ReLU, False, 1), Block(3, 16, 64, 24, nn.ReLU, False, 2), Block(3, 24, 72, 24, nn.ReLU, False, 1), Block(5, 24, 72, 40, nn.ReLU, True, 2), Block(5, 40, 120, 40, nn.ReLU, True, 1), Block(5, 40, 120, 40, nn.ReLU, True, 1), Block(3, 40, 240, 80, act, False, 2), Block(3, 80, 200, 80, act, False, 1), Block(3, 80, 184, 80, act, False, 1), Block(3, 80, 184, 80, act, False, 1), Block(3, 80, 480, 112, act, True, 1), Block(3, 112, 672, 112, act, True, 1), Block(5, 112, 672, 160, act, True, 2), Block(5, 160, 672, 160, act, True, 1), Block(5, 160, 960, 160, act, True, 1) # Block(3, 160, 960, 160, act, True, 1), ) self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False) self.bn2 = nn.BatchNorm2d(960) # self.bneck = nn.Sequential( # Block(3, 16, 16, 16, nn.ReLU, True, 2), # Block(3, 16, 72, 24, nn.ReLU, False, 2), # Block(3, 24, 88, 24, nn.ReLU, False, 1), # Block(5, 24, 96, 40, act, True, 2), # Block(5, 40, 240, 40, act, True, 1), # Block(5, 40, 240, 40, act, True, 1), # Block(5, 40, 120, 48, act, True, 1), # Block(5, 48, 144, 48, act, True, 1), # Block(5, 48, 288, 96, act, True, 2), # Block(5, 96, 576, 96, act, True, 1), # Block(5, 96, 576, 96, act, True, 1), # ) # # self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False) # self.bn2 = nn.BatchNorm2d(576) self.hs2 = act(inplace=True) # self.gap = nn.AdaptiveAvgPool2d(1) # self.gap = nn.MaxPool2d(kernel_size=2,stride=2,padding=0) # self.linear3 = nn.Linear(576, 1280, bias=False) # # self.linear3 = nn.Linear(1920, num_classes, bias=False) # self.bn3 = nn.BatchNorm1d(1280) # # self.bn3 = nn.BatchNorm1d(10) # self.hs3 = act(inplace=True) # self.drop = nn.Dropout(0.2) self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0) self.rnn = nn.Sequential( BidirectionalLSTM(960, 256, 256), BidirectionalLSTM(256, 256, self.num_classes)) # self.linear4 = nn.Linear(1280, num_classes) self.init_params() def init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.normal_(m.weight, std=0.001) if m.bias is not None: init.constant_(m.bias, 0) def forward(self, x): out = self.hs1(self.bn1(self.conv1(x))) # out = self.conv1(x) # print('size1',out.size()) out = self.bneck(out) # print('size2',out.size()) out = self.hs2(self.bn2(self.conv2(out))) # out = self.conv2(out) # print('size3',out.size()) # out = out.permute(0,3,1,2) # out = self.pool(out) B, C, H, W = out.shape assert H == 1 out = out.squeeze(2) # print('size', out.size()) out = out.permute(2, 0, 1) # [w, b, c] # out = out.permute(0, 2, 1) # [b, w, c] # print('size', out.size()) out = self.rnn(out) # print('size',out.size()) # out = self.gap(out).flatten(2) # out = out.flatten(2) # print('size',out.size()) # out_size = out.size(1) # out = self.gap(out) # out = self.drop(self.hs3(self.bn3(self.linear3(out)))) # out = self.drop(self.hs3(self.bn3(nn.BatchNorm1d(out_size)(out)))) # print('size',out.size()) # return self.linear4(out) # return F.log_softmax(out, dim=2) out = F.softmax(out, dim=2) return out def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # def get_crnn(config): # # model = CRNN(config.MODEL.IMAGE_SIZE.H, 1, config.MODEL.NUM_CLASSES + 1, config.MODEL.NUM_HIDDEN) # model.apply(weights_init) # # return model def get_crnn(config): # model = CRNN(config.MODEL.IMAGE_SIZE.H, 1, config.MODEL.NUM_CLASSES + 1, config.MODEL.NUM_HIDDEN) model = MobileNetV3_Large(config.MODEL.NUM_CLASSES + 1) model.apply(weights_init) return model if __name__ == '__main__': from lib.utils.utils import model_info model = MobileNetV3_Large(1000) model_info(model) model = CRNN(32,1,1000,256) model_info(model) from timm.models import MobileNetV3 from timm.models import MobileNetV3Features