123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315 |
- import torch.nn as nn
- import torch.nn.functional as F
- class BidirectionalLSTM(nn.Module):
- # Inputs hidden units Out
- def __init__(self, nIn, nHidden, nOut):
- super(BidirectionalLSTM, self).__init__()
- self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
- self.embedding = nn.Linear(nHidden * 2, nOut)
- def forward(self, input):
- recurrent, _ = self.rnn(input)
- T, b, h = recurrent.size()
- t_rec = recurrent.view(T * b, h)
- output = self.embedding(t_rec) # [T * b, nOut]
- output = output.view(T, b, -1)
- return output
- class CRNN(nn.Module):
- def __init__(self, imgH, nc, nclass, nh, n_rnn=2, leakyRelu=False):
- super(CRNN, self).__init__()
- assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
- ks = [3, 3, 3, 3, 3, 3, 2]
- ps = [1, 1, 1, 1, 1, 1, 0]
- ss = [1, 1, 1, 1, 1, 1, 1]
- nm = [64, 128, 256, 256, 512, 512, 512]
- cnn = nn.Sequential()
- def convRelu(i, batchNormalization=False):
- nIn = nc if i == 0 else nm[i - 1]
- nOut = nm[i]
- cnn.add_module('conv{0}'.format(i),
- nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i]))
- if batchNormalization:
- cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut))
- if leakyRelu:
- cnn.add_module('relu{0}'.format(i),
- nn.LeakyReLU(0.2, inplace=True))
- else:
- cnn.add_module('relu{0}'.format(i), nn.ReLU(True))
- convRelu(0)
- cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64
- convRelu(1)
- cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32
- convRelu(2, True)
- convRelu(3)
- cnn.add_module('pooling{0}'.format(2),
- nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16
- convRelu(4, True)
- convRelu(5)
- cnn.add_module('pooling{0}'.format(3),
- nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16
- convRelu(6, True) # 512x1x16
- self.cnn = cnn
- self.rnn = nn.Sequential(
- BidirectionalLSTM(512, nh, nh),
- BidirectionalLSTM(nh, nh, nclass))
- def forward(self, input):
- # conv features
- conv = self.cnn(input)
- print('size', conv.size())
- b, c, h, w = conv.size()
- print("b, c, h, w :",b, c, h, w )
- assert h == 1, "the height of conv must be 1"
- conv = conv.squeeze(2) # b *512 * width
- print('size', conv.size())
- conv = conv.permute(2, 0, 1) # [w, b, c]
- print('size', conv.size())
- output = F.log_softmax(self.rnn(conv), dim=2)
- print('size', output.size())
- return output
- class SeModule(nn.Module):
- def __init__(self, in_size, reduction=4):
- super(SeModule, self).__init__()
- expand_size = max(in_size // reduction, 8)
- self.se = nn.Sequential(
- nn.AdaptiveAvgPool2d(1),
- nn.Conv2d(in_size, expand_size, kernel_size=1, bias=False),
- nn.BatchNorm2d(expand_size),
- nn.ReLU(inplace=True),
- nn.Conv2d(expand_size, in_size, kernel_size=1, bias=False),
- nn.Hardsigmoid()
- )
- def forward(self, x):
- return x * self.se(x)
- class Block(nn.Module):
- '''expand + depthwise + pointwise'''
- def __init__(self, kernel_size, in_size, expand_size, out_size, act, se, stride):
- super(Block, self).__init__()
- self.stride = stride
- self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, bias=False)
- self.bn1 = nn.BatchNorm2d(expand_size)
- self.act1 = act(inplace=True)
- self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride,
- padding=kernel_size // 2, groups=expand_size, bias=False)
- self.bn2 = nn.BatchNorm2d(expand_size)
- self.act2 = act(inplace=True)
- self.se = SeModule(expand_size) if se else nn.Identity()
- self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, bias=False)
- self.bn3 = nn.BatchNorm2d(out_size)
- self.act3 = act(inplace=True)
- self.skip = None
- if stride == 1 and in_size != out_size:
- self.skip = nn.Sequential(
- nn.Conv2d(in_size, out_size, kernel_size=1, bias=False),
- nn.BatchNorm2d(out_size)
- )
- if stride == 2 and in_size != out_size:
- self.skip = nn.Sequential(
- nn.Conv2d(in_channels=in_size, out_channels=in_size, kernel_size=3, groups=in_size, stride=2, padding=1,
- bias=False),
- nn.BatchNorm2d(in_size),
- nn.Conv2d(in_size, out_size, kernel_size=1, bias=True),
- nn.BatchNorm2d(out_size)
- )
- if stride == 2 and in_size == out_size:
- self.skip = nn.Sequential(
- nn.Conv2d(in_channels=in_size, out_channels=out_size, kernel_size=3, groups=in_size, stride=2,
- padding=1, bias=False),
- nn.BatchNorm2d(out_size)
- )
- def forward(self, x):
- skip = x
- out = self.act1(self.bn1(self.conv1(x)))
- out = self.act2(self.bn2(self.conv2(out)))
- out = self.se(out)
- out = self.bn3(self.conv3(out))
- if self.skip is not None:
- skip = self.skip(skip)
- return self.act3(out + skip)
- from torch.nn import init
- class MobileNetV3_Large(nn.Module):
- def __init__(self, num_classes=1000, act=nn.Hardswish):
- self.num_classes = num_classes
- super(MobileNetV3_Large, self).__init__()
- # self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
- self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
- self.bn1 = nn.BatchNorm2d(16)
- self.hs1 = act(inplace=True)
- self.bneck = nn.Sequential(
- Block(3, 16, 16, 16, nn.ReLU, False, 1),
- Block(3, 16, 64, 24, nn.ReLU, False, 2),
- Block(3, 24, 72, 24, nn.ReLU, False, 1),
- Block(5, 24, 72, 40, nn.ReLU, True, 2),
- Block(5, 40, 120, 40, nn.ReLU, True, 1),
- Block(5, 40, 120, 40, nn.ReLU, True, 1),
- Block(3, 40, 240, 80, act, False, 2),
- Block(3, 80, 200, 80, act, False, 1),
- Block(3, 80, 184, 80, act, False, 1),
- Block(3, 80, 184, 80, act, False, 1),
- Block(3, 80, 480, 112, act, True, 1),
- Block(3, 112, 672, 112, act, True, 1),
- Block(5, 112, 672, 160, act, True, 2),
- Block(5, 160, 672, 160, act, True, 1),
- Block(5, 160, 960, 160, act, True, 1)
- # Block(3, 160, 960, 160, act, True, 1),
- )
- self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
- self.bn2 = nn.BatchNorm2d(960)
- # self.bneck = nn.Sequential(
- # Block(3, 16, 16, 16, nn.ReLU, True, 2),
- # Block(3, 16, 72, 24, nn.ReLU, False, 2),
- # Block(3, 24, 88, 24, nn.ReLU, False, 1),
- # Block(5, 24, 96, 40, act, True, 2),
- # Block(5, 40, 240, 40, act, True, 1),
- # Block(5, 40, 240, 40, act, True, 1),
- # Block(5, 40, 120, 48, act, True, 1),
- # Block(5, 48, 144, 48, act, True, 1),
- # Block(5, 48, 288, 96, act, True, 2),
- # Block(5, 96, 576, 96, act, True, 1),
- # Block(5, 96, 576, 96, act, True, 1),
- # )
- #
- # self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
- # self.bn2 = nn.BatchNorm2d(576)
- self.hs2 = act(inplace=True)
- # self.gap = nn.AdaptiveAvgPool2d(1)
- # self.gap = nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
- # self.linear3 = nn.Linear(576, 1280, bias=False)
- # # self.linear3 = nn.Linear(1920, num_classes, bias=False)
- # self.bn3 = nn.BatchNorm1d(1280)
- # # self.bn3 = nn.BatchNorm1d(10)
- # self.hs3 = act(inplace=True)
- # self.drop = nn.Dropout(0.2)
- self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
- self.rnn = nn.Sequential(
- BidirectionalLSTM(960, 256, 256),
- BidirectionalLSTM(256, 256, self.num_classes))
- # self.linear4 = nn.Linear(1280, num_classes)
- self.init_params()
- def init_params(self):
- for m in self.modules():
- if isinstance(m, nn.Conv2d):
- init.kaiming_normal_(m.weight, mode='fan_out')
- if m.bias is not None:
- init.constant_(m.bias, 0)
- elif isinstance(m, nn.BatchNorm2d):
- init.constant_(m.weight, 1)
- init.constant_(m.bias, 0)
- elif isinstance(m, nn.Linear):
- init.normal_(m.weight, std=0.001)
- if m.bias is not None:
- init.constant_(m.bias, 0)
- def forward(self, x):
- out = self.hs1(self.bn1(self.conv1(x)))
- # out = self.conv1(x)
- # print('size1',out.size())
- out = self.bneck(out)
- # print('size2',out.size())
- out = self.hs2(self.bn2(self.conv2(out)))
- # out = self.conv2(out)
- # print('size3',out.size())
- # out = out.permute(0,3,1,2)
- # out = self.pool(out)
- B, C, H, W = out.shape
- assert H == 1
- out = out.squeeze(2)
- # print('size', out.size())
- out = out.permute(2, 0, 1) # [w, b, c]
- # out = out.permute(0, 2, 1) # [b, w, c]
- # print('size', out.size())
- out = self.rnn(out)
- # print('size',out.size())
- # out = self.gap(out).flatten(2)
- # out = out.flatten(2)
- # print('size',out.size())
- # out_size = out.size(1)
- # out = self.gap(out)
- # out = self.drop(self.hs3(self.bn3(self.linear3(out))))
- # out = self.drop(self.hs3(self.bn3(nn.BatchNorm1d(out_size)(out))))
- # print('size',out.size())
- # return self.linear4(out)
- # return F.log_softmax(out, dim=2)
- out = F.softmax(out, dim=2)
- return out
- def weights_init(m):
- classname = m.__class__.__name__
- if classname.find('Conv') != -1:
- m.weight.data.normal_(0.0, 0.02)
- elif classname.find('BatchNorm') != -1:
- m.weight.data.normal_(1.0, 0.02)
- m.bias.data.fill_(0)
- # def get_crnn(config):
- #
- # model = CRNN(config.MODEL.IMAGE_SIZE.H, 1, config.MODEL.NUM_CLASSES + 1, config.MODEL.NUM_HIDDEN)
- # model.apply(weights_init)
- #
- # return model
- def get_crnn(config):
- # model = CRNN(config.MODEL.IMAGE_SIZE.H, 1, config.MODEL.NUM_CLASSES + 1, config.MODEL.NUM_HIDDEN)
- model = MobileNetV3_Large(config.MODEL.NUM_CLASSES + 1)
- model.apply(weights_init)
- return model
- if __name__ == '__main__':
- from lib.utils.utils import model_info
- model = MobileNetV3_Large(1000)
- model_info(model)
- model = CRNN(32,1,1000,256)
- model_info(model)
- from timm.models import MobileNetV3
- from timm.models import MobileNetV3Features
|