DetGhostNet.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # 2020.06.09-Changed for building GhostNet
  2. # Huawei Technologies Co., Ltd. <foss@huawei.com>
  3. """
  4. Creates a GhostNet Model as defined in:
  5. GhostNet: More Features from Cheap Operations By Kai Han, Yunhe Wang, Qi Tian, Jianyuan Guo, Chunjing Xu, Chang Xu.
  6. https://arxiv.org/abs/1911.11907
  7. Modified from https://github.com/d-li14/mobilenetv3.pytorch and https://github.com/rwightman/pytorch-image-models
  8. """
  9. import os
  10. import torch
  11. import torch.nn as nn
  12. import torch.nn.functional as F
  13. import math
  14. import logging
  15. from collections import OrderedDict
  16. from torchocr.networks.CommonModules import CBAM
  17. def _make_divisible(v, divisor, min_value=None):
  18. """
  19. This function is taken from the original tf repo.
  20. It ensures that all layers have a channel number that is divisible by 8
  21. It can be seen here:
  22. https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
  23. """
  24. if min_value is None:
  25. min_value = divisor
  26. new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  27. # Make sure that round down does not go down by more than 10%.
  28. if new_v < 0.9 * v:
  29. new_v += divisor
  30. return new_v
  31. def hard_sigmoid(x, inplace: bool = False):
  32. if inplace:
  33. return x.add_(3.).clamp_(0., 6.).div_(6.)
  34. else:
  35. return F.relu6(x + 3.) / 6.
  36. class SqueezeExcite(nn.Module):
  37. def __init__(self, in_chs, se_ratio=0.25, reduced_base_chs=None,
  38. act_layer=nn.ReLU, gate_fn=hard_sigmoid, divisor=4, **_):
  39. super(SqueezeExcite, self).__init__()
  40. self.gate_fn = gate_fn
  41. reduced_chs = _make_divisible((reduced_base_chs or in_chs) * se_ratio, divisor)
  42. self.avg_pool = nn.AdaptiveAvgPool2d(1)
  43. self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
  44. self.act1 = act_layer(inplace=True)
  45. self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
  46. def forward(self, x):
  47. x_se = self.avg_pool(x)
  48. x_se = self.conv_reduce(x_se)
  49. x_se = self.act1(x_se)
  50. x_se = self.conv_expand(x_se)
  51. x = x * self.gate_fn(x_se)
  52. return x
  53. class ConvBnAct(nn.Module):
  54. def __init__(self, in_chs, out_chs, kernel_size,
  55. stride=1, act_layer=nn.ReLU):
  56. super(ConvBnAct, self).__init__()
  57. self.conv = nn.Conv2d(in_chs, out_chs, kernel_size, stride, kernel_size // 2, bias=False)
  58. self.bn1 = nn.BatchNorm2d(out_chs)
  59. self.act1 = act_layer(inplace=True)
  60. def forward(self, x):
  61. x = self.conv(x)
  62. x = self.bn1(x)
  63. x = self.act1(x)
  64. return x
  65. class GhostModule(nn.Module):
  66. def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3, stride=1, relu=True):
  67. super(GhostModule, self).__init__()
  68. self.oup = oup
  69. init_channels = math.ceil(oup / ratio)
  70. new_channels = init_channels * (ratio - 1)
  71. self.primary_conv = nn.Sequential(
  72. nn.Conv2d(inp, init_channels, kernel_size, stride, kernel_size // 2, bias=False),
  73. nn.BatchNorm2d(init_channels),
  74. nn.ReLU(inplace=True) if relu else nn.Sequential(),
  75. )
  76. self.cheap_operation = nn.Sequential(
  77. nn.Conv2d(init_channels, new_channels, dw_size, 1, dw_size // 2, groups=init_channels, bias=False),
  78. nn.BatchNorm2d(new_channels),
  79. nn.ReLU(inplace=True) if relu else nn.Sequential(),
  80. )
  81. def forward(self, x):
  82. x1 = self.primary_conv(x)
  83. x2 = self.cheap_operation(x1)
  84. out = torch.cat([x1, x2], dim=1)
  85. return out[:, :self.oup, :, :]
  86. class GhostBottleneck(nn.Module):
  87. """ Ghost bottleneck w/ optional SE"""
  88. def __init__(self, in_chs, mid_chs, out_chs, dw_kernel_size=3,
  89. stride=1, act_layer=nn.ReLU, se_ratio=0.):
  90. super(GhostBottleneck, self).__init__()
  91. has_se = se_ratio is not None and se_ratio > 0.
  92. self.stride = stride
  93. # Point-wise expansion
  94. self.ghost1 = GhostModule(in_chs, mid_chs, relu=True)
  95. # Depth-wise convolution
  96. if self.stride > 1:
  97. self.conv_dw = nn.Conv2d(mid_chs, mid_chs, dw_kernel_size, stride=stride,
  98. padding=(dw_kernel_size - 1) // 2,
  99. groups=mid_chs, bias=False)
  100. self.bn_dw = nn.BatchNorm2d(mid_chs)
  101. # Squeeze-and-excitation
  102. if has_se:
  103. self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio)
  104. # self.se = CBAM(mid_chs,mid_chs)
  105. else:
  106. self.se = None
  107. # Point-wise linear projection
  108. self.ghost2 = GhostModule(mid_chs, out_chs, relu=False)
  109. # shortcut
  110. if (in_chs == out_chs and self.stride == 1):
  111. self.shortcut = nn.Sequential()
  112. else:
  113. self.shortcut = nn.Sequential(
  114. nn.Conv2d(in_chs, in_chs, dw_kernel_size, stride=stride,
  115. padding=(dw_kernel_size - 1) // 2, groups=in_chs, bias=False),
  116. nn.BatchNorm2d(in_chs),
  117. nn.Conv2d(in_chs, out_chs, 1, stride=1, padding=0, bias=False),
  118. nn.BatchNorm2d(out_chs),
  119. )
  120. def forward(self, x):
  121. residual = x
  122. # 1st ghost bottleneck
  123. x = self.ghost1(x)
  124. # Depth-wise convolution
  125. if self.stride > 1:
  126. x = self.conv_dw(x)
  127. x = self.bn_dw(x)
  128. # Squeeze-and-excitation
  129. if self.se is not None:
  130. x = self.se(x)
  131. # 2nd ghost bottleneck
  132. x = self.ghost2(x)
  133. x += self.shortcut(residual)
  134. return x
  135. class GhostNet(nn.Module):
  136. def __init__(self, cfgs, num_classes=1000, width=1.0, dropout=0.2, pretrained=True,**kwargs):
  137. super(GhostNet, self).__init__()
  138. # setting of inverted residual blocks
  139. model_name = kwargs.get('model_name', 'default')
  140. self.disable_se = kwargs.get('disable_se', False)
  141. if model_name=='default':
  142. self.cfgs= [
  143. # k, t, c, SE, s
  144. # stage1
  145. [[3, 16, 16, 0, 1]],
  146. # stage2
  147. [[3, 48, 24, 0, 2]],
  148. [[3, 72, 24, 0, 1]],
  149. # stage3
  150. [[5, 72, 40, 0.25, 2]],
  151. [[5, 120, 40, 0.25, 1]],
  152. # stage4
  153. [[3, 240, 80, 0, 2]],
  154. [[3, 200, 80, 0, 1],
  155. [3, 184, 80, 0, 1],
  156. [3, 184, 80, 0, 1],
  157. [3, 480, 112, 0.25, 1],
  158. [3, 672, 112, 0.25, 1]
  159. ],
  160. # stage5
  161. [[5, 672, 160, 0.25, 2]],
  162. [[5, 960, 160, 0, 1],
  163. [5, 960, 160, 0.25, 1],
  164. [5, 960, 160, 0, 1],
  165. [5, 960, 160, 0.25, 1]
  166. ]
  167. ]
  168. # self.cfgs = cfgs
  169. # self.dropout = dropout
  170. # building first layer
  171. output_channel = _make_divisible(16 * width, 4) # 16
  172. self.conv_stem = nn.Conv2d(3, output_channel, 3, 2, 1, bias=False)
  173. self.bn1 = nn.BatchNorm2d(output_channel)
  174. self.act1 = nn.ReLU(inplace=True)
  175. input_channel = output_channel
  176. # building inverted residual blocks
  177. stages = []
  178. block = GhostBottleneck
  179. self.keep_stages = []
  180. self.out_channels = []
  181. i = 0
  182. for cfg in self.cfgs:
  183. layers = []
  184. for k, exp_size, c, se_ratio, s in cfg:
  185. if s == 2 and i > 2:
  186. self.out_channels.append(input_channel)
  187. output_channel = _make_divisible(c * width, 4)
  188. hidden_channel = _make_divisible(exp_size * width, 4)
  189. layers.append(block(input_channel, hidden_channel, output_channel, k, s,
  190. se_ratio=se_ratio))
  191. input_channel = output_channel
  192. i += 1
  193. stages.append(nn.Sequential(*layers))
  194. output_channel = _make_divisible(exp_size * width, 4)
  195. stages.append(nn.Sequential(ConvBnAct(input_channel, output_channel, 1)))
  196. input_channel = output_channel
  197. self.out_channels.append(input_channel)
  198. self.blocks = nn.Sequential(*stages)
  199. # building last several layers
  200. # output_channel = 1280
  201. # self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
  202. # self.conv_head = nn.Conv2d(input_channel, output_channel, 1, 1, 0, bias=True)
  203. # self.act2 = nn.ReLU(inplace=True)
  204. # self.classifier = nn.Linear(output_channel, num_classes)
  205. if pretrained:
  206. ckpt_path = f'./weights/state_dict_73.98.pth'
  207. logger = logging.getLogger('torchocr')
  208. if os.path.exists(ckpt_path):
  209. logger.info('load imagenet weights')
  210. dic_ckpt = torch.load(ckpt_path)
  211. filtered_dict = OrderedDict()
  212. for key in dic_ckpt.keys():
  213. flag = key.find('se') != -1
  214. if self.disable_se and flag:
  215. continue
  216. filtered_dict[key] = dic_ckpt[key]
  217. self.load_state_dict(filtered_dict)
  218. else:
  219. logger.info(f'{ckpt_path} not exists')
  220. def forward(self, x):
  221. x = self.conv_stem(x)
  222. x = self.bn1(x)
  223. x = self.act1(x)
  224. out = []
  225. for stage in self.blocks:
  226. x = stage(x)
  227. out.append(x)
  228. return [out[2], out[4], out[6], out[9]]
  229. def ghostnet(**kwargs):
  230. """
  231. Constructs a GhostNet model
  232. """
  233. cfgs = [
  234. # k, t, c, SE, s
  235. # stage1
  236. [[3, 16, 16, 0, 1]],
  237. # stage2
  238. [[3, 48, 24, 0, 2]],
  239. [[3, 72, 24, 0, 1]],
  240. # stage3
  241. [[5, 72, 40, 0.25, 2]],
  242. [[5, 120, 40, 0.25, 1]],
  243. # stage4
  244. [[3, 240, 80, 0, 2]],
  245. [[3, 200, 80, 0, 1],
  246. [3, 184, 80, 0, 1],
  247. [3, 184, 80, 0, 1],
  248. [3, 480, 112, 0.25, 1],
  249. [3, 672, 112, 0.25, 1]
  250. ],
  251. # stage5
  252. [[5, 672, 160, 0.25, 2]],
  253. [[5, 960, 160, 0, 1],
  254. [5, 960, 160, 0.25, 1],
  255. [5, 960, 160, 0, 1],
  256. [5, 960, 160, 0.25, 1]
  257. ]
  258. ]
  259. return GhostNet(cfgs, **kwargs)
  260. if __name__ == '__main__':
  261. model = ghostnet()
  262. model.eval()
  263. # print(model)
  264. input = torch.randn(32, 3, 320, 256)
  265. y = model(input)
  266. print(y.size())