resnet_vd.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. import paddle.nn.functional as F
  17. from paddleseg.cvlibs import manager
  18. from paddleseg.models import layers
  19. import ppmatting
  20. __all__ = [
  21. "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd"
  22. ]
  23. class ConvBNLayer(nn.Layer):
  24. def __init__(
  25. self,
  26. in_channels,
  27. out_channels,
  28. kernel_size,
  29. stride=1,
  30. dilation=1,
  31. groups=1,
  32. is_vd_mode=False,
  33. act=None, ):
  34. super(ConvBNLayer, self).__init__()
  35. self.is_vd_mode = is_vd_mode
  36. self._pool2d_avg = nn.AvgPool2D(
  37. kernel_size=2, stride=2, padding=0, ceil_mode=True)
  38. self._conv = nn.Conv2D(
  39. in_channels=in_channels,
  40. out_channels=out_channels,
  41. kernel_size=kernel_size,
  42. stride=stride,
  43. padding=(kernel_size - 1) // 2 if dilation == 1 else 0,
  44. dilation=dilation,
  45. groups=groups,
  46. bias_attr=False)
  47. self._batch_norm = layers.SyncBatchNorm(out_channels)
  48. self._act_op = layers.Activation(act=act)
  49. def forward(self, inputs):
  50. if self.is_vd_mode:
  51. inputs = self._pool2d_avg(inputs)
  52. y = self._conv(inputs)
  53. y = self._batch_norm(y)
  54. y = self._act_op(y)
  55. return y
  56. class BottleneckBlock(nn.Layer):
  57. def __init__(self,
  58. in_channels,
  59. out_channels,
  60. stride,
  61. shortcut=True,
  62. if_first=False,
  63. dilation=1):
  64. super(BottleneckBlock, self).__init__()
  65. self.conv0 = ConvBNLayer(
  66. in_channels=in_channels,
  67. out_channels=out_channels,
  68. kernel_size=1,
  69. act='relu')
  70. self.dilation = dilation
  71. self.conv1 = ConvBNLayer(
  72. in_channels=out_channels,
  73. out_channels=out_channels,
  74. kernel_size=3,
  75. stride=stride,
  76. act='relu',
  77. dilation=dilation)
  78. self.conv2 = ConvBNLayer(
  79. in_channels=out_channels,
  80. out_channels=out_channels * 4,
  81. kernel_size=1,
  82. act=None)
  83. if not shortcut:
  84. self.short = ConvBNLayer(
  85. in_channels=in_channels,
  86. out_channels=out_channels * 4,
  87. kernel_size=1,
  88. stride=1,
  89. is_vd_mode=False if if_first or stride == 1 else True)
  90. self.shortcut = shortcut
  91. def forward(self, inputs):
  92. y = self.conv0(inputs)
  93. ####################################################################
  94. # If given dilation rate > 1, using corresponding padding.
  95. # The performance drops down without the follow padding.
  96. if self.dilation > 1:
  97. padding = self.dilation
  98. y = F.pad(y, [padding, padding, padding, padding])
  99. #####################################################################
  100. conv1 = self.conv1(y)
  101. conv2 = self.conv2(conv1)
  102. if self.shortcut:
  103. short = inputs
  104. else:
  105. short = self.short(inputs)
  106. y = paddle.add(x=short, y=conv2)
  107. y = F.relu(y)
  108. return y
  109. class BasicBlock(nn.Layer):
  110. def __init__(self,
  111. in_channels,
  112. out_channels,
  113. stride,
  114. shortcut=True,
  115. if_first=False):
  116. super(BasicBlock, self).__init__()
  117. self.stride = stride
  118. self.conv0 = ConvBNLayer(
  119. in_channels=in_channels,
  120. out_channels=out_channels,
  121. kernel_size=3,
  122. stride=stride,
  123. act='relu')
  124. self.conv1 = ConvBNLayer(
  125. in_channels=out_channels,
  126. out_channels=out_channels,
  127. kernel_size=3,
  128. act=None)
  129. if not shortcut:
  130. self.short = ConvBNLayer(
  131. in_channels=in_channels,
  132. out_channels=out_channels,
  133. kernel_size=1,
  134. stride=1,
  135. is_vd_mode=False if if_first or stride == 1 else True)
  136. self.shortcut = shortcut
  137. def forward(self, inputs):
  138. y = self.conv0(inputs)
  139. conv1 = self.conv1(y)
  140. if self.shortcut:
  141. short = inputs
  142. else:
  143. short = self.short(inputs)
  144. y = paddle.add(x=short, y=conv1)
  145. y = F.relu(y)
  146. return y
  147. class ResNet_vd(nn.Layer):
  148. """
  149. The ResNet_vd implementation based on PaddlePaddle.
  150. The original article refers to Jingdong
  151. Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks"
  152. (https://arxiv.org/pdf/1812.01187.pdf).
  153. Args:
  154. layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
  155. output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
  156. multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
  157. pretrained (str, optional): The path of pretrained model.
  158. """
  159. def __init__(self,
  160. input_channels=3,
  161. layers=50,
  162. output_stride=32,
  163. multi_grid=(1, 1, 1),
  164. pretrained=None):
  165. super(ResNet_vd, self).__init__()
  166. self.conv1_logit = None # for gscnn shape stream
  167. self.layers = layers
  168. supported_layers = [18, 34, 50, 101, 152, 200]
  169. assert layers in supported_layers, \
  170. "supported layers are {} but input layer is {}".format(
  171. supported_layers, layers)
  172. if layers == 18:
  173. depth = [2, 2, 2, 2]
  174. elif layers == 34 or layers == 50:
  175. depth = [3, 4, 6, 3]
  176. elif layers == 101:
  177. depth = [3, 4, 23, 3]
  178. elif layers == 152:
  179. depth = [3, 8, 36, 3]
  180. elif layers == 200:
  181. depth = [3, 12, 48, 3]
  182. num_channels = [64, 256, 512,
  183. 1024] if layers >= 50 else [64, 64, 128, 256]
  184. num_filters = [64, 128, 256, 512]
  185. # for channels of four returned stages
  186. self.feat_channels = [c * 4 for c in num_filters
  187. ] if layers >= 50 else num_filters
  188. self.feat_channels = [64] + self.feat_channels
  189. dilation_dict = None
  190. if output_stride == 8:
  191. dilation_dict = {2: 2, 3: 4}
  192. elif output_stride == 16:
  193. dilation_dict = {3: 2}
  194. self.conv1_1 = ConvBNLayer(
  195. in_channels=input_channels,
  196. out_channels=32,
  197. kernel_size=3,
  198. stride=2,
  199. act='relu')
  200. self.conv1_2 = ConvBNLayer(
  201. in_channels=32,
  202. out_channels=32,
  203. kernel_size=3,
  204. stride=1,
  205. act='relu')
  206. self.conv1_3 = ConvBNLayer(
  207. in_channels=32,
  208. out_channels=64,
  209. kernel_size=3,
  210. stride=1,
  211. act='relu')
  212. self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
  213. # self.block_list = []
  214. self.stage_list = []
  215. if layers >= 50:
  216. for block in range(len(depth)):
  217. shortcut = False
  218. block_list = []
  219. for i in range(depth[block]):
  220. if layers in [101, 152] and block == 2:
  221. if i == 0:
  222. conv_name = "res" + str(block + 2) + "a"
  223. else:
  224. conv_name = "res" + str(block + 2) + "b" + str(i)
  225. else:
  226. conv_name = "res" + str(block + 2) + chr(97 + i)
  227. ###############################################################################
  228. # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
  229. dilation_rate = dilation_dict[
  230. block] if dilation_dict and block in dilation_dict else 1
  231. # Actually block here is 'stage', and i is 'block' in 'stage'
  232. # At the stage 4, expand the the dilation_rate if given multi_grid
  233. if block == 3:
  234. dilation_rate = dilation_rate * multi_grid[i]
  235. ###############################################################################
  236. bottleneck_block = self.add_sublayer(
  237. 'bb_%d_%d' % (block, i),
  238. BottleneckBlock(
  239. in_channels=num_channels[block]
  240. if i == 0 else num_filters[block] * 4,
  241. out_channels=num_filters[block],
  242. stride=2 if i == 0 and block != 0 and
  243. dilation_rate == 1 else 1,
  244. shortcut=shortcut,
  245. if_first=block == i == 0,
  246. dilation=dilation_rate))
  247. block_list.append(bottleneck_block)
  248. shortcut = True
  249. self.stage_list.append(block_list)
  250. else:
  251. for block in range(len(depth)):
  252. shortcut = False
  253. block_list = []
  254. for i in range(depth[block]):
  255. conv_name = "res" + str(block + 2) + chr(97 + i)
  256. basic_block = self.add_sublayer(
  257. 'bb_%d_%d' % (block, i),
  258. BasicBlock(
  259. in_channels=num_channels[block]
  260. if i == 0 else num_filters[block],
  261. out_channels=num_filters[block],
  262. stride=2 if i == 0 and block != 0 else 1,
  263. shortcut=shortcut,
  264. if_first=block == i == 0))
  265. block_list.append(basic_block)
  266. shortcut = True
  267. self.stage_list.append(block_list)
  268. self.pretrained = pretrained
  269. self.init_weight()
  270. def forward(self, inputs):
  271. feat_list = []
  272. y = self.conv1_1(inputs)
  273. y = self.conv1_2(y)
  274. y = self.conv1_3(y)
  275. feat_list.append(y)
  276. y = self.pool2d_max(y)
  277. # A feature list saves the output feature map of each stage.
  278. for stage in self.stage_list:
  279. for block in stage:
  280. y = block(y)
  281. feat_list.append(y)
  282. return feat_list
  283. def init_weight(self):
  284. ppmatting.utils.load_pretrained_model(self, self.pretrained)
  285. @manager.BACKBONES.add_component
  286. def ResNet18_vd(**args):
  287. model = ResNet_vd(layers=18, **args)
  288. return model
  289. @manager.BACKBONES.add_component
  290. def ResNet34_vd(**args):
  291. model = ResNet_vd(layers=34, **args)
  292. return model
  293. @manager.BACKBONES.add_component
  294. def ResNet50_vd(**args):
  295. model = ResNet_vd(layers=50, **args)
  296. return model
  297. @manager.BACKBONES.add_component
  298. def ResNet101_vd(**args):
  299. model = ResNet_vd(layers=101, **args)
  300. return model
  301. def ResNet152_vd(**args):
  302. model = ResNet_vd(layers=152, **args)
  303. return model
  304. def ResNet200_vd(**args):
  305. model = ResNet_vd(layers=200, **args)
  306. return model