yolov5s中加入DCNv2(可变形卷积v2)

Requirement

torch>=1.8.1
torchvision>=0.9.1

实现步骤

1.测试环境是否满足要求

import torchimport torchvision.opsfrom torch import nnimport mathclass DCNv2(nn.Module):def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):super(DCNv2, self).__init__()self.in_channels = in_channelsself.out_channels = out_channelsself.kernel_size = kernel_sizeself.stride = stride if type(stride) == tuple else (stride, stride)self.padding = padding# init weight and biasself.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size))self.bias = nn.Parameter(torch.Tensor(out_channels))# offset convself.conv_offset_mask = nn.Conv2d(in_channels, 3 * kernel_size * kernel_size,kernel_size=kernel_size, stride=stride,padding=self.padding, bias=True)# initself.reset_parameters()self._init_weight()def reset_parameters(self):n = self.in_channels * (self.kernel_size**2)stdv = 1. / math.sqrt(n)self.weight.data.uniform_(-stdv, stdv)self.bias.data.zero_()def _init_weight(self):# init offset_mask convnn.init.constant_(self.conv_offset_mask.weight, 0.)nn.init.constant_(self.conv_offset_mask.bias, 0.)def forward(self, x):out = self.conv_offset_mask(x)o1, o2, mask = torch.chunk(out, 3, dim=1)offset = torch.cat((o1, o2), dim=1)mask = torch.sigmoid(mask)x = torchvision.ops.deform_conv2d(input=x, offset=offset, weight=self.weight, bias=self.bias, padding=self.padding,mask=mask,stride=self.stride)return xmodel = nn.Sequential(DCNv2(3, 32, kernel_size=3, stride=1, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(2, 2),DCNv2(32, 32, kernel_size=3, stride=1, padding=1),DCNv2(32, 64, kernel_size=3, stride=1, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(2, 2),DCNv2(64, 64, kernel_size=3, stride=1, padding=1),DCNv2(64, 128, kernel_size=3, stride=1, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(2, 2),DCNv2(128, 128, kernel_size=3, stride=1, padding=1),DCNv2(128, 256, kernel_size=3, stride=1, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(2, 2))x = torch.randn(2, 3, 64, 64)y = model(x)print(x.size())print(y.size())"""torch.Size([2, 3, 64, 64])torch.Size([2, 256, 4, 4])"""

如果能输出,则说明环境适配。

2.修改models/yolov5s.yaml

# YOLOv5by Ultralytics, GPL-3.0 license# Parametersnc: 1# number of classesdepth_multiple: 0.33# model depth multiplewidth_multiple: 0.50# layer channel multipleanchors:- [10,13, 16,30, 33,23]- [30,61, 62,45, 59,119]# P4/16- [116,90, 156,198, 373,326]# P5/32# YOLOv5 v6.0 backbonebackbone:# [from, number, module, args][[-1, 1, Conv, [64, 6, 2, 2]],# 0-P1/2 [-1, 1, DCNv2, [128, 3, 2]],# 1-P2/4 [-1, 3, C3, [128]],# 2 [-1, 1, DCNv2, [256, 3, 2]],# 3-P3/8 [-1, 6, C3, [256]],# 4 [-1, 1, DCNv2, [512, 3, 2]],# 5-P4/16 [-1, 9, C3, [512]],# 6 [-1, 1, DCNv2, [1024, 3, 2]],# 7-P5/32 [-1, 3, C3, [1024]],# 8 [-1, 1, SPPF, [1024, 5]],# 9]# YOLOv5 v6.0 headhead:[[-1, 1, Conv, [512, 1, 1]],# 10 [-1, 1, nn.Upsample, [None, 2, 'nearest']],# 11 [[-1, 6], 1, Concat, [1]],# 12 cat backbone P4 [-1, 3, C3, [512, False]],# 13 [-1, 1, Conv, [256, 1, 1]],# 14 [-1, 1, nn.Upsample, [None, 2, 'nearest']],# 15 [[-1, 4], 1, Concat, [1]],# 16 cat backbone P3 [-1, 3, C3, [256, False]],# 17 (P3/8-small) [-1, 1, Conv, [256, 3, 2]],# 18 [[-1, 14], 1, Concat, [1]],# 19 cat head P4 [-1, 3, C3, [512, False]],# 20 (P4/16-medium) [-1, 1, Conv, [512, 3, 2]],# 21 [[-1, 10], 1, Concat, [1]],# 22 cat head P5 [-1, 3, C3, [1024, False]],# 23 (P5/32-large) [[17, 20, 23], 1, Detect, [nc, anchors]],# Detect(P3, P4, P5)]

3.修改models/common.py

# --------------------------DCNv2 start--------------------------class DCNv2(nn.Module):def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):super(DCNv2, self).__init__()self.in_channels = in_channelsself.out_channels = out_channelsself.kernel_size = kernel_sizeself.stride = stride if type(stride) == tuple else (stride, stride)self.padding = padding# init weight and biasself.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, kernel_size, kernel_size))self.bias = nn.Parameter(torch.Tensor(out_channels))# offset convself.conv_offset_mask = nn.Conv2d(in_channels, 3 * kernel_size * kernel_size,kernel_size=kernel_size, stride=stride,padding=self.padding, bias=True)# initself.reset_parameters()self._init_weight()def reset_parameters(self):n = self.in_channels * (self.kernel_size**2)stdv = 1. / math.sqrt(n)self.weight.data.uniform_(-stdv, stdv)self.bias.data.zero_()def _init_weight(self):# init offset_mask convnn.init.constant_(self.conv_offset_mask.weight, 0.)nn.init.constant_(self.conv_offset_mask.bias, 0.)def forward(self, x):out = self.conv_offset_mask(x)o1, o2, mask = torch.chunk(out, 3, dim=1)offset = torch.cat((o1, o2), dim=1)mask = torch.sigmoid(mask)x = torchvision.ops.deform_conv2d(input=x, offset=offset, weight=self.weight, bias=self.bias, padding=self.padding,mask=mask,stride=self.stride)return x# ---------------------------DCNv2 end---------------------------

4.修改models/yolo.py

if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,BottleneckCSP, C3]:#在列表加DCNv2if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,BottleneckCSP, C3, DCNv2]:

完成以上操作后,在train.py中导入对应的yaml文件和确认参数,即可开始训练。

参考

【1】 https://blog.csdn.net/shuaijieer/article/details/126249088
【2】 https://github.com/yjh0410/PyTorch_DCNv2