• 本文为365天深度学习训练营中的学习记录博客
  • 参考文章:Pytorch实战 | 第P8天:YOLOv5-C3模块实现(训练营内部成员可读)
  • 原作者:K同学啊|接辅导、项目定制

了解C3的结构,方便后续YOLOv5算法的学习。采用的数据集是天气识别的数据集。

一、 前期准备

1. 设置GPU

import torchimport torch.nn as nnimport torchvision.transforms as transformsimport torchvisionfrom torchvision import transforms, datasetsimport os,PIL,pathlib,warningswarnings.filterwarnings("ignore")             #忽略警告信息device = torch.device("cuda" if torch.cuda.is_available() else "cpu")print(device)

输出:cuda

2. 导入数据

import os,PIL,random,pathlibdata_dir = './data/'data_dir = pathlib.Path(data_dir)data_paths  = list(data_dir.glob('*'))classeNames = [str(path).split("\\")[1] for path in data_paths]print(classeNames)

图形变换,输出一下:用到torchvision.transforms.Compose()

train_transforms = transforms.Compose([    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸    # transforms.RandomHorizontalFlip(), # 随机水平翻转    transforms.ToTensor(),          # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间    transforms.Normalize(           # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛        mean=[0.485, 0.456, 0.406],        std=[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。]) test_transform = transforms.Compose([    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸    transforms.ToTensor(),          # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间    transforms.Normalize(           # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛        mean=[0.485, 0.456, 0.406],        std=[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。]) total_data = datasets.ImageFolder("./data/",transform=train_transforms)print(total_data.class_to_idx)

输出:{‘cloudy’: 0, ‘rain’: 1, ‘shine’: 2, ‘sunrise’: 3}

3. 划分数据集

train_size = int(0.8 * len(total_data))test_size  = len(total_data) - train_sizetrain_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])
batch_size = 32train_dl = torch.utils.data.DataLoader(train_dataset,                                           batch_size=batch_size,                                           shuffle=True,                                           num_workers=0)test_dl = torch.utils.data.DataLoader(test_dataset,                                          batch_size=batch_size,                                          shuffle=True,                                          num_workers=0)for X, y in test_dl:    print("Shape of X [N, C, H, W]: ", X.shape)    print("Shape of y: ", y.shape, y.dtype)    break

二、搭建YOLOv5-C3模型

1.搭建模型

import torch.nn.functional as Fdef autopad(k, p=None):  # kernel, padding    # Pad to 'same'    if p is None:        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad    return pclass Conv(nn.Module):    # Standard convolution    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups        super().__init__()        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)        self.bn = nn.BatchNorm2d(c2)        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())    def forward(self, x):        return self.act(self.bn(self.conv(x)))    def forward_fuse(self, x):        return self.act(self.conv(x))class Bottleneck(nn.Module):    # Standard bottleneck    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion        super().__init__()        c_ = int(c2 * e)  # hidden channels        self.cv1 = Conv(c1, c_, 1, 1)        self.cv2 = Conv(c_, c2, 3, 1, g=g)        self.add = shortcut and c1 == c2    def forward(self, x):        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))class C3(nn.Module):    # CSP Bottleneck with 3 convolutions    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion        super().__init__()        c_ = int(c2 * e)  # hidden channels        self.cv1 = Conv(c1, c_, 1, 1)        self.cv2 = Conv(c1, c_, 1, 1)        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))    def forward(self, x):        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))class model_K(nn.Module):    def __init__(self):        super(model_K, self).__init__()        # 卷积模块        self.Conv = Conv(3, 32, 3, 2)        # C3模块1        self.C3_1 = C3(32, 64, 3, 2)        # 全连接网络层,用于分类        self.classifier = nn.Sequential(            nn.Linear(in_features=802816, out_features=100),            nn.ReLU(),            nn.Linear(in_features=100, out_features=4)        )    def forward(self, x):        x = self.Conv(x)        x = self.C3_1(x)        x = torch.flatten(x, start_dim=1)        x = self.classifier(x)        return xdevice = "cuda" />

2.查看模型详情

统计模型参数量以及其他指标

import torchsummary as summarysummary.summary(model, (3, 224, 224))

三、 训练模型

1. 编写训练和测试函数

和之前cnn网络、vgg一样

# 训练循环def train(dataloader, model, loss_fn, optimizer):    size = len(dataloader.dataset)  # 训练集的大小    num_batches = len(dataloader)   # 批次数目, (size/batch_size,向上取整)     train_loss, train_acc = 0, 0  # 初始化训练损失和正确率        for X, y in dataloader:  # 获取图片及其标签        X, y = X.to(device), y.to(device)                # 计算预测误差        pred = model(X)          # 网络输出        loss = loss_fn(pred, y)  # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失                # 反向传播        optimizer.zero_grad()  # grad属性归零        loss.backward()        # 反向传播        optimizer.step()       # 每一步自动更新                # 记录acc与loss        train_acc  += (pred.argmax(1) == y).type(torch.float).sum().item()        train_loss += loss.item()                train_acc  /= size    train_loss /= num_batches     return train_acc, train_loss
def test (dataloader, model, loss_fn):    size        = len(dataloader.dataset)  # 测试集的大小    num_batches = len(dataloader)          # 批次数目    test_loss, test_acc = 0, 0        # 当不进行训练时,停止梯度更新,节省计算内存消耗    with torch.no_grad():        for imgs, target in dataloader:            imgs, target = imgs.to(device), target.to(device)                        # 计算loss            target_pred = model(imgs)            loss        = loss_fn(target_pred, target)                        test_loss += loss.item()            test_acc  += (target_pred.argmax(1) == target).type(torch.float).sum().item()     test_acc  /= size    test_loss /= num_batches     return test_acc, test_loss

2. 正式训练

这里也设置了训练器,结合前几次实验经验,使用Adam模型

import copyoptimizer = torch.optim.Adam(model.parameters(), lr=1e-4)loss_fn = nn.CrossEntropyLoss()  # 创建损失函数epochs = 20train_loss = []train_acc = []test_loss = []test_acc = []best_acc = 0  # 设置一个最佳准确率,作为最佳模型的判别指标for epoch in range(epochs):    # 更新学习率(使用自定义学习率时使用)    # adjust_learning_rate(optimizer, epoch, learn_rate)    model.train()    epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)    # scheduler.step() # 更新学习率(调用官方动态学习率接口时使用)    model.eval()    epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)    # 保存最佳模型到 best_model    if epoch_test_acc > best_acc:        best_acc = epoch_test_acc        best_model = copy.deepcopy(model)    train_acc.append(epoch_train_acc)    train_loss.append(epoch_train_loss)    test_acc.append(epoch_test_acc)    test_loss.append(epoch_test_loss)    # 获取当前的学习率    lr = optimizer.state_dict()['param_groups'][0]['lr']    template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')    print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss,                          epoch_test_acc * 100, epoch_test_loss, lr))# 保存最佳模型到文件中PATH = './best_model.pth'  # 保存的参数文件名torch.save(model.state_dict(), PATH)print('Done')

遇到了问题:RuntimeError: CUDA out of memory。这个在之前也遇到过,我显卡(3050ti)性能一般,但是可以把batch_size减小一半,本实验由32改为16即可运行。

四、 结果可视化

1. Loss与Accuracy图

import matplotlib.pyplot as plt#隐藏警告import warningswarnings.filterwarnings("ignore")               #忽略警告信息plt.rcParams['font.sans-serif']    = ['SimHei'] # 用来正常显示中文标签plt.rcParams['axes.unicode_minus'] = False      # 用来正常显示负号plt.rcParams['figure.dpi']         = 100        #分辨率 epochs_range = range(epochs) plt.figure(figsize=(12, 3))plt.subplot(1, 2, 1) plt.plot(epochs_range, train_acc, label='Training Accuracy')plt.plot(epochs_range, test_acc, label='Test Accuracy')plt.legend(loc='lower right')plt.title('Training and Validation Accuracy') plt.subplot(1, 2, 2)plt.plot(epochs_range, train_loss, label='Training Loss')plt.plot(epochs_range, test_loss, label='Test Loss')plt.legend(loc='upper right')plt.title('Training and Validation Loss')plt.show()

2. 指定图片进行预测

from PIL import Imageclasses = list(total_data.class_to_idx)def predict_one_image(image_path, model, transform, classes):    test_img = Image.open(image_path).convert('RGB')    plt.imshow(test_img)  # 展示预测的图片    test_img = transform(test_img)    img = test_img.to(device).unsqueeze(0)    model.eval()    output = model(img)    _, pred = torch.max(output, 1)    pred_class = classes[pred]    print(f'预测结果是:{pred_class}')# 预测训练集中的某张照片predict_one_image(image_path='./data/sunrise/sunrise8.jpg',                  model=model,                  transform=train_transforms,                  classes=classes)

3. 模型评估

以往都是看看最后几轮得到准确率,但是跳动比较大就不太好找准确率最高的一回,所以我们用函数返回进行比较。

best_model.eval()epoch_test_acc, epoch_test_loss = test(test_dl, best_model, loss_fn)print(epoch_test_acc, epoch_test_loss)print(epoch_test_acc)

*五、优化模型

C3模块
作用:
1 在新版yolov5中,作者将BottleneckCSP(瓶颈层)模块转变为了C3模块,其结构作用基本相同均为CSP架构,只是在修正单元的选择上有所不同,其包含了3个标准卷积层以及多个Bottleneck模块(数量由配置文件.yaml的n和depth_multiple参数乘积决定)

2 C3相对于BottleneckCSP模块不同的是,经历过残差输出后的Conv模块被去掉了,concat后的标准卷积模块中的激活函数也由LeakyRelu变为了SiLU(同上)。

3 该模块是对残差特征进行学习的主要模块,其结构分为两支,一支使用了上述指定多个Bottleneck堆叠和3个标准卷积层,另一支仅经过一个基本卷积模块,最后将两支进行concat操作。

class C3(nn.Module):    # CSP Bottleneck with 3 convolutions    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion        super().__init__()        c_ = int(c2 * e)  # hidden channels        self.cv1 = Conv(c1, c_, 1, 1)        self.cv2 = Conv(c1, c_, 1, 1)        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))    def forward(self, x):        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))

提升:

修改BottleNeck层数为4

最后准确率提升了7%左右