睿智的目标检测64——目标检测中的MixUp数据增强方法

  • 学习前言
  • 代码下载
  • 什么是MixUp数据增强方法
  • 实现思路
  • 全部代码
    • 1、数据增强与MixUp
    • 2、调用代码

学习前言

哈哈哈!我再来一次数据增强!

代码下载

https://github.com/bubbliiiing/object-detection-augmentation

什么是MixUp数据增强方法

MixUp数据增强方法在最新的几个Yolo算法中得到了广泛的应用,特别在YoloX中,s、m、l、x四个型号的网络都使用了MixUp数据增强。nano和tiny由于模型的拟合能力一般没有使用MixUp,但也说明了MixUp具有强大的数据增强能力。

MixUp的思路较为简单,主要是将两张图像按比例进行混合,如图所示:

图片混合完成后,原来两幅图片的真实框此时也位于一幅图像上。

实现思路

1、每次读取两张的图片。


2、分别对两张图片进行翻转、缩放、色域变化等数据增强。

3、将二者的真实框堆叠到一起。

全部代码

1、数据增强与MixUp

该部分为普通数据增强与MixUp的代码

import cv2import numpy as npfrom PIL import Image, ImageDrawdef rand(a=0, b=1):    return np.random.rand()*(b-a) + adef get_random_data(annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):    line    = annotation_line.split()    #------------------------------#    #   读取图像并转换成RGB图像    #------------------------------#    image   = Image.open(line[0])    image   = image.convert('RGB')    #------------------------------#    #   获得图像的高宽与目标高宽    #------------------------------#    iw, ih  = image.size    h, w    = input_shape    #------------------------------#    #   获得预测框    #------------------------------#    box     = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])    if not random:        scale = min(w/iw, h/ih)        nw = int(iw*scale)        nh = int(ih*scale)        dx = (w-nw)//2        dy = (h-nh)//2        #---------------------------------#        #   将图像多余的部分加上灰条        #---------------------------------#        image       = image.resize((nw,nh), Image.BICUBIC)        new_image   = Image.new('RGB', (w,h), (128,128,128))        new_image.paste(image, (dx, dy))        image_data  = np.array(new_image, np.float32)        #---------------------------------#        #   对真实框进行调整        #---------------------------------#        if len(box)>0:            np.random.shuffle(box)            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy            box[:, 0:2][box[:, 0:2]<0] = 0            box[:, 2][box[:, 2]>w] = w            box[:, 3][box[:, 3]>h] = h            box_w = box[:, 2] - box[:, 0]            box_h = box[:, 3] - box[:, 1]            box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box        return image_data, box                #------------------------------------------#    #   对图像进行缩放并且进行长和宽的扭曲    #------------------------------------------#    new_ar = iw/ih * rand(1-jitter,1+jitter) / rand(1-jitter,1+jitter)    scale = rand(.25, 2)    if new_ar < 1:        nh = int(scale*h)        nw = int(nh*new_ar)    else:        nw = int(scale*w)        nh = int(nw/new_ar)    image = image.resize((nw,nh), Image.BICUBIC)    #------------------------------------------#    #   将图像多余的部分加上灰条    #------------------------------------------#    dx = int(rand(0, w-nw))    dy = int(rand(0, h-nh))    new_image = Image.new('RGB', (w,h), (128,128,128))    new_image.paste(image, (dx, dy))    image = new_image    #------------------------------------------#    #   翻转图像    #------------------------------------------#    flip = rand()<.5    if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)    image_data      = np.array(image, np.uint8)    #---------------------------------#    #   对图像进行色域变换    #   计算色域变换的参数    #---------------------------------#    r               = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1    #---------------------------------#    #   将图像转到HSV上    #---------------------------------#    hue, sat, val   = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))    dtype           = image_data.dtype    #---------------------------------#    #   应用变换    #---------------------------------#    x       = np.arange(0, 256, dtype=r.dtype)    lut_hue = ((x * r[0]) % 180).astype(dtype)    lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)    lut_val = np.clip(x * r[2], 0, 255).astype(dtype)    image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))    image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)    #---------------------------------#    #   对真实框进行调整    #---------------------------------#    if len(box)>0:        np.random.shuffle(box)        box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx        box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy        if flip: box[:, [0,2]] = w - box[:, [2,0]]        box[:, 0:2][box[:, 0:2]<0] = 0        box[:, 2][box[:, 2]>w] = w        box[:, 3][box[:, 3]>h] = h        box_w = box[:, 2] - box[:, 0]        box_h = box[:, 3] - box[:, 1]        box = box[np.logical_and(box_w>1, box_h>1)]         return image_data, boxdef get_random_data_with_MixUp(image_1, box_1, image_2, box_2):    new_image = np.array(image_1, np.float32) * 0.5 + np.array(image_2, np.float32) * 0.5    new_boxes = np.concatenate([box_1, box_2], axis=0)    return new_image, new_boxes

2、调用代码

该部分为调用代码

import osfrom random import sampleimport numpy as npfrom PIL import Image, ImageDrawfrom utils.random_data import get_random_data, get_random_data_with_MixUpfrom utils.utils import convert_annotation, get_classes#-----------------------------------------------------------------------------------##   Origin_VOCdevkit_path   原始数据集所在的路径#-----------------------------------------------------------------------------------#Origin_VOCdevkit_path   = "VOCdevkit_Origin"#-----------------------------------------------------------------------------------##   input_shape             生成的图片大小。#-----------------------------------------------------------------------------------#input_shape             = [640, 640]if __name__ == "__main__":    Origin_JPEGImages_path  = os.path.join(Origin_VOCdevkit_path, "VOC2007/JPEGImages")    Origin_Annotations_path = os.path.join(Origin_VOCdevkit_path, "VOC2007/Annotations")        #---------------------------#    #   遍历标签并赋值    #---------------------------#    xml_names = os.listdir(Origin_Annotations_path)    #------------------------------#    #   获取两个图像与标签    #------------------------------#    sample_xmls     = sample(xml_names, 2)    unique_labels   = get_classes(sample_xmls, Origin_Annotations_path)    jpg_name_1  = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[0])[0] + '.jpg')    jpg_name_2  = os.path.join(Origin_JPEGImages_path, os.path.splitext(sample_xmls[1])[0] + '.jpg')    xml_name_1  = os.path.join(Origin_Annotations_path, sample_xmls[0])    xml_name_2  = os.path.join(Origin_Annotations_path, sample_xmls[1])        line_1 = convert_annotation(jpg_name_1, xml_name_1, unique_labels)    line_2 = convert_annotation(jpg_name_2, xml_name_2, unique_labels)    #------------------------------#    #   各自数据增强    #------------------------------#    image_1, box_1  = get_random_data(line_1, input_shape)     image_2, box_2  = get_random_data(line_2, input_shape)         #------------------------------#    #   合并mixup    #------------------------------#    image_data, box_data = get_random_data_with_MixUp(image_1, box_1, image_2, box_2)        img = Image.fromarray(image_data.astype(np.uint8))    for j in range(len(box_data)):        thickness = 3        left, top, right, bottom  = box_data[j][0:4]        draw = ImageDraw.Draw(img)        for i in range(thickness):            draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255, 255, 255))    img.show()