yolov5 8系列 labelme数据标注 数据集生成终极教程

  • 一.数据集准备
  • 二.转换为yolo 数据集

一.数据集准备

  1. 创建一个data 文件夹
  2. 在data文件夹下创建一个images 文件夹
  3. 将所有图片数据放入images文件夹下

使用labelme标注数据

  1. python环境下使用 pip install labelme 安装labelme
  2. 在cmd 中使用命令 labelme 命令打开软件
  3. 进行标注
  4. 将标注文件和原图都放在images文件夹

标注完成后 images 文件夹下 存在原图和标注的json 文件

二.转换为yolo 数据集

在data文件夹下,根据labelme标签创建一个classes.txt 的类别文件
然后运行makedataset.py,直接生成labels 标签和train,val txt文件

import jsonimport cv2import numpy as npimport globimport osdef split_by_ratio(arr, *ratios):"""按比例拆分数组:param arr::param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组:return:"""arr = np.random.permutation(arr)ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]#读取中文路径def cv_imread(file_path):cv_img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)return cv_imgif __name__=="__main__":# 文件列表json_list = glob.glob("images/*.json")np.random.shuffle(json_list)trains,vals = split_by_ratio(json_list,0.9,0.1)# 训练文件夹if not os.path.exists("labels"):os.makedirs("labels")# 类别class_names = []for i, line in enumerate(open("classes.txt", encoding='utf-8').readlines()):class_name = line.strip()class_names.append(class_name)i = 0with open('train.txt', 'w') as f:for t in trains:basename = t.split("/")[-1].split("\\")[-1].split(".")[0]# 读取json文件data = ""with open(t, 'r', encoding='utf-8') as ft:data = json.load(ft)with open("labels/"+basename + ".txt", 'w') as fa:for shape in data['shapes']:assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"class_id= class_names.index(shape['label'])width = data["imageWidth"]height = data["imageHeight"]x1, y1 = shape['points'][0]x2, y2 = shape['points'][1]x_center = (x1 + x2) / 2 / widthy_center = (y1 + y2) / 2 / heightwidth = abs(x2 - x1) / widthheight = abs(y2 - y1) / heightfa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")# yololabelsout_txt_file = "data/images/" +basename + ".jpg\n"f.write(out_txt_file)with open('val.txt', 'w') as f:for t in vals:basename = t.split("/")[-1].split("\\")[-1].split(".")[0]# 读取json文件data = ""with open(t, 'r', encoding='utf-8') as ft:data = json.load(ft)with open("labels/"+basename+ ".txt", 'w') as fa:for shape in data['shapes']:assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"class_id= class_names.index(shape['label'])x1, y1 = shape['points'][0]x2, y2 = shape['points'][1]x_center = (x1 + x2) / 2 / widthy_center = (y1 + y2) / 2 / heightwidth = abs(x2 - x1) / widthheight = abs(y2 - y1) / heightfa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")# yololabelsout_txt_file = "data/images/" + basename+ ".jpg\n"f.write(out_txt_file)