1. 데이터를 traning, val set으로 8:2 분할
- 1. 라이브러리 로딩
import os
import shutil
from sklearn.model_selection import train_test_split
import json
-----------------------------------------------------------------------
- 2. 폴더 경로 지정
data_folder = '/content/Dataset'
train_images_folder = '/content/Dataset/images/train'
val_images_folder = '/content/Dataset/images/val'
train_labels_folder = '/content/Dataset/labels/train'
val_labels_folder = '/content/Dataset/labels/val'
-----------------------------------------------------------------------
- 3. 데이터셋 분할하기
money = [10, 100, 1000, 10000, 50, 500, 5000, 50000]
data_set_list = [train, val]
jpg = []
json = []
for money_list in money:
file_folder = data_folder+f'/{money_list}'
file_list = os.listdir(file_folder)
file_list = sorted(file_list)
for file_name in file_list:
if file_name.endswith('.jpg'):
jpg.append(file_name)
elif file_name.endswith('.json'):
json.append(file_name)
train_data, val_data = train_test_split(jpg, test_size=0.2, random_state=2023)
for file_name in train_data:
source_path = os.path.join(file_folder, file_name)
destination_path = os.path.join(train_images_folder, file_name)
shutil.copy(source_path, destination_path)
for file_name in val_data:
source_path = os.path.join(file_folder, file_name)
destination_path = os.path.join(val_images_folder, file_name)
shutil.copy(source_path, destination_path)
train_data, val_data = train_test_split(json, test_size=0.2, random_state=2023)
for file_name in train_data:
source_path = os.path.join(file_folder, file_name)
destination_path = os.path.join(train_labels_folder, file_name)
shutil.copy(source_path, destination_path)
for file_name in val_data:
source_path = os.path.join(file_folder, file_name)
destination_path = os.path.join(val_labels_folder, file_name)
shutil.copy(source_path, destination_path)
-----------------------------------------------------------------------
- 4. 분할된 파일 갯수 확인
folder_path = '/content/Dataset/images/val'
folder_contents = os.listdir(folder_path)
file_count = len(folder_contents)
-----------------------------------------------------------------------
-5. json에서 필요한 내용으로 text 파일 만들기
- 필요 항목 :
위치 정보 : x1, x2, y1, y2
박스 정보 : shape_type
클래스 정보 : labels
json_path = '/content/Dataset/labels/'
temp_list = ['train', 'val']
for temp_list_2 in temp_list:
folder_path = json_path + temp_list_2
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
try:
with open(file_path, 'r') as json_file:
data = json.load(json_file)
except json.JSONDecodeError as e:
print(f'JSON 파일 파싱 오류: {e}')
shapes = data.get('shapes', [])
if shapes:
shape = shape[0]
shape = shapes[0]
x_center = (shape['points'][1][0] - shape['points'][0][0]) / 10
y_center = (shape['points'][1][1] - shape['points'][0][1]) / 10
Width_norm = data.get('imageWidth', 'N/A') / 5
Height_norm = data.get('imageHeight', 'N/A') / 5
shape_type = shape.get('shape_type', 'N/A')
labels = data.get('imagePath', 'N/A').split('_')[0]
out_file_name = os.path.splitext(file_name)[0] + '.txt'
out_file_path = os.path.join(folder_path+'_text', out_file_name)
with open(output_file_path, 'a') as txt_file:
txt_file.write(f'File: {file_name}\n')
txt_file.write(f'label: {labels}\n')
txt_file.write(f'shape_type: {shape_type}\n')
txt_file.write(f'X_point: {x_center}\n')
txt_file.write(f'Y_point: {y_center}\n')
txt_file.write(f'Width: {Width_norm}\n')
txt_file.write(f'Height: {Height_norm}\n')