yolo 형식의 데이터셋에서 클래스 별 인스턴스 수를 카운팅하여 출력해주는 스크립트
python count_yolo_label.py <source_folder>
import os
import sys
from collections import Counter
def read_yolo_dataset_from_folder(source_folder):
class_distribution = Counter()
for root, dirs, files in os.walk(source_folder):
for file in files:
if file.endswith('.txt'):
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
for line in f:
line = line.strip()
if line.startswith('#'):
continue
parts = line.split()
if len(parts) < 2:
continue
try:
class_id = int(parts[0])
class_distribution[class_id] += 1
except ValueError:
continue
return class_distribution
def read_class_names(class_names_file):
with open(class_names_file, 'r') as f:
class_names = [line.strip() for line in f.readlines()]
return class_names
def main():
if len(sys.argv) != 2:
print("Usage: python count_yolo_label.py <source_folder>")
return
source_folder = sys.argv[1]
if not os.path.exists(source_folder):
print("Source folder not found.")
return
class_names_file = os.path.join(source_folder, 'class_names.txt')
if os.path.exists(class_names_file):
class_names = read_class_names(class_names_file)
else:
class_names = []
class_distribution = read_yolo_dataset_from_folder(source_folder)
print("Class distribution:")
for class_id, count in sorted(class_distribution.items()):
if class_id < len(class_names):
print(f"|{class_id:4d} | {class_names[class_id]:10s} |{count:6d} instances |")
else:
print(f"|{class_id:4d} |{count:6d} instances |")
output_file = os.path.join(source_folder, 'Class_distribution.txt')
with open(output_file, 'w') as f:
f.write("Class distribution:\n")
for class_id, count in sorted(class_distribution.items()):
if class_id < len(class_names):
f.write(f"|{class_id:4d} | {class_names[class_id]:10s} |{count:6d} instances |")
else:
f.write(f"|{class_id:4d} |{count:6d} instances |")
print(f"Class distribution saved to {output_file}")
if __name__ == "__main__":
main()