from __future__ import annotations import argparse import sys from pathlib import Path from collections import Counter ROOT_DIR = Path(__file__).resolve().parents[3] if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from lib.data import build_dataset_index def main() -> None: parser = argparse.ArgumentParser(description="Inspect indexed segmentation dataset samples.") parser.add_argument("--dataset", required=True, help="Dataset name, e.g. BUS-UCLM, TN3K, TG3K, BUSI") parser.add_argument("--root", required=True, help="Dataset root path") parser.add_argument("--show", type=int, default=5, help="How many sample records to print") args = parser.parse_args() records = build_dataset_index(args.dataset, args.root) print(f"dataset={args.dataset} samples={len(records)}") split_counter = Counter([record.split or "none" for record in records]) class_counter = Counter([record.class_name or "none" for record in records]) print("splits:", dict(split_counter)) print("classes:", dict(class_counter)) for record in records[: args.show]: print( { "sample_id": record.sample_id, "image": str(record.image_path), "mask": str(record.mask_path) if record.mask_path is not None else None, "annotation": str(record.annotation_path) if record.annotation_path is not None else None, "split": record.split, "class_name": record.class_name, "meta": record.meta, } ) if __name__ == "__main__": main()