inspect_dataset_index.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from __future__ import annotations
  2. import argparse
  3. import sys
  4. from pathlib import Path
  5. from collections import Counter
  6. ROOT_DIR = Path(__file__).resolve().parents[3]
  7. if str(ROOT_DIR) not in sys.path:
  8. sys.path.insert(0, str(ROOT_DIR))
  9. from lib.data import build_dataset_index
  10. def main() -> None:
  11. parser = argparse.ArgumentParser(description="Inspect indexed segmentation dataset samples.")
  12. parser.add_argument("--dataset", required=True, help="Dataset name, e.g. BUS-UCLM, TN3K, TG3K, BUSI")
  13. parser.add_argument("--root", required=True, help="Dataset root path")
  14. parser.add_argument("--show", type=int, default=5, help="How many sample records to print")
  15. args = parser.parse_args()
  16. records = build_dataset_index(args.dataset, args.root)
  17. print(f"dataset={args.dataset} samples={len(records)}")
  18. split_counter = Counter([record.split or "none" for record in records])
  19. class_counter = Counter([record.class_name or "none" for record in records])
  20. print("splits:", dict(split_counter))
  21. print("classes:", dict(class_counter))
  22. for record in records[: args.show]:
  23. print(
  24. {
  25. "sample_id": record.sample_id,
  26. "image": str(record.image_path),
  27. "mask": str(record.mask_path) if record.mask_path is not None else None,
  28. "annotation": str(record.annotation_path) if record.annotation_path is not None else None,
  29. "split": record.split,
  30. "class_name": record.class_name,
  31. "meta": record.meta,
  32. }
  33. )
  34. if __name__ == "__main__":
  35. main()