2 weeks ago · 5f33d6e8b4
--- a/.gitignore
+++ b/.gitignore
@@ -1,58 +1,60 @@
 
															-# Python

														
 
															-__pycache__/

														
 
															-*.py[cod]

														
 
															-*$py.class

														
 
															-*.so

														
 
															-*.egg

														
 
															-*.egg-info/

														
 
															-dist/

														
 
															-build/

														
 
															-*.whl

														
 
															-

														
 
															-# TypeScript

														
 
															-lib/sam2/demo/

														
 
															-

														
 
															-

														
 
															-# IDE

														
 
															-.idea/

														
 
															-.vscode/

														
 
															-*.swp

														
 
															-*.swo

														
 
															-

														
 
															-# OS

														
 
															-.DS_Store

														
 
															-Thumbs.db

														
 
															-

														
 
															-# Reference code & papers (do not upload)

														
 
															-ref/

														
 
															-tmp/

														
 
															-

														
 
															-# Weights & checkpoints

														
 
															-*.pth

														
 
															-*.pt

														
 
															-*.ckpt

														
 
															-*.onnx

														
 
															-

														
 
															-# Logs & outputs

														
 
															-*.log

														
 
															-outputs/

														
 
															-results/

														
 
															-runs/

														
 
															-lightning_logs/

														
 
															-swanlog/

														
 
															-

														
 
															-# Jupyter

														
 
															-.ipynb_checkpoints/

														
 
															-

														
 
															-# Environment

														
 
															-.env

														
 
															-.venv/

														
 
															-venv/

														
 
															-

														
 
															-# data

														
 
															-data/

														
 
															-cache/

														
 
															-

														
 
															-# Codex and .gitignore

														
 
															-.codex

														
 
															-.gitignore

														
 
															+# Python
														
 
															+__pycache__/
														
 
															+*.py[cod]
														
 
															+*$py.class
														
 
															+*.so
														
 
															+*.egg
														
 
															+*.egg-info/
														
 
															+dist/
														
 
															+build/
														
 
															+*.whl
														
 
															+
														
 
															+# TypeScript
														
 
															+lib/sam2/demo/
														
 
															+
														
 
															+
														
 
															+# IDE
														
 
															+.idea/
														
 
															+.vscode/
														
 
															+*.swp
														
 
															+*.swo
														
 
															+
														
 
															+# OS
														
 
															+.DS_Store
														
 
															+Thumbs.db
														
 
															+
														
 
															+# Reference code & papers (do not upload)
														
 
															+ref/
														
 
															+tmp/
														
 
															+
														
 
															+# Weights & checkpoints
														
 
															+*.pth
														
 
															+*.pt
														
 
															+*.ckpt
														
 
															+*.onnx
														
 
															+
														
 
															+# Logs & outputs
														
 
															+*.log
														
 
															+outputs/
														
 
															+results/
														
 
															+runs/
														
 
															+lightning_logs/
														
 
															+swanlog/
														
 
															+
														
 
															+# Jupyter
														
 
															+.ipynb_checkpoints/
														
 
															+
														
 
															+# Environment
														
 
															+.env
														
 
															+.venv/
														
 
															+venv/
														
 
															+
														
 
															+# Data and generated documentation
														
 
															+/data/
														
 
															+cache/
														
 
															+docs/
														
 
															+!lib/data/
														
 
															+
														
 
															+# Codex and .gitignore
														
 
															+.codex
														
 
															+.gitignore
														
--- a/configs/segmentation/train_sup_us_template.yaml
+++ b/configs/segmentation/train_sup_us_template.yaml
@@ -81,6 +81,9 @@ model:
 
															   ssm_forward_type: v3
														
 
															   ssm_backend: auto
														
 
															   use_frequency_refine: true
														
 
															+  low_freq_radius_h: 0.25
														
 
															+  low_freq_radius_w: 0.25
														
 
															+  learnable_low_freq_radius: true
														
 
															   guide_mode: affine
														
 
															   out_channels: null
														
--- a/configs/segmentation/us_exp_sup_busi.yaml
+++ b/configs/segmentation/us_exp_sup_busi.yaml
@@ -71,6 +71,9 @@ model:
 
															   ssm_forward_type: v3
														
 
															   ssm_backend: auto
														
 
															   use_frequency_refine: true
														
 
															+  low_freq_radius_h: 0.25
														
 
															+  low_freq_radius_w: 0.25
														
 
															+  learnable_low_freq_radius: true
														
 
															   guide_mode: affine
														
 
															   out_channels: null
														
--- a/configs/segmentation/us_exp_sup_busi_ablation.yaml
+++ b/configs/segmentation/us_exp_sup_busi_ablation.yaml
@@ -71,6 +71,9 @@ model:
 
															   ssm_forward_type: v3
														
 
															   ssm_backend: auto
														
 
															   use_frequency_refine: false
														
 
															+  low_freq_radius_h: 0.25
														
 
															+  low_freq_radius_w: 0.25
														
 
															+  learnable_low_freq_radius: true
														
 
															   guide_mode: affine
														
 
															   out_channels: null
														
--- a/lib/data/__init__.py
+++ b/lib/data/__init__.py
@@ -0,0 +1,51 @@
 
															+from .augment import SegmentationAugmentation, build_segmentation_augmentation
														
 
															+from .builder import DATASET_REGISTRY, build_dataset_index
														
 
															+from .collate import record_collate_fn
														
 
															+from .datasets import SegmentationRecordDataset, default_image_loader, default_mask_loader
														
 
															+from .ddti import build_ddti_mask, parse_ddti_xml
														
 
															+from .loaders import (
														
 
															+    OFFICIAL_SPLIT_FILES,
														
 
															+    apply_official_split,
														
 
															+    build_dataloader,
														
 
															+    build_record_dataset,
														
 
															+    get_official_split_file,
														
 
															+    list_supported_splits,
														
 
															+)
														
 
															+from .project_splits import (
														
 
															+    PROJECT_SPLIT_DATASETS,
														
 
															+    PROJECT_SPLIT_ROOT,
														
 
															+    generate_project_splits,
														
 
															+    get_project_split_file,
														
 
															+    load_project_split_ids,
														
 
															+    select_project_split_base_records,
														
 
															+)
														
 
															+from .records import SegSampleRecord
														
 
															+from .splits import load_id_txt, load_json_split
														
 
															+
														
 
															+__all__ = [
														
 
															+    "DATASET_REGISTRY",
														
 
															+    "OFFICIAL_SPLIT_FILES",
														
 
															+    "PROJECT_SPLIT_DATASETS",
														
 
															+    "PROJECT_SPLIT_ROOT",
														
 
															+    "SegmentationAugmentation",
														
 
															+    "SegSampleRecord",
														
 
															+    "build_segmentation_augmentation",
														
 
															+    "build_dataset_index",
														
 
															+    "record_collate_fn",
														
 
															+    "SegmentationRecordDataset",
														
 
															+    "default_image_loader",
														
 
															+    "default_mask_loader",
														
 
															+    "parse_ddti_xml",
														
 
															+    "build_ddti_mask",
														
 
															+    "load_id_txt",
														
 
															+    "load_json_split",
														
 
															+    "apply_official_split",
														
 
															+    "build_record_dataset",
														
 
															+    "build_dataloader",
														
 
															+    "get_official_split_file",
														
 
															+    "generate_project_splits",
														
 
															+    "get_project_split_file",
														
 
															+    "load_project_split_ids",
														
 
															+    "list_supported_splits",
														
 
															+    "select_project_split_base_records",
														
 
															+]
														
--- a/lib/data/augment.py
+++ b/lib/data/augment.py
@@ -0,0 +1,73 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from typing import Any
														
 
															+
														
 
															+import torch
														
 
															+
														
 
															+
														
 
															+def _rand_uniform(low: float, high: float) -> float:
														
 
															+    return float(torch.empty(1).uniform_(low, high).item())
														
 
															+
														
 
															+
														
 
															+class SegmentationAugmentation:
														
 
															+    def __init__(self, config: dict[str, Any] | None = None) -> None:
														
 
															+        self.config = config or {}
														
 
															+
														
 
															+    def __call__(
														
 
															+        self,
														
 
															+        image: torch.Tensor,
														
 
															+        mask: torch.Tensor | None = None,
														
 
															+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
														
 
															+        image, mask = self._apply_spatial(image, mask)
														
 
															+        image = self._apply_intensity(image)
														
 
															+        return image, mask
														
 
															+
														
 
															+    def _apply_spatial(
														
 
															+        self,
														
 
															+        image: torch.Tensor,
														
 
															+        mask: torch.Tensor | None,
														
 
															+    ) -> tuple[torch.Tensor, torch.Tensor | None]:
														
 
															+        if bool(self.config.get("random_flip", False)):
														
 
															+            if torch.rand(1).item() < 0.5:
														
 
															+                image = torch.flip(image, dims=(-1,))
														
 
															+                if mask is not None:
														
 
															+                    mask = torch.flip(mask, dims=(-1,))
														
 
															+            if torch.rand(1).item() < 0.5:
														
 
															+                image = torch.flip(image, dims=(-2,))
														
 
															+                if mask is not None:
														
 
															+                    mask = torch.flip(mask, dims=(-2,))
														
 
															+
														
 
															+        if bool(self.config.get("random_rotate_90", False)):
														
 
															+            k = int(torch.randint(0, 4, (1,)).item())
														
 
															+            if k > 0:
														
 
															+                image = torch.rot90(image, k=k, dims=(-2, -1))
														
 
															+                if mask is not None:
														
 
															+                    mask = torch.rot90(mask, k=k, dims=(-2, -1))
														
 
															+
														
 
															+        return image, mask
														
 
															+
														
 
															+    def _apply_intensity(self, image: torch.Tensor) -> torch.Tensor:
														
 
															+        if bool(self.config.get("random_brightness_contrast", False)):
														
 
															+            brightness = float(self.config.get("brightness_limit", 0.15))
														
 
															+            contrast = float(self.config.get("contrast_limit", 0.15))
														
 
															+            brightness_factor = _rand_uniform(1.0 - brightness, 1.0 + brightness)
														
 
															+            contrast_factor = _rand_uniform(1.0 - contrast, 1.0 + contrast)
														
 
															+            mean = image.mean(dim=(-2, -1), keepdim=True)
														
 
															+            image = (image - mean) * contrast_factor + mean
														
 
															+            image = image * brightness_factor
														
 
															+
														
 
															+        if bool(self.config.get("random_gaussian_noise", False)):
														
 
															+            std = float(self.config.get("gaussian_noise_std", 0.03))
														
 
															+            if std > 0:
														
 
															+                image = image + torch.randn_like(image) * std
														
 
															+
														
 
															+        return image.clamp(0.0, 1.0)
														
 
															+
														
 
															+
														
 
															+def build_segmentation_augmentation(config: dict[str, Any] | None):
														
 
															+    if not config:
														
 
															+        return None
														
 
															+    return SegmentationAugmentation(config)
														
 
															+
														
 
															+
														
 
															+__all__ = ["SegmentationAugmentation", "build_segmentation_augmentation"]
														
--- a/lib/data/builder.py
+++ b/lib/data/builder.py
@@ -0,0 +1,151 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from pathlib import Path
														
 
															+from typing import Callable
														
 
															+
														
 
															+from .indexers import (
														
 
															+    build_filename_matched_records,
														
 
															+    build_paired_folder_records,
														
 
															+    build_prefixed_paired_records,
														
 
															+    build_pre_split_records,
														
 
															+    build_stem_paired_records,
														
 
															+    build_xml_annotation_records,
														
 
															+)
														
 
															+from .records import SegSampleRecord
														
 
															+
														
 
															+
														
 
															+def _build_bus_uclm(root: Path) -> list[SegSampleRecord]:
														
 
															+    return build_paired_folder_records(
														
 
															+        dataset_name="BUS-UCLM",
														
 
															+        image_dir=root / "images",
														
 
															+        mask_dir=root / "masks",
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_tg3k(root: Path) -> list[SegSampleRecord]:
														
 
															+    return build_paired_folder_records(
														
 
															+        dataset_name="TG3K",
														
 
															+        image_dir=root / "thyroid-image",
														
 
															+        mask_dir=root / "thyroid-mask",
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_tn3k(root: Path) -> list[SegSampleRecord]:
														
 
															+    return build_pre_split_records(
														
 
															+        dataset_name="TN3K",
														
 
															+        train_image_dir=root / "trainval-image",
														
 
															+        train_mask_dir=root / "trainval-mask",
														
 
															+        test_image_dir=root / "test-image",
														
 
															+        test_mask_dir=root / "test-mask",
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_otu_2d(root: Path) -> list[SegSampleRecord]:
														
 
															+    return build_stem_paired_records(
														
 
															+        dataset_name="OTU_2d",
														
 
															+        image_dir=root / "images",
														
 
															+        mask_dir=root / "annotations",
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_busi(root: Path) -> list[SegSampleRecord]:
														
 
															+    base = root / "Dataset_BUSI_with_GT"
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+    for class_name in ["benign", "malignant", "normal"]:
														
 
															+        class_dir = base / class_name
														
 
															+        records.extend(
														
 
															+            build_filename_matched_records(
														
 
															+                dataset_name="BUSI",
														
 
															+                folder=class_dir,
														
 
															+                class_name=class_name,
														
 
															+            )
														
 
															+        )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def _build_bus_bra(root: Path) -> list[SegSampleRecord]:
														
 
															+    image_dir = root / "BUSBRA" / "BUSBRA" / "Images"
														
 
															+    mask_dir = root / "BUSBRA" / "BUSBRA" / "Masks"
														
 
															+    return build_prefixed_paired_records(
														
 
															+        dataset_name="BUS-BRA",
														
 
															+        image_dir=image_dir,
														
 
															+        mask_dir=mask_dir,
														
 
															+        image_prefix_to_strip="bus_",
														
 
															+        mask_prefix_to_strip="mask_",
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_bus_uc(root: Path) -> list[SegSampleRecord]:
														
 
															+    base = root / "BUS_UC" / "BUS_UC"
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+
														
 
															+    records.extend(
														
 
															+        build_paired_folder_records(
														
 
															+            dataset_name="BUS_UC",
														
 
															+            image_dir=base / "All" / "images",
														
 
															+            mask_dir=base / "All" / "masks",
														
 
															+            split="all",
														
 
															+            class_name="all",
														
 
															+        )
														
 
															+    )
														
 
															+    records.extend(
														
 
															+        build_paired_folder_records(
														
 
															+            dataset_name="BUS_UC",
														
 
															+            image_dir=base / "Benign" / "images",
														
 
															+            mask_dir=base / "Benign" / "masks",
														
 
															+            class_name="benign",
														
 
															+        )
														
 
															+    )
														
 
															+    records.extend(
														
 
															+        build_paired_folder_records(
														
 
															+            dataset_name="BUS_UC",
														
 
															+            image_dir=base / "Malignant" / "images",
														
 
															+            mask_dir=base / "Malignant" / "masks",
														
 
															+            class_name="malignant",
														
 
															+        )
														
 
															+    )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def _build_ccaui(root: Path) -> list[SegSampleRecord]:
														
 
															+    base = root / "Common Carotid Artery Ultrasound Images"
														
 
															+    return build_paired_folder_records(
														
 
															+        dataset_name="CCAUI",
														
 
															+        image_dir=base / "US images",
														
 
															+        mask_dir=base / "Expert mask images",
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _build_ddti(root: Path) -> list[SegSampleRecord]:
														
 
															+    return build_xml_annotation_records(
														
 
															+        dataset_name="DDTI",
														
 
															+        root=root,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+DATASET_REGISTRY: dict[str, Callable[[Path], list[SegSampleRecord]]] = {
														
 
															+    "BUS-UCLM": _build_bus_uclm,
														
 
															+    "TG3K": _build_tg3k,
														
 
															+    "TN3K": _build_tn3k,
														
 
															+    "OTU_2d": _build_otu_2d,
														
 
															+    "BUSI": _build_busi,
														
 
															+    "BUS-BRA": _build_bus_bra,
														
 
															+    "BUS_UC": _build_bus_uc,
														
 
															+    "CCAUI": _build_ccaui,
														
 
															+    "DDTI": _build_ddti,
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def build_dataset_index(dataset_name: str, root: str | Path) -> list[SegSampleRecord]:
														
 
															+    builder = DATASET_REGISTRY.get(dataset_name)
														
 
															+    if builder is None:
														
 
															+        raise ValueError(
														
 
															+            f"Unsupported dataset '{dataset_name}'. Expected one of: {', '.join(DATASET_REGISTRY)}."
														
 
															+        )
														
 
															+    root = Path(root)
														
 
															+    if not root.exists():
														
 
															+        raise FileNotFoundError(f"Dataset root not found: {root}")
														
 
															+    return builder(root)
														
 
															+
														
 
															+
														
 
															+__all__ = ["DATASET_REGISTRY", "build_dataset_index"]
														
--- a/lib/data/collate.py
+++ b/lib/data/collate.py
@@ -0,0 +1,40 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from collections.abc import Sequence
														
 
															+from typing import Any
														
 
															+
														
 
															+import torch
														
 
															+
														
 
															+
														
 
															+def record_collate_fn(batch: Sequence[dict[str, Any]]) -> dict[str, Any]:
														
 
															+    if not batch:
														
 
															+        raise ValueError("Empty batch is not allowed.")
														
 
															+
														
 
															+    collated: dict[str, Any] = {}
														
 
															+    keys = batch[0].keys()
														
 
															+    for key in keys:
														
 
															+        values = [sample[key] for sample in batch]
														
 
															+        first = values[0]
														
 
															+
														
 
															+        if torch.is_tensor(first):
														
 
															+            shapes = [tuple(value.shape) for value in values]
														
 
															+            if all(shape == shapes[0] for shape in shapes):
														
 
															+                collated[key] = torch.stack(values, dim=0)
														
 
															+            else:
														
 
															+                collated[key] = values
														
 
															+            continue
														
 
															+
														
 
															+        if first is None:
														
 
															+            collated[key] = values
														
 
															+            continue
														
 
															+
														
 
															+        if isinstance(first, (str, int, float, dict)):
														
 
															+            collated[key] = values
														
 
															+            continue
														
 
															+
														
 
															+        collated[key] = values
														
 
															+
														
 
															+    return collated
														
 
															+
														
 
															+
														
 
															+__all__ = ["record_collate_fn"]
														
--- a/lib/data/datasets.py
+++ b/lib/data/datasets.py
@@ -0,0 +1,82 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from collections.abc import Callable
														
 
															+from pathlib import Path
														
 
															+from typing import Any
														
 
															+
														
 
															+import numpy as np
														
 
															+import torch
														
 
															+from PIL import Image
														
 
															+from torch.utils.data import Dataset
														
 
															+
														
 
															+from .ddti import build_ddti_mask
														
 
															+from .records import SegSampleRecord
														
 
															+
														
 
															+
														
 
															+def default_image_loader(path: str | Path) -> torch.Tensor:
														
 
															+    image = Image.open(path).convert("RGB")
														
 
															+    array = np.asarray(image, dtype=np.float32) / 255.0
														
 
															+    return torch.from_numpy(array).permute(2, 0, 1).contiguous()
														
 
															+
														
 
															+
														
 
															+def default_mask_loader(path: str | Path) -> torch.Tensor:
														
 
															+    mask = Image.open(path).convert("L")
														
 
															+    array = (np.asarray(mask, dtype=np.float32) > 0).astype(np.float32)
														
 
															+    return torch.from_numpy(array).unsqueeze(0).contiguous()
														
 
															+
														
 
															+
														
 
															+class SegmentationRecordDataset(Dataset):
														
 
															+    def __init__(
														
 
															+            self,
														
 
															+            records: list[SegSampleRecord],
														
 
															+            image_loader: Callable[[str | Path], torch.Tensor] | None = None,
														
 
															+            mask_loader: Callable[[str | Path], torch.Tensor] | None = None,
														
 
															+            joint_transform: Callable[[torch.Tensor, torch.Tensor | None], tuple[torch.Tensor, torch.Tensor | None]] | None = None,
														
 
															+            image_transform: Callable[[torch.Tensor], torch.Tensor] | None = None,
														
 
															+            mask_transform: Callable[[torch.Tensor], torch.Tensor] | None = None,
														
 
															+    ) -> None:
														
 
															+        self.records = records
														
 
															+        self.image_loader = image_loader or default_image_loader
														
 
															+        self.mask_loader = mask_loader or default_mask_loader
														
 
															+        self.joint_transform = joint_transform
														
 
															+        self.image_transform = image_transform
														
 
															+        self.mask_transform = mask_transform
														
 
															+
														
 
															+    def __len__(self) -> int:
														
 
															+        return len(self.records)
														
 
															+
														
 
															+    def __getitem__(self, index: int) -> dict[str, Any]:
														
 
															+        record = self.records[index]
														
 
															+        image = self.image_loader(record.image_path)
														
 
															+
														
 
															+        mask = None
														
 
															+        if record.mask_path is not None:
														
 
															+            mask = self.mask_loader(record.mask_path)
														
 
															+        elif record.annotation_path is not None and record.dataset_name == "DDTI":
														
 
															+            ddti_mask = build_ddti_mask(record.image_path, record.annotation_path)
														
 
															+            mask_array = (np.asarray(ddti_mask, dtype=np.float32) > 0).astype(np.float32)
														
 
															+            mask = torch.from_numpy(mask_array).unsqueeze(0).contiguous()
														
 
															+
														
 
															+        if self.joint_transform is not None:
														
 
															+            image, mask = self.joint_transform(image, mask)
														
 
															+        if self.image_transform is not None:
														
 
															+            image = self.image_transform(image)
														
 
															+        if mask is not None and self.mask_transform is not None:
														
 
															+            mask = self.mask_transform(mask)
														
 
															+
														
 
															+        return {
														
 
															+            "image": image,
														
 
															+            "mask": mask,
														
 
															+            "dataset_name": record.dataset_name,
														
 
															+            "sample_id": record.sample_id,
														
 
															+            "split": record.split,
														
 
															+            "class_name": record.class_name,
														
 
															+            "meta": record.meta,
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+__all__ = [
														
 
															+    "SegmentationRecordDataset",
														
 
															+    "default_image_loader",
														
 
															+    "default_mask_loader",
														
 
															+]
														
--- a/lib/data/ddti.py
+++ b/lib/data/ddti.py
@@ -0,0 +1,81 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+import json
														
 
															+from pathlib import Path
														
 
															+import xml.etree.ElementTree as ET
														
 
															+
														
 
															+from PIL import Image, ImageDraw
														
 
															+
														
 
															+
														
 
															+def parse_ddti_xml(annotation_path: str | Path) -> dict[int, list[list[tuple[int, int]]]]:
														
 
															+    """
														
 
															+    解析 DDTI 的 xml 标注。
														
 
															+
														
 
															+    Returns:
														
 
															+        {image_index: [polygon1, polygon2, ...]}
														
 
															+    """
														
 
															+    annotation_path = Path(annotation_path)
														
 
															+    root = ET.parse(annotation_path).getroot()
														
 
															+    image_to_polygons: dict[int, list[list[tuple[int, int]]]] = {}
														
 
															+
														
 
															+    for mark in root.findall("mark"):
														
 
															+        image_text = mark.findtext("image")
														
 
															+        svg_text = mark.findtext("svg")
														
 
															+        if not image_text or not svg_text:
														
 
															+            continue
														
 
															+
														
 
															+        image_index = int(image_text)
														
 
															+        try:
														
 
															+            shapes = json.loads(svg_text)
														
 
															+        except json.JSONDecodeError:
														
 
															+            continue
														
 
															+
														
 
															+        polygons: list[list[tuple[int, int]]] = []
														
 
															+        for shape in shapes:
														
 
															+            points = shape.get("points", [])
														
 
															+            polygon = []
														
 
															+            for point in points:
														
 
															+                x = int(round(point["x"]))
														
 
															+                y = int(round(point["y"]))
														
 
															+                polygon.append((x, y))
														
 
															+            if len(polygon) >= 3:
														
 
															+                polygons.append(polygon)
														
 
															+
														
 
															+        if polygons:
														
 
															+            image_to_polygons[image_index] = polygons
														
 
															+
														
 
															+    return image_to_polygons
														
 
															+
														
 
															+
														
 
															+def build_ddti_mask(
														
 
															+        image_path: str | Path,
														
 
															+        annotation_path: str | Path,
														
 
															+        image_index: int | None = None,
														
 
															+        fill_value: int = 255,
														
 
															+) -> Image.Image:
														
 
															+    """
														
 
															+    根据 DDTI 的 xml 为指定图像生成二值掩膜。
														
 
															+    """
														
 
															+    image_path = Path(image_path)
														
 
															+    annotation_path = Path(annotation_path)
														
 
															+    image = Image.open(image_path)
														
 
															+    width, height = image.size
														
 
															+
														
 
															+    if image_index is None:
														
 
															+        stem = image_path.stem
														
 
															+        if "_" not in stem:
														
 
															+            raise ValueError(f"Cannot infer image index from file name: {image_path.name}")
														
 
															+        _, image_idx_str = stem.split("_", 1)
														
 
															+        image_index = int(image_idx_str)
														
 
															+
														
 
															+    polygons_map = parse_ddti_xml(annotation_path)
														
 
															+    polygons = polygons_map.get(int(image_index), [])
														
 
															+
														
 
															+    mask = Image.new("L", (width, height), 0)
														
 
															+    draw = ImageDraw.Draw(mask)
														
 
															+    for polygon in polygons:
														
 
															+        draw.polygon(polygon, outline=fill_value, fill=fill_value)
														
 
															+    return mask
														
 
															+
														
 
															+
														
 
															+__all__ = ["parse_ddti_xml", "build_ddti_mask"]
														
--- a/lib/data/indexers.py
+++ b/lib/data/indexers.py
@@ -0,0 +1,225 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from pathlib import Path
														
 
															+
														
 
															+from .records import SegSampleRecord
														
 
															+from .utils import list_image_files, stem_without_mask_suffix
														
 
															+
														
 
															+
														
 
															+def build_paired_folder_records(
														
 
															+        dataset_name: str,
														
 
															+        image_dir: Path,
														
 
															+        mask_dir: Path,
														
 
															+        *,
														
 
															+        split: str | None = None,
														
 
															+        class_name: str | None = None,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    images = list_image_files(image_dir)
														
 
															+    masks = list_image_files(mask_dir)
														
 
															+    mask_map = {mask.name: mask for mask in masks}
														
 
															+
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+    for image in images:
														
 
															+        mask = mask_map.get(image.name)
														
 
															+        if mask is None:
														
 
															+            continue
														
 
															+        records.append(
														
 
															+            SegSampleRecord(
														
 
															+                dataset_name=dataset_name,
														
 
															+                image_path=image,
														
 
															+                mask_path=mask,
														
 
															+                split=split,
														
 
															+                sample_id=image.stem,
														
 
															+                class_name=class_name,
														
 
															+            )
														
 
															+        )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def build_prefixed_paired_records(
														
 
															+        dataset_name: str,
														
 
															+        image_dir: Path,
														
 
															+        mask_dir: Path,
														
 
															+        *,
														
 
															+        image_prefix_to_strip: str = "",
														
 
															+        mask_prefix_to_strip: str = "",
														
 
															+        split: str | None = None,
														
 
															+        class_name: str | None = None,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    images = list_image_files(image_dir)
														
 
															+    masks = list_image_files(mask_dir)
														
 
															+
														
 
															+    def _normalize(path: Path, prefix: str) -> str:
														
 
															+        name = path.name
														
 
															+        if prefix and name.startswith(prefix):
														
 
															+            name = name[len(prefix):]
														
 
															+        return name
														
 
															+
														
 
															+    mask_map = {_normalize(mask, mask_prefix_to_strip): mask for mask in masks}
														
 
															+
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+    for image in images:
														
 
															+        key = _normalize(image, image_prefix_to_strip)
														
 
															+        mask = mask_map.get(key)
														
 
															+        if mask is None:
														
 
															+            continue
														
 
															+        records.append(
														
 
															+            SegSampleRecord(
														
 
															+                dataset_name=dataset_name,
														
 
															+                image_path=image,
														
 
															+                mask_path=mask,
														
 
															+                split=split,
														
 
															+                sample_id=image.stem,
														
 
															+                class_name=class_name,
														
 
															+            )
														
 
															+        )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def build_stem_paired_records(
														
 
															+        dataset_name: str,
														
 
															+        image_dir: Path,
														
 
															+        mask_dir: Path,
														
 
															+        *,
														
 
															+        split: str | None = None,
														
 
															+        class_name: str | None = None,
														
 
															+        prefer_plain_mask: bool = True,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    images = list_image_files(image_dir)
														
 
															+    masks = list_image_files(mask_dir)
														
 
															+
														
 
															+    grouped_masks: dict[str, list[Path]] = {}
														
 
															+    for mask in masks:
														
 
															+        key = stem_without_mask_suffix(mask.name)
														
 
															+        grouped_masks.setdefault(key, []).append(mask)
														
 
															+
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+    for image in images:
														
 
															+        key = image.stem
														
 
															+        candidates = sorted(grouped_masks.get(key, []))
														
 
															+        if not candidates:
														
 
															+            continue
														
 
															+
														
 
															+        mask = candidates[0]
														
 
															+        if prefer_plain_mask:
														
 
															+            plain = [candidate for candidate in candidates if "_binary" not in candidate.stem.lower()]
														
 
															+            if plain:
														
 
															+                mask = plain[0]
														
 
															+
														
 
															+        records.append(
														
 
															+            SegSampleRecord(
														
 
															+                dataset_name=dataset_name,
														
 
															+                image_path=image,
														
 
															+                mask_path=mask,
														
 
															+                split=split,
														
 
															+                sample_id=key,
														
 
															+                class_name=class_name,
														
 
															+                meta={"mask_candidates": str(len(candidates))},
														
 
															+            )
														
 
															+        )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def build_filename_matched_records(
														
 
															+        dataset_name: str,
														
 
															+        folder: Path,
														
 
															+        *,
														
 
															+        split: str | None = None,
														
 
															+        class_name: str | None = None,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    files = list_image_files(folder)
														
 
															+    image_map: dict[str, Path] = {}
														
 
															+    mask_map: dict[str, list[Path]] = {}
														
 
															+
														
 
															+    for path in files:
														
 
															+        key = stem_without_mask_suffix(path.name)
														
 
															+        if "_mask" in path.stem:
														
 
															+            mask_map.setdefault(key, []).append(path)
														
 
															+        else:
														
 
															+            image_map[key] = path
														
 
															+
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+    for key, image in sorted(image_map.items()):
														
 
															+        masks = sorted(mask_map.get(key, []))
														
 
															+        if not masks:
														
 
															+            continue
														
 
															+        records.append(
														
 
															+            SegSampleRecord(
														
 
															+                dataset_name=dataset_name,
														
 
															+                image_path=image,
														
 
															+                mask_path=masks[0],
														
 
															+                split=split,
														
 
															+                sample_id=key,
														
 
															+                class_name=class_name,
														
 
															+                meta={"mask_count": str(len(masks))},
														
 
															+            )
														
 
															+        )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def build_pre_split_records(
														
 
															+        dataset_name: str,
														
 
															+        train_image_dir: Path,
														
 
															+        train_mask_dir: Path,
														
 
															+        test_image_dir: Path,
														
 
															+        test_mask_dir: Path,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    records = []
														
 
															+    records.extend(
														
 
															+        build_paired_folder_records(
														
 
															+            dataset_name=dataset_name,
														
 
															+            image_dir=train_image_dir,
														
 
															+            mask_dir=train_mask_dir,
														
 
															+            split="trainval",
														
 
															+        )
														
 
															+    )
														
 
															+    records.extend(
														
 
															+        build_paired_folder_records(
														
 
															+            dataset_name=dataset_name,
														
 
															+            image_dir=test_image_dir,
														
 
															+            mask_dir=test_mask_dir,
														
 
															+            split="test",
														
 
															+        )
														
 
															+    )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+def build_xml_annotation_records(
														
 
															+        dataset_name: str,
														
 
															+        root: Path,
														
 
															+        *,
														
 
															+        split: str | None = None,
														
 
															+        class_name: str | None = None,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    xml_map = {path.stem: path for path in sorted(root.glob("*.xml"))}
														
 
															+    image_files = sorted(root.glob("*.jpg"))
														
 
															+
														
 
															+    records: list[SegSampleRecord] = []
														
 
															+    for image in image_files:
														
 
															+        sample_key = image.stem.split("_")[0]
														
 
															+        annotation = xml_map.get(sample_key)
														
 
															+        if annotation is None:
														
 
															+            continue
														
 
															+        records.append(
														
 
															+            SegSampleRecord(
														
 
															+                dataset_name=dataset_name,
														
 
															+                image_path=image,
														
 
															+                mask_path=None,
														
 
															+                annotation_path=annotation,
														
 
															+                split=split,
														
 
															+                sample_id=image.stem,
														
 
															+                class_name=class_name,
														
 
															+                meta={"annotation_type": "xml"},
														
 
															+            )
														
 
															+        )
														
 
															+    return records
														
 
															+
														
 
															+
														
 
															+__all__ = [
														
 
															+    "build_paired_folder_records",
														
 
															+    "build_prefixed_paired_records",
														
 
															+    "build_stem_paired_records",
														
 
															+    "build_filename_matched_records",
														
 
															+    "build_pre_split_records",
														
 
															+    "build_xml_annotation_records",
														
 
															+]
														
--- a/lib/data/loaders.py
+++ b/lib/data/loaders.py
@@ -0,0 +1,224 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from pathlib import Path
														
 
															+from typing import Any
														
 
															+
														
 
															+from torch.utils.data import DataLoader
														
 
															+
														
 
															+from .augment import build_segmentation_augmentation
														
 
															+from .builder import build_dataset_index
														
 
															+from .collate import record_collate_fn
														
 
															+from .datasets import SegmentationRecordDataset
														
 
															+from .project_splits import (
														
 
															+    PROJECT_SPLIT_DATASETS,
														
 
															+    get_project_split_file,
														
 
															+    load_project_split_ids,
														
 
															+    select_project_split_base_records,
														
 
															+)
														
 
															+from .records import SegSampleRecord
														
 
															+from .splits import load_id_txt, load_json_split
														
 
															+
														
 
															+OFFICIAL_SPLIT_FILES: dict[str, dict[str, str]] = {
														
 
															+    "OTU_2d": {
														
 
															+        "train": "train.txt",
														
 
															+        "val": "val.txt",
														
 
															+    },
														
 
															+    "TN3K": {
														
 
															+        "train": "tn3k-trainval.json",
														
 
															+        "val": "tn3k-trainval.json",
														
 
															+        "test": "tn3k-trainval.json",
														
 
															+    },
														
 
															+    "TG3K": {
														
 
															+        "train": "tg3k-trainval.json",
														
 
															+        "val": "tg3k-trainval.json",
														
 
															+        "test": "tg3k-trainval.json",
														
 
															+    },
														
 
															+}
														
 
															+
														
 
															+
														
 
															+def _normalize_id_set(values: list[str]) -> set[str]:
														
 
															+    normalized = set()
														
 
															+    for item in values:
														
 
															+        normalized.add(item)
														
 
															+        try:
														
 
															+            normalized.add(f"{int(item):04d}")
														
 
															+        except ValueError:
														
 
															+            pass
														
 
															+    return normalized
														
 
															+
														
 
															+
														
 
															+def _as_exact_id_set(values: list[str]) -> set[str]:
														
 
															+    return {item for item in values}
														
 
															+
														
 
															+
														
 
															+def _clone_record(record: SegSampleRecord, split_name: str | None) -> SegSampleRecord:
														
 
															+    return SegSampleRecord(
														
 
															+        dataset_name=record.dataset_name,
														
 
															+        image_path=record.image_path,
														
 
															+        mask_path=record.mask_path,
														
 
															+        annotation_path=record.annotation_path,
														
 
															+        split=split_name,
														
 
															+        sample_id=record.sample_id,
														
 
															+        class_name=record.class_name,
														
 
															+        meta=dict(record.meta),
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def _filter_by_sample_ids(records: list[SegSampleRecord], sample_ids: set[str], split_name: str) -> list[SegSampleRecord]:
														
 
															+    filtered = []
														
 
															+    for record in records:
														
 
															+        if record.sample_id in sample_ids:
														
 
															+            filtered.append(_clone_record(record, split_name))
														
 
															+    return filtered
														
 
															+
														
 
															+
														
 
															+def _filter_by_existing_split(records: list[SegSampleRecord], split: str) -> list[SegSampleRecord]:
														
 
															+    return [_clone_record(record, split) for record in records if record.split == split]
														
 
															+
														
 
															+
														
 
															+def get_official_split_file(
														
 
															+        dataset_name: str,
														
 
															+        root: str | Path,
														
 
															+        split: str,
														
 
															+) -> Path | None:
														
 
															+    split_map = OFFICIAL_SPLIT_FILES.get(dataset_name)
														
 
															+    if split_map is None:
														
 
															+        return None
														
 
															+
														
 
															+    relative_path = split_map.get(split)
														
 
															+    if relative_path is None:
														
 
															+        return None
														
 
															+    return Path(root) / relative_path
														
 
															+
														
 
															+
														
 
															+def list_supported_splits(dataset_name: str) -> list[str]:
														
 
															+    official = OFFICIAL_SPLIT_FILES.get(dataset_name)
														
 
															+    if official is not None:
														
 
															+        return list(official.keys())
														
 
															+    if dataset_name in PROJECT_SPLIT_DATASETS:
														
 
															+        return ["train", "val"]
														
 
															+    return []
														
 
															+
														
 
															+
														
 
															+def apply_official_split(
														
 
															+        dataset_name: str,
														
 
															+        root: str | Path,
														
 
															+        records: list[SegSampleRecord],
														
 
															+        split: str,
														
 
															+        *,
														
 
															+        split_file: str | Path | None = None,
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    root = Path(root)
														
 
															+
														
 
															+    if dataset_name == "OTU_2d":
														
 
															+        if split not in {"train", "val"}:
														
 
															+            raise ValueError("OTU_2d currently supports official splits: train, val.")
														
 
															+        split_path = Path(split_file) if split_file is not None else get_official_split_file(dataset_name, root, split)
														
 
															+        ids = _normalize_id_set(load_id_txt(split_path))
														
 
															+        return _filter_by_sample_ids(records, ids, split_name=split)
														
 
															+
														
 
															+    if dataset_name == "TN3K":
														
 
															+        if split == "test":
														
 
															+            return _filter_by_existing_split(records, "test")
														
 
															+        split_path = Path(split_file) if split_file is not None else get_official_split_file(dataset_name, root, split)
														
 
															+        split_map = load_json_split(split_path)
														
 
															+        if split not in split_map:
														
 
															+            raise ValueError(f"Split '{split}' not found in {split_path}.")
														
 
															+        ids = _normalize_id_set(split_map[split])
														
 
															+        trainval_records = [record for record in records if record.split == "trainval"]
														
 
															+        return _filter_by_sample_ids(trainval_records, ids, split_name=split)
														
 
															+
														
 
															+    if dataset_name == "TG3K":
														
 
															+        split_path = Path(split_file) if split_file is not None else get_official_split_file(dataset_name, root, split)
														
 
															+        split_map = load_json_split(split_path)
														
 
															+        if split not in split_map:
														
 
															+            raise ValueError(f"Split '{split}' not found in {split_path}.")
														
 
															+        ids = _normalize_id_set(split_map[split])
														
 
															+        return _filter_by_sample_ids(records, ids, split_name=split)
														
 
															+
														
 
															+    if dataset_name in PROJECT_SPLIT_DATASETS:
														
 
															+        if split not in {"train", "val"}:
														
 
															+            raise ValueError(
														
 
															+                f"{dataset_name} currently supports project splits: train, val."
														
 
															+            )
														
 
															+        records = select_project_split_base_records(dataset_name, records)
														
 
															+        split_path = Path(split_file) if split_file is not None else get_project_split_file(root, split)
														
 
															+        ids = _as_exact_id_set(load_project_split_ids(root, split) if split_file is None else load_id_txt(split_path))
														
 
															+        return _filter_by_sample_ids(records, ids, split_name=split)
														
 
															+
														
 
															+    filtered = _filter_by_existing_split(records, split)
														
 
															+    if filtered:
														
 
															+        return filtered
														
 
															+    raise ValueError(
														
 
															+        f"No split handler registered for dataset '{dataset_name}' and split '{split}'."
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def build_record_dataset(
														
 
															+        dataset_name: str,
														
 
															+        root: str | Path,
														
 
															+        *,
														
 
															+        split: str | None = None,
														
 
															+        split_file: str | Path | None = None,
														
 
															+        augmentation_config: dict[str, Any] | None = None,
														
 
															+        image_transform=None,
														
 
															+        mask_transform=None,
														
 
															+) -> SegmentationRecordDataset:
														
 
															+    records = build_dataset_index(dataset_name, root)
														
 
															+    if split is not None:
														
 
															+        records = apply_official_split(
														
 
															+            dataset_name=dataset_name,
														
 
															+            root=root,
														
 
															+            records=records,
														
 
															+            split=split,
														
 
															+            split_file=split_file,
														
 
															+        )
														
 
															+    return SegmentationRecordDataset(
														
 
															+        records=records,
														
 
															+        joint_transform=build_segmentation_augmentation(augmentation_config),
														
 
															+        image_transform=image_transform,
														
 
															+        mask_transform=mask_transform,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+def build_dataloader(
														
 
															+        dataset_name: str,
														
 
															+        root: str | Path,
														
 
															+        *,
														
 
															+        split: str | None = None,
														
 
															+        split_file: str | Path | None = None,
														
 
															+        batch_size: int = 4,
														
 
															+        shuffle: bool = False,
														
 
															+        num_workers: int = 0,
														
 
															+        augmentation_config: dict[str, Any] | None = None,
														
 
															+        image_transform=None,
														
 
															+        mask_transform=None,
														
 
															+        **loader_kwargs: Any,
														
 
															+) -> DataLoader:
														
 
															+    dataset = build_record_dataset(
														
 
															+        dataset_name=dataset_name,
														
 
															+        root=root,
														
 
															+        split=split,
														
 
															+        split_file=split_file,
														
 
															+        augmentation_config=augmentation_config,
														
 
															+        image_transform=image_transform,
														
 
															+        mask_transform=mask_transform,
														
 
															+    )
														
 
															+    return DataLoader(
														
 
															+        dataset,
														
 
															+        batch_size=batch_size,
														
 
															+        shuffle=shuffle,
														
 
															+        num_workers=num_workers,
														
 
															+        collate_fn=loader_kwargs.pop("collate_fn", record_collate_fn),
														
 
															+        **loader_kwargs,
														
 
															+    )
														
 
															+
														
 
															+
														
 
															+__all__ = [
														
 
															+    "OFFICIAL_SPLIT_FILES",
														
 
															+    "apply_official_split",
														
 
															+    "build_record_dataset",
														
 
															+    "build_dataloader",
														
 
															+    "get_official_split_file",
														
 
															+    "list_supported_splits",
														
 
															+]
														
--- a/lib/data/project_splits.py
+++ b/lib/data/project_splits.py
@@ -0,0 +1,159 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+import random
														
 
															+from collections import defaultdict
														
 
															+from pathlib import Path
														
 
															+
														
 
															+from .builder import build_dataset_index
														
 
															+from .records import SegSampleRecord
														
 
															+
														
 
															+
														
 
															+PROJECT_SPLIT_ROOT = Path("splits") / "project"
														
 
															+PROJECT_SPLIT_DATASETS = {"BUS-UCLM", "BUSI", "BUS-BRA", "BUS_UC", "CCAUI", "DDTI"}
														
 
															+
														
 
															+
														
 
															+def _project_split_dir(root: str | Path) -> Path:
														
 
															+    return Path(root) / PROJECT_SPLIT_ROOT
														
 
															+
														
 
															+
														
 
															+def get_project_split_file(
														
 
															+        root: str | Path,
														
 
															+        split: str,
														
 
															+) -> Path:
														
 
															+    return _project_split_dir(root) / f"{split}.txt"
														
 
															+
														
 
															+
														
 
															+def load_project_split_ids(
														
 
															+        root: str | Path,
														
 
															+        split: str,
														
 
															+) -> list[str]:
														
 
															+    path = get_project_split_file(root, split)
														
 
															+    if not path.exists():
														
 
															+        raise FileNotFoundError(f"Project split file not found: {path}")
														
 
															+    return [
														
 
															+        line.strip()
														
 
															+        for line in path.read_text(encoding="utf-8", errors="ignore").splitlines()
														
 
															+        if line.strip()
														
 
															+    ]
														
 
															+
														
 
															+
														
 
															+def _write_split_ids(path: Path, sample_ids: list[str]) -> None:
														
 
															+    path.parent.mkdir(parents=True, exist_ok=True)
														
 
															+    if sample_ids:
														
 
															+        path.write_text("\n".join(sample_ids) + "\n", encoding="utf-8")
														
 
															+    else:
														
 
															+        path.write_text("", encoding="utf-8")
														
 
															+
														
 
															+
														
 
															+def _deduplicate_records(
														
 
															+        dataset_name: str,
														
 
															+        records: list[SegSampleRecord],
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    if dataset_name != "BUS_UC":
														
 
															+        return records
														
 
															+
														
 
															+    # BUS_UC 的 All 与 Benign/Malignant 是重复样本，默认只保留 All 作为正式划分基底。
														
 
															+    all_records = [record for record in records if record.class_name == "all"]
														
 
															+    return all_records if all_records else records
														
 
															+
														
 
															+
														
 
															+def select_project_split_base_records(
														
 
															+        dataset_name: str,
														
 
															+        records: list[SegSampleRecord],
														
 
															+) -> list[SegSampleRecord]:
														
 
															+    return _deduplicate_records(dataset_name, records)
														
 
															+
														
 
															+
														
 
															+def _group_records_for_split(
														
 
															+        records: list[SegSampleRecord],
														
 
															+) -> dict[str, list[SegSampleRecord]]:
														
 
															+    groups: dict[str, list[SegSampleRecord]] = defaultdict(list)
														
 
															+    for record in records:
														
 
															+        key = record.class_name or "__default__"
														
 
															+        groups[key].append(record)
														
 
															+    return groups
														
 
															+
														
 
															+
														
 
															+def _split_group(
														
 
															+        group_records: list[SegSampleRecord],
														
 
															+        *,
														
 
															+        val_ratio: float,
														
 
															+        rng: random.Random,
														
 
															+) -> tuple[list[SegSampleRecord], list[SegSampleRecord]]:
														
 
															+    shuffled = list(group_records)
														
 
															+    rng.shuffle(shuffled)
														
 
															+
														
 
															+    val_count = int(round(len(shuffled) * val_ratio))
														
 
															+    if len(shuffled) >= 2:
														
 
															+        val_count = max(1, min(len(shuffled) - 1, val_count))
														
 
															+    elif len(shuffled) == 1:
														
 
															+        val_count = 0
														
 
															+
														
 
															+    val_records = shuffled[:val_count]
														
 
															+    train_records = shuffled[val_count:]
														
 
															+    return train_records, val_records
														
 
															+
														
 
															+
														
 
															+def generate_project_splits(
														
 
															+        dataset_name: str,
														
 
															+        root: str | Path,
														
 
															+        *,
														
 
															+        val_ratio: float = 0.2,
														
 
															+        seed: int = 42,
														
 
															+        stratify_by_class: bool = True,
														
 
															+        reuse_existing: bool = True,
														
 
															+) -> dict[str, list[str]]:
														
 
															+    if dataset_name not in PROJECT_SPLIT_DATASETS:
														
 
															+        raise ValueError(
														
 
															+            f"Dataset '{dataset_name}' is not enabled for project split generation."
														
 
															+        )
														
 
															+    if not 0.0 < val_ratio < 1.0:
														
 
															+        raise ValueError(f"val_ratio must be between 0 and 1, got {val_ratio}.")
														
 
															+
														
 
															+    train_path = get_project_split_file(root, "train")
														
 
															+    val_path = get_project_split_file(root, "val")
														
 
															+    if reuse_existing and train_path.exists() and val_path.exists():
														
 
															+        return {
														
 
															+            "train": load_project_split_ids(root, "train"),
														
 
															+            "val": load_project_split_ids(root, "val"),
														
 
															+        }
														
 
															+
														
 
															+    records = build_dataset_index(dataset_name, root)
														
 
															+    records = _deduplicate_records(dataset_name, records)
														
 
															+    rng = random.Random(seed)
														
 
															+
														
 
															+    train_records: list[SegSampleRecord] = []
														
 
															+    val_records: list[SegSampleRecord] = []
														
 
															+
														
 
															+    if stratify_by_class:
														
 
															+        groups = _group_records_for_split(records)
														
 
															+        for group_records in groups.values():
														
 
															+            group_train, group_val = _split_group(group_records, val_ratio=val_ratio, rng=rng)
														
 
															+            train_records.extend(group_train)
														
 
															+            val_records.extend(group_val)
														
 
															+    else:
														
 
															+        train_records, val_records = _split_group(records, val_ratio=val_ratio, rng=rng)
														
 
															+
														
 
															+    train_ids = sorted(record.sample_id for record in train_records if record.sample_id is not None)
														
 
															+    val_ids = sorted(record.sample_id for record in val_records if record.sample_id is not None)
														
 
															+
														
 
															+    split_dir = _project_split_dir(root)
														
 
															+    split_dir.mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+    _write_split_ids(train_path, train_ids)
														
 
															+    _write_split_ids(val_path, val_ids)
														
 
															+
														
 
															+    return {
														
 
															+        "train": train_ids,
														
 
															+        "val": val_ids,
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+__all__ = [
														
 
															+    "PROJECT_SPLIT_DATASETS",
														
 
															+    "PROJECT_SPLIT_ROOT",
														
 
															+    "generate_project_splits",
														
 
															+    "get_project_split_file",
														
 
															+    "load_project_split_ids",
														
 
															+    "select_project_split_base_records",
														
 
															+]
														
--- a/lib/data/records.py
+++ b/lib/data/records.py
@@ -0,0 +1,25 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from dataclasses import dataclass, field
														
 
															+from pathlib import Path
														
 
															+
														
 
															+
														
 
															+@dataclass(slots=True)
														
 
															+class SegSampleRecord:
														
 
															+    """
														
 
															+    统一的分割样本记录格式。
														
 
															+
														
 
															+    这一层只负责“索引”，不负责真正读取图像。
														
 
															+    """
														
 
															+
														
 
															+    dataset_name: str
														
 
															+    image_path: Path
														
 
															+    mask_path: Path | None = None
														
 
															+    annotation_path: Path | None = None
														
 
															+    split: str | None = None
														
 
															+    sample_id: str | None = None
														
 
															+    class_name: str | None = None
														
 
															+    meta: dict[str, str] = field(default_factory=dict)
														
 
															+
														
 
															+
														
 
															+__all__ = ["SegSampleRecord"]
														
--- a/lib/data/splits.py
+++ b/lib/data/splits.py
@@ -0,0 +1,21 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+import json
														
 
															+from pathlib import Path
														
 
															+
														
 
															+
														
 
															+def load_id_txt(path: str | Path) -> list[str]:
														
 
															+    path = Path(path)
														
 
															+    return [line.strip() for line in path.read_text(encoding="utf-8", errors="ignore").splitlines() if line.strip()]
														
 
															+
														
 
															+
														
 
															+def load_json_split(path: str | Path) -> dict[str, list[str]]:
														
 
															+    path = Path(path)
														
 
															+    obj = json.loads(path.read_text(encoding="utf-8"))
														
 
															+    result: dict[str, list[str]] = {}
														
 
															+    for key, value in obj.items():
														
 
															+        result[key] = [str(item) for item in value]
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+__all__ = ["load_id_txt", "load_json_split"]
														
--- a/lib/data/utils.py
+++ b/lib/data/utils.py
@@ -0,0 +1,36 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from pathlib import Path
														
 
															+import re
														
 
															+
														
 
															+
														
 
															+IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".bmp", ".tif", ".tiff"}
														
 
															+
														
 
															+
														
 
															+def is_image_file(path: Path) -> bool:
														
 
															+    return path.is_file() and path.suffix.lower() in IMAGE_EXTENSIONS
														
 
															+
														
 
															+
														
 
															+def list_image_files(folder: Path) -> list[Path]:
														
 
															+    if not folder.exists():
														
 
															+        raise FileNotFoundError(f"Folder not found: {folder}")
														
 
															+    return sorted([path for path in folder.iterdir() if is_image_file(path)])
														
 
															+
														
 
															+
														
 
															+def stem_without_mask_suffix(name: str) -> str:
														
 
															+    stem = Path(name).stem
														
 
															+    stem = re.sub(r"_mask(_\d+)?$", "", stem)
														
 
															+    return stem
														
 
															+
														
 
															+
														
 
															+def relative_stem(path: Path) -> str:
														
 
															+    return path.stem
														
 
															+
														
 
															+
														
 
															+__all__ = [
														
 
															+    "IMAGE_EXTENSIONS",
														
 
															+    "is_image_file",
														
 
															+    "list_image_files",
														
 
															+    "stem_without_mask_suffix",
														
 
															+    "relative_stem",
														
 
															+]
														
--- a/lib/modules/xnet_2d.py
+++ b/lib/modules/xnet_2d.py
@@ -2,6 +2,7 @@ from __future__ import annotations
 
															 from collections.abc import Sequence
														
 
															+import ptwt
														
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															 import torch.nn.functional as F
														
@@ -11,6 +12,7 @@ from .lib_mamba.vmamba import SS2D as VMambaSS2D
 
															 class XNetStem2d(nn.Module):
														
 
															+    # Stem reduces spatial size by 4x while lifting features into encoder stage 1.
														
 
															     def __init__(self, in_channels: int, stem_channels: int, out_channels: int) -> None:
														
 
															         super().__init__()
														
 
															         self.block = nn.Sequential(
														
@@ -43,6 +45,7 @@ class XNetDownsample2d(nn.Module):
 
															 class XLocalBranch2d(nn.Module):
														
 
															+    # Parallel depthwise branches capture short-range texture at two kernel scales.
														
 
															     def __init__(self, channels: int) -> None:
														
 
															         super().__init__()
														
 
															         self.branch3 = nn.Sequential(
														
@@ -60,45 +63,36 @@ class XLocalBranch2d(nn.Module):
 
															         return self.branch3(x) + self.branch5(x)
														
 
															-class XHaarWaveletTransform2d(nn.Module):
														
 
															-    def __init__(self, channels: int) -> None:
														
 
															+class XWaveletTransform2d(nn.Module):
														
 
															+    # ptwt-based wavelet decomposition/reconstruction with explicit crop so odd
														
 
															+    # input sizes round-trip to the exact original spatial shape.
														
 
															+    def __init__(
														
 
															+        self, channels: int, wavelet_type: str = "haar", wavelet_level: int = 1
														
 
															+    ) -> None:
														
 
															         super().__init__()
														
 
															-        ll = torch.tensor([[0.5, 0.5], [0.5, 0.5]], dtype=torch.float32)
														
 
															-        lh = torch.tensor([[-0.5, -0.5], [0.5, 0.5]], dtype=torch.float32)
														
 
															-        hl = torch.tensor([[-0.5, 0.5], [-0.5, 0.5]], dtype=torch.float32)
														
 
															-        hh = torch.tensor([[0.5, -0.5], [-0.5, 0.5]], dtype=torch.float32)
														
 
															-        filt = torch.stack([ll, lh, hl, hh], dim=0).unsqueeze(1)
														
 
															-        self.register_buffer(
														
 
															-            "analysis_filter", filt.repeat(channels, 1, 1, 1), persistent=False
														
 
															-        )
														
 
															-        self.register_buffer(
														
 
															-            "synthesis_filter", filt.repeat(channels, 1, 1, 1), persistent=False
														
 
															-        )
														
 
															         self.channels = channels
														
 
															+        self.wavelet_type = wavelet_type
														
 
															+        self.wavelet_level = wavelet_level
														
 
															     def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
														
 
															-        b, c, h, w = x.shape
														
 
															-        pad_h = h % 2
														
 
															-        pad_w = w % 2
														
 
															-        if pad_h or pad_w:
														
 
															-            x = F.pad(x, (0, pad_w, 0, pad_h))
														
 
															-        y = F.conv2d(x, self.analysis_filter, stride=2, groups=self.channels)
														
 
															-        y = y.view(b, c, 4, y.shape[-2], y.shape[-1])
														
 
															-        ll = y[:, :, 0]
														
 
															-        high = y[:, :, 1:].reshape(b, c * 3, y.shape[-2], y.shape[-1])
														
 
															+        coeffs = ptwt.wavedec2(x, self.wavelet_type, level=self.wavelet_level)
														
 
															+        ll = coeffs[0]
														
 
															+        high_parts = coeffs[1]
														
 
															+        high = torch.cat(high_parts, dim=1)
														
 
															         return ll, high
														
 
															     def inverse(
														
 
															         self, ll: torch.Tensor, high: torch.Tensor, output_size: tuple[int, int]
														
 
															     ) -> torch.Tensor:
														
 
															-        b, c, h, w = ll.shape
														
 
															-        high = high.view(b, c, 3, h, w)
														
 
															-        y = torch.cat([ll.unsqueeze(2), high], dim=2).reshape(b, c * 4, h, w)
														
 
															-        x = F.conv_transpose2d(y, self.synthesis_filter, stride=2, groups=self.channels)
														
 
															+        lh, hl, hh = torch.chunk(high, 3, dim=1)
														
 
															+        coeffs = [ll, (lh, hl, hh)]
														
 
															+        x = ptwt.waverec2(coeffs, self.wavelet_type)
														
 
															         return x[:, :, : output_size[0], : output_size[1]]
														
 
															 class XWaveletBranch2d(nn.Module):
														
 
															+    # The wavelet branch learns on low/high-frequency components separately and
														
 
															+    # then reconstructs back to the original feature size.
														
 
															     def __init__(
														
 
															         self, channels: int, wavelet_type: str = "haar", wavelet_level: int = 1
														
 
															     ) -> None:
														
@@ -109,7 +103,9 @@ class XWaveletBranch2d(nn.Module):
 
															             raise ValueError(
														
 
															                 "Initial XNet implementation only supports wavelet_level=1."
														
 
															             )
														
 
															-        self.wavelet = XHaarWaveletTransform2d(channels)
														
 
															+        self.wavelet = XWaveletTransform2d(
														
 
															+            channels, wavelet_type=wavelet_type, wavelet_level=wavelet_level
														
 
															+        )
														
 
															         self.ll_proj = nn.Sequential(
														
 
															             Conv2dBN(channels, channels, 3, 1, 1),
														
 
															             nn.ReLU(inplace=True),
														
@@ -134,6 +130,7 @@ class XWaveletBranch2d(nn.Module):
 
															 class XSSMGlobalBranch2d(nn.Module):
														
 
															+    # The global branch wraps VMamba and switches scan backend at runtime.
														
 
															     def __init__(
														
 
															         self,
														
 
															         channels: int,
														
@@ -240,6 +237,7 @@ class XBranchFusion2d(nn.Module):
 
															 class XTEB2d(nn.Module):
														
 
															+    # XTEB fuses local, wavelet, and global branches with residual post/ffn blocks.
														
 
															     def __init__(
														
 
															         self,
														
 
															         channels: int,
														
@@ -333,6 +331,7 @@ class XNetEncoderStage2d(nn.Module):
 
															 class XNetEncoder2d(nn.Module):
														
 
															+    # The encoder is a 4-stage feature pyramid with optional stage-1 global branch.
														
 
															     def __init__(
														
 
															         self,
														
 
															         in_channels: int,
														
@@ -416,6 +415,7 @@ class XNetEncoder2d(nn.Module):
 
															 class XGuideProjector2d(nn.Module):
														
 
															+    # Guides are projected from encoder features and aligned to decoder resolution.
														
 
															     def __init__(
														
 
															         self, in_channels: int, out_channels: int, mode: str = "affine"
														
 
															     ) -> None:
														
@@ -450,6 +450,7 @@ class XGuideProjector2d(nn.Module):
 
															 class XSkipFusion2d(nn.Module):
														
 
															+    # Decoder input and skip feature are aligned, projected, and fused together.
														
 
															     def __init__(self, in_channels: int, skip_channels: int, out_channels: int) -> None:
														
 
															         super().__init__()
														
 
															         self.input_proj = nn.Sequential(
														
@@ -473,6 +474,7 @@ class XSkipFusion2d(nn.Module):
 
															 class XGuideModulation2d(nn.Module):
														
 
															+    # Apply either direct affine guide or feature-to-affine modulation.
														
 
															     def __init__(self, channels: int, guide_mode: str = "affine") -> None:
														
 
															         super().__init__()
														
 
															         self.guide_mode = guide_mode
														
@@ -493,15 +495,23 @@ class XGuideModulation2d(nn.Module):
 
															 class XFrequencyRefine2d(nn.Module):
														
 
															-    def __init__(self, channels: int) -> None:
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        channels: int,
														
 
															+        low_freq_radius_h: float = 0.25,
														
 
															+        low_freq_radius_w: float = 0.25,
														
 
															+        learnable_low_freq_radius: bool = True,
														
 
															+    ) -> None:
														
 
															         super().__init__()
														
 
															+        if low_freq_radius_h <= 0.0 or low_freq_radius_w <= 0.0:
														
 
															+            raise ValueError("Low-frequency radii must be positive.")
														
 
															+        # Gates are predicted from half-spectrum magnitude statistics instead of
														
 
															+        # directly reusing spatial-domain pooled features.
														
 
															         self.low_gate = nn.Sequential(
														
 
															-            nn.AdaptiveAvgPool2d(1),
														
 
															             nn.Conv2d(channels, channels, kernel_size=1, bias=True),
														
 
															             nn.Sigmoid(),
														
 
															         )
														
 
															         self.high_gate = nn.Sequential(
														
 
															-            nn.AdaptiveAvgPool2d(1),
														
 
															             nn.Conv2d(channels, channels, kernel_size=1, bias=True),
														
 
															             nn.Sigmoid(),
														
 
															         )
														
@@ -510,25 +520,77 @@ class XFrequencyRefine2d(nn.Module):
 
															             nn.ReLU(inplace=True),
														
 
															             Conv2dBN(channels, channels, 1, 1, 0),
														
 
															         )
														
 
															+        self.learnable_low_freq_radius = learnable_low_freq_radius
														
 
															+        if learnable_low_freq_radius:
														
 
															+            self.low_freq_radius_h = nn.Parameter(
														
 
															+                torch.tensor(low_freq_radius_h, dtype=torch.float32)
														
 
															+            )
														
 
															+            self.low_freq_radius_w = nn.Parameter(
														
 
															+                torch.tensor(low_freq_radius_w, dtype=torch.float32)
														
 
															+            )
														
 
															+        else:
														
 
															+            self.register_buffer(
														
 
															+                "low_freq_radius_h",
														
 
															+                torch.tensor(low_freq_radius_h, dtype=torch.float32),
														
 
															+                persistent=False,
														
 
															+            )
														
 
															+            self.register_buffer(
														
 
															+                "low_freq_radius_w",
														
 
															+                torch.tensor(low_freq_radius_w, dtype=torch.float32),
														
 
															+                persistent=False,
														
 
															+            )
														
 
															+
														
 
															+    def _resolve_radius(
														
 
															+        self, value: torch.Tensor, max_ratio: float, device: torch.device
														
 
															+    ) -> torch.Tensor:
														
 
															+        radius = value.to(device=device, dtype=torch.float32)
														
 
															+        if self.learnable_low_freq_radius:
														
 
															+            radius = torch.sigmoid(radius) * max_ratio
														
 
															+        return torch.clamp(radius, min=1.0e-3, max=max_ratio)
														
 
															+
														
 
															+    def _build_low_frequency_mask(
														
 
															+        self, h_freq: int, w_freq: int, device: torch.device
														
 
															+    ) -> torch.Tensor:
														
 
															+        y = torch.arange(h_freq, device=device, dtype=torch.float32)
														
 
															+        x = torch.arange(w_freq, device=device, dtype=torch.float32)
														
 
															+        y = torch.minimum(y, h_freq - y)
														
 
															+        radius_h = self._resolve_radius(self.low_freq_radius_h, 0.5, device) * max(
														
 
															+            h_freq, 1
														
 
															+        )
														
 
															+        radius_w = self._resolve_radius(self.low_freq_radius_w, 1.0, device) * max(
														
 
															+            w_freq, 1
														
 
															+        )
														
 
															+        y = y / torch.clamp(radius_h, min=1.0)
														
 
															+        x = x / torch.clamp(radius_w, min=1.0)
														
 
															+        y_grid, x_grid = torch.meshgrid(y, x, indexing="ij")
														
 
															+        mask = (y_grid.square() + x_grid.square()) <= 1.0
														
 
															+        return mask.unsqueeze(0).unsqueeze(0)
														
 
															     def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															         input_dtype = x.dtype
														
 
															         if x.dtype != torch.float32:
														
 
															             x = x.to(torch.float32)
														
 
															         fft = torch.fft.rfft2(x, norm="ortho")
														
 
															-        low = fft.clone()
														
 
															-        h_freq, w_freq = low.shape[-2], low.shape[-1]
														
 
															-        low[:, :, h_freq // 4 :, :] = 0
														
 
															-        low[:, :, :, w_freq // 4 :] = 0
														
 
															+        h_freq, w_freq = fft.shape[-2], fft.shape[-1]
														
 
															+        low_mask = self._build_low_frequency_mask(h_freq, w_freq, fft.device).to(
														
 
															+            dtype=x.dtype
														
 
															+        )
														
 
															+        low = fft * low_mask
														
 
															         high = fft - low
														
 
															-        low = low * self.low_gate(x)
														
 
															-        high = high * self.high_gate(x)
														
 
															+
														
 
															+        magnitude = fft.abs()
														
 
															+        low_stats = (magnitude * low_mask).mean(dim=(-2, -1), keepdim=True)
														
 
															+        high_stats = (magnitude * (1.0 - low_mask)).mean(dim=(-2, -1), keepdim=True)
														
 
															+
														
 
															+        low = low * self.low_gate(low_stats)
														
 
															+        high = high * self.high_gate(high_stats)
														
 
															         out = torch.fft.irfft2(low + high, s=x.shape[-2:], norm="ortho")
														
 
															         out = out.to(dtype=input_dtype)
														
 
															         return self.refine(out)
														
 
															 class XCRB2d(nn.Module):
														
 
															+    # Decoder block: skip fusion -> guide modulation -> frequency refine -> residual output.
														
 
															     def __init__(
														
 
															         self,
														
 
															         in_channels: int,
														
@@ -537,12 +599,22 @@ class XCRB2d(nn.Module):
 
															         out_channels: int,
														
 
															         guide_mode: str = "affine",
														
 
															         use_frequency_refine: bool = True,
														
 
															+        low_freq_radius_h: float = 0.25,
														
 
															+        low_freq_radius_w: float = 0.25,
														
 
															+        learnable_low_freq_radius: bool = True,
														
 
															     ) -> None:
														
 
															         super().__init__()
														
 
															         self.skip_fusion = XSkipFusion2d(in_channels, skip_channels, out_channels)
														
 
															         self.guide_modulation = XGuideModulation2d(out_channels, guide_mode=guide_mode)
														
 
															         self.frequency_refine = (
														
 
															-            XFrequencyRefine2d(out_channels) if use_frequency_refine else nn.Identity()
														
 
															+            XFrequencyRefine2d(
														
 
															+                out_channels,
														
 
															+                low_freq_radius_h=low_freq_radius_h,
														
 
															+                low_freq_radius_w=low_freq_radius_w,
														
 
															+                learnable_low_freq_radius=learnable_low_freq_radius,
														
 
															+            )
														
 
															+            if use_frequency_refine
														
 
															+            else nn.Identity()
														
 
															         )
														
 
															         self.out_refine = nn.Sequential(
														
 
															             Conv2dBN(out_channels, out_channels, 3, 1, 1),
														
@@ -586,6 +658,9 @@ class XNetDecoder2d(nn.Module):
 
															         decoder_channels: Sequence[int] = (128, 64, 32),
														
 
															         guide_mode: str = "affine",
														
 
															         use_frequency_refine: bool = True,
														
 
															+        low_freq_radius_h: float = 0.25,
														
 
															+        low_freq_radius_w: float = 0.25,
														
 
															+        learnable_low_freq_radius: bool = True,
														
 
															         out_channels: int | None = None,
														
 
															     ) -> None:
														
 
															         super().__init__()
														
@@ -605,6 +680,9 @@ class XNetDecoder2d(nn.Module):
 
															             d4,
														
 
															             guide_mode=guide_mode,
														
 
															             use_frequency_refine=use_frequency_refine,
														
 
															+            low_freq_radius_h=low_freq_radius_h,
														
 
															+            low_freq_radius_w=low_freq_radius_w,
														
 
															+            learnable_low_freq_radius=learnable_low_freq_radius,
														
 
															         )
														
 
															         self.dec3 = XCRB2d(
														
 
															             d4,
														
@@ -613,6 +691,9 @@ class XNetDecoder2d(nn.Module):
 
															             d3,
														
 
															             guide_mode=guide_mode,
														
 
															             use_frequency_refine=use_frequency_refine,
														
 
															+            low_freq_radius_h=low_freq_radius_h,
														
 
															+            low_freq_radius_w=low_freq_radius_w,
														
 
															+            learnable_low_freq_radius=learnable_low_freq_radius,
														
 
															         )
														
 
															         self.dec2 = XCRB2d(
														
 
															             d3,
														
@@ -621,6 +702,9 @@ class XNetDecoder2d(nn.Module):
 
															             d2,
														
 
															             guide_mode=guide_mode,
														
 
															             use_frequency_refine=use_frequency_refine,
														
 
															+            low_freq_radius_h=low_freq_radius_h,
														
 
															+            low_freq_radius_w=low_freq_radius_w,
														
 
															+            learnable_low_freq_radius=learnable_low_freq_radius,
														
 
															         )
														
 
															         self.head_refine = XNetHeadRefine2d(d2, out_channels or d2)
														
 
															         self.out_channels = out_channels or d2
														
@@ -680,6 +764,9 @@ class XNet2d(nn.Module):
 
															         ssm_forward_type: str = "v3",
														
 
															         ssm_backend: str = "auto",
														
 
															         use_frequency_refine: bool = True,
														
 
															+        low_freq_radius_h: float = 0.25,
														
 
															+        low_freq_radius_w: float = 0.25,
														
 
															+        learnable_low_freq_radius: bool = True,
														
 
															         guide_mode: str = "affine",
														
 
															         out_channels: int | None = None,
														
 
															     ) -> None:
														
@@ -720,6 +807,9 @@ class XNet2d(nn.Module):
 
															             decoder_channels=decoder_channels,
														
 
															             guide_mode=guide_mode,
														
 
															             use_frequency_refine=use_frequency_refine,
														
 
															+            low_freq_radius_h=low_freq_radius_h,
														
 
															+            low_freq_radius_w=low_freq_radius_w,
														
 
															+            learnable_low_freq_radius=learnable_low_freq_radius,
														
 
															             out_channels=out_channels,
														
 
															         )
														
 
															         head_in_channels = self.decoder.out_channels
														
--- a/lib/modules/xnet_2d_zh.py
+++ b/lib/modules/xnet_2d_zh.py
@@ -0,0 +1,980 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+from collections.abc import Sequence
														
 
															+
														
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+import torch.nn.functional as F
														
 
															+
														
 
															+import ptwt
														
 
															+
														
 
															+from .layers_2d import Conv2dBN
														
 
															+from .lib_mamba.vmamba import SS2D as VMambaSS2D
														
 
															+
														
 
															+"""
														
 
															+## 完成的修改
														
 
															+
														
 
															+### 1. 小波变换模块迁移至 ptwt
														
 
															+- **替换 `XHaarWaveletTransform2d` → `XWaveletTransform2d`**：使用 `ptwt.wavedec2` / `ptwt.waverec2` 实现可逆小波变换
														
 
															+- **优势**：
														
 
															+  - 支持任意 pywt 兼容小波（haar, db4, sym2, db6 等），通过 `wavelet_type` 参数切换
														
 
															+  - 自动处理边界对齐，无需手动 padding/cropping
														
 
															+  - 代码更简洁，无手工卷积滤波器
														
 
															+- **`XWaveletBranch2d`** 已更新引用新类，移除了 wavelet 类型限制检查
														
 
															+
														
 
															+### 2. XFrequencyRefine2d 频率域精炼模块分析
														
 
															+
														
 
															+**发现的问题与修复：**
														
 
															+- **原代码 FFT 低频掩码位置错误**：未使用 `fftshift`，直接在左上角做十字掩码，与真实低频位置（四角）不匹配
														
 
															+- **已修复**：使用 `fftshift` → 圆形低频掩码 → `ifftshift` 还原的正确流程
														
 
															+
														
 
															+**设计合理性评估：**
														
 
															+| 方面 | 评价 |
														
 
															+|------|------|
														
 
															+| 低频/高频分离 | ✅ 圆形掩码合理，可调节半径 |
														
 
															+| 门控机制 | ⚠️ 门控值来自空间域而非频域，可能损失频域选择性 |
														
 
															+| 通道注意力 | ✅ 每个通道独立门控，灵活 |
														
 
															+| 重建精度 | ✅ 正交归一化 FFT + 完整频域保留 |
														
 
															+| 计算开销 | ⚠️ meshgrid 每步计算，可缓存优化 |
														
 
															+
														
 
															+**改进建议：**
														
 
															+1. 门控可改为频域计算（对 `|fft|` 做平均池化）而非空间域
														
 
															+2. 低频半径可改为可学习参数
														
 
															+3. meshgrid 可缓存为 buffer 避免重复计算
														
 
															+
														
 
															+### 验证结果
														
 
															+所有模块测试通过，小波分解→重建误差 < 1e-4，输出形状一致。
														
 
															+"""
														
 
															+
														
 
															+# ============================================================
														
 
															+# 核心架构：XNet2D 医学图像分割网络
														
 
															+# 业务意图：针对超声等医学图像分割任务，融合局部纹理、频率域、全局序列建模三重能力
														
 
															+# 设计约束：
														
 
															+#   - 2D 张量通道优先 (N,C,H,W)
														
 
															+#   - 所有可逆变换需支持 inverse 恢复原始空间尺寸
														
 
															+#   - SSM 后端可切换：GPU→oflex，CPU→torch
														
 
															+# ============================================================
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetStem2d：输入茎（Stem）
														
 
															+# 为什么：将单张输入图快速降采样 4 倍 (H/4, W/4)，并逐步提升通道维度
														
 
															+# 关键行为：
														
 
															+#   - 两次步幅为 2 的卷积实现 4 倍下采样
														
 
															+#   - 中间嵌入 depthwise 卷积增强局部通道交互
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetStem2d(nn.Module):
														
 
															+    def __init__(self, in_channels: int, stem_channels: int, out_channels: int) -> None:
														
 
															+        super().__init__()
														
 
															+        self.block = nn.Sequential(
														
 
															+            Conv2dBN(in_channels, stem_channels, 3, 2, 1),  # 首次下采样 H/2, W/2
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(
														
 
															+                stem_channels, stem_channels, 3, 1, 1, groups=stem_channels
														
 
															+            ),  # depthwise 局部特征增强
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(stem_channels, out_channels, 1, 1, 0),  # 通道升维
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(out_channels, out_channels, 3, 2, 1),  # 二次下采样 H/4, W/4
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        return self.block(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetDownsample2d：阶段间下采样器
														
 
															+# 为什么：在编码器各阶段之间平滑过渡，降低空间分辨率同时增加通道数
														
 
															+# 关键行为：
														
 
															+#   - 仅支持 conv 模式（扩展点由子类控制）
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetDownsample2d(nn.Module):
														
 
															+    def __init__(self, in_channels: int, out_channels: int, mode: str = "conv") -> None:
														
 
															+        super().__init__()
														
 
															+        if mode != "conv":
														
 
															+            raise ValueError(f"Unsupported downsample mode: {mode}")
														
 
															+        self.block = nn.Sequential(
														
 
															+            Conv2dBN(in_channels, out_channels, 3, 2, 1),  # H/2, W/2 下采样
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        return self.block(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XLocalBranch2d：局部感受野分支
														
 
															+# 为什么：并行捕获 3×3 和 5×5 多尺度局部纹理，对医学图像边缘/细微结构敏感
														
 
															+# 关键行为：
														
 
															+#   - 两组 depthwise 卷积 + 1×1 通道压缩
														
 
															+#   - 输出直接相加（残差式局部特征累积）
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XLocalBranch2d(nn.Module):
														
 
															+    def __init__(self, channels: int) -> None:
														
 
															+        super().__init__()
														
 
															+        self.branch3 = nn.Sequential(
														
 
															+            Conv2dBN(
														
 
															+                channels, channels, 3, 1, 1, groups=channels
														
 
															+            ),  # 3×3 depthwise 局部感受野
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0),  # 1×1 通道重映射
														
 
															+        )
														
 
															+        self.branch5 = nn.Sequential(
														
 
															+            Conv2dBN(
														
 
															+                channels, channels, 5, 1, 2, groups=channels
														
 
															+            ),  # 5×5 depthwise 更大感受野
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        return self.branch3(x) + self.branch5(x)  # 多尺度局部特征融合
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XWaveletTransform2d：基于 ptwt 的 2D 小波变换
														
 
															+# 为什么：将特征分解为低频近似 (LL) 与高频细节 (LH, HL, HH)，便于频率域操作
														
 
															+# 关键行为：
														
 
															+#   - 使用 ptwt.wavedec2 / ptwt.waverec2 实现可逆小波分解与重建
														
 
															+#   - 支持任意 pywt 兼容小波（haar, db4, sym2 等）
														
 
															+#   - 输出格式：(ll_coeff, (lh_coeff, hl_coeff, hh_coeff))
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XWaveletTransform2d(nn.Module):
														
 
															+    def __init__(self, wavelet: str = "haar", level: int = 1) -> None:
														
 
															+        super().__init__()
														
 
															+        self.wavelet = wavelet
														
 
															+        self.level = level
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
														
 
															+        """
														
 
															+        分解输入张量。
														
 
															+        Returns:
														
 
															+            ll: 低频近似系数 [B, C, H', W']
														
 
															+            high: 高频细节张量，拼接 LH/HL/HH 为 [B, C*3, H', W']
														
 
															+        """
														
 
															+        coeffs = ptwt.wavedec2(x, self.wavelet, level=self.level)
														
 
															+        ll = coeffs[0]  # 低频近似
														
 
															+        detail_tuple = coeffs[1]  # (lh, hl, hh) 元组
														
 
															+        high = torch.cat([detail_tuple[0], detail_tuple[1], detail_tuple[2]], dim=1)
														
 
															+        return ll, high
														
 
															+
														
 
															+    def inverse(
														
 
															+        self, ll: torch.Tensor, high: torch.Tensor, output_size: tuple[int, int]
														
 
															+    ) -> torch.Tensor:
														
 
															+        """
														
 
															+        从低频和高频系数重建原始张量。
														
 
															+        Args:
														
 
															+            ll: 低频近似系数
														
 
															+            high: 高频细节张量 [B, C*3, H', W']
														
 
															+            output_size: 目标输出尺寸 (H, W)
														
 
															+        """
														
 
															+        lh = high[:, 0 : high.shape[1] // 3]
														
 
															+        hl = high[:, high.shape[1] // 3 : 2 * high.shape[1] // 3]
														
 
															+        hh = high[:, 2 * high.shape[1] // 3 :]
														
 
															+        coeffs = [ll, (lh, hl, hh)]
														
 
															+        # ptwt.waverec2 自动处理边界对齐，无需手动裁剪
														
 
															+        return ptwt.waverec2(coeffs, self.wavelet)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XWaveletBranch2d：小波分支
														
 
															+# 为什么：对小波分解后的低频和高频分别做特征学习，再重建回空间域
														
 
															+# 关键行为：
														
 
															+#   - 当前仅支持 Haar 小波和 level=1（设计约束）
														
 
															+#   - 高频通道数 = channels * 3，需单独投影
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XWaveletBranch2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self, channels: int, wavelet_type: str = "haar", wavelet_level: int = 1
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.wavelet = XWaveletTransform2d(wavelet=wavelet_type, level=wavelet_level)
														
 
															+        # 低频通道投影
														
 
															+        self.ll_proj = nn.Sequential(
														
 
															+            Conv2dBN(channels, channels, 3, 1, 1),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+        # 高频通道投影（depthwise 处理多高频分量）
														
 
															+        self.high_proj = nn.Sequential(
														
 
															+            Conv2dBN(channels * 3, channels * 3, 3, 1, 1, groups=channels * 3),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(channels * 3, channels * 3, 1, 1, 0),
														
 
															+        )
														
 
															+        # 重建后输出投影
														
 
															+        self.out_proj = nn.Sequential(
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        output_size = x.shape[-2:]
														
 
															+        ll, high = self.wavelet(x)  # 分解
														
 
															+        ll = self.ll_proj(ll)
														
 
															+        high = self.high_proj(high)
														
 
															+        x = self.wavelet.inverse(ll, high, output_size=output_size)  # 重建
														
 
															+        return self.out_proj(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XSSMGlobalBranch2d：SSM 全局分支（核心：VMamba SS2D）
														
 
															+# 为什么：用 State Space Model 捕获长程依赖，弥补卷积局部感受野不足
														
 
															+# 关键行为：
														
 
															+#   - 自动选择后端：CUDA→oflex（快速），否则→torch（兼容）
														
 
															+#   - 通过 monkey-patch forward_core 动态切换 scan 策略
														
 
															+#   - 用完后恢复原始 forward_core 避免状态污染
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XSSMGlobalBranch2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        channels: int,
														
 
															+        global_ratio: float = 2.0,
														
 
															+        d_state: int = 16,
														
 
															+        forward_type: str = "v3",
														
 
															+        ssm_backend: str = "auto",
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        hidden_ratio = max(global_ratio, 1.0)  # SSM 隐层缩放比例
														
 
															+        self.backend = ssm_backend
														
 
															+        self.pre = nn.Sequential(
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0),  # 预投影归一化
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+        self.ssm = VMambaSS2D(
														
 
															+            d_model=channels,
														
 
															+            d_state=d_state,
														
 
															+            ssm_ratio=hidden_ratio,
														
 
															+            d_conv=3,
														
 
															+            dropout=0.0,
														
 
															+            initialize="v0",
														
 
															+            forward_type=forward_type,
														
 
															+            channel_first=True,
														
 
															+        )
														
 
															+        self.post = nn.Sequential(
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0),  # 后投影归一化
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        x = self.pre(x)
														
 
															+        prev_backend = None
														
 
															+        backend = self.backend.lower()
														
 
															+        if backend == "auto":
														
 
															+            backend = "oflex" if x.is_cuda else "torch"
														
 
															+
														
 
															+        # 动态切换 SSM 后端（避免修改全局配置）
														
 
															+        if backend == "oflex" and hasattr(self.ssm, "forward_core"):
														
 
															+            prev_backend = self.ssm.forward_core
														
 
															+            self.ssm.forward_core = lambda z, _core=prev_backend: _core(
														
 
															+                z,
														
 
															+                selective_scan_backend="oflex",
														
 
															+                scan_force_torch=False,
														
 
															+            )
														
 
															+        elif backend == "torch" and hasattr(self.ssm, "forward_core"):
														
 
															+            prev_backend = self.ssm.forward_core
														
 
															+            self.ssm.forward_core = lambda z, _core=prev_backend: _core(
														
 
															+                z,
														
 
															+                selective_scan_backend="torch",
														
 
															+                scan_force_torch=True,
														
 
															+            )
														
 
															+        try:
														
 
															+            x = self.ssm(x)  # SSM 全局建模
														
 
															+        finally:
														
 
															+            if prev_backend is not None:
														
 
															+                self.ssm.forward_core = prev_backend  # 恢复原始后端
														
 
															+        return self.post(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XGlobalBranch2d：全局分支包装器
														
 
															+# 为什么：提供统一接口，将 SSM 分支暴露为可开关的模块
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XGlobalBranch2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        channels: int,
														
 
															+        global_ratio: float = 2.0,
														
 
															+        ssm_d_state: int = 16,
														
 
															+        ssm_forward_type: str = "v3",
														
 
															+        ssm_backend: str = "auto",
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.ssm_branch = XSSMGlobalBranch2d(
														
 
															+            channels=channels,
														
 
															+            global_ratio=global_ratio,
														
 
															+            d_state=ssm_d_state,
														
 
															+            forward_type=ssm_forward_type,
														
 
															+            ssm_backend=ssm_backend,
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        return self.ssm_branch(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XBranchFusion2d：多分支特征融合
														
 
															+# 为什么：将局部/小波/全局三个分支的输出自适应加权融合
														
 
															+# 关键行为：
														
 
															+#   - 通道拼接 → 1×1 压缩 → 通道注意力门控（Channel Attention Gate）
														
 
															+#   - 门控值经 Sigmoid 后与融合特征逐元素相乘
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XBranchFusion2d(nn.Module):
														
 
															+    def __init__(self, channels: int, num_branches: int = 3) -> None:
														
 
															+        super().__init__()
														
 
															+        fused_channels = channels * num_branches
														
 
															+        hidden_channels = max(channels // 4, 8)  # 门控网络隐藏维度
														
 
															+        self.fuse = nn.Sequential(
														
 
															+            Conv2dBN(fused_channels, channels, 1, 1, 0),  # 通道降维融合
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+        # 通道注意力门控
														
 
															+        self.gate = nn.Sequential(
														
 
															+            nn.AdaptiveAvgPool2d(1),  # 全局平均池化 → 空间不变
														
 
															+            nn.Conv2d(fused_channels, hidden_channels, kernel_size=1, bias=True),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            nn.Conv2d(hidden_channels, channels, kernel_size=1, bias=True),
														
 
															+            nn.Sigmoid(),  # 门控值 [0, 1]
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, branch_outputs: Sequence[torch.Tensor]) -> torch.Tensor:
														
 
															+        x_cat = torch.cat(list(branch_outputs), dim=1)  # 拼接所有分支
														
 
															+        x_fused = self.fuse(x_cat)
														
 
															+        gate = self.gate(x_cat)  # 计算通道门控
														
 
															+        return x_fused * gate  # 门控加权融合
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XTEB2d：X-Tri-Enhance-Block (2D) — 核心构建块
														
 
															+# 为什么：将局部、小波、全局三个分支并行融合，并叠加 FFN 残差
														
 
															+# 关键行为：
														
 
															+#   - pre_norm：先做 1×1 投影再输入多分支
														
 
															+#   - fusion：XBranchFusion2d 自适应融合三分支
														
 
															+#   - post + FFN：双层残差连接（post-fusion + FFN）
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XTEB2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        channels: int,
														
 
															+        global_ratio: float = 2.0,
														
 
															+        wavelet_type: str = "haar",
														
 
															+        wavelet_level: int = 1,
														
 
															+        use_wavelet_branch: bool = True,
														
 
															+        use_global_branch: bool = True,
														
 
															+        ssm_d_state: int = 16,
														
 
															+        ssm_forward_type: str = "v3",
														
 
															+        ssm_backend: str = "auto",
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.pre_norm = Conv2dBN(channels, channels, 1, 1, 0)  # 预投影
														
 
															+        self.local_branch = XLocalBranch2d(channels)  # 局部分支（始终启用）
														
 
															+        # 小波分支（可开关）
														
 
															+        self.wavelet_branch = (
														
 
															+            XWaveletBranch2d(
														
 
															+                channels, wavelet_type=wavelet_type, wavelet_level=wavelet_level
														
 
															+            )
														
 
															+            if use_wavelet_branch
														
 
															+            else nn.Identity()
														
 
															+        )
														
 
															+        # 全局 SSM 分支（可开关）
														
 
															+        self.global_branch = (
														
 
															+            XGlobalBranch2d(
														
 
															+                channels,
														
 
															+                global_ratio=global_ratio,
														
 
															+                ssm_d_state=ssm_d_state,
														
 
															+                ssm_forward_type=ssm_forward_type,
														
 
															+                ssm_backend=ssm_backend,
														
 
															+            )
														
 
															+            if use_global_branch
														
 
															+            else nn.Identity()
														
 
															+        )
														
 
															+        self.fusion = XBranchFusion2d(channels, num_branches=3)  # 三分支融合
														
 
															+        # 后处理残差块
														
 
															+        self.post = nn.Sequential(
														
 
															+            Conv2dBN(channels, channels, 3, 1, 1),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0, bn_weight_init=0.0),  # 零初始化
														
 
															+        )
														
 
															+        # FFN 残差块
														
 
															+        self.ffn = nn.Sequential(
														
 
															+            Conv2dBN(channels, channels * 2, 1, 1, 0),  # 通道扩展
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(channels * 2, channels, 1, 1, 0, bn_weight_init=0.0),  # 零初始化
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        x_in = x
														
 
															+        x = self.pre_norm(x)
														
 
															+        # 三分支并行 + 融合 + 残差
														
 
															+        x = x_in + self.post(
														
 
															+            self.fusion(
														
 
															+                [self.local_branch(x), self.wavelet_branch(x), self.global_branch(x)]
														
 
															+            )
														
 
															+        )
														
 
															+        # FFN 残差
														
 
															+        return x + self.ffn(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetEncoderStage2d：编码器阶段
														
 
															+# 为什么：堆叠多个 XTEB2d 块作为单一编码器层级
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetEncoderStage2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        channels: int,
														
 
															+        depth: int,
														
 
															+        global_ratio: float = 2.0,
														
 
															+        wavelet_type: str = "haar",
														
 
															+        wavelet_level: int = 1,
														
 
															+        use_wavelet_branch: bool = True,
														
 
															+        use_global_branch: bool = True,
														
 
															+        ssm_d_state: int = 16,
														
 
															+        ssm_forward_type: str = "v3",
														
 
															+        ssm_backend: str = "auto",
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.blocks = nn.Sequential(
														
 
															+            *[
														
 
															+                XTEB2d(
														
 
															+                    channels=channels,
														
 
															+                    global_ratio=global_ratio,
														
 
															+                    wavelet_type=wavelet_type,
														
 
															+                    wavelet_level=wavelet_level,
														
 
															+                    use_wavelet_branch=use_wavelet_branch,
														
 
															+                    use_global_branch=use_global_branch,
														
 
															+                    ssm_d_state=ssm_d_state,
														
 
															+                    ssm_forward_type=ssm_forward_type,
														
 
															+                    ssm_backend=ssm_backend,
														
 
															+                )
														
 
															+                for _ in range(depth)
														
 
															+            ]
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        return self.blocks(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetEncoder2d：完整编码器
														
 
															+# 为什么：Stem + 4 个阶段 + 3 个下采样 → 多尺度特征金字塔 [e1, e2, e3, e4]
														
 
															+# 关键约束：
														
 
															+#   - 阶段数固定为 4（由构造函数校验）
														
 
															+#   - Stage1 默认关闭全局 SSM（浅层特征不适合长程建模）
														
 
															+#   - stage_channels 属性暴露各阶段输出通道数
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetEncoder2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        in_channels: int,
														
 
															+        stem_channels: int,
														
 
															+        encoder_channels: Sequence[int],
														
 
															+        encoder_depths: Sequence[int],
														
 
															+        global_ratio: float = 2.0,
														
 
															+        wavelet_type: str = "haar",
														
 
															+        wavelet_level: int = 1,
														
 
															+        use_wavelet_branch: bool = True,
														
 
															+        use_global_branch_stage1: bool = False,
														
 
															+        ssm_d_state: int = 16,
														
 
															+        ssm_forward_type: str = "v3",
														
 
															+        ssm_backend: str = "auto",
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        if len(encoder_channels) != 4 or len(encoder_depths) != 4:
														
 
															+            raise ValueError("XNetEncoder2d expects 4 encoder stages.")
														
 
															+        c1, c2, c3, c4 = encoder_channels
														
 
															+        d1, d2, d3, d4 = encoder_depths
														
 
															+        self.stem = XNetStem2d(in_channels, stem_channels, c1)
														
 
															+        # Stage 1：浅层，可选关闭全局分支
														
 
															+        self.stage1 = XNetEncoderStage2d(
														
 
															+            c1,
														
 
															+            d1,
														
 
															+            global_ratio,
														
 
															+            wavelet_type,
														
 
															+            wavelet_level,
														
 
															+            use_wavelet_branch=use_wavelet_branch,
														
 
															+            use_global_branch=use_global_branch_stage1,
														
 
															+            ssm_d_state=ssm_d_state,
														
 
															+            ssm_forward_type=ssm_forward_type,
														
 
															+            ssm_backend=ssm_backend,
														
 
															+        )
														
 
															+        self.down1 = XNetDownsample2d(c1, c2)
														
 
															+        # Stage 2-4：始终启用全局分支
														
 
															+        self.stage2 = XNetEncoderStage2d(
														
 
															+            c2,
														
 
															+            d2,
														
 
															+            global_ratio,
														
 
															+            wavelet_type,
														
 
															+            wavelet_level,
														
 
															+            use_wavelet_branch,
														
 
															+            True,
														
 
															+            ssm_d_state=ssm_d_state,
														
 
															+            ssm_forward_type=ssm_forward_type,
														
 
															+            ssm_backend=ssm_backend,
														
 
															+        )
														
 
															+        self.down2 = XNetDownsample2d(c2, c3)
														
 
															+        self.stage3 = XNetEncoderStage2d(
														
 
															+            c3,
														
 
															+            d3,
														
 
															+            global_ratio,
														
 
															+            wavelet_type,
														
 
															+            wavelet_level,
														
 
															+            use_wavelet_branch,
														
 
															+            True,
														
 
															+            ssm_d_state=ssm_d_state,
														
 
															+            ssm_forward_type=ssm_forward_type,
														
 
															+            ssm_backend=ssm_backend,
														
 
															+        )
														
 
															+        self.down3 = XNetDownsample2d(c3, c4)
														
 
															+        self.stage4 = XNetEncoderStage2d(
														
 
															+            c4,
														
 
															+            d4,
														
 
															+            global_ratio,
														
 
															+            wavelet_type,
														
 
															+            wavelet_level,
														
 
															+            use_wavelet_branch,
														
 
															+            True,
														
 
															+            ssm_d_state=ssm_d_state,
														
 
															+            ssm_forward_type=ssm_forward_type,
														
 
															+            ssm_backend=ssm_backend,
														
 
															+        )
														
 
															+        self.stage_channels = list(encoder_channels)  # 暴露各阶段通道数
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
														
 
															+        e1 = self.stage1(self.stem(x))  # 浅层特征
														
 
															+        e2 = self.stage2(self.down1(e1))  # 中层特征
														
 
															+        e3 = self.stage3(self.down2(e2))  # 深层特征
														
 
															+        e4 = self.stage4(self.down3(e3))  # 最深特征
														
 
															+        return [e1, e2, e3, e4]  # 多尺度特征金字塔
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XGuideProjector2d：引导投影器
														
 
															+# 为什么：从编码器特征生成引导信号（guide），用于解码器的自适应调制
														
 
															+# 关键行为：
														
 
															+#   - affine 模式：输出 (gamma, beta) 用于仿射调制
														
 
															+#   - feature 模式：直接输出特征
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XGuideProjector2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self, in_channels: int, out_channels: int, mode: str = "affine"
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.mode = mode
														
 
															+        if mode == "affine":
														
 
															+            # 输出双倍通道 → 后续拆分为 gamma 和 beta
														
 
															+            self.proj = nn.Sequential(
														
 
															+                Conv2dBN(in_channels, out_channels * 2, 1, 1, 0),
														
 
															+                nn.ReLU(inplace=True),
														
 
															+                nn.Conv2d(out_channels * 2, out_channels * 2, kernel_size=1, bias=True),
														
 
															+            )
														
 
															+        elif mode == "feature":
														
 
															+            self.proj = nn.Sequential(
														
 
															+                Conv2dBN(in_channels, out_channels, 1, 1, 0),
														
 
															+                nn.ReLU(inplace=True),
														
 
															+            )
														
 
															+        else:
														
 
															+            raise ValueError(f"Unsupported guide mode: {mode}")
														
 
															+
														
 
															+    def forward(
														
 
															+        self,
														
 
															+        x: torch.Tensor,
														
 
															+        target_size: tuple[int, int],
														
 
															+    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
														
 
															+        # 插值到目标尺寸（guide 需要与解码器特征空间对齐）
														
 
															+        x = F.interpolate(x, size=target_size, mode="bilinear", align_corners=False)
														
 
															+        x = self.proj(x)
														
 
															+        if self.mode == "affine":
														
 
															+            gamma, beta = torch.chunk(x, 2, dim=1)  # 拆分为仿射参数
														
 
															+            gamma = torch.sigmoid(gamma) + 0.5  # gamma 偏置到 [0.5, 1.5]
														
 
															+            return gamma, beta
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XSkipFusion2d：跳跃连接融合
														
 
															+# 为什么：将编码器特征与解码器特征融合后传入
														
 
															+# 关键行为：
														
 
															+#   - 分别投影输入和跳跃特征到相同维度
														
 
															+#   - 拼接 + 3×3 卷积融合
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XSkipFusion2d(nn.Module):
														
 
															+    def __init__(self, in_channels: int, skip_channels: int, out_channels: int) -> None:
														
 
															+        super().__init__()
														
 
															+        self.input_proj = nn.Sequential(
														
 
															+            Conv2dBN(in_channels, out_channels, 1, 1, 0),  # 解码器特征投影
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+        self.skip_proj = nn.Sequential(
														
 
															+            Conv2dBN(skip_channels, out_channels, 1, 1, 0),  # 跳跃特征投影
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+        self.fuse = nn.Sequential(
														
 
															+            Conv2dBN(out_channels * 2, out_channels, 3, 1, 1),  # 拼接后融合
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor, skip: torch.Tensor) -> torch.Tensor:
														
 
															+        # 双线性插值对齐空间尺寸
														
 
															+        x = F.interpolate(x, size=skip.shape[-2:], mode="bilinear", align_corners=False)
														
 
															+        x = self.input_proj(x)
														
 
															+        skip = self.skip_proj(skip)
														
 
															+        return self.fuse(torch.cat([x, skip], dim=1))  # 通道拼接融合
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XGuideModulation2d：引导调制器
														
 
															+# 为什么：对特征应用仿射调制 (gamma * x + beta) 或特征驱动调制
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XGuideModulation2d(nn.Module):
														
 
															+    def __init__(self, channels: int, guide_mode: str = "affine") -> None:
														
 
															+        super().__init__()
														
 
															+        self.guide_mode = guide_mode
														
 
															+        if guide_mode == "feature":
														
 
															+            # feature 模式下先将 guide 转为仿射参数
														
 
															+            self.to_affine = nn.Conv2d(channels, channels * 2, kernel_size=1, bias=True)
														
 
															+
														
 
															+    def forward(
														
 
															+        self,
														
 
															+        x: torch.Tensor,
														
 
															+        guide: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
														
 
															+    ) -> torch.Tensor:
														
 
															+        if self.guide_mode == "affine":
														
 
															+            gamma, beta = guide  # 直接使用仿射参数
														
 
															+        else:
														
 
															+            gamma, beta = torch.chunk(self.to_affine(guide), 2, dim=1)
														
 
															+            gamma = torch.sigmoid(gamma) + 0.5
														
 
															+        return gamma * x + beta  # 仿射调制
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XFrequencyRefine2d：频率域精炼
														
 
															+# 为什么：在频域对低频/高频分别应用门控，增强关键频率成分
														
 
															+# 关键行为：
														
 
															+#   - FFT → 低频中心保留 + 高频带通 → 逆 FFT
														
 
															+#   - 门控由自适应平均池化生成
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XFrequencyRefine2d(nn.Module):
														
 
															+    def __init__(self, channels: int) -> None:
														
 
															+        super().__init__()
														
 
															+        # 低频门控
														
 
															+        self.low_gate = nn.Sequential(
														
 
															+            nn.AdaptiveAvgPool2d(1),
														
 
															+            nn.Conv2d(channels, channels, kernel_size=1, bias=True),
														
 
															+            nn.Sigmoid(),
														
 
															+        )
														
 
															+        # 高频门控
														
 
															+        self.high_gate = nn.Sequential(
														
 
															+            nn.AdaptiveAvgPool2d(1),
														
 
															+            nn.Conv2d(channels, channels, kernel_size=1, bias=True),
														
 
															+            nn.Sigmoid(),
														
 
															+        )
														
 
															+        # 频域精炼后的空间域细化
														
 
															+        self.refine = nn.Sequential(
														
 
															+            Conv2dBN(
														
 
															+                channels, channels, 3, 1, 1, groups=channels
														
 
															+            ),  # depthwise 局部细化
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(channels, channels, 1, 1, 0),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        input_dtype = x.dtype
														
 
															+        if x.dtype != torch.float32:
														
 
															+            x = x.to(torch.float32)  # FFT 需要 float32 精度
														
 
															+        fft = torch.fft.rfft2(x, norm="ortho")  # 实值 FFT
														
 
															+        h_freq, w_freq = fft.shape[-2], fft.shape[-1]
														
 
															+        # 构建圆形低频掩码（中心位于四个角：FFT 未 shift 时低频在四角）
														
 
															+        # 使用 fftshift 将低频移至中心，应用掩码后再 ifftshift 还原
														
 
															+        fft_shifted = torch.fft.fftshift(fft, dim=(-2, -1))
														
 
															+        low = fft_shifted.clone()
														
 
															+        # 圆形低频掩码：保留中心区域
														
 
															+        radius_h = h_freq // 4
														
 
															+        radius_w = w_freq // 4
														
 
															+        y_grid, x_grid = torch.meshgrid(
														
 
															+            torch.arange(h_freq, device=fft.device),
														
 
															+            torch.arange(w_freq, device=fft.device),
														
 
															+            indexing="ij",
														
 
															+        )
														
 
															+        center_y, center_x = h_freq // 2, w_freq // 2
														
 
															+        mask = (y_grid - center_y) ** 2 + (x_grid - center_x) ** 2 <= max(
														
 
															+            radius_h, radius_w
														
 
															+        ) ** 2
														
 
															+        mask = mask.unsqueeze(0).unsqueeze(0).expand(fft.shape[0], fft.shape[1], -1, -1)
														
 
															+        low = low * mask  # 低频分量
														
 
															+        high = fft_shifted - low  # 高频 = 全部 - 低频
														
 
															+        # 还原到原始 FFT 坐标系
														
 
															+        low = torch.fft.ifftshift(low, dim=(-2, -1))
														
 
															+        high = torch.fft.ifftshift(high, dim=(-2, -1))
														
 
															+        # 应用通道门控（门控值来自空间域）
														
 
															+        low = low * self.low_gate(x)
														
 
															+        high = high * self.high_gate(x)
														
 
															+        out = torch.fft.irfft2(low + high, s=x.shape[-2:], norm="ortho")  # 逆 FFT
														
 
															+        out = out.to(dtype=input_dtype)
														
 
															+        return self.refine(out)  # 空间域细化
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XCRB2d：X-ResBlock with Guide (2D) — 解码器核心块
														
 
															+# 为什么：融合跳跃连接 + 引导调制 + 频率精炼，是解码器重建的基础单元
														
 
															+# 数据流：
														
 
															+#   输入特征 → SkipFusion → GuideModulation → FrequencyRefine → OutRefine
														
 
															+#   每步均有残差连接
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XCRB2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        in_channels: int,
														
 
															+        skip_channels: int,
														
 
															+        guide_channels: int,
														
 
															+        out_channels: int,
														
 
															+        guide_mode: str = "affine",
														
 
															+        use_frequency_refine: bool = True,
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.skip_fusion = XSkipFusion2d(in_channels, skip_channels, out_channels)
														
 
															+        self.guide_modulation = XGuideModulation2d(out_channels, guide_mode=guide_mode)
														
 
															+        self.frequency_refine = (
														
 
															+            XFrequencyRefine2d(out_channels) if use_frequency_refine else nn.Identity()
														
 
															+        )
														
 
															+        # 输出细化（零初始化末尾以渐进学习）
														
 
															+        self.out_refine = nn.Sequential(
														
 
															+            Conv2dBN(out_channels, out_channels, 3, 1, 1),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(out_channels, out_channels, 3, 1, 1, bn_weight_init=0.0),
														
 
															+        )
														
 
															+        self.guide_channels = guide_channels
														
 
															+
														
 
															+    def forward(
														
 
															+        self,
														
 
															+        x: torch.Tensor,
														
 
															+        skip: torch.Tensor,
														
 
															+        guide: torch.Tensor | tuple[torch.Tensor, torch.Tensor],
														
 
															+    ) -> torch.Tensor:
														
 
															+        x = self.skip_fusion(x, skip)  # 跳跃融合
														
 
															+        x = self.guide_modulation(x, guide)  # 引导调制
														
 
															+        x = x + self.frequency_refine(x)  # 频率精炼残差
														
 
															+        return x + self.out_refine(x)  # 输出细化残差
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetHeadRefine2d：特征精炼头
														
 
															+# 为什么：在解码器末端做最后的特征增强
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetHeadRefine2d(nn.Module):
														
 
															+    def __init__(self, channels: int, out_channels: int | None = None) -> None:
														
 
															+        super().__init__()
														
 
															+        if out_channels is None:
														
 
															+            out_channels = channels
														
 
															+        self.block = nn.Sequential(
														
 
															+            Conv2dBN(channels, out_channels, 3, 1, 1),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            Conv2dBN(out_channels, out_channels, 3, 1, 1),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+        )
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor) -> torch.Tensor:
														
 
															+        return self.block(x)
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetDecoder2d：完整解码器
														
 
															+# 为什么：从最深特征 e4 逐步上采样，逐层引入引导信号和跳跃连接
														
 
															+# 关键数据流：
														
 
															+#   e4 → guide4 → dec4 → guide3 → dec3 → guide2 → dec2 → head_refine
														
 
															+#   返回：输出特征、所有解码特征、所有引导信号（供损失函数使用）
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetDecoder2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        encoder_channels: Sequence[int],
														
 
															+        decoder_channels: Sequence[int] = (128, 64, 32),
														
 
															+        guide_mode: str = "affine",
														
 
															+        use_frequency_refine: bool = True,
														
 
															+        out_channels: int | None = None,
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        if len(encoder_channels) != 4:
														
 
															+            raise ValueError("XNetDecoder2d expects 4 encoder stages.")
														
 
															+        if len(decoder_channels) != 3:
														
 
															+            raise ValueError("XNetDecoder2d expects 3 decoder channels.")
														
 
															+        c1, c2, c3, c4 = encoder_channels
														
 
															+        d4, d3, d2 = decoder_channels
														
 
															+        # 引导投影器（从编码器特征生成 guide）
														
 
															+        self.guide4 = XGuideProjector2d(c4, d4, mode=guide_mode)
														
 
															+        self.guide3 = XGuideProjector2d(c3, d3, mode=guide_mode)
														
 
															+        self.guide2 = XGuideProjector2d(c2, d2, mode=guide_mode)
														
 
															+        # 解码块（逐层降通道 + 跳跃融合）
														
 
															+        self.dec4 = XCRB2d(
														
 
															+            c4,
														
 
															+            c3,
														
 
															+            d4,
														
 
															+            d4,
														
 
															+            guide_mode=guide_mode,
														
 
															+            use_frequency_refine=use_frequency_refine,
														
 
															+        )
														
 
															+        self.dec3 = XCRB2d(
														
 
															+            d4,
														
 
															+            c2,
														
 
															+            d3,
														
 
															+            d3,
														
 
															+            guide_mode=guide_mode,
														
 
															+            use_frequency_refine=use_frequency_refine,
														
 
															+        )
														
 
															+        self.dec2 = XCRB2d(
														
 
															+            d3,
														
 
															+            c1,
														
 
															+            d2,
														
 
															+            d2,
														
 
															+            guide_mode=guide_mode,
														
 
															+            use_frequency_refine=use_frequency_refine,
														
 
															+        )
														
 
															+        self.head_refine = XNetHeadRefine2d(d2, out_channels or d2)
														
 
															+        self.out_channels = out_channels or d2
														
 
															+
														
 
															+    def forward(
														
 
															+        self,
														
 
															+        features: Sequence[torch.Tensor],
														
 
															+    ) -> tuple[
														
 
															+        torch.Tensor,
														
 
															+        list[torch.Tensor],
														
 
															+        list[torch.Tensor | tuple[torch.Tensor, torch.Tensor]],
														
 
															+    ]:
														
 
															+        e1, e2, e3, e4 = features
														
 
															+        # 从深到浅逐层解码
														
 
															+        g4 = self.guide4(e4, target_size=e3.shape[-2:])  # 从 e4 生成 guide
														
 
															+        d4 = self.dec4(e4, e3, g4)  # 解码 + 跳跃 e3
														
 
															+        g3 = self.guide3(e3, target_size=e2.shape[-2:])
														
 
															+        d3 = self.dec3(d4, e2, g3)  # 解码 + 跳跃 e2
														
 
															+        g2 = self.guide2(e2, target_size=e1.shape[-2:])
														
 
															+        d2 = self.dec2(d3, e1, g2)  # 解码 + 跳跃 e1
														
 
															+        d1 = self.head_refine(d2)  # 最终精炼
														
 
															+        # 返回解码输出、中间特征（用于辅助损失）、引导信号
														
 
															+        return d1, [d4, d3, d2, d1], [g4, g3, g2]
														
 
															+
														
 
															+
														
 
															+# --------------------------------------------------------------------------
														
 
															+# XNetSegHead2d：分割头
														
 
															+# 为什么：将最终特征映射为 logits 图，并上采样到原始输入尺寸
														
 
															+# --------------------------------------------------------------------------
														
 
															+class XNetSegHead2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self, in_channels: int, num_classes: int, upsample_scale: int = 4
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        self.block = nn.Sequential(
														
 
															+            Conv2dBN(in_channels, in_channels, 3, 1, 1),
														
 
															+            nn.ReLU(inplace=True),
														
 
															+            nn.Conv2d(
														
 
															+                in_channels, num_classes, kernel_size=1, bias=True
														
 
															+            ),  # 映射到类别数
														
 
															+        )
														
 
															+        self.upsample_scale = upsample_scale
														
 
															+
														
 
															+    def forward(self, x: torch.Tensor, output_size: tuple[int, int]) -> torch.Tensor:
														
 
															+        x = self.block(x)
														
 
															+        # 双线性上采样到目标尺寸（推理时传入原始输入 H, W）
														
 
															+        return F.interpolate(x, size=output_size, mode="bilinear", align_corners=False)
														
 
															+
														
 
															+
														
 
															+# ==========================================================================
														
 
															+# XNet2d：完整网络（编码器 + Bottleneck + 解码器 + 分割头）
														
 
															+# 架构概览：
														
 
															+#   输入 → Stem → [Stage1 ↓ Stage2 ↓ Stage3 ↓ Stage4] → Bottleneck
														
 
															+#         → [dec4 ← dec3 ← dec2] → Head → Logits
														
 
															+# 业务特点：
														
 
															+#   - 编码器浅层（Stage1）默认关闭 SSM 以降低计算开销
														
 
															+#   - 解码器逐层注入 guide 信号，实现自适应特征调制
														
 
															+#   - 每个解码块支持频率精炼，增强医学图像细节保留
														
 
															+# ==========================================================================
														
 
															+class XNet2d(nn.Module):
														
 
															+    def __init__(
														
 
															+        self,
														
 
															+        in_channels: int,
														
 
															+        num_classes: int,
														
 
															+        encoder_channels: Sequence[int] = (32, 64, 128, 192),
														
 
															+        encoder_depths: Sequence[int] = (2, 2, 2, 2),
														
 
															+        decoder_channels: Sequence[int] = (128, 64, 32),
														
 
															+        stem_channels: int = 24,
														
 
															+        bottleneck_depth: int = 1,
														
 
															+        global_ratio: float = 2.0,
														
 
															+        wavelet_type: str = "haar",
														
 
															+        wavelet_level: int = 1,
														
 
															+        use_wavelet_branch: bool = True,
														
 
															+        use_global_branch_stage1: bool = False,
														
 
															+        ssm_d_state: int = 16,
														
 
															+        ssm_forward_type: str = "v3",
														
 
															+        ssm_backend: str = "auto",
														
 
															+        use_frequency_refine: bool = True,
														
 
															+        guide_mode: str = "affine",
														
 
															+        out_channels: int | None = None,
														
 
															+    ) -> None:
														
 
															+        super().__init__()
														
 
															+        # 编码器：多尺度特征金字塔
														
 
															+        self.encoder = XNetEncoder2d(
														
 
															+            in_channels=in_channels,
														
 
															+            stem_channels=stem_channels,
														
 
															+            encoder_channels=encoder_channels,
														
 
															+            encoder_depths=encoder_depths,
														
 
															+            global_ratio=global_ratio,
														
 
															+            wavelet_type=wavelet_type,
														
 
															+            wavelet_level=wavelet_level,
														
 
															+            use_wavelet_branch=use_wavelet_branch,
														
 
															+            use_global_branch_stage1=use_global_branch_stage1,
														
 
															+            ssm_d_state=ssm_d_state,
														
 
															+            ssm_forward_type=ssm_forward_type,
														
 
															+            ssm_backend=ssm_backend,
														
 
															+        )
														
 
															+        # Bottleneck：最深特征进一步建模
														
 
															+        bottleneck_channels = encoder_channels[-1]
														
 
															+        self.bottleneck = nn.Sequential(
														
 
															+            *[
														
 
															+                XTEB2d(
														
 
															+                    channels=bottleneck_channels,
														
 
															+                    global_ratio=global_ratio,
														
 
															+                    wavelet_type=wavelet_type,
														
 
															+                    wavelet_level=wavelet_level,
														
 
															+                    use_wavelet_branch=use_wavelet_branch,
														
 
															+                    use_global_branch=True,  # bottleneck 始终启用全局分支
														
 
															+                    ssm_d_state=ssm_d_state,
														
 
															+                    ssm_forward_type=ssm_forward_type,
														
 
															+                    ssm_backend=ssm_backend,
														
 
															+                )
														
 
															+                for _ in range(bottleneck_depth)
														
 
															+            ]
														
 
															+        )
														
 
															+        # 解码器
														
 
															+        self.decoder = XNetDecoder2d(
														
 
															+            encoder_channels=encoder_channels,
														
 
															+            decoder_channels=decoder_channels,
														
 
															+            guide_mode=guide_mode,
														
 
															+            use_frequency_refine=use_frequency_refine,
														
 
															+            out_channels=out_channels,
														
 
															+        )
														
 
															+        # 分割头
														
 
															+        head_in_channels = self.decoder.out_channels
														
 
															+        self.segmentation_head = XNetSegHead2d(head_in_channels, num_classes)
														
 
															+
														
 
															+    def forward(
														
 
															+        self, x: torch.Tensor
														
 
															+    ) -> dict[
														
 
															+        str, torch.Tensor | list[torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]]
														
 
															+    ]:
														
 
															+        encoder_features = self.encoder(x)  # 多尺度特征 [e1, e2, e3, e4]
														
 
															+        encoder_features[-1] = self.bottleneck(encoder_features[-1])  # bottleneck
														
 
															+        decoder_out, decoder_features, guides = self.decoder(encoder_features)  # 解码
														
 
															+        output_size = x.shape[-2:]
														
 
															+        logits = self.segmentation_head(
														
 
															+            decoder_out, output_size=output_size
														
 
															+        )  # 分割 logits
														
 
															+        # 返回字典：包含 logits、中间特征（用于辅助损失）、引导信号
														
 
															+        outputs: dict[
														
 
															+            str,
														
 
															+            torch.Tensor | list[torch.Tensor] | list[tuple[torch.Tensor, torch.Tensor]],
														
 
															+        ] = {
														
 
															+            "logits": logits,
														
 
															+            "seg_logits": logits,
														
 
															+            "encoder_features": encoder_features,
														
 
															+            "decoder_features": decoder_features,
														
 
															+            "guides": guides,
														
 
															+        }
														
 
															+        return outputs
														
--- a/lib/trainers/supervised.py
+++ b/lib/trainers/supervised.py
@@ -43,6 +43,11 @@ class SupervisedSegmentationTrainer(BaseTrainer):
 
															             ssm_forward_type=str(model_cfg.get("ssm_forward_type", "v3")),
														
 
															             ssm_backend=str(model_cfg.get("ssm_backend", "auto")),
														
 
															             use_frequency_refine=bool(model_cfg.get("use_frequency_refine", True)),
														
 
															+            low_freq_radius_h=float(model_cfg.get("low_freq_radius_h", 0.25)),
														
 
															+            low_freq_radius_w=float(model_cfg.get("low_freq_radius_w", 0.25)),
														
 
															+            learnable_low_freq_radius=bool(
														
 
															+                model_cfg.get("learnable_low_freq_radius", True)
														
 
															+            ),
														
 
															             guide_mode=str(model_cfg.get("guide_mode", "affine")),
														
 
															             out_channels=model_cfg.get("out_channels"),
														
 
															         ).to(self.device)
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,26 @@
 
															+# Core training stack
														
 
															+torch>=2.2
														
 
															+torchvision>=0.17
														
 
															+numpy>=1.24
														
 
															+Pillow>=10.0
														
 
															+PyYAML>=6.0
														
 
															+
														
 
															+# Medical segmentation losses and metrics
														
 
															+monai>=1.3
														
 
															+
														
 
															+# XNet2d wavelet and VMamba dependencies
														
 
															+ptwt>=0.1.9
														
 
															+PyWavelets>=1.5
														
 
															+timm>=1.0
														
 
															+fvcore>=0.1.5
														
 
															+einops>=0.7
														
 
															+packaging>=23.0
														
 
															+triton>=2.2; platform_system == "Linux"
														
 
															+
														
 
															+# Experiment logging and utilities
														
 
															+swanlab>=0.6
														
 
															+tqdm>=4.66
														
 
															+matplotlib>=3.8
														
 
															+
														
 
															+# Tests
														
 
															+pytest>=8.0
														
--- a/tests/test_xnet_2d.py
+++ b/tests/test_xnet_2d.py
@@ -0,0 +1,38 @@
 
															+from __future__ import annotations
														
 
															+
														
 
															+import torch
														
 
															+from torch import nn
														
 
															+
														
 
															+from lib.modules.xnet_2d import XNet2d, XTEB2d
														
 
															+
														
 
															+
														
 
															+def test_xnet2d_forward_preserves_segmentation_shape() -> None:
														
 
															+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
														
 
															+    model = XNet2d(
														
 
															+        in_channels=3,
														
 
															+        num_classes=1,
														
 
															+        encoder_channels=(8, 16, 24, 32),
														
 
															+        encoder_depths=(1, 1, 1, 1),
														
 
															+        decoder_channels=(24, 16, 8),
														
 
															+        stem_channels=8,
														
 
															+        bottleneck_depth=1,
														
 
															+        global_ratio=1.0,
														
 
															+        use_wavelet_branch=True,
														
 
															+        use_global_branch_stage1=False,
														
 
															+        ssm_d_state=1,
														
 
															+        ssm_backend="torch",
														
 
															+        use_frequency_refine=True,
														
 
															+        learnable_low_freq_radius=False,
														
 
															+    ).to(device)
														
 
															+    if device.type == "cpu":
														
 
															+        for module in model.modules():
														
 
															+            if isinstance(module, XTEB2d):
														
 
															+                module.global_branch = nn.Identity()
														
 
															+    model.eval()
														
 
															+
														
 
															+    x = torch.randn(2, 3, 64, 64, device=device)
														
 
															+    with torch.no_grad():
														
 
															+        outputs = model(x)
														
 
															+
														
 
															+    assert outputs["seg_logits"].shape == (2, 1, 64, 64)
														
 
															+    assert outputs["logits"].shape == outputs["seg_logits"].shape
														
--- a/tmp/docs/training/当前项目详解与纯文本架构流程图.md
+++ b/tmp/docs/training/当前项目详解与纯文本架构流程图.md
@@ -1,1421 +0,0 @@
 
															-# 当前项目详解与纯文本架构流程图
														
 
															-
														
 
															-## 1. 当前项目定位
														
 
															-
														
 
															-`X_SSL_Net` 当前 active 主线是一个面向 2D 超声图像分割的全监督训练工程。
														
 
															-
														
 
															-当前真实训练链路：
														
 
															-
														
 
															-```text
														
 
															-shell script
														
 
															--> tools/train.py
														
 
															--> SupervisedSegmentationTrainer
														
 
															--> SegmentationRecordDataset / DataLoader
														
 
															--> XNet2d
														
 
															--> seg_logits
														
 
															--> DiceCE loss / BCE fallback
														
 
															--> Dice / IoU validation
														
 
															--> best.pth / last.pth
														
 
															-```
														
 
															-
														
 
															-当前真实模型主线：
														
 
															-
														
 
															-```text
														
 
															-XNet2d = X-shaped CNN-Wavelet-VMamba hybrid segmentation network
														
 
															-```
														
 
															-
														
 
															-当前主训练只使用一个分割头：
														
 
															-
														
 
															-```text
														
 
															-outputs["seg_logits"]
														
 
															-```
														
 
															-
														
 
															-当前主线不调用：
														
 
															-
														
 
															-1. `lib/sam2`
														
 
															-2. `lib/SwinTransformer`
														
 
															-3. SwinV2 segmentation config
														
 
															-4. boundary auxiliary head
														
 
															-5. semi-supervised trainer
														
 
															-
														
 
															-`lib/sam2` 与 `lib/SwinTransformer` 目前作为外部代码资产保留，不进入当前训练路径。
														
 
															-
														
 
															-## 2. 一句话总览
														
 
															-
														
 
															-当前项目可以概括为：
														
 
															-
														
 
															-```text
														
 
															-用 XNet2d 在 BUSI / DDTI / TN3K / TG3K 等 2D 超声数据集上做全监督分割训练。
														
 
															-XNet2d 的 encoder 用 local + wavelet + VMamba-style SS2D 三分支建模，
														
 
															-decoder 用同尺度 skip + 斜向 guide + 频率细化恢复 mask。
														
 
															-```
														
 
															-
														
 
															-## 3. 启动入口
														
 
															-
														
 
															-### 3.1 推荐 shell 入口
														
 
															-
														
 
															-最常用入口：
														
 
															-
														
 
															-```bash
														
 
															-DATASET=BUSI bash tools/run_us_experiments.sh
														
 
															-```
														
 
															-
														
 
															-短跑调试入口：
														
 
															-
														
 
															-```bash
														
 
															-DATASET=BUSI \
														
 
															-EXTRA_SET_ARGS="train.epochs=1 train.batch_size=8 train.val_batch_size=8 logging.use_swanlab=false checkpoint.dir=outputs/validation/xnet_oflex_b8" \
														
 
															-bash tools/run_us_experiments.sh
														
 
															-```
														
 
															-
														
 
															-### 3.2 shell 脚本职责
														
 
															-
														
 
															-文件：
														
 
															-
														
 
															-```text
														
 
															-tools/run_us_experiments.sh
														
 
															-```
														
 
															-
														
 
															-它做四件事：
														
 
															-
														
 
															-1. 解析 `DATASET`
														
 
															-2. 映射数据集根目录
														
 
															-3. 对需要项目级划分的数据集生成或加载 `train/val`
														
 
															-4. 调用 `tools/train.py`
														
 
															-
														
 
															-支持的数据集名称：
														
 
															-
														
 
															-```text
														
 
															-BUS-UCLM
														
 
															-BUSI
														
 
															-BUS-BRA
														
 
															-BUS_UC
														
 
															-CCAUI
														
 
															-DDTI
														
 
															-OTU_2d
														
 
															-TN3K
														
 
															-TG3K
														
 
															-```
														
 
															-
														
 
															-数据集根目录映射：
														
 
															-
														
 
															-```text
														
 
															-BUSI      -> data/BUSI
														
 
															-DDTI      -> data/DDTI
														
 
															-TN3K      -> data/TN3K
														
 
															-TG3K      -> data/TG3K
														
 
															-BUS_UC    -> data/BUS_UC
														
 
															-...
														
 
															-```
														
 
															-
														
 
															-项目级 split 数据集：
														
 
															-
														
 
															-```text
														
 
															-BUS-UCLM, BUSI, BUS-BRA, BUS_UC, CCAUI, DDTI
														
 
															-```
														
 
															-
														
 
															-官方 split 数据集：
														
 
															-
														
 
															-```text
														
 
															-OTU_2d, TN3K, TG3K
														
 
															-```
														
 
															-
														
 
															-## 4. 从 shell 到 Python 的总流程图
														
 
															-
														
 
															-```text
														
 
															-User command
														
 
															-  |
														
 
															-  |  DATASET=BUSI EXTRA_SET_ARGS="..." bash tools/run_us_experiments.sh
														
 
															-  v
														
 
															-+----------------------------------------------------------------------------------+
														
 
															-| tools/run_us_experiments.sh                                                       |
														
 
															-+----------------------------------------------------------------------------------+
														
 
															-| 1. read DATASET / SEED / EXTRA_SET_ARGS                                           |
														
 
															-| 2. dataset_root(DATASET)                                                          |
														
 
															-| 3. if DATASET needs project split:                                                |
														
 
															-|      python scripts/generate_project_split.py --dataset DATASET --root ROOT       |
														
 
															-| 4. python tools/train.py                                                          |
														
 
															-|      --config configs/segmentation/train_sup_us_template.yaml                     |
														
 
															-|      --set dataset.dataset_name=DATASET dataset.root=ROOT ... EXTRA_SET_ARGS      |
														
 
															-+----------------------------------------------------------------------------------+
														
 
															-  |
														
 
															-  v
														
 
															-+----------------------------------------------------------------------------------+
														
 
															-| tools/train.py                                                                    |
														
 
															-+----------------------------------------------------------------------------------+
														
 
															-| 1. parse --config / --trainer / --set                                             |
														
 
															-| 2. load yaml config                                                               |
														
 
															-| 3. apply dotlist overrides                                                        |
														
 
															-| 4. optional override trainer.name                                                 |
														
 
															-| 5. build_trainer(cfg)                                                             |
														
 
															-| 6. trainer.train()                                                                |
														
 
															-+----------------------------------------------------------------------------------+
														
 
															-```
														
 
															-
														
 
															-## 5. 配置系统
														
 
															-
														
 
															-当前主配置：
														
 
															-
														
 
															-```text
														
 
															-configs/segmentation/train_sup_us_template.yaml
														
 
															-```
														
 
															-
														
 
															-当前保留的 segmentation 配置：
														
 
															-
														
 
															-```text
														
 
															-configs/segmentation/train_sup_us_template.yaml
														
 
															-configs/segmentation/us_exp_sup_busi.yaml
														
 
															-configs/segmentation/us_exp_sup_busi_ablation.yaml
														
 
															-```
														
 
															-
														
 
															-### 5.1 配置覆盖方式
														
 
															-
														
 
															-`tools/train.py` 支持：
														
 
															-
														
 
															-```text
														
 
															---set key=value key=value ...
														
 
															-```
														
 
															-
														
 
															-例如：
														
 
															-
														
 
															-```bash
														
 
															---set train.epochs=1 train.batch_size=8 model.use_frequency_refine=false
														
 
															-```
														
 
															-
														
 
															-覆盖逻辑：
														
 
															-
														
 
															-```text
														
 
															-load_yaml_config(path)
														
 
															-  |
														
 
															-  v
														
 
															-apply_dotlist_overrides(cfg, args.set)
														
 
															-  |
														
 
															-  v
														
 
															-nested dict update
														
 
															-```
														
 
															-
														
 
															-### 5.2 当前关键配置
														
 
															-
														
 
															-训练：
														
 
															-
														
 
															-```yaml
														
 
															-train:
														
 
															-  epochs: 200
														
 
															-  batch_size: 4
														
 
															-  val_batch_size: 4
														
 
															-  amp: true
														
 
															-  num_workers: 4
														
 
															-  pin_memory: true
														
 
															-  persistent_workers: true
														
 
															-  prefetch_factor: 2
														
 
															-  device: cuda
														
 
															-  grad_clip:
														
 
															-    enabled: true
														
 
															-    max_norm: 1.0
														
 
															-```
														
 
															-
														
 
															-数据：
														
 
															-
														
 
															-```yaml
														
 
															-dataset:
														
 
															-  dataset_name: BUSI
														
 
															-  root: data/BUSI
														
 
															-  split: train
														
 
															-  val_split: val
														
 
															-  image_size: [256, 256]
														
 
															-  in_channels: 3
														
 
															-  num_classes: 1
														
 
															-```
														
 
															-
														
 
															-模型：
														
 
															-
														
 
															-```yaml
														
 
															-model:
														
 
															-  in_channels: 3
														
 
															-  encoder_channels: [32, 64, 128, 192]
														
 
															-  encoder_depths: [2, 2, 2, 2]
														
 
															-  decoder_channels: [128, 64, 32]
														
 
															-  stem_channels: 24
														
 
															-  bottleneck_depth: 1
														
 
															-  global_ratio: 2.0
														
 
															-  wavelet_type: haar
														
 
															-  wavelet_level: 1
														
 
															-  use_wavelet_branch: true
														
 
															-  use_global_branch_stage1: false
														
 
															-  ssm_d_state: 16
														
 
															-  ssm_forward_type: v3
														
 
															-  ssm_backend: auto
														
 
															-  use_frequency_refine: true
														
 
															-  guide_mode: affine
														
 
															-  out_channels: null
														
 
															-```
														
 
															-
														
 
															-优化：
														
 
															-
														
 
															-```yaml
														
 
															-optimizer:
														
 
															-  name: adamw
														
 
															-  lr: 1.0e-4
														
 
															-  weight_decay: 0.05
														
 
															-
														
 
															-scheduler:
														
 
															-  name: cosine
														
 
															-  warmup:
														
 
															-    name: linear
														
 
															-    params:
														
 
															-      start_factor: 0.1
														
 
															-      total_iters: 10
														
 
															-  params:
														
 
															-    T_max: 190
														
 
															-    eta_min: 1.0e-6
														
 
															-```
														
 
															-
														
 
															-loss 与 metric：
														
 
															-
														
 
															-```yaml
														
 
															-loss:
														
 
															-  name: dicece
														
 
															-  task_mode: binary
														
 
															-  params:
														
 
															-    include_background: true
														
 
															-    lambda_dice: 0.7
														
 
															-    lambda_ce: 0.3
														
 
															-
														
 
															-validation:
														
 
															-  threshold: 0.5
														
 
															-  metrics:
														
 
															-    task_mode: binary
														
 
															-    metrics:
														
 
															-      - name: dice
														
 
															-      - name: iou
														
 
															-```
														
 
															-
														
 
															-## 6. Trainer 构建流程
														
 
															-
														
 
															-入口：
														
 
															-
														
 
															-```text
														
 
															-lib/trainers/builder.py::build_trainer
														
 
															-```
														
 
															-
														
 
															-当前 trainer：
														
 
															-
														
 
															-```text
														
 
															-lib/trainers/supervised.py::SupervisedSegmentationTrainer
														
 
															-```
														
 
															-
														
 
															-构建流程：
														
 
															-
														
 
															-```text
														
 
															-build_trainer(cfg)
														
 
															-  |
														
 
															-  v
														
 
															-read cfg.trainer.name
														
 
															-  |
														
 
															-  v
														
 
															-TRAINER_REGISTRY["supervised_segmentation"]
														
 
															-  |
														
 
															-  v
														
 
															-trainer = SupervisedSegmentationTrainer(cfg, args)
														
 
															-  |
														
 
															-  v
														
 
															-trainer.build()
														
 
															-  |
														
 
															-  v
														
 
															-return trainer
														
 
															-```
														
 
															-
														
 
															-`SupervisedSegmentationTrainer.build()` 做：
														
 
															-
														
 
															-```text
														
 
															-1. dataset_cfg = cfg["dataset"]
														
 
															-2. model_cfg   = cfg["model"]
														
 
															-3. train_cfg   = cfg["train"]
														
 
															-
														
 
															-4. build XNet2d from model_cfg
														
 
															-5. move model to device
														
 
															-6. build optimizer
														
 
															-7. build scheduler
														
 
															-8. build loss if cfg.loss is not null
														
 
															-9. build train dataloader
														
 
															-10. build validation dataloader
														
 
															-11. maybe resume checkpoint
														
 
															-12. maybe init SwanLab
														
 
															-```
														
 
															-
														
 
															-## 7. BaseTrainer 公共职责
														
 
															-
														
 
															-文件：
														
 
															-
														
 
															-```text
														
 
															-lib/trainers/base.py
														
 
															-```
														
 
															-
														
 
															-公共职责：
														
 
															-
														
 
															-```text
														
 
															-BaseTrainer
														
 
															-├─ random seed
														
 
															-├─ device selection
														
 
															-├─ output directory
														
 
															-├─ AMP GradScaler
														
 
															-├─ batch size resolution
														
 
															-├─ dataloader construction helper
														
 
															-├─ validation metric construction
														
 
															-├─ checkpoint save / resume
														
 
															-├─ early stopping
														
 
															-├─ SwanLab logging
														
 
															-├─ training setup summary
														
 
															-├─ step performance logging
														
 
															-└─ epoch finalization
														
 
															-```
														
 
															-
														
 
															-设备选择：
														
 
															-
														
 
															-```text
														
 
															-cfg.train.device == "cuda" and torch.cuda.is_available()
														
 
															-  -> cuda
														
 
															-else
														
 
															-  -> cpu
														
 
															-```
														
 
															-
														
 
															-AMP 开关：
														
 
															-
														
 
															-```text
														
 
															-cfg.train.amp == true and device == cuda
														
 
															-  -> enabled
														
 
															-else
														
 
															-  -> disabled
														
 
															-```
														
 
															-
														
 
															-当前已验证目标环境：
														
 
															-
														
 
															-```text
														
 
															-conda env: xnet_mamba
														
 
															-torch: 2.10.0+cu126
														
 
															-GPU: NVIDIA GeForce RTX 4070 Ti SUPER
														
 
															-selective_scan_cuda_oflex: available
														
 
															-```
														
 
															-
														
 
															-## 8. 数据链路
														
 
															-
														
 
															-### 8.1 数据 index 构建
														
 
															-
														
 
															-入口：
														
 
															-
														
 
															-```text
														
 
															-lib/data/builder.py::build_dataset_index
														
 
															-```
														
 
															-
														
 
															-核心 registry：
														
 
															-
														
 
															-```text
														
 
															-BUS-UCLM -> paired images/masks
														
 
															-BUSI     -> Dataset_BUSI_with_GT/{benign,malignant,normal}
														
 
															-BUS-BRA  -> prefixed image/mask matching
														
 
															-BUS_UC   -> All / Benign / Malignant folders
														
 
															-CCAUI    -> US images / Expert mask images
														
 
															-DDTI     -> XML annotation records
														
 
															-OTU_2d   -> images / annotations
														
 
															-TN3K     -> trainval/test image/mask folders
														
 
															-TG3K     -> thyroid-image / thyroid-mask
														
 
															-```
														
 
															-
														
 
															-### 8.2 split 应用
														
 
															-
														
 
															-入口：
														
 
															-
														
 
															-```text
														
 
															-lib/data/loaders.py::apply_official_split
														
 
															-```
														
 
															-
														
 
															-流程：
														
 
															-
														
 
															-```text
														
 
															-build_dataset_index(dataset_name, root)
														
 
															-  |
														
 
															-  v
														
 
															-if split is requested:
														
 
															-  |
														
 
															-  +-- OTU_2d: read train.txt / val.txt
														
 
															-  |
														
 
															-  +-- TN3K: read tn3k-trainval.json or use test folder
														
 
															-  |
														
 
															-  +-- TG3K: read tg3k-trainval.json
														
 
															-  |
														
 
															-  +-- project split dataset:
														
 
															-        read data/<dataset>/splits/project/train.txt or val.txt
														
 
															-```
														
 
															-
														
 
															-项目级 split 生成：
														
 
															-
														
 
															-```text
														
 
															-scripts/generate_project_split.py
														
 
															-  |
														
 
															-  v
														
 
															-generate_project_splits()
														
 
															-  |
														
 
															-  v
														
 
															-write:
														
 
															-  data/<dataset>/splits/project/train.txt
														
 
															-  data/<dataset>/splits/project/val.txt
														
 
															-```
														
 
															-
														
 
															-### 8.3 Dataset 读取
														
 
															-
														
 
															-文件：
														
 
															-
														
 
															-```text
														
 
															-lib/data/datasets.py::SegmentationRecordDataset
														
 
															-```
														
 
															-
														
 
															-单样本读取：
														
 
															-
														
 
															-```text
														
 
															-record
														
 
															-  |
														
 
															-  +-- image_path -> PIL RGB -> float32 [3,H,W] in [0,1]
														
 
															-  |
														
 
															-  +-- mask_path  -> PIL L -> binary float32 [1,H,W]
														
 
															-  |
														
 
															-  +-- DDTI special:
														
 
															-        annotation_path XML -> build_ddti_mask() -> binary [1,H,W]
														
 
															-  |
														
 
															-  +-- joint augmentation
														
 
															-  |
														
 
															-  +-- resize image to dataset.image_size
														
 
															-  |
														
 
															-  +-- resize mask to dataset.image_size
														
 
															-  |
														
 
															-  v
														
 
															-{
														
 
															-  "image": image,
														
 
															-  "mask": mask,
														
 
															-  "dataset_name": ...,
														
 
															-  "sample_id": ...,
														
 
															-  "split": ...,
														
 
															-  "class_name": ...,
														
 
															-  "meta": ...
														
 
															-}
														
 
															-```
														
 
															-
														
 
															-### 8.4 augmentation
														
 
															-
														
 
															-文件：
														
 
															-
														
 
															-```text
														
 
															-lib/data/augment.py::SegmentationAugmentation
														
 
															-```
														
 
															-
														
 
															-当前支持：
														
 
															-
														
 
															-```text
														
 
															-spatial:
														
 
															-  random horizontal flip
														
 
															-  random vertical flip
														
 
															-  random rotate 90
														
 
															-
														
 
															-intensity:
														
 
															-  random brightness / contrast
														
 
															-  random gaussian noise
														
 
															-  clamp to [0,1]
														
 
															-```
														
 
															-
														
 
															-### 8.5 collate
														
 
															-
														
 
															-文件：
														
 
															-
														
 
															-```text
														
 
															-lib/data/collate.py::record_collate_fn
														
 
															-```
														
 
															-
														
 
															-逻辑：
														
 
															-
														
 
															-```text
														
 
															-if all tensor shapes same:
														
 
															-  torch.stack(values, dim=0)
														
 
															-else:
														
 
															-  keep list
														
 
															-
														
 
															-strings / dict / metadata:
														
 
															-  keep list
														
 
															-```
														
 
															-
														
 
															-最终 batch：
														
 
															-
														
 
															-```text
														
 
															-image: [B,3,256,256]
														
 
															-mask : [B,1,256,256]
														
 
															-```
														
 
															-
														
 
															-## 9. Dataloader 流程图
														
 
															-
														
 
															-```text
														
 
															-SupervisedSegmentationTrainer.build()
														
 
															-  |
														
 
															-  v
														
 
															-_build_segmentation_loader(split="train")
														
 
															-  |
														
 
															-  v
														
 
															-build_dataloader()
														
 
															-  |
														
 
															-  v
														
 
															-build_record_dataset()
														
 
															-  |
														
 
															-  v
														
 
															-build_dataset_index()
														
 
															-  |
														
 
															-  v
														
 
															-apply_official_split()
														
 
															-  |
														
 
															-  v
														
 
															-SegmentationRecordDataset(records, transforms)
														
 
															-  |
														
 
															-  v
														
 
															-DataLoader(
														
 
															-  batch_size,
														
 
															-  shuffle,
														
 
															-  num_workers,
														
 
															-  pin_memory,
														
 
															-  persistent_workers,
														
 
															-  prefetch_factor,
														
 
															-  collate_fn=record_collate_fn
														
 
															-)
														
 
															-```
														
 
															-
														
 
															-注意：`DataLoader` worker 的真实启动通常发生在第一次迭代时，也就是 `======== END TRAINING SETUP ========` 之后。若 `num_workers > 0`，第一批数据可能出现一次性等待。
														
 
															-
														
 
															-## 10. XNet2d 总体结构
														
 
															-
														
 
															-文件：
														
 
															-
														
 
															-```text
														
 
															-lib/modules/xnet_2d.py
														
 
															-```
														
 
															-
														
 
															-当前默认参数量：
														
 
															-
														
 
															-```text
														
 
															-total parameters:     9,432,129
														
 
															-trainable parameters: 9,432,129
														
 
															-```
														
 
															-
														
 
															-顶层结构：
														
 
															-
														
 
															-```text
														
 
															-XNet2d
														
 
															-├─ XNetEncoder2d
														
 
															-│  ├─ XNetStem2d
														
 
															-│  ├─ Encoder Stage 1: XTEB2d x 2
														
 
															-│  ├─ Downsample 1
														
 
															-│  ├─ Encoder Stage 2: XTEB2d x 2
														
 
															-│  ├─ Downsample 2
														
 
															-│  ├─ Encoder Stage 3: XTEB2d x 2
														
 
															-│  ├─ Downsample 3
														
 
															-│  └─ Encoder Stage 4: XTEB2d x 2
														
 
															-│
														
 
															-├─ Bottleneck: XTEB2d x 1
														
 
															-│
														
 
															-├─ XNetDecoder2d
														
 
															-│  ├─ guide4: E4 -> D4 affine guide
														
 
															-│  ├─ dec4: XCRB2d(E4, E3, guide4)
														
 
															-│  ├─ guide3: E3 -> D3 affine guide
														
 
															-│  ├─ dec3: XCRB2d(D4, E2, guide3)
														
 
															-│  ├─ guide2: E2 -> D2 affine guide
														
 
															-│  ├─ dec2: XCRB2d(D3, E1, guide2)
														
 
															-│  └─ head_refine
														
 
															-│
														
 
															-└─ XNetSegHead2d
														
 
															-```
														
 
															-
														
 
															-## 11. XNet2d 纯文本架构图
														
 
															-
														
 
															-以输入 `[B,3,256,256]` 为例，默认通道为 `[32,64,128,192]`：
														
 
															-
														
 
															-```text
														
 
															-Input
														
 
															-[B, 3, 256, 256]
														
 
															-  |
														
 
															-  v
														
 
															-XNetStem2d
														
 
															-  Conv3x3 s2:       [B, 24, 128, 128]
														
 
															-  DWConv3x3:        [B, 24, 128, 128]
														
 
															-  PWConv1x1:        [B, 32, 128, 128]
														
 
															-  Conv3x3 s2:       [B, 32,  64,  64]
														
 
															-  |
														
 
															-  v
														
 
															-E1 = Encoder Stage 1, XTEB x2
														
 
															-[B, 32, 64, 64]
														
 
															-  |
														
 
															-  v
														
 
															-Down1
														
 
															-[B, 64, 32, 32]
														
 
															-  |
														
 
															-  v
														
 
															-E2 = Encoder Stage 2, XTEB x2
														
 
															-[B, 64, 32, 32]
														
 
															-  |
														
 
															-  v
														
 
															-Down2
														
 
															-[B, 128, 16, 16]
														
 
															-  |
														
 
															-  v
														
 
															-E3 = Encoder Stage 3, XTEB x2
														
 
															-[B, 128, 16, 16]
														
 
															-  |
														
 
															-  v
														
 
															-Down3
														
 
															-[B, 192, 8, 8]
														
 
															-  |
														
 
															-  v
														
 
															-E4 = Encoder Stage 4, XTEB x2
														
 
															-[B, 192, 8, 8]
														
 
															-  |
														
 
															-  v
														
 
															-Bottleneck XTEB x1
														
 
															-[B, 192, 8, 8]
														
 
															-```
														
 
															-
														
 
															-Decoder：
														
 
															-
														
 
															-```text
														
 
															-E4 [B,192,8,8]
														
 
															-  |
														
 
															-  +-- guide4 = Phi(E4) -> resize to E3 size -> affine gamma/beta for d4
														
 
															-  |
														
 
															-  v
														
 
															-dec4 input:
														
 
															-  decoder input: E4 [B,192,8,8]
														
 
															-  same-scale skip: E3 [B,128,16,16]
														
 
															-  guide: g4
														
 
															-  output D4 [B,128,16,16]
														
 
															-
														
 
															-D4 [B,128,16,16]
														
 
															-  |
														
 
															-  +-- guide3 = Phi(E3) -> resize to E2 size -> affine gamma/beta for d3
														
 
															-  |
														
 
															-  v
														
 
															-dec3 input:
														
 
															-  decoder input: D4 [B,128,16,16]
														
 
															-  same-scale skip: E2 [B,64,32,32]
														
 
															-  guide: g3
														
 
															-  output D3 [B,64,32,32]
														
 
															-
														
 
															-D3 [B,64,32,32]
														
 
															-  |
														
 
															-  +-- guide2 = Phi(E2) -> resize to E1 size -> affine gamma/beta for d2
														
 
															-  |
														
 
															-  v
														
 
															-dec2 input:
														
 
															-  decoder input: D3 [B,64,32,32]
														
 
															-  same-scale skip: E1 [B,32,64,64]
														
 
															-  guide: g2
														
 
															-  output D2 [B,32,64,64]
														
 
															-
														
 
															-D2 [B,32,64,64]
														
 
															-  |
														
 
															-  v
														
 
															-HeadRefine
														
 
															-[B,32,64,64]
														
 
															-  |
														
 
															-  v
														
 
															-SegHead + upsample to input size
														
 
															-[B,1,256,256]
														
 
															-```
														
 
															-
														
 
															-## 12. XTEB2d 详解
														
 
															-
														
 
															-`XTEB2d` 是 encoder 的基本 block。
														
 
															-
														
 
															-名字含义：
														
 
															-
														
 
															-```text
														
 
															-XTEB = XNet Tri-branch Encoding Block
														
 
															-```
														
 
															-
														
 
															-输入输出：
														
 
															-
														
 
															-```text
														
 
															-input : X [B,C,H,W]
														
 
															-output: Y [B,C,H,W]
														
 
															-```
														
 
															-
														
 
															-内部结构：
														
 
															-
														
 
															-```text
														
 
															-X
														
 
															-│
														
 
															-├─ pre_norm: 1x1 Conv2dBN
														
 
															-│
														
 
															-├─ Local branch
														
 
															-│   ├─ DWConv3x3 + PWConv1x1
														
 
															-│   └─ DWConv5x5 + PWConv1x1
														
 
															-│
														
 
															-├─ Wavelet branch
														
 
															-│   ├─ Haar DWT
														
 
															-│   │   ├─ LL
														
 
															-│   │   └─ LH/HL/HH high bands
														
 
															-│   ├─ LL projection
														
 
															-│   ├─ high-band projection
														
 
															-│   └─ inverse Haar transform
														
 
															-│
														
 
															-├─ Global branch
														
 
															-│   ├─ 1x1 pre projection
														
 
															-│   ├─ VMamba-style SS2D
														
 
															-│   └─ 1x1 post projection
														
 
															-│
														
 
															-├─ concat(local, wavelet, global)
														
 
															-├─ 1x1 fusion
														
 
															-├─ channel gate from GAP + MLP + sigmoid
														
 
															-├─ residual add
														
 
															-└─ lightweight FFN + residual add
														
 
															-```
														
 
															-
														
 
															-公式化：
														
 
															-
														
 
															-```text
														
 
															-X0 = PreNorm(X)
														
 
															-
														
 
															-L = Local(X0)
														
 
															-W = Wavelet(X0)
														
 
															-G = GlobalSS2D(X0)
														
 
															-
														
 
															-F = Fuse([L,W,G])
														
 
															-Y = X + Post(F)
														
 
															-Z = Y + FFN(Y)
														
 
															-```
														
 
															-
														
 
															-### 12.1 Local branch
														
 
															-
														
 
															-职责：
														
 
															-
														
 
															-```text
														
 
															-局部纹理、边界、短程结构
														
 
															-```
														
 
															-
														
 
															-结构：
														
 
															-
														
 
															-```text
														
 
															-DWConv3x3 -> ReLU -> PWConv1x1
														
 
															-DWConv5x5 -> ReLU -> PWConv1x1
														
 
															-sum
														
 
															-```
														
 
															-
														
 
															-### 12.2 Wavelet branch
														
 
															-
														
 
															-职责：
														
 
															-
														
 
															-```text
														
 
															-低频轮廓 + 高频边界/纹理
														
 
															-```
														
 
															-
														
 
															-结构：
														
 
															-
														
 
															-```text
														
 
															-Haar DWT:
														
 
															-  LL      -> low-frequency structure
														
 
															-  LH/HL/HH -> high-frequency directional details
														
 
															-
														
 
															-LL -> Conv projection
														
 
															-High bands -> depthwise conv + pointwise conv
														
 
															-IDWT -> output projection
														
 
															-```
														
 
															-
														
 
															-当前限制：
														
 
															-
														
 
															-```text
														
 
															-wavelet_type = haar
														
 
															-wavelet_level = 1
														
 
															-```
														
 
															-
														
 
															-### 12.3 Global SS2D branch
														
 
															-
														
 
															-职责：
														
 
															-
														
 
															-```text
														
 
															-高效长程依赖建模、全局结构一致性
														
 
															-```
														
 
															-
														
 
															-当前实现：
														
 
															-
														
 
															-```text
														
 
															-lib/modules/lib_mamba/vmamba.py::SS2D
														
 
															-```
														
 
															-
														
 
															-来源：
														
 
															-
														
 
															-```text
														
 
															-VMamba-style SS2D operator
														
 
															-```
														
 
															-
														
 
															-后端选择：
														
 
															-
														
 
															-```text
														
 
															-ssm_backend = auto
														
 
															-  |
														
 
															-  +-- if x.is_cuda:
														
 
															-        selective_scan_backend = oflex
														
 
															-        scan_force_torch = false
														
 
															-  |
														
 
															-  +-- else:
														
 
															-        selective_scan_backend = torch
														
 
															-        scan_force_torch = true
														
 
															-
														
 
															-ssm_backend = oflex
														
 
															-  -> force oflex
														
 
															-
														
 
															-ssm_backend = torch
														
 
															-  -> force torch fallback
														
 
															-```
														
 
															-
														
 
															-当前默认：
														
 
															-
														
 
															-```text
														
 
															-ssm_forward_type = v3
														
 
															-ssm_backend = auto
														
 
															-```
														
 
															-
														
 
															-在 `xnet_mamba` + RTX 4070 Ti SUPER 环境中已验证：
														
 
															-
														
 
															-```text
														
 
															-selective_scan_cuda_oflex import OK
														
 
															-WITH_SELECTIVESCAN_OFLEX = True
														
 
															-```
														
 
															-
														
 
															-## 13. XCRB2d 详解
														
 
															-
														
 
															-`XCRB2d` 是 decoder 的基本 block。
														
 
															-
														
 
															-名字含义：
														
 
															-
														
 
															-```text
														
 
															-XCRB = XNet Cross-guided Reconstruction Block
														
 
															-```
														
 
															-
														
 
															-输入：
														
 
															-
														
 
															-```text
														
 
															-decoder input: deeper decoder or bottleneck feature
														
 
															-same-scale skip: encoder feature at target scale
														
 
															-diagonal guide: deeper encoder semantic guide
														
 
															-```
														
 
															-
														
 
															-内部结构：
														
 
															-
														
 
															-```text
														
 
															-decoder input
														
 
															-  |
														
 
															-  v
														
 
															-bilinear upsample to skip size
														
 
															-  |
														
 
															-  v
														
 
															-1x1 projection
														
 
															-  |
														
 
															-  +-----------------------------+
														
 
															-                                |
														
 
															-same-scale skip                 |
														
 
															-  |                             |
														
 
															-  v                             |
														
 
															-1x1 projection                  |
														
 
															-  |                             |
														
 
															-  +----------- concat ----------+
														
 
															-                  |
														
 
															-                  v
														
 
															-             3x3 fusion
														
 
															-                  |
														
 
															-                  v
														
 
															-      guide affine modulation
														
 
															-                  |
														
 
															-                  v
														
 
															-        optional frequency refine
														
 
															-                  |
														
 
															-                  v
														
 
															-        residual spatial refine
														
 
															-```
														
 
															-
														
 
															-### 13.1 X-shaped 信息流
														
 
															-
														
 
															-当前 decoder 不只是普通 U-Net 横向 skip。
														
 
															-
														
 
															-它同时使用：
														
 
															-
														
 
															-```text
														
 
															-same-scale path:
														
 
															-  E3 -> D4
														
 
															-  E2 -> D3
														
 
															-  E1 -> D2
														
 
															-
														
 
															-diagonal guide path:
														
 
															-  E4 -> D4
														
 
															-  E3 -> D3
														
 
															-  E2 -> D2
														
 
															-```
														
 
															-
														
 
															-纯文本示意：
														
 
															-
														
 
															-```text
														
 
															-Encoder: E1 ---------------------------> D2
														
 
															-            \                          /
														
 
															-             \                        /
														
 
															-Encoder:      E2 -------------------> D3
														
 
															-               \      guide to D2   /
														
 
															-                \                  /
														
 
															-Encoder:         E3 -------------> D4
														
 
															-                  \ guide to D3  /
														
 
															-                   \            /
														
 
															-Encoder:            E4 --------/
														
 
															-                     guide to D4
														
 
															-```
														
 
															-
														
 
															-### 13.2 Guide modulation
														
 
															-
														
 
															-默认 `guide_mode=affine`。
														
 
															-
														
 
															-流程：
														
 
															-
														
 
															-```text
														
 
															-guide feature
														
 
															-  |
														
 
															-  v
														
 
															-resize to target decoder scale
														
 
															-  |
														
 
															-  v
														
 
															-projection -> [gamma, beta]
														
 
															-  |
														
 
															-  v
														
 
															-gamma = sigmoid(gamma) + 0.5
														
 
															-  |
														
 
															-  v
														
 
															-F' = gamma * F + beta
														
 
															-```
														
 
															-
														
 
															-### 13.3 Frequency refine
														
 
															-
														
 
															-默认 `use_frequency_refine=true`。
														
 
															-
														
 
															-流程：
														
 
															-
														
 
															-```text
														
 
															-feature F
														
 
															-  |
														
 
															-  v
														
 
															-cast to float32 if needed
														
 
															-  |
														
 
															-  v
														
 
															-rfft2
														
 
															-  |
														
 
															-  +-- low frequency mask
														
 
															-  |
														
 
															-  +-- high frequency residual
														
 
															-  |
														
 
															-  v
														
 
															-low/high learnable gates
														
 
															-  |
														
 
															-  v
														
 
															-irfft2
														
 
															-  |
														
 
															-  v
														
 
															-cast back to input dtype
														
 
															-  |
														
 
															-  v
														
 
															-depthwise conv refine
														
 
															-```
														
 
															-
														
 
															-这里显式将 FFT 计算放在 `float32` 中，避免 AMP 下触发 `ComplexHalf support is experimental` warning。
														
 
															-
														
 
															-## 14. XNet2d forward 输出
														
 
															-
														
 
															-`XNet2d.forward(x)` 返回：
														
 
															-
														
 
															-```python
														
 
															-{
														
 
															-    "logits": logits,
														
 
															-    "seg_logits": logits,
														
 
															-    "encoder_features": encoder_features,
														
 
															-    "decoder_features": decoder_features,
														
 
															-    "guides": guides,
														
 
															-}
														
 
															-```
														
 
															-
														
 
															-训练只使用：
														
 
															-
														
 
															-```text
														
 
															-outputs["seg_logits"]
														
 
															-```
														
 
															-
														
 
															-其余输出用于：
														
 
															-
														
 
															-```text
														
 
															-debug
														
 
															-visualization
														
 
															-future auxiliary analysis
														
 
															-```
														
 
															-
														
 
															-当前没有边界辅助输出。
														
 
															-
														
 
															-## 15. 训练循环详解
														
 
															-
														
 
															-入口：
														
 
															-
														
 
															-```text
														
 
															-SupervisedSegmentationTrainer.train()
														
 
															-```
														
 
															-
														
 
															-流程：
														
 
															-
														
 
															-```text
														
 
															-train()
														
 
															-  |
														
 
															-  v
														
 
															-print training setup
														
 
															-  |
														
 
															-  v
														
 
															-for epoch in range(start_epoch, epochs):
														
 
															-  |
														
 
															-  +-- model.train()
														
 
															-  +-- optimizer.zero_grad()
														
 
															-  +-- for step, batch in train_loader:
														
 
															-        |
														
 
															-        +-- measure data_time
														
 
															-        |
														
 
															-        +-- image = batch["image"].to(device)
														
 
															-        +-- mask  = batch["mask"].to(device)
														
 
															-        |
														
 
															-        +-- with autocast(enabled=amp):
														
 
															-              outputs = model(image)
														
 
															-              seg_logits = outputs["seg_logits"]
														
 
															-              seg_loss = loss(seg_logits, mask)
														
 
															-              total_loss = seg_loss
														
 
															-        |
														
 
															-        +-- scaled_total_loss = total_loss / accum_steps
														
 
															-        +-- grad_scaler.scale(scaled_total_loss).backward()
														
 
															-        |
														
 
															-        +-- if should optimizer step:
														
 
															-              unscale gradients if grad clipping enabled
														
 
															-              clip grad norm
														
 
															-              grad_scaler.step(optimizer)
														
 
															-              grad_scaler.update()
														
 
															-              optimizer.zero_grad()
														
 
															-        |
														
 
															-        +-- log step every logging.log_interval
														
 
															-  |
														
 
															-  +-- scheduler.step()
														
 
															-  |
														
 
															-  +-- validate if enabled and interval matches
														
 
															-  |
														
 
															-  +-- finalize epoch
														
 
															-        |
														
 
															-        +-- merge train / val metrics
														
 
															-        +-- update best metric
														
 
															-        +-- save best.pth if improved
														
 
															-        +-- save last.pth if enabled
														
 
															-        +-- early stopping check
														
 
															-```
														
 
															-
														
 
															-## 16. Loss 路径
														
 
															-
														
 
															-当前配置使用：
														
 
															-
														
 
															-```text
														
 
															-MONAI DiceCELoss
														
 
															-```
														
 
															-
														
 
															-构建路径：
														
 
															-
														
 
															-```text
														
 
															-cfg.loss
														
 
															-  |
														
 
															-  v
														
 
															-lib/tools/loss.py::build_loss
														
 
															-  |
														
 
															-  v
														
 
															-DiceCELoss(sigmoid=True, include_background=True, lambda_dice=0.7, lambda_ce=0.3)
														
 
															-```
														
 
															-
														
 
															-如果 `loss: null`：
														
 
															-
														
 
															-```text
														
 
															-torch.nn.functional.binary_cross_entropy_with_logits(seg_logits, mask)
														
 
															-```
														
 
															-
														
 
															-该 fallback 适合环境临时缺 MONAI 时做 smoke test，不建议作为正式论文训练默认。
														
 
															-
														
 
															-## 17. Validation 路径
														
 
															-
														
 
															-验证函数：
														
 
															-
														
 
															-```text
														
 
															-SupervisedSegmentationTrainer._validate()
														
 
															-```
														
 
															-
														
 
															-流程：
														
 
															-
														
 
															-```text
														
 
															-model.eval()
														
 
															-build validation metrics
														
 
															-for batch in val_loader:
														
 
															-  image -> device
														
 
															-  mask -> device
														
 
															-  outputs, losses = _compute_losses(image, mask)
														
 
															-  update loss sums
														
 
															-  update metrics with outputs["seg_logits"]
														
 
															-
														
 
															-average val loss
														
 
															-compute Dice / IoU
														
 
															-reset metric states
														
 
															-return val_metrics
														
 
															-```
														
 
															-
														
 
															-metric 输入处理：
														
 
															-
														
 
															-```text
														
 
															-binary mode:
														
 
															-  pred = sigmoid(logits) >= threshold
														
 
															-  target = target > 0
														
 
															-
														
 
															-multiclass mode:
														
 
															-  pred = argmax(logits)
														
 
															-  target = one-hot or class index
														
 
															-```
														
 
															-
														
 
															-当前默认：
														
 
															-
														
 
															-```text
														
 
															-threshold = 0.5
														
 
															-metrics = Dice, IoU
														
 
															-```
														
 
															-
														
 
															-## 18. Checkpoint 路径
														
 
															-
														
 
															-checkpoint 目录：
														
 
															-
														
 
															-```text
														
 
															-cfg.checkpoint.dir
														
 
															-```
														
 
															-
														
 
															-默认脚本会覆盖为：
														
 
															-
														
 
															-```text
														
 
															-outputs/experiments/supervised/<DATASET>
														
 
															-```
														
 
															-
														
 
															-保存文件：
														
 
															-
														
 
															-```text
														
 
															-best.pth
														
 
															-last.pth
														
 
															-```
														
 
															-
														
 
															-checkpoint 内容：
														
 
															-
														
 
															-```text
														
 
															-epoch
														
 
															-cfg
														
 
															-metrics
														
 
															-model state_dict
														
 
															-optimizer state_dict
														
 
															-scheduler state_dict
														
 
															-grad_scaler state_dict
														
 
															-best_metric
														
 
															-no_improve_epochs
														
 
															-```
														
 
															-
														
 
															-best 判断：
														
 
															-
														
 
															-```text
														
 
															-monitor = dice
														
 
															-monitor_mode = max
														
 
															-```
														
 
															-
														
 
															-即：
														
 
															-
														
 
															-```text
														
 
															-val_dice 越大越好
														
 
															-```
														
 
															-
														
 
															-## 19. 日志与性能字段
														
 
															-
														
 
															-每隔 `logging.log_interval` step 打印：
														
 
															-
														
 
															-```text
														
 
															-epoch
														
 
															-step
														
 
															-num_steps
														
 
															-data_time
														
 
															-iter_time
														
 
															-gpu_memory_mb
														
 
															-lr
														
 
															-train_total
														
 
															-train_seg
														
 
															-train_grad_norm
														
 
															-```
														
 
															-
														
 
															-含义：
														
 
															-
														
 
															-```text
														
 
															-data_time:
														
 
															-  从上一步结束到当前 batch 可用的时间。
														
 
															-  num_workers > 0 时，第一批 worker 启动开销发生在 END TRAINING SETUP 之后。
														
 
															-
														
 
															-iter_time:
														
 
															-  当前 step 的训练计算时间，包括 forward、loss、backward、optimizer step。
														
 
															-
														
 
															-gpu_memory_mb:
														
 
															-  torch.cuda.max_memory_allocated。
														
 
															-```
														
 
															-
														
 
															-当前实测参考：
														
 
															-
														
 
															-```text
														
 
															-batch_size = 8
														
 
															-image_size = 256
														
 
															-ssm_backend = auto -> oflex
														
 
															-iter_time ≈ 0.09 - 0.11 s / step
														
 
															-GPU memory ≈ 850 MB
														
 
															-```
														
 
															-
														
 
															-## 20. 从输入到 loss 的端到端流程图
														
 
															-
														
 
															-```text
														
 
															-Batch from DataLoader
														
 
															-  |
														
 
															-  +-- image [B,3,256,256]
														
 
															-  +-- mask  [B,1,256,256]
														
 
															-  |
														
 
															-  v
														
 
															-image.to(cuda), mask.to(cuda)
														
 
															-  |
														
 
															-  v
														
 
															-autocast(enabled=True)
														
 
															-  |
														
 
															-  v
														
 
															-XNet2d(image)
														
 
															-  |
														
 
															-  +-- encoder_features = [E1,E2,E3,E4]
														
 
															-  |
														
 
															-  +-- bottleneck(E4)
														
 
															-  |
														
 
															-  +-- decoder_out, decoder_features, guides
														
 
															-  |
														
 
															-  +-- segmentation_head(decoder_out)
														
 
															-  |
														
 
															-  v
														
 
															-seg_logits [B,1,256,256]
														
 
															-  |
														
 
															-  v
														
 
															-DiceCELoss(seg_logits, mask)
														
 
															-  |
														
 
															-  v
														
 
															-total_loss
														
 
															-  |
														
 
															-  v
														
 
															-GradScaler.scale(total_loss).backward()
														
 
															-  |
														
 
															-  v
														
 
															-clip gradients
														
 
															-  |
														
 
															-  v
														
 
															-optimizer.step()
														
 
															-```
														
 
															-
														
 
															-## 21. 关键运行命令
														
 
															-
														
 
															-GPU 环境检查：
														
 
															-
														
 
															-```bash
														
 
															-python -c "import sys, torch; print(sys.executable); print(torch.__version__); print(torch.cuda.is_available()); print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'no cuda')"
														
 
															-```
														
 
															-
														
 
															-oflex 检查：
														
 
															-
														
 
															-```bash
														
 
															-python -c "import torch; import selective_scan_cuda_oflex; print('oflex import OK')"
														
 
															-python -c "import torch; from lib.modules.lib_mamba import csms6s; print(csms6s.WITH_SELECTIVESCAN_OFLEX)"
														
 
															-```
														
 
															-
														
 
															-前向检查：
														
 
															-
														
 
															-```bash
														
 
															-python - <<'PY'
														
 
															-import torch
														
 
															-from lib.modules import XNet2d
														
 
															-
														
 
															-model = XNet2d(in_channels=3, num_classes=1, ssm_backend="auto", ssm_forward_type="v3").cuda().eval()
														
 
															-x = torch.randn(1, 3, 128, 128, device="cuda")
														
 
															-with torch.no_grad():
														
 
															-    y = model(x)
														
 
															-print(sorted(y.keys()))
														
 
															-print(tuple(y["seg_logits"].shape))
														
 
															-PY
														
 
															-```
														
 
															-
														
 
															-短训：
														
 
															-
														
 
															-```bash
														
 
															-DATASET=BUSI \
														
 
															-EXTRA_SET_ARGS="train.epochs=1 train.batch_size=8 train.val_batch_size=8 logging.use_swanlab=false checkpoint.dir=outputs/validation/xnet_oflex_b8" \
														
 
															-bash tools/run_us_experiments.sh
														
 
															-```
														
 
															-
														
 
															-关闭 frequency refine 消融：
														
 
															-
														
 
															-```bash
														
 
															-DATASET=BUSI \
														
 
															-EXTRA_SET_ARGS="train.epochs=1 train.batch_size=8 train.val_batch_size=8 model.use_frequency_refine=false logging.use_swanlab=false checkpoint.dir=outputs/validation/xnet_oflex_b8_no_freq" \
														
 
															-bash tools/run_us_experiments.sh
														
 
															-```
														
 
															-
														
 
															-汇总结果：
														
 
															-
														
 
															-```bash
														
 
															-bash tools/summarize_results.sh
														
 
															-sed -n '1,40p' results/experiment_summary.md
														
 
															-```
														
 
															-
														
 
															-## 22. 推荐实验主线
														
 
															-
														
 
															-第一阶段：训练链路稳定性
														
 
															-
														
 
															-```text
														
 
															-BUSI smoke
														
 
															-BUSI batch size 8
														
 
															-BUSI no frequency refine
														
 
															-```
														
 
															-
														
 
															-第二阶段：甲状腺主线
														
 
															-
														
 
															-```text
														
 
															-DDTI
														
 
															-TN3K
														
 
															-TG3K
														
 
															-DDTI -> TN3K / TN3K -> DDTI 跨数据集泛化
														
 
															-```
														
 
															-
														
 
															-第三阶段：乳腺扩展
														
 
															-
														
 
															-```text
														
 
															-BUSI
														
 
															-BUS_UC
														
 
															-BUS-BRA
														
 
															-BUS-UCLM
														
 
															-```
														
 
															-
														
 
															-第四阶段：核心消融
														
 
															-
														
 
															-```text
														
 
															-use_wavelet_branch=false
														
 
															-use_frequency_refine=false
														
 
															-ssm_backend=torch
														
 
															-use_global_branch_stage1=true
														
 
															-encoder_depths=[2,2,3,2]
														
 
															-```
														
 
															-
														
 
															-## 23. 当前边界与注意事项
														
 
															-
														
 
															-1. 当前文档描述的是 active XNet2d 全监督主链。
														
 
															-2. 当前训练主链只优化 `seg_logits`。
														
 
															-3. `lib/sam2` 保留但不参与训练。
														
 
															-4. `lib/SwinTransformer` 保留但不参与训练。
														
 
															-5. `ssm_backend=auto` 在 CUDA 上应走 `oflex`，这是当前速度优化后的默认路径。
														
 
															-6. `XFrequencyRefine2d` 的 FFT 计算使用 float32，避免 AMP 下 ComplexHalf warning。
														
 
															-7. `num_workers > 0` 时，第一次进入 dataloader 迭代可能在 `END TRAINING SETUP` 后产生一次性等待。