feat(us1): add dataset review orchestration automatic review slice

2026-03-17 10:57:49 +03:00
parent e916cb1f17
commit 023bacde39
24 changed files with 4870 additions and 131 deletions
--- a/backend/src/api/routes/init.py
+++ b/backend/src/api/routes/init.py
@@ -1,17 +1,18 @@
-# [DEF:backend.src.api.routes.__init__:Module]
+# [DEF:ApiRoutesModule:Module]
 # @COMPLEXITY: 3
 # @SEMANTICS: routes, lazy-import, module-registry
 # @PURPOSE: Provide lazy route module loading to avoid heavyweight imports during tests.
 # @LAYER: API
-# @RELATION: DEPENDS_ON -> importlib
+# @RELATION: [CALLS] ->[ApiRoutesGetAttr]
 # @INVARIANT: Only names listed in __all__ are importable via __getattr__.

-__all__ = ['plugins', 'tasks', 'settings', 'connections', 'environments', 'mappings', 'migration', 'git', 'storage', 'admin', 'reports', 'assistant', 'clean_release', 'profile']
+__all__ = ['plugins', 'tasks', 'settings', 'connections', 'environments', 'mappings', 'migration', 'git', 'storage', 'admin', 'reports', 'assistant', 'clean_release', 'profile', 'dataset_review']


-# [DEF:__getattr__:Function]
-# @COMPLEXITY: 1
+# [DEF:ApiRoutesGetAttr:Function]
+# @COMPLEXITY: 3
 # @PURPOSE: Lazily import route module by attribute name.
+# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
 # @PRE: name is module candidate exposed in __all__.
 # @POST: Returns imported submodule or raises AttributeError.
 def __getattr__(name):
@@ -19,5 +20,5 @@ def __getattr__(name):
        import importlib
        return importlib.import_module(f".{name}", __name__)
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
-# [/DEF:__getattr__:Function]
-# [/DEF:backend.src.api.routes.__init__:Module]
+# [/DEF:ApiRoutesGetAttr:Function]
+# [/DEF:ApiRoutesModule:Module]
--- a/backend/src/api/routes/tests/test_dataset_review_api.py
+++ b/backend/src/api/routes/tests/test_dataset_review_api.py
@@ -0,0 +1,349 @@
+# [DEF:DatasetReviewApiTests:Module]
+# @COMPLEXITY: 3
+# @SEMANTICS: dataset_review, api, tests, lifecycle, exports, orchestration
+# @PURPOSE: Verify backend US1 dataset review lifecycle, export, parsing, and dictionary-resolution contracts.
+# @LAYER: API
+# @RELATION: [BINDS_TO] ->[DatasetReviewApi]
+# @RELATION: [BINDS_TO] ->[DatasetReviewOrchestrator]
+
+from datetime import datetime, timezone
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+from src.app import app
+from src.api.routes.dataset_review import _get_orchestrator, _get_repository
+from src.core.config_models import Environment, GlobalSettings, AppConfig
+from src.core.utils.superset_context_extractor import SupersetContextExtractor
+from src.dependencies import get_config_manager, get_current_user, get_task_manager
+from src.models.dataset_review import (
+    BusinessSummarySource,
+    ConfidenceState,
+    DatasetReviewSession,
+    FindingArea,
+    FindingSeverity,
+    ReadinessState,
+    RecommendedAction,
+    ResolutionState,
+    SessionPhase,
+    SessionStatus,
+)
+from src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand
+from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
+
+
+client = TestClient(app)
+
+
+# [DEF:_make_user:Function]
+def _make_user():
+    admin_role = SimpleNamespace(name="Admin", permissions=[])
+    return SimpleNamespace(id="user-1", username="tester", roles=[admin_role])
+# [/DEF:_make_user:Function]
+
+
+# [DEF:_make_config_manager:Function]
+def _make_config_manager():
+    env = Environment(
+        id="env-1",
+        name="DEV",
+        url="http://superset.local",
+        username="demo",
+        password="secret",
+    )
+    config = AppConfig(environments=[env], settings=GlobalSettings())
+    manager = MagicMock()
+    manager.get_environment.side_effect = lambda env_id: env if env_id == "env-1" else None
+    manager.get_config.return_value = config
+    return manager
+# [/DEF:_make_config_manager:Function]
+
+
+# [DEF:_make_session:Function]
+def _make_session():
+    now = datetime.now(timezone.utc)
+    return DatasetReviewSession(
+        session_id="sess-1",
+        user_id="user-1",
+        environment_id="env-1",
+        source_kind="superset_link",
+        source_input="http://superset.local/dashboard/10",
+        dataset_ref="public.sales",
+        dataset_id=42,
+        dashboard_id=10,
+        readiness_state=ReadinessState.REVIEW_READY,
+        recommended_action=RecommendedAction.REVIEW_DOCUMENTATION,
+        status=SessionStatus.ACTIVE,
+        current_phase=SessionPhase.REVIEW,
+        created_at=now,
+        updated_at=now,
+        last_activity_at=now,
+    )
+# [/DEF:_make_session:Function]
+
+
+# [DEF:dataset_review_api_dependencies:Function]
+@pytest.fixture(autouse=True)
+def dataset_review_api_dependencies():
+    mock_user = _make_user()
+    config_manager = _make_config_manager()
+    task_manager = MagicMock()
+
+    app.dependency_overrides[get_current_user] = lambda: mock_user
+    app.dependency_overrides[get_config_manager] = lambda: config_manager
+    app.dependency_overrides[get_task_manager] = lambda: task_manager
+
+    yield {
+        "user": mock_user,
+        "config_manager": config_manager,
+        "task_manager": task_manager,
+    }
+    app.dependency_overrides.clear()
+# [/DEF:dataset_review_api_dependencies:Function]
+
+
+# [DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
+# @PURPOSE: Verify dashboard links recover dataset context and preserve explicit partial-recovery markers.
+def test_parse_superset_link_dashboard_partial_recovery():
+    env = Environment(
+        id="env-1",
+        name="DEV",
+        url="http://superset.local",
+        username="demo",
+        password="secret",
+    )
+    fake_client = MagicMock()
+    fake_client.get_dashboard_detail.return_value = {
+        "datasets": [{"id": 42}, {"id": 77}],
+    }
+    fake_client.get_dataset_detail.return_value = {
+        "table_name": "sales",
+        "schema": "public",
+    }
+
+    extractor = SupersetContextExtractor(environment=env, client=fake_client)
+    result = extractor.parse_superset_link(
+        "http://superset.local/dashboard/10/?native_filters=%5B%7B%22name%22%3A%22country%22%2C%22value%22%3A%22DE%22%7D%5D"
+    )
+
+    assert result.dataset_id == 42
+    assert result.dashboard_id == 10
+    assert result.dataset_ref == "public.sales"
+    assert result.partial_recovery is True
+    assert "multiple_dashboard_datasets" in result.unresolved_references
+    assert result.imported_filters[0]["filter_name"] == "country"
+# [/DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
+
+
+# [DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
+# @PURPOSE: Verify trusted dictionary exact matches outrank fuzzy candidates and unresolved fields stay explicit.
+def test_resolve_from_dictionary_prefers_exact_match():
+    resolver = SemanticSourceResolver()
+    result = resolver.resolve_from_dictionary(
+        {
+            "source_ref": "dict://finance",
+            "rows": [
+                {
+                    "field_name": "revenue",
+                    "verbose_name": "Revenue",
+                    "description": "Recognized revenue amount",
+                    "display_format": "$,.2f",
+                },
+                {
+                    "field_name": "revnue",
+                    "verbose_name": "Revenue typo",
+                    "description": "Fuzzy variant",
+                },
+            ],
+        },
+        [
+            {"field_name": "revenue", "is_locked": False},
+            {"field_name": "margin", "is_locked": False},
+        ],
+    )
+
+    resolved_exact = next(item for item in result.resolved_fields if item["field_name"] == "revenue")
+    unresolved = next(item for item in result.resolved_fields if item["field_name"] == "margin")
+
+    assert resolved_exact["applied_candidate"]["match_type"] == "exact"
+    assert resolved_exact["provenance"] == "dictionary_exact"
+    assert unresolved["status"] == "unresolved"
+    assert "margin" in result.unresolved_fields
+    assert result.partial_recovery is True
+# [/DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
+
+
+# [DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
+# @PURPOSE: Verify session start persists usable recovery-required state when Superset intake is partial.
+def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_api_dependencies):
+    repository = MagicMock()
+    created_session = _make_session()
+    created_session.readiness_state = ReadinessState.RECOVERY_REQUIRED
+    created_session.current_phase = SessionPhase.RECOVERY
+
+    repository.create_session.return_value = created_session
+    repository.save_profile_and_findings.return_value = created_session
+    repository.db = MagicMock()
+
+    orchestrator = DatasetReviewOrchestrator(
+        repository=repository,
+        config_manager=dataset_review_api_dependencies["config_manager"],
+        task_manager=None,
+    )
+
+    parsed_context = SimpleNamespace(
+        dataset_ref="public.sales",
+        dataset_id=42,
+        dashboard_id=10,
+        chart_id=None,
+        partial_recovery=True,
+        unresolved_references=["dashboard_dataset_binding_missing"],
+    )
+
+    with patch(
+        "src.services.dataset_review.orchestrator.SupersetContextExtractor.parse_superset_link",
+        return_value=parsed_context,
+    ):
+        result = orchestrator.start_session(
+            StartSessionCommand(
+                user=dataset_review_api_dependencies["user"],
+                environment_id="env-1",
+                source_kind="superset_link",
+                source_input="http://superset.local/dashboard/10",
+            )
+        )
+
+    assert result.session.readiness_state == ReadinessState.RECOVERY_REQUIRED
+    assert result.findings
+    assert result.findings[0].severity.value == "warning"
+    repository.create_session.assert_called_once()
+    repository.save_profile_and_findings.assert_called_once()
+# [/DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
+
+
+# [DEF:test_start_session_endpoint_returns_created_summary:Function]
+# @PURPOSE: Verify POST session lifecycle endpoint returns a persisted ownership-scoped summary.
+def test_start_session_endpoint_returns_created_summary(dataset_review_api_dependencies):
+    session = _make_session()
+    orchestrator = MagicMock()
+    orchestrator.start_session.return_value = SimpleNamespace(session=session, findings=[], parsed_context=None)
+
+    app.dependency_overrides[_get_orchestrator] = lambda: orchestrator
+
+    response = client.post(
+        "/api/dataset-orchestration/sessions",
+        json={
+            "source_kind": "superset_link",
+            "source_input": "http://superset.local/dashboard/10",
+            "environment_id": "env-1",
+        },
+    )
+
+    assert response.status_code == 201
+    payload = response.json()
+    assert payload["session_id"] == "sess-1"
+    assert payload["dataset_ref"] == "public.sales"
+    assert payload["environment_id"] == "env-1"
+# [/DEF:test_start_session_endpoint_returns_created_summary:Function]
+
+
+# [DEF:test_get_session_detail_export_and_lifecycle_endpoints:Function]
+# @PURPOSE: Verify lifecycle get/patch/delete plus documentation and validation exports remain ownership-scoped and usable.
+def test_get_session_detail_export_and_lifecycle_endpoints(dataset_review_api_dependencies):
+    now = datetime.now(timezone.utc)
+    session = MagicMock(spec=DatasetReviewSession)
+    session.session_id = "sess-1"
+    session.user_id = "user-1"
+    session.environment_id = "env-1"
+    session.source_kind = "superset_link"
+    session.source_input = "http://superset.local/dashboard/10"
+    session.dataset_ref = "public.sales"
+    session.dataset_id = 42
+    session.dashboard_id = 10
+    session.readiness_state = ReadinessState.REVIEW_READY
+    session.recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
+    session.status = SessionStatus.ACTIVE
+    session.current_phase = SessionPhase.REVIEW
+    session.created_at = now
+    session.updated_at = now
+    session.last_activity_at = now
+    session.profile = SimpleNamespace(
+        dataset_name="sales",
+        business_summary="Summary text",
+        confidence_state=ConfidenceState.MOSTLY_CONFIRMED,
+        dataset_type="unknown",
+        schema_name=None,
+        database_name=None,
+        business_summary_source=BusinessSummarySource.IMPORTED,
+        description=None,
+        is_sqllab_view=False,
+        completeness_score=None,
+        has_blocking_findings=False,
+        has_warning_findings=True,
+        manual_summary_locked=False,
+        created_at=now,
+        updated_at=now,
+        profile_id="profile-1",
+        session_id="sess-1",
+    )
+    session.findings = [
+        SimpleNamespace(
+            finding_id="f-1",
+            session_id="sess-1",
+            area=FindingArea.SOURCE_INTAKE,
+            severity=FindingSeverity.WARNING,
+            code="PARTIAL_SUPERSET_RECOVERY",
+            title="Partial",
+            message="Some filters require review",
+            resolution_state=ResolutionState.OPEN,
+            resolution_note=None,
+            caused_by_ref=None,
+            created_at=now,
+            resolved_at=None,
+        )
+    ]
+    session.collaborators = []
+    session.semantic_sources = []
+    session.semantic_fields = []
+    session.imported_filters = []
+    session.template_variables = []
+    session.execution_mappings = []
+    session.clarification_sessions = []
+    session.previews = []
+    session.run_contexts = []
+
+    repository = MagicMock()
+    repository.load_session_detail.return_value = session
+    repository.list_sessions_for_user.return_value = [session]
+    repository.db = MagicMock()
+
+    app.dependency_overrides[_get_repository] = lambda: repository
+
+    detail_response = client.get("/api/dataset-orchestration/sessions/sess-1")
+    assert detail_response.status_code == 200
+    assert detail_response.json()["session_id"] == "sess-1"
+
+    patch_response = client.patch(
+        "/api/dataset-orchestration/sessions/sess-1",
+        json={"status": "paused"},
+    )
+    assert patch_response.status_code == 200
+    assert patch_response.json()["status"] == "paused"
+
+    doc_response = client.get("/api/dataset-orchestration/sessions/sess-1/exports/documentation?format=json")
+    assert doc_response.status_code == 200
+    assert doc_response.json()["artifact_type"] == "documentation"
+
+    validation_response = client.get("/api/dataset-orchestration/sessions/sess-1/exports/validation?format=markdown")
+    assert validation_response.status_code == 200
+    assert validation_response.json()["artifact_type"] == "validation_report"
+    assert "Validation Report" in validation_response.json()["content"]["markdown"]
+
+    delete_response = client.delete("/api/dataset-orchestration/sessions/sess-1")
+    assert delete_response.status_code == 204
+# [/DEF:test_get_session_detail_export_and_lifecycle_endpoints:Function]
+
+# [/DEF:DatasetReviewApiTests:Module]
--- a/backend/src/api/routes/dataset_review.py
+++ b/backend/src/api/routes/dataset_review.py
@@ -0,0 +1,533 @@
+# [DEF:DatasetReviewApi:Module]
+# @COMPLEXITY: 4
+# @SEMANTICS: dataset_review, api, session_lifecycle, exports, rbac, feature_flags
+# @PURPOSE: Expose dataset review session lifecycle and export endpoints for backend US1.
+# @LAYER: API
+# @RELATION: [DEPENDS_ON] ->[AppDependencies]
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator]
+# @PRE: Authenticated user and valid environment/session scope are required for all mutations and reads.
+# @POST: Returns ownership-scoped session state and export payloads with feature-flag/RBAC enforcement.
+# @SIDE_EFFECT: Persists session state and may enqueue recovery task.
+# @DATA_CONTRACT: Input[HTTP Request] -> Output[SessionSummary | SessionDetail | ExportArtifactResponse | HTTP 204]
+# @INVARIANT: No cross-user session leakage is allowed; export payloads only expose the current user's accessible session.
+
+from __future__ import annotations
+
+# [DEF:DatasetReviewApi.imports:Block]
+import json
+from typing import Any, Dict, List, Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
+from pydantic import BaseModel, Field
+from sqlalchemy.orm import Session
+
+from src.core.database import get_db
+from src.core.logger import belief_scope, logger
+from src.dependencies import get_config_manager, get_current_user, get_task_manager, has_permission
+from src.models.auth import User
+from src.models.dataset_review import (
+    ArtifactFormat,
+    DatasetReviewSession,
+    RecommendedAction,
+    SessionStatus,
+)
+from src.schemas.dataset_review import SessionDetail, SessionSummary
+from src.services.dataset_review.orchestrator import (
+    DatasetReviewOrchestrator,
+    StartSessionCommand,
+)
+from src.services.dataset_review.repositories.session_repository import (
+    DatasetReviewSessionRepository,
+)
+# [/DEF:DatasetReviewApi.imports:Block]
+
+router = APIRouter(prefix="/api/dataset-orchestration", tags=["Dataset Orchestration"])
+
+
+# [DEF:StartSessionRequest:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Request DTO for starting one dataset review session from a Superset link or dataset selection.
+class StartSessionRequest(BaseModel):
+    source_kind: str = Field(..., pattern="^(superset_link|dataset_selection)$")
+    source_input: str = Field(..., min_length=1)
+    environment_id: str = Field(..., min_length=1)
+# [/DEF:StartSessionRequest:Class]
+
+
+# [DEF:UpdateSessionRequest:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Request DTO for lifecycle state updates on an existing session.
+class UpdateSessionRequest(BaseModel):
+    status: SessionStatus
+    note: Optional[str] = None
+# [/DEF:UpdateSessionRequest:Class]
+
+
+# [DEF:SessionCollectionResponse:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Paginated ownership-scoped dataset review session collection response.
+class SessionCollectionResponse(BaseModel):
+    items: List[SessionSummary]
+    total: int
+    page: int
+    page_size: int
+    has_next: bool
+# [/DEF:SessionCollectionResponse:Class]
+
+
+# [DEF:ExportArtifactResponse:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Inline export response for documentation or validation outputs without introducing unrelated persistence changes.
+class ExportArtifactResponse(BaseModel):
+    artifact_id: str
+    session_id: str
+    artifact_type: str
+    format: str
+    storage_ref: str
+    created_by_user_id: str
+    created_at: Optional[str] = None
+    content: Dict[str, Any]
+# [/DEF:ExportArtifactResponse:Class]
+
+
+# [DEF:_require_auto_review_flag:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Guard US1 dataset review endpoints behind the configured feature flag.
+# @RELATION: [DEPENDS_ON] ->[ConfigManager]
+def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> bool:
+    with belief_scope("dataset_review.require_auto_review_flag"):
+        if not config_manager.get_config().settings.ff_dataset_auto_review:
+            raise HTTPException(
+                status_code=status.HTTP_404_NOT_FOUND,
+                detail="Dataset auto review feature is disabled",
+            )
+        return True
+# [/DEF:_require_auto_review_flag:Function]
+
+
+# [DEF:_get_repository:Function]
+# @COMPLEXITY: 2
+# @PURPOSE: Build repository dependency for dataset review session aggregate access.
+def _get_repository(db: Session = Depends(get_db)) -> DatasetReviewSessionRepository:
+    return DatasetReviewSessionRepository(db)
+# [/DEF:_get_repository:Function]
+
+
+# [DEF:_get_orchestrator:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Build orchestrator dependency for session lifecycle actions.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator]
+def _get_orchestrator(
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    config_manager=Depends(get_config_manager),
+    task_manager=Depends(get_task_manager),
+) -> DatasetReviewOrchestrator:
+    return DatasetReviewOrchestrator(
+        repository=repository,
+        config_manager=config_manager,
+        task_manager=task_manager,
+    )
+# [/DEF:_get_orchestrator:Function]
+
+
+# [DEF:_serialize_session_summary:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Map SQLAlchemy session aggregate root into stable API summary DTO.
+# @RELATION: [DEPENDS_ON] ->[SessionSummary]
+def _serialize_session_summary(session: DatasetReviewSession) -> SessionSummary:
+    return SessionSummary.model_validate(session, from_attributes=True)
+# [/DEF:_serialize_session_summary:Function]
+
+
+# [DEF:_serialize_session_detail:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Map SQLAlchemy session aggregate root into stable API detail DTO.
+# @RELATION: [DEPENDS_ON] ->[SessionDetail]
+def _serialize_session_detail(session: DatasetReviewSession) -> SessionDetail:
+    return SessionDetail.model_validate(session, from_attributes=True)
+# [/DEF:_serialize_session_detail:Function]
+
+
+# [DEF:_get_owned_session_or_404:Function]
+# @COMPLEXITY: 4
+# @PURPOSE: Resolve one session for current user or collaborator scope, returning 404 when inaccessible.
+# @RELATION: [CALLS] ->[load_detail]
+# @PRE: session_id is a non-empty identifier and current_user is authenticated.
+# @POST: returns accessible session detail or raises HTTP 404 without leaking foreign-session existence.
+# @SIDE_EFFECT: none.
+# @DATA_CONTRACT: Input[session_id:str,current_user:User] -> Output[DatasetReviewSession|HTTPException]
+def _get_owned_session_or_404(
+    repository: DatasetReviewSessionRepository,
+    session_id: str,
+    current_user: User,
+) -> DatasetReviewSession:
+    with belief_scope("dataset_review.get_owned_session_or_404"):
+        session = repository.load_session_detail(session_id, current_user.id)
+        if session is None:
+            logger.explore(
+                "Dataset review session not found in current ownership scope",
+                extra={"session_id": session_id, "user_id": current_user.id},
+            )
+            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found")
+        return session
+# [/DEF:_get_owned_session_or_404:Function]
+
+
+# [DEF:_build_documentation_export:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Produce session documentation export content from current persisted review state.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+def _build_documentation_export(session: DatasetReviewSession, export_format: ArtifactFormat) -> Dict[str, Any]:
+    profile = session.profile
+    findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code))
+    if export_format == ArtifactFormat.MARKDOWN:
+        lines = [
+            f"# Dataset Review: {session.dataset_ref}",
+            "",
+            f"- Session ID: {session.session_id}",
+            f"- Environment: {session.environment_id}",
+            f"- Readiness: {session.readiness_state.value}",
+            f"- Recommended action: {session.recommended_action.value}",
+            "",
+            "## Business Summary",
+            profile.business_summary if profile else "No profile summary available.",
+            "",
+            "## Findings",
+        ]
+        if findings:
+            for finding in findings:
+                lines.append(
+                    f"- [{finding.severity.value}] {finding.title}: {finding.message}"
+                )
+        else:
+            lines.append("- No findings recorded.")
+        content = {"markdown": "\n".join(lines)}
+        storage_ref = f"inline://dataset-review/{session.session_id}/documentation.md"
+    else:
+        content = {
+            "session": _serialize_session_summary(session).model_dump(mode="json"),
+            "profile": profile and {
+                "dataset_name": profile.dataset_name,
+                "business_summary": profile.business_summary,
+                "confidence_state": profile.confidence_state.value,
+                "dataset_type": profile.dataset_type,
+            },
+            "findings": [
+                {
+                    "code": finding.code,
+                    "severity": finding.severity.value,
+                    "title": finding.title,
+                    "message": finding.message,
+                    "resolution_state": finding.resolution_state.value,
+                }
+                for finding in findings
+            ],
+        }
+        storage_ref = f"inline://dataset-review/{session.session_id}/documentation.json"
+    return {"storage_ref": storage_ref, "content": content}
+# [/DEF:_build_documentation_export:Function]
+
+
+# [DEF:_build_validation_export:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Produce validation-focused export content from persisted findings and readiness state.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+def _build_validation_export(session: DatasetReviewSession, export_format: ArtifactFormat) -> Dict[str, Any]:
+    findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code))
+    if export_format == ArtifactFormat.MARKDOWN:
+        lines = [
+            f"# Validation Report: {session.dataset_ref}",
+            "",
+            f"- Session ID: {session.session_id}",
+            f"- Readiness: {session.readiness_state.value}",
+            "",
+            "## Findings",
+        ]
+        if findings:
+            for finding in findings:
+                lines.append(
+                    f"- `{finding.code}` [{finding.severity.value}] {finding.message}"
+                )
+        else:
+            lines.append("- No findings recorded.")
+        content = {"markdown": "\n".join(lines)}
+        storage_ref = f"inline://dataset-review/{session.session_id}/validation.md"
+    else:
+        content = {
+            "session_id": session.session_id,
+            "dataset_ref": session.dataset_ref,
+            "readiness_state": session.readiness_state.value,
+            "findings": [
+                {
+                    "finding_id": finding.finding_id,
+                    "area": finding.area.value,
+                    "severity": finding.severity.value,
+                    "code": finding.code,
+                    "title": finding.title,
+                    "message": finding.message,
+                    "resolution_state": finding.resolution_state.value,
+                }
+                for finding in findings
+            ],
+        }
+        storage_ref = f"inline://dataset-review/{session.session_id}/validation.json"
+    return {"storage_ref": storage_ref, "content": content}
+# [/DEF:_build_validation_export:Function]
+
+
+# [DEF:list_sessions:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: List resumable dataset review sessions for the current user.
+# @RELATION: [CALLS] ->[list_user_sess]
+@router.get(
+    "/sessions",
+    response_model=SessionCollectionResponse,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "READ")),
+    ],
+)
+async def list_sessions(
+    page: int = Query(1, ge=1),
+    page_size: int = Query(20, ge=1, le=100),
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.list_sessions"):
+        sessions = repository.list_sessions_for_user(current_user.id)
+        start = (page - 1) * page_size
+        end = start + page_size
+        items = [_serialize_session_summary(session) for session in sessions[start:end]]
+        return SessionCollectionResponse(
+            items=items,
+            total=len(sessions),
+            page=page,
+            page_size=page_size,
+            has_next=end < len(sessions),
+        )
+# [/DEF:list_sessions:Function]
+
+
+# [DEF:start_session:Function]
+# @COMPLEXITY: 4
+# @PURPOSE: Start a new dataset review session from a Superset link or dataset selection.
+# @RELATION: [CALLS] ->[DatasetReviewOrchestrator.start_session]
+# @PRE: feature flag enabled, user authenticated, and request body valid.
+# @POST: returns persisted session summary scoped to the authenticated user.
+# @SIDE_EFFECT: persists session/profile/findings and may enqueue recovery task.
+# @DATA_CONTRACT: Input[StartSessionRequest] -> Output[SessionSummary]
+@router.post(
+    "/sessions",
+    response_model=SessionSummary,
+    status_code=status.HTTP_201_CREATED,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "MANAGE")),
+    ],
+)
+async def start_session(
+    request: StartSessionRequest,
+    orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.start_session"):
+        try:
+            result = orchestrator.start_session(
+                StartSessionCommand(
+                    user=current_user,
+                    environment_id=request.environment_id,
+                    source_kind=request.source_kind,
+                    source_input=request.source_input,
+                )
+            )
+        except ValueError as exc:
+            logger.explore(
+                "Dataset review session start rejected",
+                extra={"user_id": current_user.id, "error": str(exc)},
+            )
+            detail = str(exc)
+            status_code = status.HTTP_404_NOT_FOUND if detail == "Environment not found" else status.HTTP_400_BAD_REQUEST
+            raise HTTPException(status_code=status_code, detail=detail) from exc
+
+        return _serialize_session_summary(result.session)
+# [/DEF:start_session:Function]
+
+
+# [DEF:get_session_detail:Function]
+# @COMPLEXITY: 3
+# @PURPOSE: Return the full accessible dataset review session aggregate for current user scope.
+# @RELATION: [CALLS] ->[_get_owned_session_or_404]
+@router.get(
+    "/sessions/{session_id}",
+    response_model=SessionDetail,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "READ")),
+    ],
+)
+async def get_session_detail(
+    session_id: str,
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.get_session_detail"):
+        session = _get_owned_session_or_404(repository, session_id, current_user)
+        return _serialize_session_detail(session)
+# [/DEF:get_session_detail:Function]
+
+
+# [DEF:update_session:Function]
+# @COMPLEXITY: 4
+# @PURPOSE: Update resumable lifecycle status for an owned dataset review session.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+# @PRE: session is accessible to current user and requested status is allowed by lifecycle policy.
+# @POST: returns updated summary without changing ownership or unrelated aggregates.
+# @SIDE_EFFECT: mutates session lifecycle fields in persistence.
+# @DATA_CONTRACT: Input[UpdateSessionRequest] -> Output[SessionSummary]
+@router.patch(
+    "/sessions/{session_id}",
+    response_model=SessionSummary,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "MANAGE")),
+    ],
+)
+async def update_session(
+    session_id: str,
+    request: UpdateSessionRequest,
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.update_session"):
+        session = _get_owned_session_or_404(repository, session_id, current_user)
+        if session.user_id != current_user.id:
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can mutate session lifecycle")
+
+        session.status = request.status
+        if request.status == SessionStatus.PAUSED:
+            session.recommended_action = RecommendedAction.RESUME_SESSION
+        elif request.status in {SessionStatus.ARCHIVED, SessionStatus.CANCELLED, SessionStatus.COMPLETED}:
+            session.active_task_id = None
+
+        repository.db.commit()
+        repository.db.refresh(session)
+        return _serialize_session_summary(session)
+# [/DEF:update_session:Function]
+
+
+# [DEF:delete_session:Function]
+# @COMPLEXITY: 4
+# @PURPOSE: Archive or hard-delete a session owned by the current user.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+# @PRE: session is owner-scoped to current user.
+# @POST: session is archived or deleted and no foreign-session existence is disclosed.
+# @SIDE_EFFECT: mutates or deletes persisted session aggregate.
+# @DATA_CONTRACT: Input[session_id:str,hard_delete:bool] -> Output[HTTP 204]
+@router.delete(
+    "/sessions/{session_id}",
+    status_code=status.HTTP_204_NO_CONTENT,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "MANAGE")),
+    ],
+)
+async def delete_session(
+    session_id: str,
+    hard_delete: bool = Query(False),
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.delete_session"):
+        session = _get_owned_session_or_404(repository, session_id, current_user)
+        if session.user_id != current_user.id:
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can delete a session")
+
+        if hard_delete:
+            repository.db.delete(session)
+        else:
+            session.status = SessionStatus.ARCHIVED
+            session.active_task_id = None
+        repository.db.commit()
+        return Response(status_code=status.HTTP_204_NO_CONTENT)
+# [/DEF:delete_session:Function]
+
+
+# [DEF:export_documentation:Function]
+# @COMPLEXITY: 4
+# @PURPOSE: Export documentation output for the current session in JSON or Markdown form.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+# @PRE: session is accessible to current user and requested format is supported.
+# @POST: returns ownership-scoped export payload without fabricating unrelated artifacts.
+# @SIDE_EFFECT: none beyond response construction.
+# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse]
+@router.get(
+    "/sessions/{session_id}/exports/documentation",
+    response_model=ExportArtifactResponse,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "READ")),
+    ],
+)
+async def export_documentation(
+    session_id: str,
+    format: ArtifactFormat = Query(ArtifactFormat.JSON),
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.export_documentation"):
+        if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
+        session = _get_owned_session_or_404(repository, session_id, current_user)
+        export_payload = _build_documentation_export(session, format)
+        return ExportArtifactResponse(
+            artifact_id=f"documentation-{session.session_id}-{format.value}",
+            session_id=session.session_id,
+            artifact_type="documentation",
+            format=format.value,
+            storage_ref=export_payload["storage_ref"],
+            created_by_user_id=current_user.id,
+            content=export_payload["content"],
+        )
+# [/DEF:export_documentation:Function]
+
+
+# [DEF:export_validation:Function]
+# @COMPLEXITY: 4
+# @PURPOSE: Export validation findings for the current session in JSON or Markdown form.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+# @PRE: session is accessible to current user and requested format is supported.
+# @POST: returns explicit validation export payload scoped to current user session access.
+# @SIDE_EFFECT: none beyond response construction.
+# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse]
+@router.get(
+    "/sessions/{session_id}/exports/validation",
+    response_model=ExportArtifactResponse,
+    dependencies=[
+        Depends(_require_auto_review_flag),
+        Depends(has_permission("dataset:session", "READ")),
+    ],
+)
+async def export_validation(
+    session_id: str,
+    format: ArtifactFormat = Query(ArtifactFormat.JSON),
+    repository: DatasetReviewSessionRepository = Depends(_get_repository),
+    current_user: User = Depends(get_current_user),
+):
+    with belief_scope("dataset_review.export_validation"):
+        if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
+        session = _get_owned_session_or_404(repository, session_id, current_user)
+        export_payload = _build_validation_export(session, format)
+        return ExportArtifactResponse(
+            artifact_id=f"validation-{session.session_id}-{format.value}",
+            session_id=session.session_id,
+            artifact_type="validation_report",
+            format=format.value,
+            storage_ref=export_payload["storage_ref"],
+            created_by_user_id=current_user.id,
+            content=export_payload["content"],
+        )
+# [/DEF:export_validation:Function]
+
+# [/DEF:DatasetReviewApi:Module]
--- a/backend/src/app.py
+++ b/backend/src/app.py
@@ -3,8 +3,8 @@
 # @SEMANTICS: app, main, entrypoint, fastapi
 # @PURPOSE: The main entry point for the FastAPI application. It initializes the app, configures CORS, sets up dependencies, includes API routers, and defines the WebSocket endpoint for log streaming.
 # @LAYER: UI (API)
-# @RELATION: DEPENDS_ON ->[AppDependencies]
-# @RELATION: DEPENDS_ON ->[backend.src.api.routes]
+# @RELATION: [DEPENDS_ON] ->[AppDependencies]
+# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
 # @INVARIANT: Only one FastAPI app instance exists per process.
 # @INVARIANT: All WebSocket connections must be properly cleaned up on disconnect.
 # @PRE: Python environment and dependencies installed; configuration database available.
@@ -28,7 +28,7 @@ from .dependencies import get_task_manager, get_scheduler_service
 from .core.encryption_key import ensure_encryption_key
 from .core.utils.network import NetworkError
 from .core.logger import logger, belief_scope
-from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health
+from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health, dataset_review
 from .api import auth

 # [DEF:App:Global]
@@ -45,6 +45,7 @@ app = FastAPI(
 # [DEF:startup_event:Function]
 # @COMPLEXITY: 3
 # @PURPOSE: Handles application startup tasks, such as starting the scheduler.
+# @RELATION: [CALLS] ->[AppDependencies]
 # @PRE: None.
 # @POST: Scheduler is started.
 # Startup event
@@ -59,6 +60,7 @@ async def startup_event():
 # [DEF:shutdown_event:Function]
 # @COMPLEXITY: 3
 # @PURPOSE: Handles application shutdown tasks, such as stopping the scheduler.
+# @RELATION: [CALLS] ->[AppDependencies]
 # @PRE: None.
 # @POST: Scheduler is stopped.
 # Shutdown event
@@ -106,6 +108,7 @@ async def network_error_handler(request: Request, exc: NetworkError):
 # [DEF:log_requests:Function]
 # @COMPLEXITY: 3
 # @PURPOSE: Middleware to log incoming HTTP requests and their response status.
+# @RELATION: [DEPENDS_ON] ->[LoggerModule]
 # @PRE: request is a FastAPI Request object.
 # @POST: Logs request and response details.
 # @PARAM: request (Request) - The incoming request object.
@@ -154,6 +157,7 @@ app.include_router(assistant.router, prefix="/api/assistant", tags=["Assistant"]
 app.include_router(clean_release.router)
 app.include_router(clean_release_v2.router)
 app.include_router(profile.router)
+app.include_router(dataset_review.router)
 app.include_router(health.router)
 # [/DEF:api_routes:Block]

@@ -168,10 +172,13 @@ app.include_router(health.router)
 # [DEF:websocket_endpoint:Function]
 # @COMPLEXITY: 5
 # @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task with server-side filtering.
+# @RELATION: [CALLS] ->[TaskManagerPackage]
+# @RELATION: [DEPENDS_ON] ->[LoggerModule]
 # @PRE: task_id must be a valid task ID.
 # @POST: WebSocket connection is managed and logs are streamed until disconnect.
 # @SIDE_EFFECT: Subscribes to TaskManager log queue and broadcasts messages over network.
 # @DATA_CONTRACT: [task_id: str, source: str, level: str] -> [JSON log entry objects]
+# @INVARIANT: Every accepted WebSocket subscription is unsubscribed exactly once even when streaming fails or the client disconnects.
 # @UX_STATE: Connecting -> Streaming -> (Disconnected)
 #
 # @TEST_CONTRACT: WebSocketLogStreamApi ->
@@ -204,85 +211,121 @@ async def websocket_endpoint(
    """
    with belief_scope("websocket_endpoint", f"task_id={task_id}"):
        await websocket.accept()
-    
-    # Normalize filter parameters
-    source_filter = source.lower() if source else None
-    level_filter = level.upper() if level else None
-    
-    # Level hierarchy for filtering
-    level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
-    min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
-    
-    logger.info(f"WebSocket connection accepted for task {task_id} (source={source_filter}, level={level_filter})")
-    task_manager = get_task_manager()
-    queue = await task_manager.subscribe_logs(task_id)
-    
-    def matches_filters(log_entry) -> bool:
-        """Check if log entry matches the filter criteria."""
-        # Check source filter
-        if source_filter and log_entry.source.lower() != source_filter:
-            return False
-        
-        # Check level filter
-        if level_filter:
-            log_level = level_hierarchy.get(log_entry.level.upper(), 0)
-            if log_level < min_level:
+
+        source_filter = source.lower() if source else None
+        level_filter = level.upper() if level else None
+        level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
+        min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
+
+        logger.reason(
+            "Accepted WebSocket log stream connection",
+            extra={
+                "task_id": task_id,
+                "source_filter": source_filter,
+                "level_filter": level_filter,
+                "min_level": min_level,
+            },
+        )
+
+        task_manager = get_task_manager()
+        queue = await task_manager.subscribe_logs(task_id)
+        logger.reason(
+            "Subscribed WebSocket client to task log queue",
+            extra={"task_id": task_id},
+        )
+
+        def matches_filters(log_entry) -> bool:
+            """Check if log entry matches the filter criteria."""
+            log_source = getattr(log_entry, "source", None)
+            if source_filter and str(log_source or "").lower() != source_filter:
                return False
-        
-        return True
-    
-    try:
-        # Stream new logs
-        logger.info(f"Starting log stream for task {task_id}")
-        
-        # Send initial logs first to build context (apply filters)
-        initial_logs = task_manager.get_task_logs(task_id)
-        for log_entry in initial_logs:
-            if matches_filters(log_entry):
+
+            if level_filter:
+                log_level = level_hierarchy.get(str(log_entry.level).upper(), 0)
+                if log_level < min_level:
+                    return False
+
+            return True
+
+        try:
+            logger.reason(
+                "Starting task log stream replay and live forwarding",
+                extra={"task_id": task_id},
+            )
+
+            initial_logs = task_manager.get_task_logs(task_id)
+            initial_sent = 0
+            for log_entry in initial_logs:
+                if matches_filters(log_entry):
+                    log_dict = log_entry.dict()
+                    log_dict["timestamp"] = log_dict["timestamp"].isoformat()
+                    await websocket.send_json(log_dict)
+                    initial_sent += 1
+
+            logger.reflect(
+                "Initial task log replay completed",
+                extra={
+                    "task_id": task_id,
+                    "replayed_logs": initial_sent,
+                    "total_available_logs": len(initial_logs),
+                },
+            )
+
+            task = task_manager.get_task(task_id)
+            if task and task.status == "AWAITING_INPUT" and task.input_request:
+                synthetic_log = {
+                    "timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00",
+                    "level": "INFO",
+                    "message": "Task paused for user input (Connection Re-established)",
+                    "context": {"input_request": task.input_request},
+                }
+                await websocket.send_json(synthetic_log)
+                logger.reason(
+                    "Replayed awaiting-input prompt to restored WebSocket client",
+                    extra={"task_id": task_id, "task_status": task.status},
+                )
+
+            while True:
+                log_entry = await queue.get()
+
+                if not matches_filters(log_entry):
+                    continue
+
                log_dict = log_entry.dict()
-                log_dict['timestamp'] = log_dict['timestamp'].isoformat()
+                log_dict["timestamp"] = log_dict["timestamp"].isoformat()
                await websocket.send_json(log_dict)
+                logger.reflect(
+                    "Forwarded task log entry to WebSocket client",
+                    extra={
+                        "task_id": task_id,
+                        "level": log_dict.get("level"),
+                    },
+                )

-        # Force a check for AWAITING_INPUT status immediately upon connection
-        # This ensures that if the task is already waiting when the user connects, they get the prompt.
-        task = task_manager.get_task(task_id)
-        if task and task.status == "AWAITING_INPUT" and task.input_request:
-             # Construct a synthetic log entry to trigger the frontend handler
-             # This is a bit of a hack but avoids changing the websocket protocol significantly
-             synthetic_log = {
-                 "timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00",
-                 "level": "INFO",
-                 "message": "Task paused for user input (Connection Re-established)",
-                 "context": {"input_request": task.input_request}
-             }
-             await websocket.send_json(synthetic_log)
+                if "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message:
+                    logger.reason(
+                        "Observed terminal task log entry; delaying to preserve client visibility",
+                        extra={"task_id": task_id, "message": log_entry.message},
+                    )
+                    await asyncio.sleep(2)

-        while True:
-            log_entry = await queue.get()
-            
-            # Apply server-side filtering
-            if not matches_filters(log_entry):
-                continue
-            
-            log_dict = log_entry.dict()
-            log_dict['timestamp'] = log_dict['timestamp'].isoformat()
-            await websocket.send_json(log_dict)
-            
-            # If task is finished, we could potentially close the connection
-            # but let's keep it open for a bit or until the client disconnects
-            if "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message:
-                # Wait a bit to ensure client receives the last message
-                await asyncio.sleep(2)
-                # DO NOT BREAK here - allow client to keep connection open if they want to review logs
-                # or until they disconnect. Breaking closes the socket immediately.
-                # break
-
-    except WebSocketDisconnect:
-        logger.info(f"WebSocket connection disconnected for task {task_id}")
-    except Exception as e:
-        logger.error(f"WebSocket error for task {task_id}: {e}")
-    finally:
-        task_manager.unsubscribe_logs(task_id, queue)
+        except WebSocketDisconnect:
+            logger.reason(
+                "WebSocket client disconnected from task log stream",
+                extra={"task_id": task_id},
+            )
+        except Exception as exc:
+            logger.explore(
+                "WebSocket log streaming encountered an unexpected failure",
+                extra={"task_id": task_id, "error": str(exc)},
+            )
+            raise
+        finally:
+            task_manager.unsubscribe_logs(task_id, queue)
+            logger.reflect(
+                "Released WebSocket log queue subscription",
+                extra={"task_id": task_id},
+            )
 # [/DEF:websocket_endpoint:Function]

 # [DEF:StaticFiles:Mount]
--- a/backend/src/core/utils/superset_context_extractor.py
+++ b/backend/src/core/utils/superset_context_extractor.py
@@ -0,0 +1,334 @@
+# [DEF:SupersetContextExtractor:Module]
+# @COMPLEXITY: 4
+# @SEMANTICS: dataset_review, superset, link_parsing, context_recovery, partial_recovery
+# @PURPOSE: Recover dataset and dashboard context from Superset links while preserving explicit partial-recovery markers.
+# @LAYER: Infra
+# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient:Class]
+# @RELATION: [DEPENDS_ON] ->[ImportedFilter]
+# @RELATION: [DEPENDS_ON] ->[TemplateVariable]
+# @PRE: Superset link or dataset reference must be parseable enough to resolve an environment-scoped target resource.
+# @POST: Returns the best available recovered context with explicit provenance and partial-recovery markers when necessary.
+# @SIDE_EFFECT: Performs upstream Superset API reads.
+# @INVARIANT: Partial recovery is surfaced explicitly and never misrepresented as fully confirmed context.
+
+from __future__ import annotations
+
+# [DEF:SupersetContextExtractor.imports:Block]
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from urllib.parse import parse_qs, unquote, urlparse
+
+from src.core.config_models import Environment
+from src.core.logger import belief_scope, logger
+from src.core.superset_client import SupersetClient
+# [/DEF:SupersetContextExtractor.imports:Block]
+
+
+# [DEF:SupersetParsedContext:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Normalized output of Superset link parsing for session intake and recovery.
+@dataclass
+class SupersetParsedContext:
+    source_url: str
+    dataset_ref: str
+    dataset_id: Optional[int] = None
+    dashboard_id: Optional[int] = None
+    chart_id: Optional[int] = None
+    resource_type: str = "unknown"
+    query_state: Dict[str, Any] = field(default_factory=dict)
+    imported_filters: List[Dict[str, Any]] = field(default_factory=list)
+    unresolved_references: List[str] = field(default_factory=list)
+    partial_recovery: bool = False
+# [/DEF:SupersetParsedContext:Class]
+
+
+# [DEF:SupersetContextExtractor:Class]
+# @COMPLEXITY: 4
+# @PURPOSE: Parse supported Superset URLs and recover canonical dataset/dashboard references for review-session intake.
+# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
+# @PRE: constructor receives a configured environment with a usable Superset base URL.
+# @POST: extractor instance is ready to parse links against one Superset environment.
+# @SIDE_EFFECT: downstream parse operations may call Superset APIs through SupersetClient.
+class SupersetContextExtractor:
+    # [DEF:SupersetContextExtractor.__init__:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Bind extractor to one Superset environment and client instance.
+    def __init__(self, environment: Environment, client: Optional[SupersetClient] = None) -> None:
+        self.environment = environment
+        self.client = client or SupersetClient(environment)
+    # [/DEF:SupersetContextExtractor.__init__:Function]
+
+    # [DEF:SupersetContextExtractor.parse_superset_link:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Extract candidate identifiers and query state from supported Superset URLs.
+    # @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
+    # @PRE: link is a non-empty Superset URL compatible with the configured environment.
+    # @POST: returns resolved dataset/dashboard context, preserving explicit partial-recovery state if some identifiers cannot be confirmed.
+    # @SIDE_EFFECT: may issue Superset API reads to resolve dataset references from dashboard or chart URLs.
+    # @DATA_CONTRACT: Input[link:str] -> Output[SupersetParsedContext]
+    def parse_superset_link(self, link: str) -> SupersetParsedContext:
+        with belief_scope("SupersetContextExtractor.parse_superset_link"):
+            normalized_link = str(link or "").strip()
+            if not normalized_link:
+                logger.explore("Rejected empty Superset link during intake")
+                raise ValueError("Superset link must be non-empty")
+
+            parsed_url = urlparse(normalized_link)
+            if parsed_url.scheme not in {"http", "https"} or not parsed_url.netloc:
+                logger.explore(
+                    "Superset link is not a parseable absolute URL",
+                    extra={"link": normalized_link},
+                )
+                raise ValueError("Superset link must be an absolute http(s) URL")
+
+            logger.reason(
+                "Parsing Superset link for dataset review intake",
+                extra={"path": parsed_url.path, "query": parsed_url.query},
+            )
+
+            path_parts = [part for part in parsed_url.path.split("/") if part]
+            query_params = parse_qs(parsed_url.query, keep_blank_values=True)
+            query_state = self._decode_query_state(query_params)
+
+            dataset_id = self._extract_numeric_identifier(path_parts, "dataset")
+            dashboard_id = self._extract_numeric_identifier(path_parts, "dashboard")
+            chart_id = self._extract_numeric_identifier(path_parts, "chart")
+
+            resource_type = "unknown"
+            dataset_ref: Optional[str] = None
+            partial_recovery = False
+            unresolved_references: List[str] = []
+
+            if dataset_id is not None:
+                resource_type = "dataset"
+                dataset_ref = f"dataset:{dataset_id}"
+                logger.reason(
+                    "Resolved direct dataset link",
+                    extra={"dataset_id": dataset_id},
+                )
+            elif dashboard_id is not None:
+                resource_type = "dashboard"
+                logger.reason(
+                    "Resolving dashboard-bound dataset from Superset",
+                    extra={"dashboard_id": dashboard_id},
+                )
+                dashboard_detail = self.client.get_dashboard_detail(dashboard_id)
+                datasets = dashboard_detail.get("datasets") or []
+                if datasets:
+                    first_dataset = datasets[0]
+                    resolved_dataset_id = first_dataset.get("id")
+                    if resolved_dataset_id is not None:
+                        dataset_id = int(resolved_dataset_id)
+                        dataset_ref = f"dataset:{dataset_id}"
+                        logger.reason(
+                            "Recovered dataset reference from dashboard context",
+                            extra={
+                                "dashboard_id": dashboard_id,
+                                "dataset_id": dataset_id,
+                                "dataset_count": len(datasets),
+                            },
+                        )
+                        if len(datasets) > 1:
+                            partial_recovery = True
+                            unresolved_references.append("multiple_dashboard_datasets")
+                    else:
+                        partial_recovery = True
+                        unresolved_references.append("dashboard_dataset_id_missing")
+                else:
+                    partial_recovery = True
+                    unresolved_references.append("dashboard_dataset_binding_missing")
+            elif chart_id is not None:
+                resource_type = "chart"
+                partial_recovery = True
+                unresolved_references.append("chart_dataset_binding_unresolved")
+                dataset_ref = f"chart:{chart_id}"
+                logger.reason(
+                    "Accepted chart link with explicit partial recovery",
+                    extra={"chart_id": chart_id},
+                )
+            else:
+                logger.explore(
+                    "Unsupported Superset link shape encountered",
+                    extra={"path": parsed_url.path},
+                )
+                raise ValueError("Unsupported Superset link shape")
+
+            if dataset_id is not None:
+                try:
+                    dataset_detail = self.client.get_dataset_detail(dataset_id)
+                    table_name = str(dataset_detail.get("table_name") or "").strip()
+                    schema_name = str(dataset_detail.get("schema") or "").strip()
+                    if table_name:
+                        dataset_ref = (
+                            f"{schema_name}.{table_name}" if schema_name else table_name
+                        )
+                        logger.reason(
+                            "Canonicalized dataset reference from dataset detail",
+                            extra={"dataset_ref": dataset_ref, "dataset_id": dataset_id},
+                        )
+                except Exception as exc:
+                    partial_recovery = True
+                    unresolved_references.append("dataset_detail_lookup_failed")
+                    logger.explore(
+                        "Dataset detail lookup failed during link parsing; keeping session usable",
+                        extra={"dataset_id": dataset_id, "error": str(exc)},
+                    )
+
+            imported_filters = self._extract_imported_filters(query_state)
+            result = SupersetParsedContext(
+                source_url=normalized_link,
+                dataset_ref=dataset_ref or "unresolved",
+                dataset_id=dataset_id,
+                dashboard_id=dashboard_id,
+                chart_id=chart_id,
+                resource_type=resource_type,
+                query_state=query_state,
+                imported_filters=imported_filters,
+                unresolved_references=unresolved_references,
+                partial_recovery=partial_recovery,
+            )
+            logger.reflect(
+                "Superset link parsing completed",
+                extra={
+                    "dataset_ref": result.dataset_ref,
+                    "dataset_id": result.dataset_id,
+                    "dashboard_id": result.dashboard_id,
+                    "chart_id": result.chart_id,
+                    "partial_recovery": result.partial_recovery,
+                    "unresolved_references": result.unresolved_references,
+                    "imported_filters": len(result.imported_filters),
+                },
+            )
+            return result
+    # [/DEF:SupersetContextExtractor.parse_superset_link:Function]
+
+    # [DEF:SupersetContextExtractor.recover_imported_filters:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Build imported filter entries from URL state and Superset-side saved context.
+    def recover_imported_filters(self, parsed_context: SupersetParsedContext) -> List[Dict[str, Any]]:
+        return list(parsed_context.imported_filters)
+    # [/DEF:SupersetContextExtractor.recover_imported_filters:Function]
+
+    # [DEF:SupersetContextExtractor.discover_template_variables:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Detect runtime variables and Jinja references from dataset query-bearing fields.
+    def discover_template_variables(self, dataset_payload: Dict[str, Any]) -> List[Dict[str, Any]]:
+        return []
+    # [/DEF:SupersetContextExtractor.discover_template_variables:Function]
+
+    # [DEF:SupersetContextExtractor.build_recovery_summary:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Summarize recovered, partial, and unresolved context for session state and UX.
+    def build_recovery_summary(self, parsed_context: SupersetParsedContext) -> Dict[str, Any]:
+        return {
+            "dataset_ref": parsed_context.dataset_ref,
+            "dataset_id": parsed_context.dataset_id,
+            "dashboard_id": parsed_context.dashboard_id,
+            "chart_id": parsed_context.chart_id,
+            "partial_recovery": parsed_context.partial_recovery,
+            "unresolved_references": list(parsed_context.unresolved_references),
+            "imported_filter_count": len(parsed_context.imported_filters),
+        }
+    # [/DEF:SupersetContextExtractor.build_recovery_summary:Function]
+
+    # [DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Extract a numeric identifier from a REST-like Superset URL path.
+    def _extract_numeric_identifier(self, path_parts: List[str], resource_name: str) -> Optional[int]:
+        if resource_name not in path_parts:
+            return None
+        try:
+            resource_index = path_parts.index(resource_name)
+        except ValueError:
+            return None
+
+        if resource_index + 1 >= len(path_parts):
+            return None
+
+        candidate = str(path_parts[resource_index + 1]).strip()
+        if not candidate.isdigit():
+            return None
+        return int(candidate)
+    # [/DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
+
+    # [DEF:SupersetContextExtractor._decode_query_state:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Decode query-string structures used by Superset URL state transport.
+    def _decode_query_state(self, query_params: Dict[str, List[str]]) -> Dict[str, Any]:
+        query_state: Dict[str, Any] = {}
+        for key, values in query_params.items():
+            if not values:
+                continue
+            raw_value = values[-1]
+            decoded_value = unquote(raw_value)
+            if key in {"native_filters", "native_filters_key", "form_data", "q"}:
+                try:
+                    query_state[key] = json.loads(decoded_value)
+                    continue
+                except Exception:
+                    logger.explore(
+                        "Failed to decode structured Superset query state; preserving raw value",
+                        extra={"key": key},
+                    )
+            query_state[key] = decoded_value
+        return query_state
+    # [/DEF:SupersetContextExtractor._decode_query_state:Function]
+
+    # [DEF:SupersetContextExtractor._extract_imported_filters:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Normalize imported filters from decoded query state without fabricating missing values.
+    def _extract_imported_filters(self, query_state: Dict[str, Any]) -> List[Dict[str, Any]]:
+        imported_filters: List[Dict[str, Any]] = []
+
+        native_filters_payload = query_state.get("native_filters")
+        if isinstance(native_filters_payload, list):
+            for index, item in enumerate(native_filters_payload):
+                if not isinstance(item, dict):
+                    continue
+                filter_name = (
+                    item.get("filter_name")
+                    or item.get("column")
+                    or item.get("name")
+                    or f"native_filter_{index}"
+                )
+                imported_filters.append(
+                    {
+                        "filter_name": str(filter_name),
+                        "raw_value": item.get("value"),
+                        "display_name": item.get("label") or item.get("name"),
+                        "source": "superset_url",
+                        "recovery_status": "recovered"
+                        if item.get("value") is not None
+                        else "partial",
+                        "requires_confirmation": item.get("value") is None,
+                        "notes": "Recovered from Superset native filter URL state",
+                    }
+                )
+
+        form_data_payload = query_state.get("form_data")
+        if isinstance(form_data_payload, dict):
+            extra_filters = form_data_payload.get("extra_filters") or []
+            for index, item in enumerate(extra_filters):
+                if not isinstance(item, dict):
+                    continue
+                filter_name = item.get("col") or item.get("column") or f"extra_filter_{index}"
+                imported_filters.append(
+                    {
+                        "filter_name": str(filter_name),
+                        "raw_value": item.get("val"),
+                        "display_name": item.get("label"),
+                        "source": "superset_url",
+                        "recovery_status": "recovered"
+                        if item.get("val") is not None
+                        else "partial",
+                        "requires_confirmation": item.get("val") is None,
+                        "notes": "Recovered from Superset form_data extra_filters",
+                    }
+                )
+
+        return imported_filters
+    # [/DEF:SupersetContextExtractor._extract_imported_filters:Function]
+# [/DEF:SupersetContextExtractor:Class]
+
+# [/DEF:SupersetContextExtractor:Module]
--- a/backend/src/models/dataset_review.py
+++ b/backend/src/models/dataset_review.py
@@ -5,7 +5,6 @@
 # @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy
 # @PURPOSE:   SQLAlchemy models for the dataset review orchestration flow.
 # @LAYER:     Domain
-# @RELATION:  INHERITS_FROM -> [Base]
 # @RELATION:  DEPENDS_ON -> [AuthModels]
 # @RELATION:  DEPENDS_ON -> [MappingModels]
 #
--- a/backend/src/schemas/dataset_review.py
+++ b/backend/src/schemas/dataset_review.py
@@ -4,7 +4,7 @@
 # @SEMANTICS: dataset_review, schemas, pydantic, session, profile, findings
 # @PURPOSE:   Defines API schemas for the dataset review orchestration flow.
 # @LAYER:     API
-# @RELATION:  DEPENDS_ON -> pydantic
+# @RELATION:  DEPENDS_ON -> [DatasetReviewModels]

 # [SECTION: IMPORTS]
 from datetime import datetime
--- a/backend/src/services/dataset_review/orchestrator.py
+++ b/backend/src/services/dataset_review/orchestrator.py
@@ -0,0 +1,386 @@
+# [DEF:DatasetReviewOrchestrator:Module]
+# @COMPLEXITY: 5
+# @SEMANTICS: dataset_review, orchestration, session_lifecycle, intake, recovery
+# @PURPOSE: Coordinate dataset review session startup and lifecycle-safe intake recovery for one authenticated user.
+# @LAYER: Domain
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
+# @RELATION: [DEPENDS_ON] ->[SemanticSourceResolver]
+# @RELATION: [DEPENDS_ON] ->[ClarificationEngine]
+# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor]
+# @RELATION: [DEPENDS_ON] ->[SupersetCompilationAdapter]
+# @RELATION: [DEPENDS_ON] ->[TaskManager]
+# @PRE: session mutations must execute inside a persisted session boundary scoped to one authenticated user.
+# @POST: state transitions are persisted atomically and emit observable progress for long-running steps.
+# @SIDE_EFFECT: creates task records, updates session aggregates, triggers upstream Superset calls, persists audit artifacts.
+# @DATA_CONTRACT: Input[SessionCommand] -> Output[DatasetReviewSession | CompiledPreview | DatasetRunContext]
+# @INVARIANT: Launch is blocked unless a current session has no open blocking findings, all launch-sensitive mappings are approved, and a non-stale Superset-generated compiled preview matches the current input fingerprint.
+
+from __future__ import annotations
+
+# [DEF:DatasetReviewOrchestrator.imports:Block]
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+from src.core.config_manager import ConfigManager
+from src.core.logger import belief_scope, logger
+from src.core.task_manager import TaskManager
+from src.core.utils.superset_context_extractor import (
+    SupersetContextExtractor,
+    SupersetParsedContext,
+)
+from src.models.auth import User
+from src.models.dataset_review import (
+    BusinessSummarySource,
+    ConfidenceState,
+    DatasetProfile,
+    DatasetReviewSession,
+    FindingArea,
+    FindingSeverity,
+    RecommendedAction,
+    ReadinessState,
+    ResolutionState,
+    SessionPhase,
+    SessionStatus,
+    ValidationFinding,
+)
+from src.services.dataset_review.repositories.session_repository import (
+    DatasetReviewSessionRepository,
+)
+from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
+# [/DEF:DatasetReviewOrchestrator.imports:Block]
+
+
+# [DEF:StartSessionCommand:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Typed input contract for starting a dataset review session.
+@dataclass
+class StartSessionCommand:
+    user: User
+    environment_id: str
+    source_kind: str
+    source_input: str
+# [/DEF:StartSessionCommand:Class]
+
+
+# [DEF:StartSessionResult:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Session-start result carrying the persisted session and intake recovery metadata.
+@dataclass
+class StartSessionResult:
+    session: DatasetReviewSession
+    parsed_context: Optional[SupersetParsedContext] = None
+    findings: List[ValidationFinding] = field(default_factory=list)
+# [/DEF:StartSessionResult:Class]
+
+
+# [DEF:DatasetReviewOrchestrator:Class]
+# @COMPLEXITY: 5
+# @PURPOSE: Coordinate safe session startup while preserving cross-user isolation and explicit partial recovery.
+# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
+# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor]
+# @RELATION: [DEPENDS_ON] ->[TaskManager]
+# @RELATION: [DEPENDS_ON] ->[SessionRepo]
+# @RELATION: [DEPENDS_ON] ->[ConfigManager]
+# @PRE: constructor dependencies are valid and tied to the current request/task scope.
+# @POST: orchestrator instance can execute session-scoped mutations for one authenticated user.
+# @SIDE_EFFECT: downstream operations may persist session/profile/finding state and enqueue background tasks.
+# @DATA_CONTRACT: Input[StartSessionCommand] -> Output[StartSessionResult]
+# @INVARIANT: session ownership is preserved on every mutation and recovery remains explicit when partial.
+class DatasetReviewOrchestrator:
+    # [DEF:DatasetReviewOrchestrator.__init__:Function]
+    # @COMPLEXITY: 3
+    # @PURPOSE: Bind repository, config, and task dependencies required by the orchestration boundary.
+    # @RELATION: [DEPENDS_ON] ->[SessionRepo]
+    # @RELATION: [DEPENDS_ON] ->[ConfigManager]
+    def __init__(
+        self,
+        repository: DatasetReviewSessionRepository,
+        config_manager: ConfigManager,
+        task_manager: Optional[TaskManager] = None,
+        semantic_resolver: Optional[SemanticSourceResolver] = None,
+    ) -> None:
+        self.repository = repository
+        self.config_manager = config_manager
+        self.task_manager = task_manager
+        self.semantic_resolver = semantic_resolver or SemanticSourceResolver()
+    # [/DEF:DatasetReviewOrchestrator.__init__:Function]
+
+    # [DEF:DatasetReviewOrchestrator.start_session:Function]
+    # @COMPLEXITY: 5
+    # @PURPOSE: Initialize a new session from a Superset link or dataset selection and trigger context recovery.
+    # @RELATION: [DEPENDS_ON] ->[SessionRepo]
+    # @RELATION: [CALLS] ->[SupersetContextExtractor.parse_superset_link]
+    # @RELATION: [CALLS] ->[create_task]
+    # @PRE: source input is non-empty and environment is accessible.
+    # @POST: session exists in persisted storage with intake/recovery state and task linkage when async work is required.
+    # @SIDE_EFFECT: persists session and may enqueue recovery task.
+    # @DATA_CONTRACT: Input[StartSessionCommand] -> Output[StartSessionResult]
+    # @INVARIANT: no cross-user session leakage occurs; session and follow-up task remain owned by the authenticated user.
+    def start_session(self, command: StartSessionCommand) -> StartSessionResult:
+        with belief_scope("DatasetReviewOrchestrator.start_session"):
+            normalized_source_kind = str(command.source_kind or "").strip()
+            normalized_source_input = str(command.source_input or "").strip()
+            normalized_environment_id = str(command.environment_id or "").strip()
+
+            if not normalized_source_input:
+                logger.explore("Blocked dataset review session start due to empty source input")
+                raise ValueError("source_input must be non-empty")
+
+            if normalized_source_kind not in {"superset_link", "dataset_selection"}:
+                logger.explore(
+                    "Blocked dataset review session start due to unsupported source kind",
+                    extra={"source_kind": normalized_source_kind},
+                )
+                raise ValueError("source_kind must be 'superset_link' or 'dataset_selection'")
+
+            environment = self.config_manager.get_environment(normalized_environment_id)
+            if environment is None:
+                logger.explore(
+                    "Blocked dataset review session start because environment was not found",
+                    extra={"environment_id": normalized_environment_id},
+                )
+                raise ValueError("Environment not found")
+
+            logger.reason(
+                "Starting dataset review session",
+                extra={
+                    "user_id": command.user.id,
+                    "environment_id": normalized_environment_id,
+                    "source_kind": normalized_source_kind,
+                },
+            )
+
+            parsed_context: Optional[SupersetParsedContext] = None
+            findings: List[ValidationFinding] = []
+            dataset_ref = normalized_source_input
+            dataset_id: Optional[int] = None
+            dashboard_id: Optional[int] = None
+            readiness_state = ReadinessState.IMPORTING
+            recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
+            current_phase = SessionPhase.RECOVERY
+
+            if normalized_source_kind == "superset_link":
+                extractor = SupersetContextExtractor(environment)
+                parsed_context = extractor.parse_superset_link(normalized_source_input)
+                dataset_ref = parsed_context.dataset_ref
+                dataset_id = parsed_context.dataset_id
+                dashboard_id = parsed_context.dashboard_id
+
+                if parsed_context.partial_recovery:
+                    readiness_state = ReadinessState.RECOVERY_REQUIRED
+                    recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
+                    findings.extend(self._build_partial_recovery_findings(parsed_context))
+                else:
+                    readiness_state = ReadinessState.REVIEW_READY
+            else:
+                dataset_ref, dataset_id = self._parse_dataset_selection(normalized_source_input)
+                readiness_state = ReadinessState.REVIEW_READY
+                current_phase = SessionPhase.REVIEW
+
+            session = DatasetReviewSession(
+                user_id=command.user.id,
+                environment_id=normalized_environment_id,
+                source_kind=normalized_source_kind,
+                source_input=normalized_source_input,
+                dataset_ref=dataset_ref,
+                dataset_id=dataset_id,
+                dashboard_id=dashboard_id,
+                readiness_state=readiness_state,
+                recommended_action=recommended_action,
+                status=SessionStatus.ACTIVE,
+                current_phase=current_phase,
+            )
+            persisted_session = self.repository.create_session(session)
+
+            profile = self._build_initial_profile(
+                session_id=persisted_session.session_id,
+                parsed_context=parsed_context,
+                dataset_ref=dataset_ref,
+            )
+            persisted_session = self.repository.save_profile_and_findings(
+                persisted_session.session_id,
+                command.user.id,
+                profile,
+                findings,
+            )
+
+            active_task_id = self._enqueue_recovery_task(
+                command=command,
+                session=persisted_session,
+                parsed_context=parsed_context,
+            )
+            if active_task_id:
+                persisted_session.active_task_id = active_task_id
+                self.repository.db.commit()
+                self.repository.db.refresh(persisted_session)
+                logger.reason(
+                    "Linked recovery task to started dataset review session",
+                    extra={"session_id": persisted_session.session_id, "task_id": active_task_id},
+                )
+
+            logger.reflect(
+                "Dataset review session start completed",
+                extra={
+                    "session_id": persisted_session.session_id,
+                    "dataset_ref": persisted_session.dataset_ref,
+                    "dataset_id": persisted_session.dataset_id,
+                    "dashboard_id": persisted_session.dashboard_id,
+                    "readiness_state": persisted_session.readiness_state.value,
+                    "active_task_id": persisted_session.active_task_id,
+                    "finding_count": len(findings),
+                },
+            )
+            return StartSessionResult(
+                session=persisted_session,
+                parsed_context=parsed_context,
+                findings=findings,
+            )
+    # [/DEF:DatasetReviewOrchestrator.start_session:Function]
+
+    # [DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
+    # @COMPLEXITY: 3
+    # @PURPOSE: Normalize dataset-selection payload into canonical session references.
+    # @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
+    def _parse_dataset_selection(self, source_input: str) -> tuple[str, Optional[int]]:
+        normalized = str(source_input or "").strip()
+        if not normalized:
+            raise ValueError("dataset selection input must be non-empty")
+
+        if normalized.isdigit():
+            dataset_id = int(normalized)
+            return f"dataset:{dataset_id}", dataset_id
+
+        if normalized.startswith("dataset:"):
+            suffix = normalized.split(":", 1)[1].strip()
+            if suffix.isdigit():
+                return normalized, int(suffix)
+            return normalized, None
+
+        return normalized, None
+    # [/DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
+
+    # [DEF:DatasetReviewOrchestrator._build_initial_profile:Function]
+    # @COMPLEXITY: 3
+    # @PURPOSE: Create the first profile snapshot so exports and detail views remain usable immediately after intake.
+    # @RELATION: [DEPENDS_ON] ->[DatasetProfile]
+    def _build_initial_profile(
+        self,
+        session_id: str,
+        parsed_context: Optional[SupersetParsedContext],
+        dataset_ref: str,
+    ) -> DatasetProfile:
+        dataset_name = dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset"
+        business_summary = (
+            f"Review session initialized for {dataset_ref}."
+            if dataset_ref
+            else "Review session initialized with unresolved dataset context."
+        )
+        confidence_state = (
+            ConfidenceState.MIXED
+            if parsed_context and parsed_context.partial_recovery
+            else ConfidenceState.MOSTLY_CONFIRMED
+        )
+        return DatasetProfile(
+            session_id=session_id,
+            dataset_name=dataset_name or "Unresolved dataset",
+            schema_name=dataset_ref.split(".")[0] if "." in dataset_ref else None,
+            business_summary=business_summary,
+            business_summary_source=BusinessSummarySource.IMPORTED,
+            description="Initial review profile created from source intake.",
+            dataset_type="unknown",
+            is_sqllab_view=False,
+            completeness_score=0.25,
+            confidence_state=confidence_state,
+            has_blocking_findings=False,
+            has_warning_findings=bool(parsed_context and parsed_context.partial_recovery),
+            manual_summary_locked=False,
+        )
+    # [/DEF:DatasetReviewOrchestrator._build_initial_profile:Function]
+
+    # [DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Project partial Superset intake recovery into explicit findings without blocking session usability.
+    # @RELATION: [DEPENDS_ON] ->[ValidationFinding]
+    # @PRE: parsed_context.partial_recovery is true.
+    # @POST: returns warning-level findings that preserve usable but incomplete state.
+    # @SIDE_EFFECT: none beyond structured finding creation.
+    # @DATA_CONTRACT: Input[SupersetParsedContext] -> Output[List[ValidationFinding]]
+    def _build_partial_recovery_findings(
+        self,
+        parsed_context: SupersetParsedContext,
+    ) -> List[ValidationFinding]:
+        findings: List[ValidationFinding] = []
+        for unresolved_ref in parsed_context.unresolved_references:
+            findings.append(
+                ValidationFinding(
+                    area=FindingArea.SOURCE_INTAKE,
+                    severity=FindingSeverity.WARNING,
+                    code="PARTIAL_SUPERSET_RECOVERY",
+                    title="Superset context recovered partially",
+                    message=(
+                        "Session remains usable, but some Superset context requires review: "
+                        f"{unresolved_ref.replace('_', ' ')}."
+                    ),
+                    resolution_state=ResolutionState.OPEN,
+                    caused_by_ref=unresolved_ref,
+                )
+            )
+        return findings
+    # [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
+
+    # [DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Link session start to observable async recovery when task infrastructure is available.
+    # @RELATION: [CALLS] ->[create_task]
+    # @PRE: session is already persisted.
+    # @POST: returns task identifier when a task could be enqueued, otherwise None.
+    # @SIDE_EFFECT: may create one background task for progressive recovery.
+    # @DATA_CONTRACT: Input[StartSessionCommand,DatasetReviewSession,SupersetParsedContext|None] -> Output[task_id:str|None]
+    def _enqueue_recovery_task(
+        self,
+        command: StartSessionCommand,
+        session: DatasetReviewSession,
+        parsed_context: Optional[SupersetParsedContext],
+    ) -> Optional[str]:
+        if self.task_manager is None:
+            logger.reason(
+                "Dataset review session started without task manager; continuing synchronously",
+                extra={"session_id": session.session_id},
+            )
+            return None
+
+        task_params: Dict[str, Any] = {
+            "session_id": session.session_id,
+            "user_id": command.user.id,
+            "environment_id": session.environment_id,
+            "source_kind": session.source_kind,
+            "source_input": session.source_input,
+            "dataset_ref": session.dataset_ref,
+            "dataset_id": session.dataset_id,
+            "dashboard_id": session.dashboard_id,
+            "partial_recovery": bool(parsed_context and parsed_context.partial_recovery),
+        }
+
+        create_task = getattr(self.task_manager, "create_task", None)
+        if create_task is None:
+            logger.explore("Task manager has no create_task method; skipping recovery enqueue")
+            return None
+
+        try:
+            task_object = create_task(
+                plugin_id="dataset-review-recovery",
+                params=task_params,
+            )
+        except TypeError:
+            logger.explore(
+                "Recovery task enqueue skipped because task manager create_task contract is incompatible",
+                extra={"session_id": session.session_id},
+            )
+            return None
+
+        task_id = getattr(task_object, "id", None)
+        return str(task_id) if task_id else None
+    # [/DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
+# [/DEF:DatasetReviewOrchestrator:Class]
+
+# [/DEF:DatasetReviewOrchestrator:Module]
--- a/backend/src/services/dataset_review/repositories/session_repository.py
+++ b/backend/src/services/dataset_review/repositories/session_repository.py
@@ -8,6 +8,9 @@
 # @RELATION: [DEPENDS_ON] -> [CompiledPreview]
 # @PRE: repository operations execute within authenticated request or task scope.
 # @POST: session aggregate reads are structurally consistent and writes preserve ownership and version semantics.
+# @SIDE_EFFECT: reads and writes SQLAlchemy-backed session aggregates.
+# @DATA_CONTRACT: Input[SessionMutation] -> Output[PersistedSessionAggregate]
+# @INVARIANT: answers, mapping approvals, preview artifacts, and launch snapshots are never attributed to the wrong user or session.

 from typing import Optional, List
 from sqlalchemy import or_
@@ -22,27 +25,51 @@ from src.models.dataset_review import (
 )
 from src.core.logger import belief_scope

+# [DEF:SessionRepo:Class]
+# @COMPLEXITY: 4
+# @PURPOSE: Enforce ownership-scoped persistence and retrieval for dataset review session aggregates.
+# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
+# @RELATION: [DEPENDS_ON] -> [DatasetProfile]
+# @RELATION: [DEPENDS_ON] -> [ValidationFinding]
+# @RELATION: [DEPENDS_ON] -> [CompiledPreview]
+# @PRE: constructor receives a live SQLAlchemy session and callers provide authenticated user scope for guarded reads and writes.
+# @POST: repository methods return ownership-scoped aggregates or persisted child records without changing domain meaning.
+# @SIDE_EFFECT: mutates and queries the persistence layer through the injected database session.
+# @DATA_CONTRACT: Input[OwnedSessionQuery|SessionMutation] -> Output[PersistedSessionAggregate|PersistedChildRecord]
 class DatasetReviewSessionRepository:
    """
    @PURPOSE: Persist and retrieve dataset review session aggregates.
    @INVARIANT: ownership_scope -> All operations must respect the session owner's user_id.
    """
+
+    # [DEF:init_repo:Function]
    def __init__(self, db: Session):
        self.db = db
+    # [/DEF:init_repo:Function]

+    # [DEF:create_sess:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Persist an initial dataset review session shell.
+    # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
+    # @PRE: session is a new aggregate root bound to the current ownership scope.
+    # @POST: session is committed, refreshed, and returned with persisted identifiers.
+    # @SIDE_EFFECT: inserts a session row and commits the active transaction.
+    # @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[DatasetReviewSession]
    def create_session(self, session: DatasetReviewSession) -> DatasetReviewSession:
-        """
-        @PURPOSE: Persist initial session shell.
-        """
        with belief_scope("DatasetReviewSessionRepository.create_session"):
            self.db.add(session)
            self.db.commit()
            self.db.refresh(session)
            return session
+    # [/DEF:create_sess:Function]

+    # [DEF:load_detail:Function]
+    # @COMPLEXITY: 3
+    # @PURPOSE: Return the full session aggregate for API and frontend resume flows.
+    # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
+    # @RELATION: [DEPENDS_ON] -> [SessionCollaborator]
    def load_session_detail(self, session_id: str, user_id: str) -> Optional[DatasetReviewSession]:
        """
-        @PURPOSE: Return the full session aggregate for API/frontend use.
        @PRE: user_id must match session owner or authorized collaborator.
        """
        with belief_scope("DatasetReviewSessionRepository.load_session_detail"):
@@ -70,17 +97,25 @@ class DatasetReviewSessionRepository:
                    )
                )\
                .first()
+    # [/DEF:load_detail:Function]

+    # [DEF:save_prof_find:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Persist profile state and replace validation findings for an owned session in one transaction.
+    # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
+    # @RELATION: [DEPENDS_ON] -> [DatasetProfile]
+    # @RELATION: [DEPENDS_ON] -> [ValidationFinding]
+    # @PRE: session_id belongs to user_id and the supplied profile/findings belong to the same aggregate scope.
+    # @POST: stored profile matches the current session and findings are replaced by the supplied collection.
+    # @SIDE_EFFECT: updates profile rows, deletes stale findings, inserts current findings, and commits the transaction.
+    # @DATA_CONTRACT: Input[ProfileAndFindingsMutation] -> Output[DatasetReviewSession]
    def save_profile_and_findings(self, session_id: str, user_id: str, profile: DatasetProfile, findings: List[ValidationFinding]) -> DatasetReviewSession:
-        """
-        @PURPOSE: Persist profile and validation state together.
-        """
        with belief_scope("DatasetReviewSessionRepository.save_profile_and_findings"):
            session = self.db.query(DatasetReviewSession).filter(
                DatasetReviewSession.session_id == session_id,
                DatasetReviewSession.user_id == user_id
            ).first()
-            
+
            if not session:
                raise ValueError("Session not found or access denied")

@@ -90,24 +125,31 @@ class DatasetReviewSessionRepository:
                if existing_profile:
                    profile.profile_id = existing_profile.profile_id
                self.db.merge(profile)
-            
+
            # Remove old findings for this session to avoid stale data
            self.db.query(ValidationFinding).filter(
                ValidationFinding.session_id == session_id
            ).delete()
-            
+
            # Add new findings
            for finding in findings:
                finding.session_id = session_id
                self.db.add(finding)
-                
+
            self.db.commit()
            return self.load_session_detail(session_id, user_id)
+    # [/DEF:save_prof_find:Function]

+    # [DEF:save_prev:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Persist a preview snapshot and mark prior session previews stale.
+    # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
+    # @RELATION: [DEPENDS_ON] -> [CompiledPreview]
+    # @PRE: session_id belongs to user_id and preview is prepared for the same session aggregate.
+    # @POST: preview is persisted and the session points to the latest preview identifier.
+    # @SIDE_EFFECT: updates prior preview statuses, inserts a preview row, mutates the parent session, and commits.
+    # @DATA_CONTRACT: Input[PreviewMutation] -> Output[CompiledPreview]
    def save_preview(self, session_id: str, user_id: str, preview: CompiledPreview) -> CompiledPreview:
-        """
-        @PURPOSE: Persist compiled preview attempt and mark older fingerprints stale.
-        """
        with belief_scope("DatasetReviewSessionRepository.save_preview"):
            session = self.db.query(DatasetReviewSession).filter(
                DatasetReviewSession.session_id == session_id,
@@ -125,15 +167,22 @@ class DatasetReviewSessionRepository:
            self.db.add(preview)
            self.db.flush()
            session.last_preview_id = preview.preview_id
-            
+
            self.db.commit()
            self.db.refresh(preview)
            return preview
+    # [/DEF:save_prev:Function]

+    # [DEF:save_run_ctx:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Persist an immutable launch audit snapshot for an owned session.
+    # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
+    # @RELATION: [DEPENDS_ON] -> [DatasetRunContext]
+    # @PRE: session_id belongs to user_id and run_context targets the same aggregate.
+    # @POST: run context is persisted and linked as the latest launch snapshot for the session.
+    # @SIDE_EFFECT: inserts a run-context row, mutates the parent session pointer, and commits.
+    # @DATA_CONTRACT: Input[RunContextMutation] -> Output[DatasetRunContext]
    def save_run_context(self, session_id: str, user_id: str, run_context: DatasetRunContext) -> DatasetRunContext:
-        """
-        @PURPOSE: Persist immutable launch audit snapshot.
-        """
        with belief_scope("DatasetReviewSessionRepository.save_run_context"):
            session = self.db.query(DatasetReviewSession).filter(
                DatasetReviewSession.session_id == session_id,
@@ -146,18 +195,22 @@ class DatasetReviewSessionRepository:
            self.db.add(run_context)
            self.db.flush()
            session.last_run_context_id = run_context.run_context_id
-            
+
            self.db.commit()
            self.db.refresh(run_context)
            return run_context
+    # [/DEF:save_run_ctx:Function]

+    # [DEF:list_user_sess:Function]
+    # @COMPLEXITY: 3
+    # @PURPOSE: List review sessions owned by a specific user ordered by most recent update.
+    # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
    def list_sessions_for_user(self, user_id: str) -> List[DatasetReviewSession]:
-        """
-        @PURPOSE: List all review sessions owned by a user.
-        """
        with belief_scope("DatasetReviewSessionRepository.list_sessions_for_user"):
            return self.db.query(DatasetReviewSession).filter(
                DatasetReviewSession.user_id == user_id
            ).order_by(DatasetReviewSession.updated_at.desc()).all()
+    # [/DEF:list_user_sess:Function]
+# [/DEF:SessionRepo:Class]

 # [/DEF:DatasetReviewSessionRepository:Module]
--- a/backend/src/services/dataset_review/semantic_resolver.py
+++ b/backend/src/services/dataset_review/semantic_resolver.py
@@ -0,0 +1,342 @@
+# [DEF:SemanticSourceResolver:Module]
+# @COMPLEXITY: 4
+# @SEMANTICS: dataset_review, semantic_resolution, dictionary, trusted_sources, ranking
+# @PURPOSE: Resolve and rank semantic candidates from trusted dictionary-like sources before any inferred fallback.
+# @LAYER: Domain
+# @RELATION: [DEPENDS_ON] ->[LLMProviderService]
+# @RELATION: [DEPENDS_ON] ->[SemanticSource]
+# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry]
+# @RELATION: [DEPENDS_ON] ->[SemanticCandidate]
+# @PRE: selected source and target field set must be known.
+# @POST: candidate ranking follows the configured confidence hierarchy and unresolved fuzzy matches remain reviewable.
+# @SIDE_EFFECT: may create conflict findings and semantic candidate records.
+# @INVARIANT: Manual overrides are never silently replaced by imported, inferred, or AI-generated values.
+
+from __future__ import annotations
+
+# [DEF:SemanticSourceResolver.imports:Block]
+from dataclasses import dataclass, field
+from difflib import SequenceMatcher
+from typing import Any, Dict, Iterable, List, Mapping, Optional
+
+from src.core.logger import belief_scope, logger
+from src.models.dataset_review import (
+    CandidateMatchType,
+    CandidateStatus,
+    FieldProvenance,
+)
+# [/DEF:SemanticSourceResolver.imports:Block]
+
+
+# [DEF:DictionaryResolutionResult:Class]
+# @COMPLEXITY: 2
+# @PURPOSE: Carries field-level dictionary resolution output with explicit review and partial-recovery state.
+@dataclass
+class DictionaryResolutionResult:
+    source_ref: str
+    resolved_fields: List[Dict[str, Any]] = field(default_factory=list)
+    unresolved_fields: List[str] = field(default_factory=list)
+    partial_recovery: bool = False
+# [/DEF:DictionaryResolutionResult:Class]
+
+
+# [DEF:SemanticSourceResolver:Class]
+# @COMPLEXITY: 4
+# @PURPOSE: Resolve semantic candidates from trusted sources while preserving manual locks and confidence ordering.
+# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry]
+# @RELATION: [DEPENDS_ON] ->[SemanticCandidate]
+# @PRE: source payload and target field collection are provided by the caller.
+# @POST: result contains confidence-ranked candidates and does not overwrite manual locks implicitly.
+# @SIDE_EFFECT: emits semantic trace logs for ranking and fallback decisions.
+class SemanticSourceResolver:
+    # [DEF:SemanticSourceResolver.resolve_from_file:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Normalize uploaded semantic file records into field-level candidates.
+    def resolve_from_file(self, source_payload: Mapping[str, Any], fields: Iterable[Mapping[str, Any]]) -> DictionaryResolutionResult:
+        return DictionaryResolutionResult(source_ref=str(source_payload.get("source_ref") or "uploaded_file"))
+    # [/DEF:SemanticSourceResolver.resolve_from_file:Function]
+
+    # [DEF:SemanticSourceResolver.resolve_from_dictionary:Function]
+    # @COMPLEXITY: 4
+    # @PURPOSE: Resolve candidates from connected tabular dictionary sources.
+    # @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry]
+    # @RELATION: [DEPENDS_ON] ->[SemanticCandidate]
+    # @PRE: dictionary source exists and fields contain stable field_name values.
+    # @POST: returns confidence-ranked candidates where exact dictionary matches outrank fuzzy matches and unresolved fields stay explicit.
+    # @SIDE_EFFECT: emits belief-state logs describing trusted-match and partial-recovery outcomes.
+    # @DATA_CONTRACT: Input[source_payload:Mapping,fields:Iterable] -> Output[DictionaryResolutionResult]
+    def resolve_from_dictionary(
+        self,
+        source_payload: Mapping[str, Any],
+        fields: Iterable[Mapping[str, Any]],
+    ) -> DictionaryResolutionResult:
+        with belief_scope("SemanticSourceResolver.resolve_from_dictionary"):
+            source_ref = str(source_payload.get("source_ref") or "").strip()
+            dictionary_rows = source_payload.get("rows")
+
+            if not source_ref:
+                logger.explore("Dictionary semantic source is missing source_ref")
+                raise ValueError("Dictionary semantic source must include source_ref")
+
+            if not isinstance(dictionary_rows, list) or not dictionary_rows:
+                logger.explore(
+                    "Dictionary semantic source has no usable rows",
+                    extra={"source_ref": source_ref},
+                )
+                raise ValueError("Dictionary semantic source must include non-empty rows")
+
+            logger.reason(
+                "Resolving semantics from trusted dictionary source",
+                extra={"source_ref": source_ref, "row_count": len(dictionary_rows)},
+            )
+
+            normalized_rows = [self._normalize_dictionary_row(row) for row in dictionary_rows if isinstance(row, Mapping)]
+            row_index = {
+                row["field_key"]: row
+                for row in normalized_rows
+                if row.get("field_key")
+            }
+
+            resolved_fields: List[Dict[str, Any]] = []
+            unresolved_fields: List[str] = []
+
+            for raw_field in fields:
+                field_name = str(raw_field.get("field_name") or "").strip()
+                if not field_name:
+                    continue
+
+                is_locked = bool(raw_field.get("is_locked"))
+                if is_locked:
+                    logger.reason(
+                        "Preserving manual lock during dictionary resolution",
+                        extra={"field_name": field_name},
+                    )
+                    resolved_fields.append(
+                        {
+                            "field_name": field_name,
+                            "applied_candidate": None,
+                            "candidates": [],
+                            "provenance": FieldProvenance.MANUAL_OVERRIDE.value,
+                            "needs_review": False,
+                            "has_conflict": False,
+                            "is_locked": True,
+                            "status": "preserved_manual",
+                        }
+                    )
+                    continue
+
+                exact_match = row_index.get(self._normalize_key(field_name))
+                candidates: List[Dict[str, Any]] = []
+
+                if exact_match is not None:
+                    logger.reason(
+                        "Resolved exact dictionary match",
+                        extra={"field_name": field_name, "source_ref": source_ref},
+                    )
+                    candidates.append(
+                        self._build_candidate_payload(
+                            rank=1,
+                            match_type=CandidateMatchType.EXACT,
+                            confidence_score=1.0,
+                            row=exact_match,
+                        )
+                    )
+                else:
+                    fuzzy_matches = self._find_fuzzy_matches(field_name, normalized_rows)
+                    for rank_offset, fuzzy_match in enumerate(fuzzy_matches, start=1):
+                        candidates.append(
+                            self._build_candidate_payload(
+                                rank=rank_offset,
+                                match_type=CandidateMatchType.FUZZY,
+                                confidence_score=float(fuzzy_match["score"]),
+                                row=fuzzy_match["row"],
+                            )
+                        )
+
+                if not candidates:
+                    unresolved_fields.append(field_name)
+                    resolved_fields.append(
+                        {
+                            "field_name": field_name,
+                            "applied_candidate": None,
+                            "candidates": [],
+                            "provenance": FieldProvenance.UNRESOLVED.value,
+                            "needs_review": True,
+                            "has_conflict": False,
+                            "is_locked": False,
+                            "status": "unresolved",
+                        }
+                    )
+                    logger.explore(
+                        "No trusted dictionary match found for field",
+                        extra={"field_name": field_name, "source_ref": source_ref},
+                    )
+                    continue
+
+                ranked_candidates = self.rank_candidates(candidates)
+                applied_candidate = ranked_candidates[0]
+                has_conflict = len(ranked_candidates) > 1
+                provenance = (
+                    FieldProvenance.DICTIONARY_EXACT.value
+                    if applied_candidate["match_type"] == CandidateMatchType.EXACT.value
+                    else FieldProvenance.FUZZY_INFERRED.value
+                )
+                needs_review = applied_candidate["match_type"] != CandidateMatchType.EXACT.value
+
+                resolved_fields.append(
+                    {
+                        "field_name": field_name,
+                        "applied_candidate": applied_candidate,
+                        "candidates": ranked_candidates,
+                        "provenance": provenance,
+                        "needs_review": needs_review,
+                        "has_conflict": has_conflict,
+                        "is_locked": False,
+                        "status": "resolved",
+                    }
+                )
+
+            result = DictionaryResolutionResult(
+                source_ref=source_ref,
+                resolved_fields=resolved_fields,
+                unresolved_fields=unresolved_fields,
+                partial_recovery=bool(unresolved_fields),
+            )
+            logger.reflect(
+                "Dictionary resolution completed",
+                extra={
+                    "source_ref": source_ref,
+                    "resolved_fields": len(resolved_fields),
+                    "unresolved_fields": len(unresolved_fields),
+                    "partial_recovery": result.partial_recovery,
+                },
+            )
+            return result
+    # [/DEF:SemanticSourceResolver.resolve_from_dictionary:Function]
+
+    # [DEF:SemanticSourceResolver.resolve_from_reference_dataset:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Reuse semantic metadata from trusted Superset datasets.
+    def resolve_from_reference_dataset(
+        self,
+        source_payload: Mapping[str, Any],
+        fields: Iterable[Mapping[str, Any]],
+    ) -> DictionaryResolutionResult:
+        return DictionaryResolutionResult(source_ref=str(source_payload.get("source_ref") or "reference_dataset"))
+    # [/DEF:SemanticSourceResolver.resolve_from_reference_dataset:Function]
+
+    # [DEF:SemanticSourceResolver.rank_candidates:Function]
+    # @COMPLEXITY: 3
+    # @PURPOSE: Apply confidence ordering and determine best candidate per field.
+    # @RELATION: [DEPENDS_ON] ->[SemanticCandidate]
+    def rank_candidates(self, candidates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        ranked = sorted(
+            candidates,
+            key=lambda candidate: (
+                self._match_priority(candidate.get("match_type")),
+                -float(candidate.get("confidence_score", 0.0)),
+                int(candidate.get("candidate_rank", 999)),
+            ),
+        )
+        for index, candidate in enumerate(ranked, start=1):
+            candidate["candidate_rank"] = index
+        return ranked
+    # [/DEF:SemanticSourceResolver.rank_candidates:Function]
+
+    # [DEF:SemanticSourceResolver.detect_conflicts:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Mark competing candidate sets that require explicit user review.
+    def detect_conflicts(self, candidates: List[Dict[str, Any]]) -> bool:
+        return len(candidates) > 1
+    # [/DEF:SemanticSourceResolver.detect_conflicts:Function]
+
+    # [DEF:SemanticSourceResolver.apply_field_decision:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Accept, reject, or manually override a field-level semantic value.
+    def apply_field_decision(self, field_state: Mapping[str, Any], decision: Mapping[str, Any]) -> Dict[str, Any]:
+        merged = dict(field_state)
+        merged.update(decision)
+        return merged
+    # [/DEF:SemanticSourceResolver.apply_field_decision:Function]
+
+    # [DEF:SemanticSourceResolver._normalize_dictionary_row:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Normalize one dictionary row into a consistent lookup structure.
+    def _normalize_dictionary_row(self, row: Mapping[str, Any]) -> Dict[str, Any]:
+        field_name = (
+            row.get("field_name")
+            or row.get("column_name")
+            or row.get("name")
+            or row.get("field")
+        )
+        normalized_name = str(field_name or "").strip()
+        return {
+            "field_name": normalized_name,
+            "field_key": self._normalize_key(normalized_name),
+            "verbose_name": row.get("verbose_name") or row.get("label"),
+            "description": row.get("description"),
+            "display_format": row.get("display_format") or row.get("format"),
+        }
+    # [/DEF:SemanticSourceResolver._normalize_dictionary_row:Function]
+
+    # [DEF:SemanticSourceResolver._find_fuzzy_matches:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Produce confidence-scored fuzzy matches while keeping them reviewable.
+    def _find_fuzzy_matches(self, field_name: str, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        normalized_target = self._normalize_key(field_name)
+        fuzzy_matches: List[Dict[str, Any]] = []
+        for row in rows:
+            candidate_key = str(row.get("field_key") or "")
+            if not candidate_key:
+                continue
+            score = SequenceMatcher(None, normalized_target, candidate_key).ratio()
+            if score < 0.72:
+                continue
+            fuzzy_matches.append({"row": row, "score": round(score, 3)})
+        fuzzy_matches.sort(key=lambda item: item["score"], reverse=True)
+        return fuzzy_matches[:3]
+    # [/DEF:SemanticSourceResolver._find_fuzzy_matches:Function]
+
+    # [DEF:SemanticSourceResolver._build_candidate_payload:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Project normalized dictionary rows into semantic candidate payloads.
+    def _build_candidate_payload(
+        self,
+        rank: int,
+        match_type: CandidateMatchType,
+        confidence_score: float,
+        row: Mapping[str, Any],
+    ) -> Dict[str, Any]:
+        return {
+            "candidate_rank": rank,
+            "match_type": match_type.value,
+            "confidence_score": confidence_score,
+            "proposed_verbose_name": row.get("verbose_name"),
+            "proposed_description": row.get("description"),
+            "proposed_display_format": row.get("display_format"),
+            "status": CandidateStatus.PROPOSED.value,
+        }
+    # [/DEF:SemanticSourceResolver._build_candidate_payload:Function]
+
+    # [DEF:SemanticSourceResolver._match_priority:Function]
+    # @COMPLEXITY: 2
+    # @PURPOSE: Encode trusted-confidence ordering so exact dictionary reuse beats fuzzy invention.
+    def _match_priority(self, match_type: Optional[str]) -> int:
+        priority = {
+            CandidateMatchType.EXACT.value: 0,
+            CandidateMatchType.REFERENCE.value: 1,
+            CandidateMatchType.FUZZY.value: 2,
+            CandidateMatchType.GENERATED.value: 3,
+        }
+        return priority.get(str(match_type or ""), 99)
+    # [/DEF:SemanticSourceResolver._match_priority:Function]
+
+    # [DEF:SemanticSourceResolver._normalize_key:Function]
+    # @COMPLEXITY: 1
+    # @PURPOSE: Normalize field identifiers for stable exact/fuzzy comparisons.
+    def _normalize_key(self, value: str) -> str:
+        return "".join(ch for ch in str(value or "").strip().lower() if ch.isalnum() or ch == "_")
+    # [/DEF:SemanticSourceResolver._normalize_key:Function]
+# [/DEF:SemanticSourceResolver:Class]
+
+# [/DEF:SemanticSourceResolver:Module]