feat(us1): add dataset review orchestration automatic review slice

This commit is contained in:
2026-03-17 10:57:49 +03:00
parent e916cb1f17
commit 023bacde39
24 changed files with 4870 additions and 131 deletions

View File

@@ -1,17 +1,18 @@
# [DEF:backend.src.api.routes.__init__:Module]
# [DEF:ApiRoutesModule:Module]
# @COMPLEXITY: 3
# @SEMANTICS: routes, lazy-import, module-registry
# @PURPOSE: Provide lazy route module loading to avoid heavyweight imports during tests.
# @LAYER: API
# @RELATION: DEPENDS_ON -> importlib
# @RELATION: [CALLS] ->[ApiRoutesGetAttr]
# @INVARIANT: Only names listed in __all__ are importable via __getattr__.
__all__ = ['plugins', 'tasks', 'settings', 'connections', 'environments', 'mappings', 'migration', 'git', 'storage', 'admin', 'reports', 'assistant', 'clean_release', 'profile']
__all__ = ['plugins', 'tasks', 'settings', 'connections', 'environments', 'mappings', 'migration', 'git', 'storage', 'admin', 'reports', 'assistant', 'clean_release', 'profile', 'dataset_review']
# [DEF:__getattr__:Function]
# @COMPLEXITY: 1
# [DEF:ApiRoutesGetAttr:Function]
# @COMPLEXITY: 3
# @PURPOSE: Lazily import route module by attribute name.
# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
# @PRE: name is module candidate exposed in __all__.
# @POST: Returns imported submodule or raises AttributeError.
def __getattr__(name):
@@ -19,5 +20,5 @@ def __getattr__(name):
import importlib
return importlib.import_module(f".{name}", __name__)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
# [/DEF:__getattr__:Function]
# [/DEF:backend.src.api.routes.__init__:Module]
# [/DEF:ApiRoutesGetAttr:Function]
# [/DEF:ApiRoutesModule:Module]

View File

@@ -0,0 +1,349 @@
# [DEF:DatasetReviewApiTests:Module]
# @COMPLEXITY: 3
# @SEMANTICS: dataset_review, api, tests, lifecycle, exports, orchestration
# @PURPOSE: Verify backend US1 dataset review lifecycle, export, parsing, and dictionary-resolution contracts.
# @LAYER: API
# @RELATION: [BINDS_TO] ->[DatasetReviewApi]
# @RELATION: [BINDS_TO] ->[DatasetReviewOrchestrator]
from datetime import datetime, timezone
import json
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fastapi.testclient import TestClient
from src.app import app
from src.api.routes.dataset_review import _get_orchestrator, _get_repository
from src.core.config_models import Environment, GlobalSettings, AppConfig
from src.core.utils.superset_context_extractor import SupersetContextExtractor
from src.dependencies import get_config_manager, get_current_user, get_task_manager
from src.models.dataset_review import (
BusinessSummarySource,
ConfidenceState,
DatasetReviewSession,
FindingArea,
FindingSeverity,
ReadinessState,
RecommendedAction,
ResolutionState,
SessionPhase,
SessionStatus,
)
from src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand
from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
client = TestClient(app)
# [DEF:_make_user:Function]
def _make_user():
admin_role = SimpleNamespace(name="Admin", permissions=[])
return SimpleNamespace(id="user-1", username="tester", roles=[admin_role])
# [/DEF:_make_user:Function]
# [DEF:_make_config_manager:Function]
def _make_config_manager():
env = Environment(
id="env-1",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
config = AppConfig(environments=[env], settings=GlobalSettings())
manager = MagicMock()
manager.get_environment.side_effect = lambda env_id: env if env_id == "env-1" else None
manager.get_config.return_value = config
return manager
# [/DEF:_make_config_manager:Function]
# [DEF:_make_session:Function]
def _make_session():
now = datetime.now(timezone.utc)
return DatasetReviewSession(
session_id="sess-1",
user_id="user-1",
environment_id="env-1",
source_kind="superset_link",
source_input="http://superset.local/dashboard/10",
dataset_ref="public.sales",
dataset_id=42,
dashboard_id=10,
readiness_state=ReadinessState.REVIEW_READY,
recommended_action=RecommendedAction.REVIEW_DOCUMENTATION,
status=SessionStatus.ACTIVE,
current_phase=SessionPhase.REVIEW,
created_at=now,
updated_at=now,
last_activity_at=now,
)
# [/DEF:_make_session:Function]
# [DEF:dataset_review_api_dependencies:Function]
@pytest.fixture(autouse=True)
def dataset_review_api_dependencies():
mock_user = _make_user()
config_manager = _make_config_manager()
task_manager = MagicMock()
app.dependency_overrides[get_current_user] = lambda: mock_user
app.dependency_overrides[get_config_manager] = lambda: config_manager
app.dependency_overrides[get_task_manager] = lambda: task_manager
yield {
"user": mock_user,
"config_manager": config_manager,
"task_manager": task_manager,
}
app.dependency_overrides.clear()
# [/DEF:dataset_review_api_dependencies:Function]
# [DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
# @PURPOSE: Verify dashboard links recover dataset context and preserve explicit partial-recovery markers.
def test_parse_superset_link_dashboard_partial_recovery():
env = Environment(
id="env-1",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
fake_client = MagicMock()
fake_client.get_dashboard_detail.return_value = {
"datasets": [{"id": 42}, {"id": 77}],
}
fake_client.get_dataset_detail.return_value = {
"table_name": "sales",
"schema": "public",
}
extractor = SupersetContextExtractor(environment=env, client=fake_client)
result = extractor.parse_superset_link(
"http://superset.local/dashboard/10/?native_filters=%5B%7B%22name%22%3A%22country%22%2C%22value%22%3A%22DE%22%7D%5D"
)
assert result.dataset_id == 42
assert result.dashboard_id == 10
assert result.dataset_ref == "public.sales"
assert result.partial_recovery is True
assert "multiple_dashboard_datasets" in result.unresolved_references
assert result.imported_filters[0]["filter_name"] == "country"
# [/DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
# [DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
# @PURPOSE: Verify trusted dictionary exact matches outrank fuzzy candidates and unresolved fields stay explicit.
def test_resolve_from_dictionary_prefers_exact_match():
resolver = SemanticSourceResolver()
result = resolver.resolve_from_dictionary(
{
"source_ref": "dict://finance",
"rows": [
{
"field_name": "revenue",
"verbose_name": "Revenue",
"description": "Recognized revenue amount",
"display_format": "$,.2f",
},
{
"field_name": "revnue",
"verbose_name": "Revenue typo",
"description": "Fuzzy variant",
},
],
},
[
{"field_name": "revenue", "is_locked": False},
{"field_name": "margin", "is_locked": False},
],
)
resolved_exact = next(item for item in result.resolved_fields if item["field_name"] == "revenue")
unresolved = next(item for item in result.resolved_fields if item["field_name"] == "margin")
assert resolved_exact["applied_candidate"]["match_type"] == "exact"
assert resolved_exact["provenance"] == "dictionary_exact"
assert unresolved["status"] == "unresolved"
assert "margin" in result.unresolved_fields
assert result.partial_recovery is True
# [/DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
# [DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
# @PURPOSE: Verify session start persists usable recovery-required state when Superset intake is partial.
def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_api_dependencies):
repository = MagicMock()
created_session = _make_session()
created_session.readiness_state = ReadinessState.RECOVERY_REQUIRED
created_session.current_phase = SessionPhase.RECOVERY
repository.create_session.return_value = created_session
repository.save_profile_and_findings.return_value = created_session
repository.db = MagicMock()
orchestrator = DatasetReviewOrchestrator(
repository=repository,
config_manager=dataset_review_api_dependencies["config_manager"],
task_manager=None,
)
parsed_context = SimpleNamespace(
dataset_ref="public.sales",
dataset_id=42,
dashboard_id=10,
chart_id=None,
partial_recovery=True,
unresolved_references=["dashboard_dataset_binding_missing"],
)
with patch(
"src.services.dataset_review.orchestrator.SupersetContextExtractor.parse_superset_link",
return_value=parsed_context,
):
result = orchestrator.start_session(
StartSessionCommand(
user=dataset_review_api_dependencies["user"],
environment_id="env-1",
source_kind="superset_link",
source_input="http://superset.local/dashboard/10",
)
)
assert result.session.readiness_state == ReadinessState.RECOVERY_REQUIRED
assert result.findings
assert result.findings[0].severity.value == "warning"
repository.create_session.assert_called_once()
repository.save_profile_and_findings.assert_called_once()
# [/DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
# [DEF:test_start_session_endpoint_returns_created_summary:Function]
# @PURPOSE: Verify POST session lifecycle endpoint returns a persisted ownership-scoped summary.
def test_start_session_endpoint_returns_created_summary(dataset_review_api_dependencies):
session = _make_session()
orchestrator = MagicMock()
orchestrator.start_session.return_value = SimpleNamespace(session=session, findings=[], parsed_context=None)
app.dependency_overrides[_get_orchestrator] = lambda: orchestrator
response = client.post(
"/api/dataset-orchestration/sessions",
json={
"source_kind": "superset_link",
"source_input": "http://superset.local/dashboard/10",
"environment_id": "env-1",
},
)
assert response.status_code == 201
payload = response.json()
assert payload["session_id"] == "sess-1"
assert payload["dataset_ref"] == "public.sales"
assert payload["environment_id"] == "env-1"
# [/DEF:test_start_session_endpoint_returns_created_summary:Function]
# [DEF:test_get_session_detail_export_and_lifecycle_endpoints:Function]
# @PURPOSE: Verify lifecycle get/patch/delete plus documentation and validation exports remain ownership-scoped and usable.
def test_get_session_detail_export_and_lifecycle_endpoints(dataset_review_api_dependencies):
now = datetime.now(timezone.utc)
session = MagicMock(spec=DatasetReviewSession)
session.session_id = "sess-1"
session.user_id = "user-1"
session.environment_id = "env-1"
session.source_kind = "superset_link"
session.source_input = "http://superset.local/dashboard/10"
session.dataset_ref = "public.sales"
session.dataset_id = 42
session.dashboard_id = 10
session.readiness_state = ReadinessState.REVIEW_READY
session.recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
session.status = SessionStatus.ACTIVE
session.current_phase = SessionPhase.REVIEW
session.created_at = now
session.updated_at = now
session.last_activity_at = now
session.profile = SimpleNamespace(
dataset_name="sales",
business_summary="Summary text",
confidence_state=ConfidenceState.MOSTLY_CONFIRMED,
dataset_type="unknown",
schema_name=None,
database_name=None,
business_summary_source=BusinessSummarySource.IMPORTED,
description=None,
is_sqllab_view=False,
completeness_score=None,
has_blocking_findings=False,
has_warning_findings=True,
manual_summary_locked=False,
created_at=now,
updated_at=now,
profile_id="profile-1",
session_id="sess-1",
)
session.findings = [
SimpleNamespace(
finding_id="f-1",
session_id="sess-1",
area=FindingArea.SOURCE_INTAKE,
severity=FindingSeverity.WARNING,
code="PARTIAL_SUPERSET_RECOVERY",
title="Partial",
message="Some filters require review",
resolution_state=ResolutionState.OPEN,
resolution_note=None,
caused_by_ref=None,
created_at=now,
resolved_at=None,
)
]
session.collaborators = []
session.semantic_sources = []
session.semantic_fields = []
session.imported_filters = []
session.template_variables = []
session.execution_mappings = []
session.clarification_sessions = []
session.previews = []
session.run_contexts = []
repository = MagicMock()
repository.load_session_detail.return_value = session
repository.list_sessions_for_user.return_value = [session]
repository.db = MagicMock()
app.dependency_overrides[_get_repository] = lambda: repository
detail_response = client.get("/api/dataset-orchestration/sessions/sess-1")
assert detail_response.status_code == 200
assert detail_response.json()["session_id"] == "sess-1"
patch_response = client.patch(
"/api/dataset-orchestration/sessions/sess-1",
json={"status": "paused"},
)
assert patch_response.status_code == 200
assert patch_response.json()["status"] == "paused"
doc_response = client.get("/api/dataset-orchestration/sessions/sess-1/exports/documentation?format=json")
assert doc_response.status_code == 200
assert doc_response.json()["artifact_type"] == "documentation"
validation_response = client.get("/api/dataset-orchestration/sessions/sess-1/exports/validation?format=markdown")
assert validation_response.status_code == 200
assert validation_response.json()["artifact_type"] == "validation_report"
assert "Validation Report" in validation_response.json()["content"]["markdown"]
delete_response = client.delete("/api/dataset-orchestration/sessions/sess-1")
assert delete_response.status_code == 204
# [/DEF:test_get_session_detail_export_and_lifecycle_endpoints:Function]
# [/DEF:DatasetReviewApiTests:Module]

View File

@@ -0,0 +1,533 @@
# [DEF:DatasetReviewApi:Module]
# @COMPLEXITY: 4
# @SEMANTICS: dataset_review, api, session_lifecycle, exports, rbac, feature_flags
# @PURPOSE: Expose dataset review session lifecycle and export endpoints for backend US1.
# @LAYER: API
# @RELATION: [DEPENDS_ON] ->[AppDependencies]
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator]
# @PRE: Authenticated user and valid environment/session scope are required for all mutations and reads.
# @POST: Returns ownership-scoped session state and export payloads with feature-flag/RBAC enforcement.
# @SIDE_EFFECT: Persists session state and may enqueue recovery task.
# @DATA_CONTRACT: Input[HTTP Request] -> Output[SessionSummary | SessionDetail | ExportArtifactResponse | HTTP 204]
# @INVARIANT: No cross-user session leakage is allowed; export payloads only expose the current user's accessible session.
from __future__ import annotations
# [DEF:DatasetReviewApi.imports:Block]
import json
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from src.core.database import get_db
from src.core.logger import belief_scope, logger
from src.dependencies import get_config_manager, get_current_user, get_task_manager, has_permission
from src.models.auth import User
from src.models.dataset_review import (
ArtifactFormat,
DatasetReviewSession,
RecommendedAction,
SessionStatus,
)
from src.schemas.dataset_review import SessionDetail, SessionSummary
from src.services.dataset_review.orchestrator import (
DatasetReviewOrchestrator,
StartSessionCommand,
)
from src.services.dataset_review.repositories.session_repository import (
DatasetReviewSessionRepository,
)
# [/DEF:DatasetReviewApi.imports:Block]
router = APIRouter(prefix="/api/dataset-orchestration", tags=["Dataset Orchestration"])
# [DEF:StartSessionRequest:Class]
# @COMPLEXITY: 2
# @PURPOSE: Request DTO for starting one dataset review session from a Superset link or dataset selection.
class StartSessionRequest(BaseModel):
source_kind: str = Field(..., pattern="^(superset_link|dataset_selection)$")
source_input: str = Field(..., min_length=1)
environment_id: str = Field(..., min_length=1)
# [/DEF:StartSessionRequest:Class]
# [DEF:UpdateSessionRequest:Class]
# @COMPLEXITY: 2
# @PURPOSE: Request DTO for lifecycle state updates on an existing session.
class UpdateSessionRequest(BaseModel):
status: SessionStatus
note: Optional[str] = None
# [/DEF:UpdateSessionRequest:Class]
# [DEF:SessionCollectionResponse:Class]
# @COMPLEXITY: 2
# @PURPOSE: Paginated ownership-scoped dataset review session collection response.
class SessionCollectionResponse(BaseModel):
items: List[SessionSummary]
total: int
page: int
page_size: int
has_next: bool
# [/DEF:SessionCollectionResponse:Class]
# [DEF:ExportArtifactResponse:Class]
# @COMPLEXITY: 2
# @PURPOSE: Inline export response for documentation or validation outputs without introducing unrelated persistence changes.
class ExportArtifactResponse(BaseModel):
artifact_id: str
session_id: str
artifact_type: str
format: str
storage_ref: str
created_by_user_id: str
created_at: Optional[str] = None
content: Dict[str, Any]
# [/DEF:ExportArtifactResponse:Class]
# [DEF:_require_auto_review_flag:Function]
# @COMPLEXITY: 3
# @PURPOSE: Guard US1 dataset review endpoints behind the configured feature flag.
# @RELATION: [DEPENDS_ON] ->[ConfigManager]
def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> bool:
with belief_scope("dataset_review.require_auto_review_flag"):
if not config_manager.get_config().settings.ff_dataset_auto_review:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Dataset auto review feature is disabled",
)
return True
# [/DEF:_require_auto_review_flag:Function]
# [DEF:_get_repository:Function]
# @COMPLEXITY: 2
# @PURPOSE: Build repository dependency for dataset review session aggregate access.
def _get_repository(db: Session = Depends(get_db)) -> DatasetReviewSessionRepository:
return DatasetReviewSessionRepository(db)
# [/DEF:_get_repository:Function]
# [DEF:_get_orchestrator:Function]
# @COMPLEXITY: 3
# @PURPOSE: Build orchestrator dependency for session lifecycle actions.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator]
def _get_orchestrator(
repository: DatasetReviewSessionRepository = Depends(_get_repository),
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
) -> DatasetReviewOrchestrator:
return DatasetReviewOrchestrator(
repository=repository,
config_manager=config_manager,
task_manager=task_manager,
)
# [/DEF:_get_orchestrator:Function]
# [DEF:_serialize_session_summary:Function]
# @COMPLEXITY: 3
# @PURPOSE: Map SQLAlchemy session aggregate root into stable API summary DTO.
# @RELATION: [DEPENDS_ON] ->[SessionSummary]
def _serialize_session_summary(session: DatasetReviewSession) -> SessionSummary:
return SessionSummary.model_validate(session, from_attributes=True)
# [/DEF:_serialize_session_summary:Function]
# [DEF:_serialize_session_detail:Function]
# @COMPLEXITY: 3
# @PURPOSE: Map SQLAlchemy session aggregate root into stable API detail DTO.
# @RELATION: [DEPENDS_ON] ->[SessionDetail]
def _serialize_session_detail(session: DatasetReviewSession) -> SessionDetail:
return SessionDetail.model_validate(session, from_attributes=True)
# [/DEF:_serialize_session_detail:Function]
# [DEF:_get_owned_session_or_404:Function]
# @COMPLEXITY: 4
# @PURPOSE: Resolve one session for current user or collaborator scope, returning 404 when inaccessible.
# @RELATION: [CALLS] ->[load_detail]
# @PRE: session_id is a non-empty identifier and current_user is authenticated.
# @POST: returns accessible session detail or raises HTTP 404 without leaking foreign-session existence.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[session_id:str,current_user:User] -> Output[DatasetReviewSession|HTTPException]
def _get_owned_session_or_404(
repository: DatasetReviewSessionRepository,
session_id: str,
current_user: User,
) -> DatasetReviewSession:
with belief_scope("dataset_review.get_owned_session_or_404"):
session = repository.load_session_detail(session_id, current_user.id)
if session is None:
logger.explore(
"Dataset review session not found in current ownership scope",
extra={"session_id": session_id, "user_id": current_user.id},
)
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found")
return session
# [/DEF:_get_owned_session_or_404:Function]
# [DEF:_build_documentation_export:Function]
# @COMPLEXITY: 3
# @PURPOSE: Produce session documentation export content from current persisted review state.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
def _build_documentation_export(session: DatasetReviewSession, export_format: ArtifactFormat) -> Dict[str, Any]:
profile = session.profile
findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code))
if export_format == ArtifactFormat.MARKDOWN:
lines = [
f"# Dataset Review: {session.dataset_ref}",
"",
f"- Session ID: {session.session_id}",
f"- Environment: {session.environment_id}",
f"- Readiness: {session.readiness_state.value}",
f"- Recommended action: {session.recommended_action.value}",
"",
"## Business Summary",
profile.business_summary if profile else "No profile summary available.",
"",
"## Findings",
]
if findings:
for finding in findings:
lines.append(
f"- [{finding.severity.value}] {finding.title}: {finding.message}"
)
else:
lines.append("- No findings recorded.")
content = {"markdown": "\n".join(lines)}
storage_ref = f"inline://dataset-review/{session.session_id}/documentation.md"
else:
content = {
"session": _serialize_session_summary(session).model_dump(mode="json"),
"profile": profile and {
"dataset_name": profile.dataset_name,
"business_summary": profile.business_summary,
"confidence_state": profile.confidence_state.value,
"dataset_type": profile.dataset_type,
},
"findings": [
{
"code": finding.code,
"severity": finding.severity.value,
"title": finding.title,
"message": finding.message,
"resolution_state": finding.resolution_state.value,
}
for finding in findings
],
}
storage_ref = f"inline://dataset-review/{session.session_id}/documentation.json"
return {"storage_ref": storage_ref, "content": content}
# [/DEF:_build_documentation_export:Function]
# [DEF:_build_validation_export:Function]
# @COMPLEXITY: 3
# @PURPOSE: Produce validation-focused export content from persisted findings and readiness state.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
def _build_validation_export(session: DatasetReviewSession, export_format: ArtifactFormat) -> Dict[str, Any]:
findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code))
if export_format == ArtifactFormat.MARKDOWN:
lines = [
f"# Validation Report: {session.dataset_ref}",
"",
f"- Session ID: {session.session_id}",
f"- Readiness: {session.readiness_state.value}",
"",
"## Findings",
]
if findings:
for finding in findings:
lines.append(
f"- `{finding.code}` [{finding.severity.value}] {finding.message}"
)
else:
lines.append("- No findings recorded.")
content = {"markdown": "\n".join(lines)}
storage_ref = f"inline://dataset-review/{session.session_id}/validation.md"
else:
content = {
"session_id": session.session_id,
"dataset_ref": session.dataset_ref,
"readiness_state": session.readiness_state.value,
"findings": [
{
"finding_id": finding.finding_id,
"area": finding.area.value,
"severity": finding.severity.value,
"code": finding.code,
"title": finding.title,
"message": finding.message,
"resolution_state": finding.resolution_state.value,
}
for finding in findings
],
}
storage_ref = f"inline://dataset-review/{session.session_id}/validation.json"
return {"storage_ref": storage_ref, "content": content}
# [/DEF:_build_validation_export:Function]
# [DEF:list_sessions:Function]
# @COMPLEXITY: 3
# @PURPOSE: List resumable dataset review sessions for the current user.
# @RELATION: [CALLS] ->[list_user_sess]
@router.get(
"/sessions",
response_model=SessionCollectionResponse,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "READ")),
],
)
async def list_sessions(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
repository: DatasetReviewSessionRepository = Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.list_sessions"):
sessions = repository.list_sessions_for_user(current_user.id)
start = (page - 1) * page_size
end = start + page_size
items = [_serialize_session_summary(session) for session in sessions[start:end]]
return SessionCollectionResponse(
items=items,
total=len(sessions),
page=page,
page_size=page_size,
has_next=end < len(sessions),
)
# [/DEF:list_sessions:Function]
# [DEF:start_session:Function]
# @COMPLEXITY: 4
# @PURPOSE: Start a new dataset review session from a Superset link or dataset selection.
# @RELATION: [CALLS] ->[DatasetReviewOrchestrator.start_session]
# @PRE: feature flag enabled, user authenticated, and request body valid.
# @POST: returns persisted session summary scoped to the authenticated user.
# @SIDE_EFFECT: persists session/profile/findings and may enqueue recovery task.
# @DATA_CONTRACT: Input[StartSessionRequest] -> Output[SessionSummary]
@router.post(
"/sessions",
response_model=SessionSummary,
status_code=status.HTTP_201_CREATED,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "MANAGE")),
],
)
async def start_session(
request: StartSessionRequest,
orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.start_session"):
try:
result = orchestrator.start_session(
StartSessionCommand(
user=current_user,
environment_id=request.environment_id,
source_kind=request.source_kind,
source_input=request.source_input,
)
)
except ValueError as exc:
logger.explore(
"Dataset review session start rejected",
extra={"user_id": current_user.id, "error": str(exc)},
)
detail = str(exc)
status_code = status.HTTP_404_NOT_FOUND if detail == "Environment not found" else status.HTTP_400_BAD_REQUEST
raise HTTPException(status_code=status_code, detail=detail) from exc
return _serialize_session_summary(result.session)
# [/DEF:start_session:Function]
# [DEF:get_session_detail:Function]
# @COMPLEXITY: 3
# @PURPOSE: Return the full accessible dataset review session aggregate for current user scope.
# @RELATION: [CALLS] ->[_get_owned_session_or_404]
@router.get(
"/sessions/{session_id}",
response_model=SessionDetail,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "READ")),
],
)
async def get_session_detail(
session_id: str,
repository: DatasetReviewSessionRepository = Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.get_session_detail"):
session = _get_owned_session_or_404(repository, session_id, current_user)
return _serialize_session_detail(session)
# [/DEF:get_session_detail:Function]
# [DEF:update_session:Function]
# @COMPLEXITY: 4
# @PURPOSE: Update resumable lifecycle status for an owned dataset review session.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: session is accessible to current user and requested status is allowed by lifecycle policy.
# @POST: returns updated summary without changing ownership or unrelated aggregates.
# @SIDE_EFFECT: mutates session lifecycle fields in persistence.
# @DATA_CONTRACT: Input[UpdateSessionRequest] -> Output[SessionSummary]
@router.patch(
"/sessions/{session_id}",
response_model=SessionSummary,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "MANAGE")),
],
)
async def update_session(
session_id: str,
request: UpdateSessionRequest,
repository: DatasetReviewSessionRepository = Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.update_session"):
session = _get_owned_session_or_404(repository, session_id, current_user)
if session.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can mutate session lifecycle")
session.status = request.status
if request.status == SessionStatus.PAUSED:
session.recommended_action = RecommendedAction.RESUME_SESSION
elif request.status in {SessionStatus.ARCHIVED, SessionStatus.CANCELLED, SessionStatus.COMPLETED}:
session.active_task_id = None
repository.db.commit()
repository.db.refresh(session)
return _serialize_session_summary(session)
# [/DEF:update_session:Function]
# [DEF:delete_session:Function]
# @COMPLEXITY: 4
# @PURPOSE: Archive or hard-delete a session owned by the current user.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: session is owner-scoped to current user.
# @POST: session is archived or deleted and no foreign-session existence is disclosed.
# @SIDE_EFFECT: mutates or deletes persisted session aggregate.
# @DATA_CONTRACT: Input[session_id:str,hard_delete:bool] -> Output[HTTP 204]
@router.delete(
"/sessions/{session_id}",
status_code=status.HTTP_204_NO_CONTENT,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "MANAGE")),
],
)
async def delete_session(
session_id: str,
hard_delete: bool = Query(False),
repository: DatasetReviewSessionRepository = Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.delete_session"):
session = _get_owned_session_or_404(repository, session_id, current_user)
if session.user_id != current_user.id:
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can delete a session")
if hard_delete:
repository.db.delete(session)
else:
session.status = SessionStatus.ARCHIVED
session.active_task_id = None
repository.db.commit()
return Response(status_code=status.HTTP_204_NO_CONTENT)
# [/DEF:delete_session:Function]
# [DEF:export_documentation:Function]
# @COMPLEXITY: 4
# @PURPOSE: Export documentation output for the current session in JSON or Markdown form.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: session is accessible to current user and requested format is supported.
# @POST: returns ownership-scoped export payload without fabricating unrelated artifacts.
# @SIDE_EFFECT: none beyond response construction.
# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse]
@router.get(
"/sessions/{session_id}/exports/documentation",
response_model=ExportArtifactResponse,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "READ")),
],
)
async def export_documentation(
session_id: str,
format: ArtifactFormat = Query(ArtifactFormat.JSON),
repository: DatasetReviewSessionRepository = Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.export_documentation"):
if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
session = _get_owned_session_or_404(repository, session_id, current_user)
export_payload = _build_documentation_export(session, format)
return ExportArtifactResponse(
artifact_id=f"documentation-{session.session_id}-{format.value}",
session_id=session.session_id,
artifact_type="documentation",
format=format.value,
storage_ref=export_payload["storage_ref"],
created_by_user_id=current_user.id,
content=export_payload["content"],
)
# [/DEF:export_documentation:Function]
# [DEF:export_validation:Function]
# @COMPLEXITY: 4
# @PURPOSE: Export validation findings for the current session in JSON or Markdown form.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: session is accessible to current user and requested format is supported.
# @POST: returns explicit validation export payload scoped to current user session access.
# @SIDE_EFFECT: none beyond response construction.
# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse]
@router.get(
"/sessions/{session_id}/exports/validation",
response_model=ExportArtifactResponse,
dependencies=[
Depends(_require_auto_review_flag),
Depends(has_permission("dataset:session", "READ")),
],
)
async def export_validation(
session_id: str,
format: ArtifactFormat = Query(ArtifactFormat.JSON),
repository: DatasetReviewSessionRepository = Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.export_validation"):
if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
session = _get_owned_session_or_404(repository, session_id, current_user)
export_payload = _build_validation_export(session, format)
return ExportArtifactResponse(
artifact_id=f"validation-{session.session_id}-{format.value}",
session_id=session.session_id,
artifact_type="validation_report",
format=format.value,
storage_ref=export_payload["storage_ref"],
created_by_user_id=current_user.id,
content=export_payload["content"],
)
# [/DEF:export_validation:Function]
# [/DEF:DatasetReviewApi:Module]