feat(us1): add dataset review orchestration automatic review slice
This commit is contained in:
@@ -1,17 +1,18 @@
|
||||
# [DEF:backend.src.api.routes.__init__:Module]
|
||||
# [DEF:ApiRoutesModule:Module]
|
||||
# @COMPLEXITY: 3
|
||||
# @SEMANTICS: routes, lazy-import, module-registry
|
||||
# @PURPOSE: Provide lazy route module loading to avoid heavyweight imports during tests.
|
||||
# @LAYER: API
|
||||
# @RELATION: DEPENDS_ON -> importlib
|
||||
# @RELATION: [CALLS] ->[ApiRoutesGetAttr]
|
||||
# @INVARIANT: Only names listed in __all__ are importable via __getattr__.
|
||||
|
||||
__all__ = ['plugins', 'tasks', 'settings', 'connections', 'environments', 'mappings', 'migration', 'git', 'storage', 'admin', 'reports', 'assistant', 'clean_release', 'profile']
|
||||
__all__ = ['plugins', 'tasks', 'settings', 'connections', 'environments', 'mappings', 'migration', 'git', 'storage', 'admin', 'reports', 'assistant', 'clean_release', 'profile', 'dataset_review']
|
||||
|
||||
|
||||
# [DEF:__getattr__:Function]
|
||||
# @COMPLEXITY: 1
|
||||
# [DEF:ApiRoutesGetAttr:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Lazily import route module by attribute name.
|
||||
# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
|
||||
# @PRE: name is module candidate exposed in __all__.
|
||||
# @POST: Returns imported submodule or raises AttributeError.
|
||||
def __getattr__(name):
|
||||
@@ -19,5 +20,5 @@ def __getattr__(name):
|
||||
import importlib
|
||||
return importlib.import_module(f".{name}", __name__)
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
# [/DEF:__getattr__:Function]
|
||||
# [/DEF:backend.src.api.routes.__init__:Module]
|
||||
# [/DEF:ApiRoutesGetAttr:Function]
|
||||
# [/DEF:ApiRoutesModule:Module]
|
||||
|
||||
349
backend/src/api/routes/__tests__/test_dataset_review_api.py
Normal file
349
backend/src/api/routes/__tests__/test_dataset_review_api.py
Normal file
@@ -0,0 +1,349 @@
|
||||
# [DEF:DatasetReviewApiTests:Module]
|
||||
# @COMPLEXITY: 3
|
||||
# @SEMANTICS: dataset_review, api, tests, lifecycle, exports, orchestration
|
||||
# @PURPOSE: Verify backend US1 dataset review lifecycle, export, parsing, and dictionary-resolution contracts.
|
||||
# @LAYER: API
|
||||
# @RELATION: [BINDS_TO] ->[DatasetReviewApi]
|
||||
# @RELATION: [BINDS_TO] ->[DatasetReviewOrchestrator]
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from src.app import app
|
||||
from src.api.routes.dataset_review import _get_orchestrator, _get_repository
|
||||
from src.core.config_models import Environment, GlobalSettings, AppConfig
|
||||
from src.core.utils.superset_context_extractor import SupersetContextExtractor
|
||||
from src.dependencies import get_config_manager, get_current_user, get_task_manager
|
||||
from src.models.dataset_review import (
|
||||
BusinessSummarySource,
|
||||
ConfidenceState,
|
||||
DatasetReviewSession,
|
||||
FindingArea,
|
||||
FindingSeverity,
|
||||
ReadinessState,
|
||||
RecommendedAction,
|
||||
ResolutionState,
|
||||
SessionPhase,
|
||||
SessionStatus,
|
||||
)
|
||||
from src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand
|
||||
from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
|
||||
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
# [DEF:_make_user:Function]
|
||||
def _make_user():
|
||||
admin_role = SimpleNamespace(name="Admin", permissions=[])
|
||||
return SimpleNamespace(id="user-1", username="tester", roles=[admin_role])
|
||||
# [/DEF:_make_user:Function]
|
||||
|
||||
|
||||
# [DEF:_make_config_manager:Function]
|
||||
def _make_config_manager():
|
||||
env = Environment(
|
||||
id="env-1",
|
||||
name="DEV",
|
||||
url="http://superset.local",
|
||||
username="demo",
|
||||
password="secret",
|
||||
)
|
||||
config = AppConfig(environments=[env], settings=GlobalSettings())
|
||||
manager = MagicMock()
|
||||
manager.get_environment.side_effect = lambda env_id: env if env_id == "env-1" else None
|
||||
manager.get_config.return_value = config
|
||||
return manager
|
||||
# [/DEF:_make_config_manager:Function]
|
||||
|
||||
|
||||
# [DEF:_make_session:Function]
|
||||
def _make_session():
|
||||
now = datetime.now(timezone.utc)
|
||||
return DatasetReviewSession(
|
||||
session_id="sess-1",
|
||||
user_id="user-1",
|
||||
environment_id="env-1",
|
||||
source_kind="superset_link",
|
||||
source_input="http://superset.local/dashboard/10",
|
||||
dataset_ref="public.sales",
|
||||
dataset_id=42,
|
||||
dashboard_id=10,
|
||||
readiness_state=ReadinessState.REVIEW_READY,
|
||||
recommended_action=RecommendedAction.REVIEW_DOCUMENTATION,
|
||||
status=SessionStatus.ACTIVE,
|
||||
current_phase=SessionPhase.REVIEW,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
last_activity_at=now,
|
||||
)
|
||||
# [/DEF:_make_session:Function]
|
||||
|
||||
|
||||
# [DEF:dataset_review_api_dependencies:Function]
|
||||
@pytest.fixture(autouse=True)
|
||||
def dataset_review_api_dependencies():
|
||||
mock_user = _make_user()
|
||||
config_manager = _make_config_manager()
|
||||
task_manager = MagicMock()
|
||||
|
||||
app.dependency_overrides[get_current_user] = lambda: mock_user
|
||||
app.dependency_overrides[get_config_manager] = lambda: config_manager
|
||||
app.dependency_overrides[get_task_manager] = lambda: task_manager
|
||||
|
||||
yield {
|
||||
"user": mock_user,
|
||||
"config_manager": config_manager,
|
||||
"task_manager": task_manager,
|
||||
}
|
||||
app.dependency_overrides.clear()
|
||||
# [/DEF:dataset_review_api_dependencies:Function]
|
||||
|
||||
|
||||
# [DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
|
||||
# @PURPOSE: Verify dashboard links recover dataset context and preserve explicit partial-recovery markers.
|
||||
def test_parse_superset_link_dashboard_partial_recovery():
|
||||
env = Environment(
|
||||
id="env-1",
|
||||
name="DEV",
|
||||
url="http://superset.local",
|
||||
username="demo",
|
||||
password="secret",
|
||||
)
|
||||
fake_client = MagicMock()
|
||||
fake_client.get_dashboard_detail.return_value = {
|
||||
"datasets": [{"id": 42}, {"id": 77}],
|
||||
}
|
||||
fake_client.get_dataset_detail.return_value = {
|
||||
"table_name": "sales",
|
||||
"schema": "public",
|
||||
}
|
||||
|
||||
extractor = SupersetContextExtractor(environment=env, client=fake_client)
|
||||
result = extractor.parse_superset_link(
|
||||
"http://superset.local/dashboard/10/?native_filters=%5B%7B%22name%22%3A%22country%22%2C%22value%22%3A%22DE%22%7D%5D"
|
||||
)
|
||||
|
||||
assert result.dataset_id == 42
|
||||
assert result.dashboard_id == 10
|
||||
assert result.dataset_ref == "public.sales"
|
||||
assert result.partial_recovery is True
|
||||
assert "multiple_dashboard_datasets" in result.unresolved_references
|
||||
assert result.imported_filters[0]["filter_name"] == "country"
|
||||
# [/DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
|
||||
|
||||
|
||||
# [DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
|
||||
# @PURPOSE: Verify trusted dictionary exact matches outrank fuzzy candidates and unresolved fields stay explicit.
|
||||
def test_resolve_from_dictionary_prefers_exact_match():
|
||||
resolver = SemanticSourceResolver()
|
||||
result = resolver.resolve_from_dictionary(
|
||||
{
|
||||
"source_ref": "dict://finance",
|
||||
"rows": [
|
||||
{
|
||||
"field_name": "revenue",
|
||||
"verbose_name": "Revenue",
|
||||
"description": "Recognized revenue amount",
|
||||
"display_format": "$,.2f",
|
||||
},
|
||||
{
|
||||
"field_name": "revnue",
|
||||
"verbose_name": "Revenue typo",
|
||||
"description": "Fuzzy variant",
|
||||
},
|
||||
],
|
||||
},
|
||||
[
|
||||
{"field_name": "revenue", "is_locked": False},
|
||||
{"field_name": "margin", "is_locked": False},
|
||||
],
|
||||
)
|
||||
|
||||
resolved_exact = next(item for item in result.resolved_fields if item["field_name"] == "revenue")
|
||||
unresolved = next(item for item in result.resolved_fields if item["field_name"] == "margin")
|
||||
|
||||
assert resolved_exact["applied_candidate"]["match_type"] == "exact"
|
||||
assert resolved_exact["provenance"] == "dictionary_exact"
|
||||
assert unresolved["status"] == "unresolved"
|
||||
assert "margin" in result.unresolved_fields
|
||||
assert result.partial_recovery is True
|
||||
# [/DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
|
||||
|
||||
|
||||
# [DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
|
||||
# @PURPOSE: Verify session start persists usable recovery-required state when Superset intake is partial.
|
||||
def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_api_dependencies):
|
||||
repository = MagicMock()
|
||||
created_session = _make_session()
|
||||
created_session.readiness_state = ReadinessState.RECOVERY_REQUIRED
|
||||
created_session.current_phase = SessionPhase.RECOVERY
|
||||
|
||||
repository.create_session.return_value = created_session
|
||||
repository.save_profile_and_findings.return_value = created_session
|
||||
repository.db = MagicMock()
|
||||
|
||||
orchestrator = DatasetReviewOrchestrator(
|
||||
repository=repository,
|
||||
config_manager=dataset_review_api_dependencies["config_manager"],
|
||||
task_manager=None,
|
||||
)
|
||||
|
||||
parsed_context = SimpleNamespace(
|
||||
dataset_ref="public.sales",
|
||||
dataset_id=42,
|
||||
dashboard_id=10,
|
||||
chart_id=None,
|
||||
partial_recovery=True,
|
||||
unresolved_references=["dashboard_dataset_binding_missing"],
|
||||
)
|
||||
|
||||
with patch(
|
||||
"src.services.dataset_review.orchestrator.SupersetContextExtractor.parse_superset_link",
|
||||
return_value=parsed_context,
|
||||
):
|
||||
result = orchestrator.start_session(
|
||||
StartSessionCommand(
|
||||
user=dataset_review_api_dependencies["user"],
|
||||
environment_id="env-1",
|
||||
source_kind="superset_link",
|
||||
source_input="http://superset.local/dashboard/10",
|
||||
)
|
||||
)
|
||||
|
||||
assert result.session.readiness_state == ReadinessState.RECOVERY_REQUIRED
|
||||
assert result.findings
|
||||
assert result.findings[0].severity.value == "warning"
|
||||
repository.create_session.assert_called_once()
|
||||
repository.save_profile_and_findings.assert_called_once()
|
||||
# [/DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
|
||||
|
||||
|
||||
# [DEF:test_start_session_endpoint_returns_created_summary:Function]
|
||||
# @PURPOSE: Verify POST session lifecycle endpoint returns a persisted ownership-scoped summary.
|
||||
def test_start_session_endpoint_returns_created_summary(dataset_review_api_dependencies):
|
||||
session = _make_session()
|
||||
orchestrator = MagicMock()
|
||||
orchestrator.start_session.return_value = SimpleNamespace(session=session, findings=[], parsed_context=None)
|
||||
|
||||
app.dependency_overrides[_get_orchestrator] = lambda: orchestrator
|
||||
|
||||
response = client.post(
|
||||
"/api/dataset-orchestration/sessions",
|
||||
json={
|
||||
"source_kind": "superset_link",
|
||||
"source_input": "http://superset.local/dashboard/10",
|
||||
"environment_id": "env-1",
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 201
|
||||
payload = response.json()
|
||||
assert payload["session_id"] == "sess-1"
|
||||
assert payload["dataset_ref"] == "public.sales"
|
||||
assert payload["environment_id"] == "env-1"
|
||||
# [/DEF:test_start_session_endpoint_returns_created_summary:Function]
|
||||
|
||||
|
||||
# [DEF:test_get_session_detail_export_and_lifecycle_endpoints:Function]
|
||||
# @PURPOSE: Verify lifecycle get/patch/delete plus documentation and validation exports remain ownership-scoped and usable.
|
||||
def test_get_session_detail_export_and_lifecycle_endpoints(dataset_review_api_dependencies):
|
||||
now = datetime.now(timezone.utc)
|
||||
session = MagicMock(spec=DatasetReviewSession)
|
||||
session.session_id = "sess-1"
|
||||
session.user_id = "user-1"
|
||||
session.environment_id = "env-1"
|
||||
session.source_kind = "superset_link"
|
||||
session.source_input = "http://superset.local/dashboard/10"
|
||||
session.dataset_ref = "public.sales"
|
||||
session.dataset_id = 42
|
||||
session.dashboard_id = 10
|
||||
session.readiness_state = ReadinessState.REVIEW_READY
|
||||
session.recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
|
||||
session.status = SessionStatus.ACTIVE
|
||||
session.current_phase = SessionPhase.REVIEW
|
||||
session.created_at = now
|
||||
session.updated_at = now
|
||||
session.last_activity_at = now
|
||||
session.profile = SimpleNamespace(
|
||||
dataset_name="sales",
|
||||
business_summary="Summary text",
|
||||
confidence_state=ConfidenceState.MOSTLY_CONFIRMED,
|
||||
dataset_type="unknown",
|
||||
schema_name=None,
|
||||
database_name=None,
|
||||
business_summary_source=BusinessSummarySource.IMPORTED,
|
||||
description=None,
|
||||
is_sqllab_view=False,
|
||||
completeness_score=None,
|
||||
has_blocking_findings=False,
|
||||
has_warning_findings=True,
|
||||
manual_summary_locked=False,
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
profile_id="profile-1",
|
||||
session_id="sess-1",
|
||||
)
|
||||
session.findings = [
|
||||
SimpleNamespace(
|
||||
finding_id="f-1",
|
||||
session_id="sess-1",
|
||||
area=FindingArea.SOURCE_INTAKE,
|
||||
severity=FindingSeverity.WARNING,
|
||||
code="PARTIAL_SUPERSET_RECOVERY",
|
||||
title="Partial",
|
||||
message="Some filters require review",
|
||||
resolution_state=ResolutionState.OPEN,
|
||||
resolution_note=None,
|
||||
caused_by_ref=None,
|
||||
created_at=now,
|
||||
resolved_at=None,
|
||||
)
|
||||
]
|
||||
session.collaborators = []
|
||||
session.semantic_sources = []
|
||||
session.semantic_fields = []
|
||||
session.imported_filters = []
|
||||
session.template_variables = []
|
||||
session.execution_mappings = []
|
||||
session.clarification_sessions = []
|
||||
session.previews = []
|
||||
session.run_contexts = []
|
||||
|
||||
repository = MagicMock()
|
||||
repository.load_session_detail.return_value = session
|
||||
repository.list_sessions_for_user.return_value = [session]
|
||||
repository.db = MagicMock()
|
||||
|
||||
app.dependency_overrides[_get_repository] = lambda: repository
|
||||
|
||||
detail_response = client.get("/api/dataset-orchestration/sessions/sess-1")
|
||||
assert detail_response.status_code == 200
|
||||
assert detail_response.json()["session_id"] == "sess-1"
|
||||
|
||||
patch_response = client.patch(
|
||||
"/api/dataset-orchestration/sessions/sess-1",
|
||||
json={"status": "paused"},
|
||||
)
|
||||
assert patch_response.status_code == 200
|
||||
assert patch_response.json()["status"] == "paused"
|
||||
|
||||
doc_response = client.get("/api/dataset-orchestration/sessions/sess-1/exports/documentation?format=json")
|
||||
assert doc_response.status_code == 200
|
||||
assert doc_response.json()["artifact_type"] == "documentation"
|
||||
|
||||
validation_response = client.get("/api/dataset-orchestration/sessions/sess-1/exports/validation?format=markdown")
|
||||
assert validation_response.status_code == 200
|
||||
assert validation_response.json()["artifact_type"] == "validation_report"
|
||||
assert "Validation Report" in validation_response.json()["content"]["markdown"]
|
||||
|
||||
delete_response = client.delete("/api/dataset-orchestration/sessions/sess-1")
|
||||
assert delete_response.status_code == 204
|
||||
# [/DEF:test_get_session_detail_export_and_lifecycle_endpoints:Function]
|
||||
|
||||
# [/DEF:DatasetReviewApiTests:Module]
|
||||
533
backend/src/api/routes/dataset_review.py
Normal file
533
backend/src/api/routes/dataset_review.py
Normal file
@@ -0,0 +1,533 @@
|
||||
# [DEF:DatasetReviewApi:Module]
|
||||
# @COMPLEXITY: 4
|
||||
# @SEMANTICS: dataset_review, api, session_lifecycle, exports, rbac, feature_flags
|
||||
# @PURPOSE: Expose dataset review session lifecycle and export endpoints for backend US1.
|
||||
# @LAYER: API
|
||||
# @RELATION: [DEPENDS_ON] ->[AppDependencies]
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator]
|
||||
# @PRE: Authenticated user and valid environment/session scope are required for all mutations and reads.
|
||||
# @POST: Returns ownership-scoped session state and export payloads with feature-flag/RBAC enforcement.
|
||||
# @SIDE_EFFECT: Persists session state and may enqueue recovery task.
|
||||
# @DATA_CONTRACT: Input[HTTP Request] -> Output[SessionSummary | SessionDetail | ExportArtifactResponse | HTTP 204]
|
||||
# @INVARIANT: No cross-user session leakage is allowed; export payloads only expose the current user's accessible session.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
# [DEF:DatasetReviewApi.imports:Block]
|
||||
import json
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from src.core.database import get_db
|
||||
from src.core.logger import belief_scope, logger
|
||||
from src.dependencies import get_config_manager, get_current_user, get_task_manager, has_permission
|
||||
from src.models.auth import User
|
||||
from src.models.dataset_review import (
|
||||
ArtifactFormat,
|
||||
DatasetReviewSession,
|
||||
RecommendedAction,
|
||||
SessionStatus,
|
||||
)
|
||||
from src.schemas.dataset_review import SessionDetail, SessionSummary
|
||||
from src.services.dataset_review.orchestrator import (
|
||||
DatasetReviewOrchestrator,
|
||||
StartSessionCommand,
|
||||
)
|
||||
from src.services.dataset_review.repositories.session_repository import (
|
||||
DatasetReviewSessionRepository,
|
||||
)
|
||||
# [/DEF:DatasetReviewApi.imports:Block]
|
||||
|
||||
router = APIRouter(prefix="/api/dataset-orchestration", tags=["Dataset Orchestration"])
|
||||
|
||||
|
||||
# [DEF:StartSessionRequest:Class]
|
||||
# @COMPLEXITY: 2
|
||||
# @PURPOSE: Request DTO for starting one dataset review session from a Superset link or dataset selection.
|
||||
class StartSessionRequest(BaseModel):
|
||||
source_kind: str = Field(..., pattern="^(superset_link|dataset_selection)$")
|
||||
source_input: str = Field(..., min_length=1)
|
||||
environment_id: str = Field(..., min_length=1)
|
||||
# [/DEF:StartSessionRequest:Class]
|
||||
|
||||
|
||||
# [DEF:UpdateSessionRequest:Class]
|
||||
# @COMPLEXITY: 2
|
||||
# @PURPOSE: Request DTO for lifecycle state updates on an existing session.
|
||||
class UpdateSessionRequest(BaseModel):
|
||||
status: SessionStatus
|
||||
note: Optional[str] = None
|
||||
# [/DEF:UpdateSessionRequest:Class]
|
||||
|
||||
|
||||
# [DEF:SessionCollectionResponse:Class]
|
||||
# @COMPLEXITY: 2
|
||||
# @PURPOSE: Paginated ownership-scoped dataset review session collection response.
|
||||
class SessionCollectionResponse(BaseModel):
|
||||
items: List[SessionSummary]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
has_next: bool
|
||||
# [/DEF:SessionCollectionResponse:Class]
|
||||
|
||||
|
||||
# [DEF:ExportArtifactResponse:Class]
|
||||
# @COMPLEXITY: 2
|
||||
# @PURPOSE: Inline export response for documentation or validation outputs without introducing unrelated persistence changes.
|
||||
class ExportArtifactResponse(BaseModel):
|
||||
artifact_id: str
|
||||
session_id: str
|
||||
artifact_type: str
|
||||
format: str
|
||||
storage_ref: str
|
||||
created_by_user_id: str
|
||||
created_at: Optional[str] = None
|
||||
content: Dict[str, Any]
|
||||
# [/DEF:ExportArtifactResponse:Class]
|
||||
|
||||
|
||||
# [DEF:_require_auto_review_flag:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Guard US1 dataset review endpoints behind the configured feature flag.
|
||||
# @RELATION: [DEPENDS_ON] ->[ConfigManager]
|
||||
def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> bool:
|
||||
with belief_scope("dataset_review.require_auto_review_flag"):
|
||||
if not config_manager.get_config().settings.ff_dataset_auto_review:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Dataset auto review feature is disabled",
|
||||
)
|
||||
return True
|
||||
# [/DEF:_require_auto_review_flag:Function]
|
||||
|
||||
|
||||
# [DEF:_get_repository:Function]
|
||||
# @COMPLEXITY: 2
|
||||
# @PURPOSE: Build repository dependency for dataset review session aggregate access.
|
||||
def _get_repository(db: Session = Depends(get_db)) -> DatasetReviewSessionRepository:
|
||||
return DatasetReviewSessionRepository(db)
|
||||
# [/DEF:_get_repository:Function]
|
||||
|
||||
|
||||
# [DEF:_get_orchestrator:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Build orchestrator dependency for session lifecycle actions.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator]
|
||||
def _get_orchestrator(
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
) -> DatasetReviewOrchestrator:
|
||||
return DatasetReviewOrchestrator(
|
||||
repository=repository,
|
||||
config_manager=config_manager,
|
||||
task_manager=task_manager,
|
||||
)
|
||||
# [/DEF:_get_orchestrator:Function]
|
||||
|
||||
|
||||
# [DEF:_serialize_session_summary:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Map SQLAlchemy session aggregate root into stable API summary DTO.
|
||||
# @RELATION: [DEPENDS_ON] ->[SessionSummary]
|
||||
def _serialize_session_summary(session: DatasetReviewSession) -> SessionSummary:
|
||||
return SessionSummary.model_validate(session, from_attributes=True)
|
||||
# [/DEF:_serialize_session_summary:Function]
|
||||
|
||||
|
||||
# [DEF:_serialize_session_detail:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Map SQLAlchemy session aggregate root into stable API detail DTO.
|
||||
# @RELATION: [DEPENDS_ON] ->[SessionDetail]
|
||||
def _serialize_session_detail(session: DatasetReviewSession) -> SessionDetail:
|
||||
return SessionDetail.model_validate(session, from_attributes=True)
|
||||
# [/DEF:_serialize_session_detail:Function]
|
||||
|
||||
|
||||
# [DEF:_get_owned_session_or_404:Function]
|
||||
# @COMPLEXITY: 4
|
||||
# @PURPOSE: Resolve one session for current user or collaborator scope, returning 404 when inaccessible.
|
||||
# @RELATION: [CALLS] ->[load_detail]
|
||||
# @PRE: session_id is a non-empty identifier and current_user is authenticated.
|
||||
# @POST: returns accessible session detail or raises HTTP 404 without leaking foreign-session existence.
|
||||
# @SIDE_EFFECT: none.
|
||||
# @DATA_CONTRACT: Input[session_id:str,current_user:User] -> Output[DatasetReviewSession|HTTPException]
|
||||
def _get_owned_session_or_404(
|
||||
repository: DatasetReviewSessionRepository,
|
||||
session_id: str,
|
||||
current_user: User,
|
||||
) -> DatasetReviewSession:
|
||||
with belief_scope("dataset_review.get_owned_session_or_404"):
|
||||
session = repository.load_session_detail(session_id, current_user.id)
|
||||
if session is None:
|
||||
logger.explore(
|
||||
"Dataset review session not found in current ownership scope",
|
||||
extra={"session_id": session_id, "user_id": current_user.id},
|
||||
)
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found")
|
||||
return session
|
||||
# [/DEF:_get_owned_session_or_404:Function]
|
||||
|
||||
|
||||
# [DEF:_build_documentation_export:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Produce session documentation export content from current persisted review state.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
|
||||
def _build_documentation_export(session: DatasetReviewSession, export_format: ArtifactFormat) -> Dict[str, Any]:
|
||||
profile = session.profile
|
||||
findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code))
|
||||
if export_format == ArtifactFormat.MARKDOWN:
|
||||
lines = [
|
||||
f"# Dataset Review: {session.dataset_ref}",
|
||||
"",
|
||||
f"- Session ID: {session.session_id}",
|
||||
f"- Environment: {session.environment_id}",
|
||||
f"- Readiness: {session.readiness_state.value}",
|
||||
f"- Recommended action: {session.recommended_action.value}",
|
||||
"",
|
||||
"## Business Summary",
|
||||
profile.business_summary if profile else "No profile summary available.",
|
||||
"",
|
||||
"## Findings",
|
||||
]
|
||||
if findings:
|
||||
for finding in findings:
|
||||
lines.append(
|
||||
f"- [{finding.severity.value}] {finding.title}: {finding.message}"
|
||||
)
|
||||
else:
|
||||
lines.append("- No findings recorded.")
|
||||
content = {"markdown": "\n".join(lines)}
|
||||
storage_ref = f"inline://dataset-review/{session.session_id}/documentation.md"
|
||||
else:
|
||||
content = {
|
||||
"session": _serialize_session_summary(session).model_dump(mode="json"),
|
||||
"profile": profile and {
|
||||
"dataset_name": profile.dataset_name,
|
||||
"business_summary": profile.business_summary,
|
||||
"confidence_state": profile.confidence_state.value,
|
||||
"dataset_type": profile.dataset_type,
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"code": finding.code,
|
||||
"severity": finding.severity.value,
|
||||
"title": finding.title,
|
||||
"message": finding.message,
|
||||
"resolution_state": finding.resolution_state.value,
|
||||
}
|
||||
for finding in findings
|
||||
],
|
||||
}
|
||||
storage_ref = f"inline://dataset-review/{session.session_id}/documentation.json"
|
||||
return {"storage_ref": storage_ref, "content": content}
|
||||
# [/DEF:_build_documentation_export:Function]
|
||||
|
||||
|
||||
# [DEF:_build_validation_export:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Produce validation-focused export content from persisted findings and readiness state.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
|
||||
def _build_validation_export(session: DatasetReviewSession, export_format: ArtifactFormat) -> Dict[str, Any]:
|
||||
findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code))
|
||||
if export_format == ArtifactFormat.MARKDOWN:
|
||||
lines = [
|
||||
f"# Validation Report: {session.dataset_ref}",
|
||||
"",
|
||||
f"- Session ID: {session.session_id}",
|
||||
f"- Readiness: {session.readiness_state.value}",
|
||||
"",
|
||||
"## Findings",
|
||||
]
|
||||
if findings:
|
||||
for finding in findings:
|
||||
lines.append(
|
||||
f"- `{finding.code}` [{finding.severity.value}] {finding.message}"
|
||||
)
|
||||
else:
|
||||
lines.append("- No findings recorded.")
|
||||
content = {"markdown": "\n".join(lines)}
|
||||
storage_ref = f"inline://dataset-review/{session.session_id}/validation.md"
|
||||
else:
|
||||
content = {
|
||||
"session_id": session.session_id,
|
||||
"dataset_ref": session.dataset_ref,
|
||||
"readiness_state": session.readiness_state.value,
|
||||
"findings": [
|
||||
{
|
||||
"finding_id": finding.finding_id,
|
||||
"area": finding.area.value,
|
||||
"severity": finding.severity.value,
|
||||
"code": finding.code,
|
||||
"title": finding.title,
|
||||
"message": finding.message,
|
||||
"resolution_state": finding.resolution_state.value,
|
||||
}
|
||||
for finding in findings
|
||||
],
|
||||
}
|
||||
storage_ref = f"inline://dataset-review/{session.session_id}/validation.json"
|
||||
return {"storage_ref": storage_ref, "content": content}
|
||||
# [/DEF:_build_validation_export:Function]
|
||||
|
||||
|
||||
# [DEF:list_sessions:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: List resumable dataset review sessions for the current user.
|
||||
# @RELATION: [CALLS] ->[list_user_sess]
|
||||
@router.get(
|
||||
"/sessions",
|
||||
response_model=SessionCollectionResponse,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "READ")),
|
||||
],
|
||||
)
|
||||
async def list_sessions(
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(20, ge=1, le=100),
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.list_sessions"):
|
||||
sessions = repository.list_sessions_for_user(current_user.id)
|
||||
start = (page - 1) * page_size
|
||||
end = start + page_size
|
||||
items = [_serialize_session_summary(session) for session in sessions[start:end]]
|
||||
return SessionCollectionResponse(
|
||||
items=items,
|
||||
total=len(sessions),
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
has_next=end < len(sessions),
|
||||
)
|
||||
# [/DEF:list_sessions:Function]
|
||||
|
||||
|
||||
# [DEF:start_session:Function]
|
||||
# @COMPLEXITY: 4
|
||||
# @PURPOSE: Start a new dataset review session from a Superset link or dataset selection.
|
||||
# @RELATION: [CALLS] ->[DatasetReviewOrchestrator.start_session]
|
||||
# @PRE: feature flag enabled, user authenticated, and request body valid.
|
||||
# @POST: returns persisted session summary scoped to the authenticated user.
|
||||
# @SIDE_EFFECT: persists session/profile/findings and may enqueue recovery task.
|
||||
# @DATA_CONTRACT: Input[StartSessionRequest] -> Output[SessionSummary]
|
||||
@router.post(
|
||||
"/sessions",
|
||||
response_model=SessionSummary,
|
||||
status_code=status.HTTP_201_CREATED,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "MANAGE")),
|
||||
],
|
||||
)
|
||||
async def start_session(
|
||||
request: StartSessionRequest,
|
||||
orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.start_session"):
|
||||
try:
|
||||
result = orchestrator.start_session(
|
||||
StartSessionCommand(
|
||||
user=current_user,
|
||||
environment_id=request.environment_id,
|
||||
source_kind=request.source_kind,
|
||||
source_input=request.source_input,
|
||||
)
|
||||
)
|
||||
except ValueError as exc:
|
||||
logger.explore(
|
||||
"Dataset review session start rejected",
|
||||
extra={"user_id": current_user.id, "error": str(exc)},
|
||||
)
|
||||
detail = str(exc)
|
||||
status_code = status.HTTP_404_NOT_FOUND if detail == "Environment not found" else status.HTTP_400_BAD_REQUEST
|
||||
raise HTTPException(status_code=status_code, detail=detail) from exc
|
||||
|
||||
return _serialize_session_summary(result.session)
|
||||
# [/DEF:start_session:Function]
|
||||
|
||||
|
||||
# [DEF:get_session_detail:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Return the full accessible dataset review session aggregate for current user scope.
|
||||
# @RELATION: [CALLS] ->[_get_owned_session_or_404]
|
||||
@router.get(
|
||||
"/sessions/{session_id}",
|
||||
response_model=SessionDetail,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "READ")),
|
||||
],
|
||||
)
|
||||
async def get_session_detail(
|
||||
session_id: str,
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.get_session_detail"):
|
||||
session = _get_owned_session_or_404(repository, session_id, current_user)
|
||||
return _serialize_session_detail(session)
|
||||
# [/DEF:get_session_detail:Function]
|
||||
|
||||
|
||||
# [DEF:update_session:Function]
|
||||
# @COMPLEXITY: 4
|
||||
# @PURPOSE: Update resumable lifecycle status for an owned dataset review session.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
|
||||
# @PRE: session is accessible to current user and requested status is allowed by lifecycle policy.
|
||||
# @POST: returns updated summary without changing ownership or unrelated aggregates.
|
||||
# @SIDE_EFFECT: mutates session lifecycle fields in persistence.
|
||||
# @DATA_CONTRACT: Input[UpdateSessionRequest] -> Output[SessionSummary]
|
||||
@router.patch(
|
||||
"/sessions/{session_id}",
|
||||
response_model=SessionSummary,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "MANAGE")),
|
||||
],
|
||||
)
|
||||
async def update_session(
|
||||
session_id: str,
|
||||
request: UpdateSessionRequest,
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.update_session"):
|
||||
session = _get_owned_session_or_404(repository, session_id, current_user)
|
||||
if session.user_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can mutate session lifecycle")
|
||||
|
||||
session.status = request.status
|
||||
if request.status == SessionStatus.PAUSED:
|
||||
session.recommended_action = RecommendedAction.RESUME_SESSION
|
||||
elif request.status in {SessionStatus.ARCHIVED, SessionStatus.CANCELLED, SessionStatus.COMPLETED}:
|
||||
session.active_task_id = None
|
||||
|
||||
repository.db.commit()
|
||||
repository.db.refresh(session)
|
||||
return _serialize_session_summary(session)
|
||||
# [/DEF:update_session:Function]
|
||||
|
||||
|
||||
# [DEF:delete_session:Function]
|
||||
# @COMPLEXITY: 4
|
||||
# @PURPOSE: Archive or hard-delete a session owned by the current user.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
|
||||
# @PRE: session is owner-scoped to current user.
|
||||
# @POST: session is archived or deleted and no foreign-session existence is disclosed.
|
||||
# @SIDE_EFFECT: mutates or deletes persisted session aggregate.
|
||||
# @DATA_CONTRACT: Input[session_id:str,hard_delete:bool] -> Output[HTTP 204]
|
||||
@router.delete(
|
||||
"/sessions/{session_id}",
|
||||
status_code=status.HTTP_204_NO_CONTENT,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "MANAGE")),
|
||||
],
|
||||
)
|
||||
async def delete_session(
|
||||
session_id: str,
|
||||
hard_delete: bool = Query(False),
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.delete_session"):
|
||||
session = _get_owned_session_or_404(repository, session_id, current_user)
|
||||
if session.user_id != current_user.id:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can delete a session")
|
||||
|
||||
if hard_delete:
|
||||
repository.db.delete(session)
|
||||
else:
|
||||
session.status = SessionStatus.ARCHIVED
|
||||
session.active_task_id = None
|
||||
repository.db.commit()
|
||||
return Response(status_code=status.HTTP_204_NO_CONTENT)
|
||||
# [/DEF:delete_session:Function]
|
||||
|
||||
|
||||
# [DEF:export_documentation:Function]
|
||||
# @COMPLEXITY: 4
|
||||
# @PURPOSE: Export documentation output for the current session in JSON or Markdown form.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
|
||||
# @PRE: session is accessible to current user and requested format is supported.
|
||||
# @POST: returns ownership-scoped export payload without fabricating unrelated artifacts.
|
||||
# @SIDE_EFFECT: none beyond response construction.
|
||||
# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse]
|
||||
@router.get(
|
||||
"/sessions/{session_id}/exports/documentation",
|
||||
response_model=ExportArtifactResponse,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "READ")),
|
||||
],
|
||||
)
|
||||
async def export_documentation(
|
||||
session_id: str,
|
||||
format: ArtifactFormat = Query(ArtifactFormat.JSON),
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.export_documentation"):
|
||||
if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
|
||||
session = _get_owned_session_or_404(repository, session_id, current_user)
|
||||
export_payload = _build_documentation_export(session, format)
|
||||
return ExportArtifactResponse(
|
||||
artifact_id=f"documentation-{session.session_id}-{format.value}",
|
||||
session_id=session.session_id,
|
||||
artifact_type="documentation",
|
||||
format=format.value,
|
||||
storage_ref=export_payload["storage_ref"],
|
||||
created_by_user_id=current_user.id,
|
||||
content=export_payload["content"],
|
||||
)
|
||||
# [/DEF:export_documentation:Function]
|
||||
|
||||
|
||||
# [DEF:export_validation:Function]
|
||||
# @COMPLEXITY: 4
|
||||
# @PURPOSE: Export validation findings for the current session in JSON or Markdown form.
|
||||
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
|
||||
# @PRE: session is accessible to current user and requested format is supported.
|
||||
# @POST: returns explicit validation export payload scoped to current user session access.
|
||||
# @SIDE_EFFECT: none beyond response construction.
|
||||
# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse]
|
||||
@router.get(
|
||||
"/sessions/{session_id}/exports/validation",
|
||||
response_model=ExportArtifactResponse,
|
||||
dependencies=[
|
||||
Depends(_require_auto_review_flag),
|
||||
Depends(has_permission("dataset:session", "READ")),
|
||||
],
|
||||
)
|
||||
async def export_validation(
|
||||
session_id: str,
|
||||
format: ArtifactFormat = Query(ArtifactFormat.JSON),
|
||||
repository: DatasetReviewSessionRepository = Depends(_get_repository),
|
||||
current_user: User = Depends(get_current_user),
|
||||
):
|
||||
with belief_scope("dataset_review.export_validation"):
|
||||
if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
|
||||
session = _get_owned_session_or_404(repository, session_id, current_user)
|
||||
export_payload = _build_validation_export(session, format)
|
||||
return ExportArtifactResponse(
|
||||
artifact_id=f"validation-{session.session_id}-{format.value}",
|
||||
session_id=session.session_id,
|
||||
artifact_type="validation_report",
|
||||
format=format.value,
|
||||
storage_ref=export_payload["storage_ref"],
|
||||
created_by_user_id=current_user.id,
|
||||
content=export_payload["content"],
|
||||
)
|
||||
# [/DEF:export_validation:Function]
|
||||
|
||||
# [/DEF:DatasetReviewApi:Module]
|
||||
Reference in New Issue
Block a user