fix(us3): align dataset review contracts and acceptance gates

This commit is contained in:
2026-03-17 18:20:36 +03:00
parent 5f44435a4b
commit 8728756a3f
8 changed files with 163 additions and 33 deletions

View File

@@ -72,8 +72,13 @@ client = TestClient(app)
# [DEF:_make_user:Function]
def _make_user():
admin_role = SimpleNamespace(name="Admin", permissions=[])
return SimpleNamespace(id="user-1", username="tester", roles=[admin_role])
permissions = [
SimpleNamespace(resource="dataset:session", action="READ"),
SimpleNamespace(resource="dataset:session", action="MANAGE"),
SimpleNamespace(resource="dataset:execution:launch", action="EXECUTE"),
]
dataset_review_role = SimpleNamespace(name="DatasetReviewOperator", permissions=permissions)
return SimpleNamespace(id="user-1", username="tester", roles=[dataset_review_role])
# [/DEF:_make_user:Function]
@@ -670,7 +675,7 @@ def test_us2_field_semantic_override_lock_unlock_and_feedback(dataset_review_api
# [DEF:test_us3_mapping_patch_approval_preview_and_launch_endpoints:Function]
# @PURPOSE: US3 execution endpoints should persist manual overrides, preserve explicit approval semantics, return Superset preview truth, and expose audited launch handoff.
# @PURPOSE: US3 execution endpoints should persist manual overrides, preserve explicit approval semantics, return contract-shaped preview truth, and expose audited launch handoff.
def test_us3_mapping_patch_approval_preview_and_launch_endpoints(dataset_review_api_dependencies):
session = _make_us3_session()
latest_preview = CompiledPreview(
@@ -777,23 +782,99 @@ def test_us3_mapping_patch_approval_preview_and_launch_endpoints(dataset_review_
assert batch_response.status_code == 200
assert batch_response.json()[0]["mapping_id"] == "map-1"
list_response = client.get("/api/dataset-orchestration/sessions/sess-1/mappings")
assert list_response.status_code == 200
assert list_response.json()["items"][0]["mapping_id"] == "map-1"
preview_response = client.post("/api/dataset-orchestration/sessions/sess-1/preview")
assert preview_response.status_code == 200
preview_payload = preview_response.json()
assert preview_payload["session_id"] == "sess-1"
assert preview_payload["preview_id"] == "preview-1"
assert preview_payload["preview_status"] == "ready"
assert preview_payload["preview"]["compiled_by"] == "superset"
assert "SELECT * FROM sales" in preview_payload["preview"]["compiled_sql"]
assert preview_payload["compiled_by"] == "superset"
assert "SELECT * FROM sales" in preview_payload["compiled_sql"]
orchestrator.prepare_launch_preview.return_value = PreparePreviewResult(
session=session,
preview=SimpleNamespace(
preview_id="preview-pending",
session_id="sess-1",
preview_status=PreviewStatus.PENDING,
compiled_sql=None,
preview_fingerprint="fingerprint-pending",
compiled_by="superset",
error_code=None,
error_details=None,
compiled_at=None,
created_at=datetime.now(timezone.utc),
),
blocked_reasons=[],
)
preview_enqueue_response = client.post("/api/dataset-orchestration/sessions/sess-1/preview")
assert preview_enqueue_response.status_code == 202
assert preview_enqueue_response.json() == {
"session_id": "sess-1",
"preview_status": "pending",
"task_id": None,
}
launch_response = client.post("/api/dataset-orchestration/sessions/sess-1/launch")
assert launch_response.status_code == 200
assert launch_response.status_code == 201
launch_payload = launch_response.json()
assert launch_payload["session"]["session_id"] == "sess-1"
assert launch_payload["run_context"]["run_context_id"] == "run-1"
assert launch_payload["run_context"]["sql_lab_session_ref"] == "sql-lab-77"
assert launch_payload["run_context"]["launch_status"] == "started"
assert launch_payload["redirect_url"] == "http://superset.local/superset/sqllab?queryId=sql-lab-77"
# [/DEF:test_us3_mapping_patch_approval_preview_and_launch_endpoints:Function]
# [DEF:test_us3_launch_endpoint_requires_launch_permission:Function]
# @PURPOSE: Launch endpoint should enforce the contract RBAC permission instead of the generic session-manage permission.
def test_us3_launch_endpoint_requires_launch_permission(dataset_review_api_dependencies):
session = _make_us3_session()
repository = MagicMock()
repository.load_session_detail.return_value = session
repository.db = MagicMock()
repository.event_logger = MagicMock(spec=SessionEventLogger)
run_context = SimpleNamespace(
run_context_id="run-1",
session_id="sess-1",
dataset_ref="public.sales",
environment_id="env-1",
preview_id="preview-1",
sql_lab_session_ref="sql-lab-77",
effective_filters=[],
template_params={},
approved_mapping_ids=["map-1"],
semantic_decision_refs=[],
open_warning_refs=[],
launch_status=LaunchStatus.STARTED,
launch_error=None,
created_at=datetime.now(timezone.utc),
)
orchestrator = MagicMock()
orchestrator.launch_dataset.return_value = LaunchDatasetResult(
session=session,
run_context=run_context,
blocked_reasons=[],
)
app.dependency_overrides[_get_repository] = lambda: repository
app.dependency_overrides[_get_orchestrator] = lambda: orchestrator
dataset_review_api_dependencies["user"].roles = [
SimpleNamespace(
name="SessionManager",
permissions=[SimpleNamespace(resource="dataset:session", action="MANAGE")],
)
]
response = client.post("/api/dataset-orchestration/sessions/sess-1/launch")
assert response.status_code == 403
assert response.json()["detail"] == "Permission denied for dataset:execution:launch:EXECUTE"
# [/DEF:test_us3_launch_endpoint_requires_launch_permission:Function]
# [DEF:test_semantic_source_version_propagation_preserves_locked_fields:Function]
# @PURPOSE: Updated semantic source versions should mark unlocked fields reviewable while preserving locked manual values.
def test_semantic_source_version_propagation_preserves_locked_fields():

View File

@@ -17,7 +17,7 @@ from __future__ import annotations
# [DEF:DatasetReviewApi.imports:Block]
import json
from datetime import datetime
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union
from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
from pydantic import BaseModel, Field
@@ -233,15 +233,22 @@ class BatchApproveMappingRequest(BaseModel):
# [DEF:PreviewEnqueueResultResponse:Class]
# @COMPLEXITY: 2
# @PURPOSE: Synchronous preview trigger response aligned with preview-status contract.
# @PURPOSE: Contract-compliant async preview trigger response exposing only enqueue state.
class PreviewEnqueueResultResponse(BaseModel):
session_id: str
preview_status: str
task_id: Optional[str] = None
preview: Optional[CompiledPreviewDto] = None
# [/DEF:PreviewEnqueueResultResponse:Class]
# [DEF:MappingCollectionResponse:Class]
# @COMPLEXITY: 2
# @PURPOSE: Contract-compliant wrapper for execution mapping list responses.
class MappingCollectionResponse(BaseModel):
items: List[ExecutionMappingDto]
# [/DEF:MappingCollectionResponse:Class]
# [DEF:UpdateExecutionMappingRequest:Class]
# @COMPLEXITY: 2
# @PURPOSE: Request DTO for one manual execution-mapping override update without introducing unrelated bulk mutation semantics.
@@ -254,10 +261,10 @@ class UpdateExecutionMappingRequest(BaseModel):
# [DEF:LaunchDatasetResponse:Class]
# @COMPLEXITY: 2
# @PURPOSE: Response DTO for one completed launch submission handoff with updated session and persisted run context.
# @PURPOSE: Contract-compliant launch result exposing audited run context and SQL Lab redirect target.
class LaunchDatasetResponse(BaseModel):
session: SessionSummary
run_context: DatasetRunContextDto
redirect_url: str
# [/DEF:LaunchDatasetResponse:Class]
@@ -560,6 +567,8 @@ def _map_candidate_provenance(candidate: SemanticCandidate) -> FieldProvenance:
# [DEF:_resolve_candidate_source_version:Function]
# @COMPLEXITY: 3
# @PURPOSE: Resolve the semantic source version for one accepted candidate from the loaded session aggregate.
# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry]
# @RELATION: [DEPENDS_ON] ->[SemanticSource]
def _resolve_candidate_source_version(field: SemanticFieldEntry, source_id: Optional[str]) -> Optional[str]:
if not source_id:
return None
@@ -661,6 +670,27 @@ def _serialize_run_context(run_context) -> DatasetRunContextDto:
# [/DEF:_serialize_run_context:Function]
# [DEF:_build_sql_lab_redirect_url:Function]
# @COMPLEXITY: 3
# @PURPOSE: Build a stable SQL Lab redirect URL from the configured Superset environment and persisted run context reference.
# @RELATION: [DEPENDS_ON] ->[DatasetRunContextDto]
def _build_sql_lab_redirect_url(environment_url: str, sql_lab_session_ref: str) -> str:
base_url = str(environment_url or "").rstrip("/")
session_ref = str(sql_lab_session_ref or "").strip()
if not base_url:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Superset environment URL is not configured",
)
if not session_ref:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="SQL Lab session reference is missing",
)
return f"{base_url}/superset/sqllab?queryId={session_ref}"
# [/DEF:_build_sql_lab_redirect_url:Function]
# [DEF:_build_documentation_export:Function]
# @COMPLEXITY: 3
# @PURPOSE: Produce session documentation export content from current persisted review state.
@@ -1365,12 +1395,12 @@ async def approve_batch_semantic_fields(
# @PURPOSE: Return the current mapping-review set for one accessible session.
# @RELATION: [CALLS] ->[_get_owned_session_or_404]
# @PRE: Session is ownership-accessible to the authenticated user and execution feature is enabled.
# @POST: Returns the persisted mapping review set for the requested session without mutating approval state.
# @POST: Returns the persisted mapping review set for the requested session wrapped in the contract collection shape without mutating approval state.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[session_id:str] -> Output[List[ExecutionMappingDto]]
# @DATA_CONTRACT: Input[session_id:str] -> Output[MappingCollectionResponse]
@router.get(
"/sessions/{session_id}/mappings",
response_model=List[ExecutionMappingDto],
response_model=MappingCollectionResponse,
dependencies=[
Depends(_require_auto_review_flag),
Depends(_require_execution_flag),
@@ -1384,7 +1414,9 @@ async def list_execution_mappings(
):
with belief_scope("dataset_review.list_execution_mappings"):
session = _get_owned_session_or_404(repository, session_id, current_user)
return [_serialize_execution_mapping(item) for item in session.execution_mappings]
return MappingCollectionResponse(
items=[_serialize_execution_mapping(item) for item in session.execution_mappings]
)
# [/DEF:list_execution_mappings:Function]
@@ -1579,12 +1611,12 @@ async def approve_batch_execution_mappings(
# @PURPOSE: Trigger Superset-side preview compilation for the current owned execution context.
# @RELATION: [CALLS] ->[DatasetReviewOrchestrator.prepare_launch_preview]
# @PRE: Session belongs to the current owner and required mapping inputs are available.
# @POST: Returns the persisted preview artifact produced from Superset or a deterministic conflict error.
# @POST: Returns the compiled preview directly for synchronous success or enqueue-state shape when preview generation remains pending.
# @SIDE_EFFECT: Persists preview attempt and updates readiness state.
# @DATA_CONTRACT: Input[session_id:str] -> Output[PreviewEnqueueResultResponse]
# @DATA_CONTRACT: Input[session_id:str] -> Output[CompiledPreviewDto | PreviewEnqueueResultResponse]
@router.post(
"/sessions/{session_id}/preview",
response_model=PreviewEnqueueResultResponse,
response_model=Union[CompiledPreviewDto, PreviewEnqueueResultResponse],
dependencies=[
Depends(_require_auto_review_flag),
Depends(_require_execution_flag),
@@ -1593,6 +1625,7 @@ async def approve_batch_execution_mappings(
)
async def trigger_preview_generation(
session_id: str,
response: Response,
orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator),
current_user: User = Depends(get_current_user),
):
@@ -1613,12 +1646,16 @@ async def trigger_preview_generation(
)
raise HTTPException(status_code=status_code, detail=detail) from exc
return PreviewEnqueueResultResponse(
session_id=result.session.session_id,
preview_status=result.preview.preview_status.value,
task_id=None,
preview=CompiledPreviewDto.model_validate(result.preview, from_attributes=True),
)
if result.preview.preview_status == PreviewStatus.PENDING:
response.status_code = status.HTTP_202_ACCEPTED
return PreviewEnqueueResultResponse(
session_id=result.session.session_id,
preview_status=result.preview.preview_status.value,
task_id=None,
)
response.status_code = status.HTTP_200_OK
return CompiledPreviewDto.model_validate(result.preview, from_attributes=True)
# [/DEF:trigger_preview_generation:Function]
@@ -1627,21 +1664,23 @@ async def trigger_preview_generation(
# @PURPOSE: Execute the current owned session launch handoff through the orchestrator and return audited SQL Lab run context.
# @RELATION: [CALLS] ->[DatasetReviewOrchestrator.launch_dataset]
# @PRE: Session belongs to the current owner, execution feature is enabled, and launch gates are satisfied or a deterministic conflict is returned.
# @POST: Returns updated session summary plus persisted run context when launch handoff is accepted.
# @POST: Returns persisted run context plus redirect URL when launch handoff is accepted.
# @SIDE_EFFECT: Persists launch audit snapshot and may trigger SQL Lab session creation.
# @DATA_CONTRACT: Input[session_id:str] -> Output[LaunchDatasetResponse]
@router.post(
"/sessions/{session_id}/launch",
response_model=LaunchDatasetResponse,
status_code=status.HTTP_201_CREATED,
dependencies=[
Depends(_require_auto_review_flag),
Depends(_require_execution_flag),
Depends(has_permission("dataset:session", "MANAGE")),
Depends(has_permission("dataset:execution:launch", "EXECUTE")),
],
)
async def launch_dataset(
session_id: str,
orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator),
config_manager=Depends(get_config_manager),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.launch_dataset"):
@@ -1663,9 +1702,14 @@ async def launch_dataset(
)
raise HTTPException(status_code=status_code, detail=detail) from exc
environment = config_manager.get_environment(result.session.environment_id)
environment_url = getattr(environment, "url", "") if environment is not None else ""
return LaunchDatasetResponse(
session=_serialize_session_summary(result.session),
run_context=_serialize_run_context(result.run_context),
redirect_url=_build_sql_lab_redirect_url(
environment_url=environment_url,
sql_lab_session_ref=result.run_context.sql_lab_session_ref,
),
)
# [/DEF:launch_dataset:Function]