feat(027): Final Phase T038-T043 implementation

- T038: SessionEvent logger and persistence logic
  - Added SessionEventLogger service with explicit audit event persistence
  - Added SessionEvent model with events relationship on DatasetReviewSession
  - Integrated event logging into orchestrator flows and API mutation endpoints

- T039: Semantic source version propagation
  - Added source_version column to SemanticFieldEntry
  - Added propagate_source_version_update() to SemanticResolver
  - Preserves locked/manual field invariants during propagation

- T040: Batch approval API and UI actions
  - Added batch semantic approval endpoint (/fields/semantic/approve-batch)
  - Added batch mapping approval endpoint (/mappings/approve-batch)
  - Added batch approval actions to SemanticLayerReview and ExecutionMappingReview components
  - Aligned batch semantics with single-item approval contracts

- T041: Superset compatibility matrix tests
  - Added test_superset_matrix.py with preview and SQL Lab fallback coverage
  - Tests verify client method preference and matrix fallback behavior

- T042: RBAC audit sweep on session-mutation endpoints
  - Added _require_owner_mutation_scope() helper
  - Applied owner guards to update_session, delete_session, and all mutation endpoints
  - Ensured no bypass of existing permission checks

- T043: i18n coverage for dataset-review UI
  - Added workspace state labels (empty/importing/review) to en.json and ru.json
  - Added batch action labels for semantics and mappings
  - Fixed workspace state comparison to lowercase strings
  - Removed hardcoded workspace state display strings

Signed-off-by: Implementation Specialist <impl@ss-tools>
This commit is contained in:
2026-03-17 14:29:33 +03:00
parent 38bda6a714
commit ed3d5f3039
33 changed files with 99234 additions and 93415 deletions

View File

@@ -19,23 +19,36 @@ from __future__ import annotations
# [DEF:DatasetReviewOrchestrator.imports:Block]
from dataclasses import dataclass, field
from datetime import datetime
import hashlib
import json
from typing import Any, Dict, List, Optional
from src.core.config_manager import ConfigManager
from src.core.logger import belief_scope, logger
from src.core.task_manager import TaskManager
from src.core.utils.superset_compilation_adapter import (
PreviewCompilationPayload,
SqlLabLaunchPayload,
SupersetCompilationAdapter,
)
from src.core.utils.superset_context_extractor import (
SupersetContextExtractor,
SupersetParsedContext,
)
from src.models.auth import User
from src.models.dataset_review import (
ApprovalState,
BusinessSummarySource,
CompiledPreview,
ConfidenceState,
DatasetProfile,
DatasetReviewSession,
DatasetRunContext,
FindingArea,
FindingSeverity,
LaunchStatus,
PreviewStatus,
RecommendedAction,
ReadinessState,
ResolutionState,
@@ -47,6 +60,7 @@ from src.services.dataset_review.repositories.session_repository import (
DatasetReviewSessionRepository,
)
from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
from src.services.dataset_review.event_logger import SessionEventPayload
# [/DEF:DatasetReviewOrchestrator.imports:Block]
@@ -73,6 +87,48 @@ class StartSessionResult:
# [/DEF:StartSessionResult:Class]
# [DEF:PreparePreviewCommand:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed input contract for compiling one Superset-backed session preview.
@dataclass
class PreparePreviewCommand:
user: User
session_id: str
# [/DEF:PreparePreviewCommand:Class]
# [DEF:PreparePreviewResult:Class]
# @COMPLEXITY: 2
# @PURPOSE: Result contract for one persisted compiled preview attempt.
@dataclass
class PreparePreviewResult:
session: DatasetReviewSession
preview: CompiledPreview
blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:PreparePreviewResult:Class]
# [DEF:LaunchDatasetCommand:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed input contract for launching one dataset-review session into SQL Lab.
@dataclass
class LaunchDatasetCommand:
user: User
session_id: str
# [/DEF:LaunchDatasetCommand:Class]
# [DEF:LaunchDatasetResult:Class]
# @COMPLEXITY: 2
# @PURPOSE: Launch result carrying immutable run context and any gate blockers surfaced before launch.
@dataclass
class LaunchDatasetResult:
session: DatasetReviewSession
run_context: DatasetRunContext
blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:LaunchDatasetResult:Class]
# [DEF:DatasetReviewOrchestrator:Class]
# @COMPLEXITY: 5
# @PURPOSE: Coordinate safe session startup while preserving cross-user isolation and explicit partial recovery.
@@ -197,6 +253,23 @@ class DatasetReviewOrchestrator:
parsed_context=parsed_context,
dataset_ref=dataset_ref,
)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=persisted_session.session_id,
actor_user_id=command.user.id,
event_type="session_started",
event_summary="Dataset review session shell created",
current_phase=persisted_session.current_phase.value,
readiness_state=persisted_session.readiness_state.value,
event_details={
"source_kind": persisted_session.source_kind,
"dataset_ref": persisted_session.dataset_ref,
"dataset_id": persisted_session.dataset_id,
"dashboard_id": persisted_session.dashboard_id,
"partial_recovery": bool(parsed_context and parsed_context.partial_recovery),
},
)
)
persisted_session = self.repository.save_profile_and_findings(
persisted_session.session_id,
command.user.id,
@@ -213,6 +286,17 @@ class DatasetReviewOrchestrator:
persisted_session.active_task_id = active_task_id
self.repository.db.commit()
self.repository.db.refresh(persisted_session)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=persisted_session.session_id,
actor_user_id=command.user.id,
event_type="recovery_task_linked",
event_summary="Recovery task linked to dataset review session",
current_phase=persisted_session.current_phase.value,
readiness_state=persisted_session.readiness_state.value,
event_details={"task_id": active_task_id},
)
)
logger.reason(
"Linked recovery task to started dataset review session",
extra={"session_id": persisted_session.session_id, "task_id": active_task_id},
@@ -237,6 +321,238 @@ class DatasetReviewOrchestrator:
)
# [/DEF:DatasetReviewOrchestrator.start_session:Function]
# [DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
# @COMPLEXITY: 4
# @PURPOSE: Assemble effective execution inputs and trigger Superset-side preview compilation.
# @RELATION: [CALLS] ->[SupersetCompilationAdapter.compile_preview]
# @PRE: all required variables have candidate values or explicitly accepted defaults.
# @POST: returns preview artifact in pending, ready, failed, or stale state.
# @SIDE_EFFECT: persists preview attempt and upstream compilation diagnostics.
# @DATA_CONTRACT: Input[PreparePreviewCommand] -> Output[PreparePreviewResult]
def prepare_launch_preview(self, command: PreparePreviewCommand) -> PreparePreviewResult:
with belief_scope("DatasetReviewOrchestrator.prepare_launch_preview"):
session = self.repository.load_session_detail(command.session_id, command.user.id)
if session is None or session.user_id != command.user.id:
logger.explore(
"Preview preparation rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id},
)
raise ValueError("Session not found")
if session.dataset_id is None:
raise ValueError("Preview requires a resolved dataset_id")
environment = self.config_manager.get_environment(session.environment_id)
if environment is None:
raise ValueError("Environment not found")
execution_snapshot = self._build_execution_snapshot(session)
preview_blockers = execution_snapshot["preview_blockers"]
if preview_blockers:
logger.explore(
"Preview preparation blocked by incomplete execution context",
extra={
"session_id": session.session_id,
"blocked_reasons": preview_blockers,
},
)
raise ValueError("Preview blocked: " + "; ".join(preview_blockers))
adapter = SupersetCompilationAdapter(environment)
preview = adapter.compile_preview(
PreviewCompilationPayload(
session_id=session.session_id,
dataset_id=session.dataset_id,
preview_fingerprint=execution_snapshot["preview_fingerprint"],
template_params=execution_snapshot["template_params"],
effective_filters=execution_snapshot["effective_filters"],
)
)
persisted_preview = self.repository.save_preview(
session.session_id,
command.user.id,
preview,
)
session.current_phase = SessionPhase.PREVIEW
session.last_activity_at = datetime.utcnow()
if persisted_preview.preview_status == PreviewStatus.READY:
launch_blockers = self._build_launch_blockers(
session=session,
execution_snapshot=execution_snapshot,
preview=persisted_preview,
)
if launch_blockers:
session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY
session.recommended_action = RecommendedAction.APPROVE_MAPPING
else:
session.readiness_state = ReadinessState.RUN_READY
session.recommended_action = RecommendedAction.LAUNCH_DATASET
else:
session.readiness_state = ReadinessState.PARTIALLY_READY
session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW
self.repository.db.commit()
self.repository.db.refresh(session)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=session.session_id,
actor_user_id=command.user.id,
event_type="preview_generated",
event_summary="Superset preview generation persisted",
current_phase=session.current_phase.value,
readiness_state=session.readiness_state.value,
event_details={
"preview_id": persisted_preview.preview_id,
"preview_status": persisted_preview.preview_status.value,
"preview_fingerprint": persisted_preview.preview_fingerprint,
},
)
)
logger.reflect(
"Superset preview preparation completed",
extra={
"session_id": session.session_id,
"preview_id": persisted_preview.preview_id,
"preview_status": persisted_preview.preview_status.value,
"preview_fingerprint": persisted_preview.preview_fingerprint,
},
)
return PreparePreviewResult(
session=session,
preview=persisted_preview,
blocked_reasons=[],
)
# [/DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
# [DEF:DatasetReviewOrchestrator.launch_dataset:Function]
# @COMPLEXITY: 5
# @PURPOSE: Start the approved dataset execution through SQL Lab and persist run context for audit/replay.
# @RELATION: [CALLS] ->[SupersetCompilationAdapter.create_sql_lab_session]
# @PRE: session is run-ready and compiled preview is current.
# @POST: returns persisted run context with SQL Lab session reference and launch outcome.
# @SIDE_EFFECT: creates SQL Lab execution session and audit snapshot.
# @DATA_CONTRACT: Input[LaunchDatasetCommand] -> Output[LaunchDatasetResult]
# @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest Superset preview fingerprint matches current execution inputs.
def launch_dataset(self, command: LaunchDatasetCommand) -> LaunchDatasetResult:
with belief_scope("DatasetReviewOrchestrator.launch_dataset"):
session = self.repository.load_session_detail(command.session_id, command.user.id)
if session is None or session.user_id != command.user.id:
logger.explore(
"Launch rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id},
)
raise ValueError("Session not found")
if session.dataset_id is None:
raise ValueError("Launch requires a resolved dataset_id")
environment = self.config_manager.get_environment(session.environment_id)
if environment is None:
raise ValueError("Environment not found")
execution_snapshot = self._build_execution_snapshot(session)
current_preview = self._get_latest_preview(session)
launch_blockers = self._build_launch_blockers(
session=session,
execution_snapshot=execution_snapshot,
preview=current_preview,
)
if launch_blockers:
logger.explore(
"Launch gate blocked dataset execution",
extra={
"session_id": session.session_id,
"blocked_reasons": launch_blockers,
},
)
raise ValueError("Launch blocked: " + "; ".join(launch_blockers))
adapter = SupersetCompilationAdapter(environment)
try:
sql_lab_session_ref = adapter.create_sql_lab_session(
SqlLabLaunchPayload(
session_id=session.session_id,
dataset_id=session.dataset_id,
preview_id=current_preview.preview_id,
compiled_sql=str(current_preview.compiled_sql or ""),
template_params=execution_snapshot["template_params"],
)
)
launch_status = LaunchStatus.STARTED
launch_error = None
except Exception as exc:
logger.explore(
"SQL Lab launch failed after passing gates",
extra={"session_id": session.session_id, "error": str(exc)},
)
sql_lab_session_ref = "unavailable"
launch_status = LaunchStatus.FAILED
launch_error = str(exc)
run_context = DatasetRunContext(
session_id=session.session_id,
dataset_ref=session.dataset_ref,
environment_id=session.environment_id,
preview_id=current_preview.preview_id,
sql_lab_session_ref=sql_lab_session_ref,
effective_filters=execution_snapshot["effective_filters"],
template_params=execution_snapshot["template_params"],
approved_mapping_ids=execution_snapshot["approved_mapping_ids"],
semantic_decision_refs=execution_snapshot["semantic_decision_refs"],
open_warning_refs=execution_snapshot["open_warning_refs"],
launch_status=launch_status,
launch_error=launch_error,
)
persisted_run_context = self.repository.save_run_context(
session.session_id,
command.user.id,
run_context,
)
session.current_phase = SessionPhase.LAUNCH
session.last_activity_at = datetime.utcnow()
if launch_status == LaunchStatus.FAILED:
session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY
session.recommended_action = RecommendedAction.LAUNCH_DATASET
else:
session.readiness_state = ReadinessState.RUN_IN_PROGRESS
session.recommended_action = RecommendedAction.EXPORT_OUTPUTS
self.repository.db.commit()
self.repository.db.refresh(session)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=session.session_id,
actor_user_id=command.user.id,
event_type="dataset_launch_requested",
event_summary="Dataset launch handoff persisted",
current_phase=session.current_phase.value,
readiness_state=session.readiness_state.value,
event_details={
"run_context_id": persisted_run_context.run_context_id,
"launch_status": persisted_run_context.launch_status.value,
"preview_id": persisted_run_context.preview_id,
"sql_lab_session_ref": persisted_run_context.sql_lab_session_ref,
},
)
)
logger.reflect(
"Dataset launch orchestration completed with audited run context",
extra={
"session_id": session.session_id,
"run_context_id": persisted_run_context.run_context_id,
"launch_status": persisted_run_context.launch_status.value,
"preview_id": persisted_run_context.preview_id,
},
)
return LaunchDatasetResult(
session=session,
run_context=persisted_run_context,
blocked_reasons=[],
)
# [/DEF:DatasetReviewOrchestrator.launch_dataset:Function]
# [DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
# @COMPLEXITY: 3
# @PURPOSE: Normalize dataset-selection payload into canonical session references.
@@ -328,6 +644,158 @@ class DatasetReviewOrchestrator:
return findings
# [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
# [DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build effective filters, template params, approvals, and fingerprint for preview and launch gating.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: Session aggregate includes imported filters, template variables, and current execution mappings.
# @POST: returns deterministic execution snapshot for current session state without mutating persistence.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[Dict[str,Any]]
def _build_execution_snapshot(self, session: DatasetReviewSession) -> Dict[str, Any]:
filter_lookup = {item.filter_id: item for item in session.imported_filters}
variable_lookup = {item.variable_id: item for item in session.template_variables}
effective_filters: List[Dict[str, Any]] = []
template_params: Dict[str, Any] = {}
approved_mapping_ids: List[str] = []
open_warning_refs: List[str] = []
preview_blockers: List[str] = []
for mapping in session.execution_mappings:
imported_filter = filter_lookup.get(mapping.filter_id)
template_variable = variable_lookup.get(mapping.variable_id)
if imported_filter is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter")
continue
if template_variable is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_variable")
continue
effective_value = mapping.effective_value
if effective_value is None:
effective_value = imported_filter.normalized_value
if effective_value is None:
effective_value = imported_filter.raw_value
if effective_value is None:
effective_value = template_variable.default_value
if effective_value is None and template_variable.is_required:
preview_blockers.append(f"variable:{template_variable.variable_name}:missing_required_value")
continue
effective_filters.append(
{
"mapping_id": mapping.mapping_id,
"filter_id": imported_filter.filter_id,
"filter_name": imported_filter.filter_name,
"variable_id": template_variable.variable_id,
"variable_name": template_variable.variable_name,
"effective_value": effective_value,
"raw_input_value": mapping.raw_input_value,
}
)
template_params[template_variable.variable_name] = effective_value
if mapping.approval_state == ApprovalState.APPROVED:
approved_mapping_ids.append(mapping.mapping_id)
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED:
open_warning_refs.append(mapping.mapping_id)
mapped_variable_ids = {mapping.variable_id for mapping in session.execution_mappings}
for variable in session.template_variables:
if variable.variable_id in mapped_variable_ids:
continue
if variable.default_value is not None:
template_params[variable.variable_name] = variable.default_value
continue
if variable.is_required:
preview_blockers.append(f"variable:{variable.variable_name}:unmapped")
semantic_decision_refs = [
field.field_id
for field in session.semantic_fields
if field.is_locked or not field.needs_review or field.provenance.value != "unresolved"
]
preview_fingerprint = self._compute_preview_fingerprint(
{
"dataset_id": session.dataset_id,
"template_params": template_params,
"effective_filters": effective_filters,
}
)
return {
"effective_filters": effective_filters,
"template_params": template_params,
"approved_mapping_ids": sorted(approved_mapping_ids),
"semantic_decision_refs": sorted(semantic_decision_refs),
"open_warning_refs": sorted(open_warning_refs),
"preview_blockers": sorted(set(preview_blockers)),
"preview_fingerprint": preview_fingerprint,
}
# [/DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# [DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
# @COMPLEXITY: 4
# @PURPOSE: Enforce launch gates from findings, approvals, and current preview truth.
# @RELATION: [DEPENDS_ON] ->[CompiledPreview]
# @PRE: execution_snapshot was computed from current session state and preview is the latest persisted preview or None.
# @POST: returns explicit blocker codes for every unmet launch invariant.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[DatasetReviewSession,Dict[str,Any],CompiledPreview|None] -> Output[List[str]]
def _build_launch_blockers(
self,
session: DatasetReviewSession,
execution_snapshot: Dict[str, Any],
preview: Optional[CompiledPreview],
) -> List[str]:
blockers = list(execution_snapshot["preview_blockers"])
for finding in session.findings:
if (
finding.severity == FindingSeverity.BLOCKING
and finding.resolution_state not in {ResolutionState.RESOLVED, ResolutionState.APPROVED}
):
blockers.append(f"finding:{finding.code}:blocking")
for mapping in session.execution_mappings:
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED:
blockers.append(f"mapping:{mapping.mapping_id}:approval_required")
if preview is None:
blockers.append("preview:missing")
else:
if preview.preview_status != PreviewStatus.READY:
blockers.append(f"preview:{preview.preview_status.value}")
if preview.preview_fingerprint != execution_snapshot["preview_fingerprint"]:
blockers.append("preview:fingerprint_mismatch")
return sorted(set(blockers))
# [/DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
# [DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve the current latest preview snapshot for one session aggregate.
def _get_latest_preview(self, session: DatasetReviewSession) -> Optional[CompiledPreview]:
if not session.previews:
return None
if session.last_preview_id:
for preview in session.previews:
if preview.preview_id == session.last_preview_id:
return preview
return sorted(
session.previews,
key=lambda item: (item.created_at or datetime.min, item.preview_id),
reverse=True,
)[0]
# [/DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# [DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
# @COMPLEXITY: 2
# @PURPOSE: Produce deterministic execution fingerprint for preview truth and staleness checks.
def _compute_preview_fingerprint(self, payload: Dict[str, Any]) -> str:
serialized = json.dumps(payload, sort_keys=True, default=str)
return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
# [/DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
# [DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
# @COMPLEXITY: 4
# @PURPOSE: Link session start to observable async recovery when task infrastructure is available.