feat(027): Final Phase T038-T043 implementation

- T038: SessionEvent logger and persistence logic
  - Added SessionEventLogger service with explicit audit event persistence
  - Added SessionEvent model with events relationship on DatasetReviewSession
  - Integrated event logging into orchestrator flows and API mutation endpoints

- T039: Semantic source version propagation
  - Added source_version column to SemanticFieldEntry
  - Added propagate_source_version_update() to SemanticResolver
  - Preserves locked/manual field invariants during propagation

- T040: Batch approval API and UI actions
  - Added batch semantic approval endpoint (/fields/semantic/approve-batch)
  - Added batch mapping approval endpoint (/mappings/approve-batch)
  - Added batch approval actions to SemanticLayerReview and ExecutionMappingReview components
  - Aligned batch semantics with single-item approval contracts

- T041: Superset compatibility matrix tests
  - Added test_superset_matrix.py with preview and SQL Lab fallback coverage
  - Tests verify client method preference and matrix fallback behavior

- T042: RBAC audit sweep on session-mutation endpoints
  - Added _require_owner_mutation_scope() helper
  - Applied owner guards to update_session, delete_session, and all mutation endpoints
  - Ensured no bypass of existing permission checks

- T043: i18n coverage for dataset-review UI
  - Added workspace state labels (empty/importing/review) to en.json and ru.json
  - Added batch action labels for semantics and mappings
  - Fixed workspace state comparison to lowercase strings
  - Removed hardcoded workspace state display strings

Signed-off-by: Implementation Specialist <impl@ss-tools>
This commit is contained in:
2026-03-17 14:29:33 +03:00
parent 38bda6a714
commit ed3d5f3039
33 changed files with 99234 additions and 93415 deletions

View File

@@ -0,0 +1,552 @@
# [DEF:ClarificationEngine:Module]
# @COMPLEXITY: 4
# @SEMANTICS: dataset_review, clarification, question_payload, answer_persistence, readiness, findings
# @PURPOSE: Manage one-question-at-a-time clarification state, deterministic answer persistence, and readiness/finding updates.
# @LAYER: Domain
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
# @RELATION: [DEPENDS_ON] ->[ClarificationSession]
# @RELATION: [DEPENDS_ON] ->[ClarificationQuestion]
# @RELATION: [DEPENDS_ON] ->[ClarificationAnswer]
# @RELATION: [DEPENDS_ON] ->[ValidationFinding]
# @PRE: Target session contains a persisted clarification aggregate in the current ownership scope.
# @POST: Active clarification payload exposes one highest-priority unresolved question, and each recorded answer is persisted before pointer/readiness mutation.
# @SIDE_EFFECT: Persists clarification answers, question/session states, and related readiness/finding changes.
# @DATA_CONTRACT: Input[DatasetReviewSession|ClarificationAnswerCommand] -> Output[ClarificationStateResult]
# @INVARIANT: Only one active clarification question may exist at a time; skipped and expert-review items remain unresolved and visible.
from __future__ import annotations
# [DEF:ClarificationEngine.imports:Block]
import uuid
from dataclasses import dataclass, field
from datetime import datetime
from typing import List, Optional
from src.core.logger import belief_scope, logger
from src.models.auth import User
from src.models.dataset_review import (
AnswerKind,
ClarificationAnswer,
ClarificationQuestion,
ClarificationSession,
ClarificationStatus,
DatasetReviewSession,
FindingArea,
FindingSeverity,
QuestionState,
ReadinessState,
RecommendedAction,
ResolutionState,
SessionPhase,
ValidationFinding,
)
from src.services.dataset_review.repositories.session_repository import (
DatasetReviewSessionRepository,
)
# [/DEF:ClarificationEngine.imports:Block]
# [DEF:ClarificationQuestionPayload:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed active-question payload returned to the API layer.
@dataclass
class ClarificationQuestionPayload:
question_id: str
clarification_session_id: str
topic_ref: str
question_text: str
why_it_matters: str
current_guess: Optional[str]
priority: int
state: QuestionState
options: list[dict[str, object]] = field(default_factory=list)
# [/DEF:ClarificationQuestionPayload:Class]
# [DEF:ClarificationStateResult:Class]
# @COMPLEXITY: 2
# @PURPOSE: Clarification state result carrying the current session, active payload, and changed findings.
@dataclass
class ClarificationStateResult:
clarification_session: ClarificationSession
current_question: Optional[ClarificationQuestionPayload]
session: DatasetReviewSession
changed_findings: List[ValidationFinding] = field(default_factory=list)
# [/DEF:ClarificationStateResult:Class]
# [DEF:ClarificationAnswerCommand:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed answer command for clarification state mutation.
@dataclass
class ClarificationAnswerCommand:
session: DatasetReviewSession
question_id: str
answer_kind: AnswerKind
answer_value: Optional[str]
user: User
# [/DEF:ClarificationAnswerCommand:Class]
# [DEF:ClarificationEngine:Class]
# @COMPLEXITY: 4
# @PURPOSE: Provide deterministic one-question-at-a-time clarification selection and answer persistence.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository]
# @RELATION: [DEPENDS_ON] ->[ClarificationSession]
# @RELATION: [DEPENDS_ON] ->[ValidationFinding]
# @PRE: Repository is bound to the current request transaction scope.
# @POST: Returned clarification state is persistence-backed and aligned with session readiness/recommended action.
# @SIDE_EFFECT: Mutates clarification answers, session flags, and related clarification findings.
class ClarificationEngine:
# [DEF:ClarificationEngine.__init__:Function]
# @COMPLEXITY: 2
# @PURPOSE: Bind repository dependency for clarification persistence operations.
def __init__(self, repository: DatasetReviewSessionRepository) -> None:
self.repository = repository
# [/DEF:ClarificationEngine.__init__:Function]
# [DEF:ClarificationEngine.build_question_payload:Function]
# @COMPLEXITY: 4
# @PURPOSE: Return the one active highest-priority clarification question payload with why-it-matters, current guess, and options.
# @RELATION: [DEPENDS_ON] ->[ClarificationQuestion]
# @RELATION: [DEPENDS_ON] ->[ClarificationOption]
# @PRE: Session contains unresolved clarification state or a resumable clarification session.
# @POST: Returns exactly one active/open question payload or None when no unresolved question remains.
# @SIDE_EFFECT: Normalizes the active-question pointer and clarification status in persistence.
# @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[ClarificationQuestionPayload|None]
def build_question_payload(
self,
session: DatasetReviewSession,
) -> Optional[ClarificationQuestionPayload]:
with belief_scope("ClarificationEngine.build_question_payload"):
clarification_session = self._get_latest_clarification_session(session)
if clarification_session is None:
logger.reason(
"Clarification payload requested without clarification session",
extra={"session_id": session.session_id},
)
return None
active_questions = [
question for question in clarification_session.questions
if question.state == QuestionState.OPEN
]
active_questions.sort(key=lambda item: (-int(item.priority), item.created_at, item.question_id))
if not active_questions:
clarification_session.current_question_id = None
clarification_session.status = ClarificationStatus.COMPLETED
session.readiness_state = self._derive_readiness_state(session)
session.recommended_action = self._derive_recommended_action(session)
if session.current_phase == SessionPhase.CLARIFICATION:
session.current_phase = SessionPhase.REVIEW
self.repository.db.commit()
logger.reflect(
"No unresolved clarification question remains",
extra={"session_id": session.session_id},
)
return None
selected_question = active_questions[0]
clarification_session.current_question_id = selected_question.question_id
clarification_session.status = ClarificationStatus.ACTIVE
session.readiness_state = ReadinessState.CLARIFICATION_ACTIVE
session.recommended_action = RecommendedAction.ANSWER_NEXT_QUESTION
session.current_phase = SessionPhase.CLARIFICATION
logger.reason(
"Selected active clarification question",
extra={
"session_id": session.session_id,
"clarification_session_id": clarification_session.clarification_session_id,
"question_id": selected_question.question_id,
"priority": selected_question.priority,
},
)
self.repository.db.commit()
payload = ClarificationQuestionPayload(
question_id=selected_question.question_id,
clarification_session_id=selected_question.clarification_session_id,
topic_ref=selected_question.topic_ref,
question_text=selected_question.question_text,
why_it_matters=selected_question.why_it_matters,
current_guess=selected_question.current_guess,
priority=selected_question.priority,
state=selected_question.state,
options=[
{
"option_id": option.option_id,
"question_id": option.question_id,
"label": option.label,
"value": option.value,
"is_recommended": option.is_recommended,
"display_order": option.display_order,
}
for option in sorted(
selected_question.options,
key=lambda item: (item.display_order, item.label, item.option_id),
)
],
)
logger.reflect(
"Clarification payload built",
extra={
"session_id": session.session_id,
"question_id": payload.question_id,
"option_count": len(payload.options),
},
)
return payload
# [/DEF:ClarificationEngine.build_question_payload:Function]
# [DEF:ClarificationEngine.record_answer:Function]
# @COMPLEXITY: 4
# @PURPOSE: Persist one clarification answer before any pointer/readiness mutation and compute deterministic state impact.
# @RELATION: [DEPENDS_ON] ->[ClarificationAnswer]
# @RELATION: [DEPENDS_ON] ->[ValidationFinding]
# @PRE: Target question belongs to the session's active clarification session and is still open.
# @POST: Answer row is persisted before current-question pointer advances; skipped/expert-review items remain unresolved and visible.
# @SIDE_EFFECT: Inserts answer row, mutates question/session states, updates clarification findings, and commits.
# @DATA_CONTRACT: Input[ClarificationAnswerCommand] -> Output[ClarificationStateResult]
def record_answer(self, command: ClarificationAnswerCommand) -> ClarificationStateResult:
with belief_scope("ClarificationEngine.record_answer"):
session = command.session
clarification_session = self._get_latest_clarification_session(session)
if clarification_session is None:
logger.explore(
"Cannot record clarification answer because no clarification session exists",
extra={"session_id": session.session_id},
)
raise ValueError("Clarification session not found")
question = self._find_question(clarification_session, command.question_id)
if question is None:
logger.explore(
"Cannot record clarification answer for foreign or missing question",
extra={"session_id": session.session_id, "question_id": command.question_id},
)
raise ValueError("Clarification question not found")
if question.answer is not None:
logger.explore(
"Rejected duplicate clarification answer submission",
extra={"session_id": session.session_id, "question_id": command.question_id},
)
raise ValueError("Clarification question already answered")
if clarification_session.current_question_id and clarification_session.current_question_id != question.question_id:
logger.explore(
"Rejected answer for non-active clarification question",
extra={
"session_id": session.session_id,
"question_id": question.question_id,
"current_question_id": clarification_session.current_question_id,
},
)
raise ValueError("Only the active clarification question can be answered")
normalized_answer_value = self._normalize_answer_value(command.answer_kind, command.answer_value, question)
logger.reason(
"Persisting clarification answer before state advancement",
extra={
"session_id": session.session_id,
"question_id": question.question_id,
"answer_kind": command.answer_kind.value,
},
)
persisted_answer = ClarificationAnswer(
question_id=question.question_id,
answer_kind=command.answer_kind,
answer_value=normalized_answer_value,
answered_by_user_id=command.user.id,
impact_summary=self._build_impact_summary(question, command.answer_kind, normalized_answer_value),
)
self.repository.db.add(persisted_answer)
self.repository.db.flush()
changed_finding = self._upsert_clarification_finding(
session=session,
question=question,
answer_kind=command.answer_kind,
answer_value=normalized_answer_value,
)
if command.answer_kind == AnswerKind.SELECTED:
question.state = QuestionState.ANSWERED
elif command.answer_kind == AnswerKind.CUSTOM:
question.state = QuestionState.ANSWERED
elif command.answer_kind == AnswerKind.SKIPPED:
question.state = QuestionState.SKIPPED
elif command.answer_kind == AnswerKind.EXPERT_REVIEW:
question.state = QuestionState.EXPERT_REVIEW
question.updated_at = datetime.utcnow()
self.repository.db.flush()
clarification_session.resolved_count = self._count_resolved_questions(clarification_session)
clarification_session.remaining_count = self._count_remaining_questions(clarification_session)
clarification_session.summary_delta = self.summarize_progress(clarification_session)
clarification_session.updated_at = datetime.utcnow()
next_question = self._select_next_open_question(clarification_session)
clarification_session.current_question_id = next_question.question_id if next_question else None
clarification_session.status = (
ClarificationStatus.ACTIVE if next_question else ClarificationStatus.COMPLETED
)
if clarification_session.status == ClarificationStatus.COMPLETED:
clarification_session.completed_at = datetime.utcnow()
session.readiness_state = self._derive_readiness_state(session)
session.recommended_action = self._derive_recommended_action(session)
session.current_phase = (
SessionPhase.CLARIFICATION
if clarification_session.current_question_id
else SessionPhase.REVIEW
)
session.last_activity_at = datetime.utcnow()
self.repository.db.commit()
self.repository.db.refresh(session)
logger.reflect(
"Clarification answer recorded and session advanced",
extra={
"session_id": session.session_id,
"question_id": question.question_id,
"next_question_id": clarification_session.current_question_id,
"readiness_state": session.readiness_state.value,
"remaining_count": clarification_session.remaining_count,
},
)
return ClarificationStateResult(
clarification_session=clarification_session,
current_question=self.build_question_payload(session),
session=session,
changed_findings=[changed_finding] if changed_finding else [],
)
# [/DEF:ClarificationEngine.record_answer:Function]
# [DEF:ClarificationEngine.summarize_progress:Function]
# @COMPLEXITY: 3
# @PURPOSE: Produce a compact progress summary for pause/resume and completion UX.
# @RELATION: [DEPENDS_ON] ->[ClarificationSession]
def summarize_progress(self, clarification_session: ClarificationSession) -> str:
resolved = self._count_resolved_questions(clarification_session)
remaining = self._count_remaining_questions(clarification_session)
return f"{resolved} resolved, {remaining} unresolved"
# [/DEF:ClarificationEngine.summarize_progress:Function]
# [DEF:ClarificationEngine._get_latest_clarification_session:Function]
# @COMPLEXITY: 2
# @PURPOSE: Select the latest clarification session for the current dataset review aggregate.
def _get_latest_clarification_session(
self,
session: DatasetReviewSession,
) -> Optional[ClarificationSession]:
if not session.clarification_sessions:
return None
ordered_sessions = sorted(
session.clarification_sessions,
key=lambda item: (item.started_at, item.clarification_session_id),
reverse=True,
)
return ordered_sessions[0]
# [/DEF:ClarificationEngine._get_latest_clarification_session:Function]
# [DEF:ClarificationEngine._find_question:Function]
# @COMPLEXITY: 1
# @PURPOSE: Resolve a clarification question from the active clarification aggregate.
def _find_question(
self,
clarification_session: ClarificationSession,
question_id: str,
) -> Optional[ClarificationQuestion]:
for question in clarification_session.questions:
if question.question_id == question_id:
return question
return None
# [/DEF:ClarificationEngine._find_question:Function]
# [DEF:ClarificationEngine._select_next_open_question:Function]
# @COMPLEXITY: 2
# @PURPOSE: Select the next unresolved question in deterministic priority order.
def _select_next_open_question(
self,
clarification_session: ClarificationSession,
) -> Optional[ClarificationQuestion]:
open_questions = [
question for question in clarification_session.questions
if question.state == QuestionState.OPEN
]
if not open_questions:
return None
open_questions.sort(key=lambda item: (-int(item.priority), item.created_at, item.question_id))
return open_questions[0]
# [/DEF:ClarificationEngine._select_next_open_question:Function]
# [DEF:ClarificationEngine._count_resolved_questions:Function]
# @COMPLEXITY: 1
# @PURPOSE: Count questions whose answers fully resolved the ambiguity.
def _count_resolved_questions(self, clarification_session: ClarificationSession) -> int:
return sum(
1
for question in clarification_session.questions
if question.state == QuestionState.ANSWERED
)
# [/DEF:ClarificationEngine._count_resolved_questions:Function]
# [DEF:ClarificationEngine._count_remaining_questions:Function]
# @COMPLEXITY: 1
# @PURPOSE: Count questions still unresolved or deferred after clarification interaction.
def _count_remaining_questions(self, clarification_session: ClarificationSession) -> int:
return sum(
1
for question in clarification_session.questions
if question.state in {QuestionState.OPEN, QuestionState.SKIPPED, QuestionState.EXPERT_REVIEW}
)
# [/DEF:ClarificationEngine._count_remaining_questions:Function]
# [DEF:ClarificationEngine._normalize_answer_value:Function]
# @COMPLEXITY: 2
# @PURPOSE: Validate and normalize answer payload based on answer kind and active question options.
def _normalize_answer_value(
self,
answer_kind: AnswerKind,
answer_value: Optional[str],
question: ClarificationQuestion,
) -> Optional[str]:
normalized_answer_value = str(answer_value).strip() if answer_value is not None else None
if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM} and not normalized_answer_value:
raise ValueError("answer_value is required for selected or custom clarification answers")
if answer_kind == AnswerKind.SELECTED:
allowed_values = {option.value for option in question.options}
if normalized_answer_value not in allowed_values:
raise ValueError("answer_value must match one of the current clarification options")
if answer_kind == AnswerKind.SKIPPED:
return normalized_answer_value or "skipped"
if answer_kind == AnswerKind.EXPERT_REVIEW:
return normalized_answer_value or "expert_review"
return normalized_answer_value
# [/DEF:ClarificationEngine._normalize_answer_value:Function]
# [DEF:ClarificationEngine._build_impact_summary:Function]
# @COMPLEXITY: 2
# @PURPOSE: Build a compact audit note describing how the clarification answer affects session state.
def _build_impact_summary(
self,
question: ClarificationQuestion,
answer_kind: AnswerKind,
answer_value: Optional[str],
) -> str:
if answer_kind == AnswerKind.SKIPPED:
return f"Clarification for {question.topic_ref} was skipped and remains unresolved."
if answer_kind == AnswerKind.EXPERT_REVIEW:
return f"Clarification for {question.topic_ref} was deferred for expert review."
return f"Clarification for {question.topic_ref} recorded as '{answer_value}'."
# [/DEF:ClarificationEngine._build_impact_summary:Function]
# [DEF:ClarificationEngine._upsert_clarification_finding:Function]
# @COMPLEXITY: 3
# @PURPOSE: Keep one finding per clarification topic aligned with answer outcome and unresolved visibility rules.
# @RELATION: [DEPENDS_ON] ->[ValidationFinding]
def _upsert_clarification_finding(
self,
session: DatasetReviewSession,
question: ClarificationQuestion,
answer_kind: AnswerKind,
answer_value: Optional[str],
) -> ValidationFinding:
caused_by_ref = f"clarification:{question.question_id}"
existing = next(
(
finding for finding in session.findings
if finding.area == FindingArea.CLARIFICATION and finding.caused_by_ref == caused_by_ref
),
None,
)
if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM}:
resolution_state = ResolutionState.RESOLVED
resolved_at = datetime.utcnow()
message = f"Clarified '{question.topic_ref}' with answer '{answer_value}'."
elif answer_kind == AnswerKind.SKIPPED:
resolution_state = ResolutionState.SKIPPED
resolved_at = None
message = f"Clarification for '{question.topic_ref}' was skipped and still needs review."
else:
resolution_state = ResolutionState.EXPERT_REVIEW
resolved_at = None
message = f"Clarification for '{question.topic_ref}' requires expert review."
if existing is None:
existing = ValidationFinding(
finding_id=str(uuid.uuid4()),
session_id=session.session_id,
area=FindingArea.CLARIFICATION,
severity=FindingSeverity.WARNING,
code="CLARIFICATION_PENDING",
title="Clarification pending",
message=message,
resolution_state=resolution_state,
resolution_note=None,
caused_by_ref=caused_by_ref,
created_at=datetime.utcnow(),
resolved_at=resolved_at,
)
self.repository.db.add(existing)
session.findings.append(existing)
else:
existing.message = message
existing.resolution_state = resolution_state
existing.resolved_at = resolved_at
if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM}:
existing.code = "CLARIFICATION_RESOLVED"
existing.title = "Clarification resolved"
elif answer_kind == AnswerKind.SKIPPED:
existing.code = "CLARIFICATION_SKIPPED"
existing.title = "Clarification skipped"
else:
existing.code = "CLARIFICATION_EXPERT_REVIEW"
existing.title = "Clarification requires expert review"
return existing
# [/DEF:ClarificationEngine._upsert_clarification_finding:Function]
# [DEF:ClarificationEngine._derive_readiness_state:Function]
# @COMPLEXITY: 3
# @PURPOSE: Recompute readiness after clarification mutation while preserving unresolved visibility semantics.
# @RELATION: [DEPENDS_ON] ->[ClarificationSession]
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
def _derive_readiness_state(self, session: DatasetReviewSession) -> ReadinessState:
clarification_session = self._get_latest_clarification_session(session)
if clarification_session is None:
return session.readiness_state
if clarification_session.current_question_id:
return ReadinessState.CLARIFICATION_ACTIVE
if clarification_session.remaining_count > 0:
return ReadinessState.CLARIFICATION_NEEDED
return ReadinessState.REVIEW_READY
# [/DEF:ClarificationEngine._derive_readiness_state:Function]
# [DEF:ClarificationEngine._derive_recommended_action:Function]
# @COMPLEXITY: 2
# @PURPOSE: Recompute next-action guidance after clarification mutations.
def _derive_recommended_action(self, session: DatasetReviewSession) -> RecommendedAction:
clarification_session = self._get_latest_clarification_session(session)
if clarification_session is None:
return session.recommended_action
if clarification_session.current_question_id:
return RecommendedAction.ANSWER_NEXT_QUESTION
if clarification_session.remaining_count > 0:
return RecommendedAction.START_CLARIFICATION
return RecommendedAction.REVIEW_DOCUMENTATION
# [/DEF:ClarificationEngine._derive_recommended_action:Function]
# [/DEF:ClarificationEngine:Class]
# [/DEF:ClarificationEngine:Module]

View File

@@ -0,0 +1,156 @@
# [DEF:SessionEventLogger:Module]
# @COMPLEXITY: 4
# @SEMANTICS: dataset_review, audit, session_events, persistence, observability
# @PURPOSE: Persist explicit session mutation events for dataset-review audit trails without weakening ownership or approval invariants.
# @LAYER: Domain
# @RELATION: [DEPENDS_ON] ->[SessionEvent]
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: Caller provides an owned session scope and an authenticated actor identifier for each persisted mutation event.
# @POST: Every logged event is committed as an explicit, queryable audit record with deterministic event metadata.
# @SIDE_EFFECT: Inserts persisted session event rows and emits runtime belief-state logs for audit-sensitive mutations.
from __future__ import annotations
# [DEF:SessionEventLogger.imports:Block]
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
from sqlalchemy.orm import Session
from src.core.logger import belief_scope, logger
from src.models.dataset_review import DatasetReviewSession, SessionEvent
# [/DEF:SessionEventLogger.imports:Block]
# [DEF:SessionEventPayload:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed input contract for one persisted dataset-review session audit event.
@dataclass(frozen=True)
class SessionEventPayload:
session_id: str
actor_user_id: str
event_type: str
event_summary: str
current_phase: Optional[str] = None
readiness_state: Optional[str] = None
event_details: Dict[str, Any] = field(default_factory=dict)
# [/DEF:SessionEventPayload:Class]
# [DEF:SessionEventLogger:Class]
# @COMPLEXITY: 4
# @PURPOSE: Persist explicit dataset-review session audit events with meaningful runtime reasoning logs.
# @RELATION: [DEPENDS_ON] ->[SessionEvent]
# @RELATION: [DEPENDS_ON] ->[sqlalchemy.orm.Session]
# @PRE: The database session is live and payload identifiers are non-empty.
# @POST: Returns the committed session event row with a stable identifier and stored detail payload.
# @SIDE_EFFECT: Writes one audit row to persistence and emits logger.reason/logger.reflect traces.
class SessionEventLogger:
# [DEF:SessionEventLogger.__init__:Function]
# @COMPLEXITY: 2
# @PURPOSE: Bind a live SQLAlchemy session to the session-event logger.
def __init__(self, db: Session) -> None:
self.db = db
# [/DEF:SessionEventLogger.__init__:Function]
# [DEF:SessionEventLogger.log_event:Function]
# @COMPLEXITY: 4
# @PURPOSE: Persist one explicit session event row for an owned dataset-review mutation.
# @RELATION: [DEPENDS_ON] ->[SessionEvent]
# @PRE: session_id, actor_user_id, event_type, and event_summary are non-empty.
# @POST: Returns the committed SessionEvent record with normalized detail payload.
# @SIDE_EFFECT: Inserts and commits one session_events row.
# @DATA_CONTRACT: Input[SessionEventPayload] -> Output[SessionEvent]
def log_event(self, payload: SessionEventPayload) -> SessionEvent:
with belief_scope("SessionEventLogger.log_event"):
session_id = str(payload.session_id or "").strip()
actor_user_id = str(payload.actor_user_id or "").strip()
event_type = str(payload.event_type or "").strip()
event_summary = str(payload.event_summary or "").strip()
if not session_id:
logger.explore("Session event logging rejected because session_id is empty")
raise ValueError("session_id must be non-empty")
if not actor_user_id:
logger.explore(
"Session event logging rejected because actor_user_id is empty",
extra={"session_id": session_id},
)
raise ValueError("actor_user_id must be non-empty")
if not event_type:
logger.explore(
"Session event logging rejected because event_type is empty",
extra={"session_id": session_id, "actor_user_id": actor_user_id},
)
raise ValueError("event_type must be non-empty")
if not event_summary:
logger.explore(
"Session event logging rejected because event_summary is empty",
extra={"session_id": session_id, "event_type": event_type},
)
raise ValueError("event_summary must be non-empty")
normalized_details = dict(payload.event_details or {})
logger.reason(
"Persisting explicit dataset-review session audit event",
extra={
"session_id": session_id,
"actor_user_id": actor_user_id,
"event_type": event_type,
"current_phase": payload.current_phase,
"readiness_state": payload.readiness_state,
},
)
event = SessionEvent(
session_id=session_id,
actor_user_id=actor_user_id,
event_type=event_type,
event_summary=event_summary,
current_phase=payload.current_phase,
readiness_state=payload.readiness_state,
event_details=normalized_details,
)
self.db.add(event)
self.db.commit()
self.db.refresh(event)
logger.reflect(
"Dataset-review session audit event persisted",
extra={
"session_id": session_id,
"session_event_id": event.session_event_id,
"event_type": event.event_type,
},
)
return event
# [/DEF:SessionEventLogger.log_event:Function]
# [DEF:SessionEventLogger.log_for_session:Function]
# @COMPLEXITY: 3
# @PURPOSE: Convenience wrapper for logging an event directly from a session aggregate root.
# @RELATION: [CALLS] ->[SessionEventLogger.log_event]
def log_for_session(
self,
session: DatasetReviewSession,
*,
actor_user_id: str,
event_type: str,
event_summary: str,
event_details: Optional[Dict[str, Any]] = None,
) -> SessionEvent:
return self.log_event(
SessionEventPayload(
session_id=session.session_id,
actor_user_id=actor_user_id,
event_type=event_type,
event_summary=event_summary,
current_phase=session.current_phase.value if session.current_phase else None,
readiness_state=session.readiness_state.value if session.readiness_state else None,
event_details=dict(event_details or {}),
)
)
# [/DEF:SessionEventLogger.log_for_session:Function]
# [/DEF:SessionEventLogger:Class]
# [/DEF:SessionEventLogger:Module]

View File

@@ -19,23 +19,36 @@ from __future__ import annotations
# [DEF:DatasetReviewOrchestrator.imports:Block]
from dataclasses import dataclass, field
from datetime import datetime
import hashlib
import json
from typing import Any, Dict, List, Optional
from src.core.config_manager import ConfigManager
from src.core.logger import belief_scope, logger
from src.core.task_manager import TaskManager
from src.core.utils.superset_compilation_adapter import (
PreviewCompilationPayload,
SqlLabLaunchPayload,
SupersetCompilationAdapter,
)
from src.core.utils.superset_context_extractor import (
SupersetContextExtractor,
SupersetParsedContext,
)
from src.models.auth import User
from src.models.dataset_review import (
ApprovalState,
BusinessSummarySource,
CompiledPreview,
ConfidenceState,
DatasetProfile,
DatasetReviewSession,
DatasetRunContext,
FindingArea,
FindingSeverity,
LaunchStatus,
PreviewStatus,
RecommendedAction,
ReadinessState,
ResolutionState,
@@ -47,6 +60,7 @@ from src.services.dataset_review.repositories.session_repository import (
DatasetReviewSessionRepository,
)
from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
from src.services.dataset_review.event_logger import SessionEventPayload
# [/DEF:DatasetReviewOrchestrator.imports:Block]
@@ -73,6 +87,48 @@ class StartSessionResult:
# [/DEF:StartSessionResult:Class]
# [DEF:PreparePreviewCommand:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed input contract for compiling one Superset-backed session preview.
@dataclass
class PreparePreviewCommand:
user: User
session_id: str
# [/DEF:PreparePreviewCommand:Class]
# [DEF:PreparePreviewResult:Class]
# @COMPLEXITY: 2
# @PURPOSE: Result contract for one persisted compiled preview attempt.
@dataclass
class PreparePreviewResult:
session: DatasetReviewSession
preview: CompiledPreview
blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:PreparePreviewResult:Class]
# [DEF:LaunchDatasetCommand:Class]
# @COMPLEXITY: 2
# @PURPOSE: Typed input contract for launching one dataset-review session into SQL Lab.
@dataclass
class LaunchDatasetCommand:
user: User
session_id: str
# [/DEF:LaunchDatasetCommand:Class]
# [DEF:LaunchDatasetResult:Class]
# @COMPLEXITY: 2
# @PURPOSE: Launch result carrying immutable run context and any gate blockers surfaced before launch.
@dataclass
class LaunchDatasetResult:
session: DatasetReviewSession
run_context: DatasetRunContext
blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:LaunchDatasetResult:Class]
# [DEF:DatasetReviewOrchestrator:Class]
# @COMPLEXITY: 5
# @PURPOSE: Coordinate safe session startup while preserving cross-user isolation and explicit partial recovery.
@@ -197,6 +253,23 @@ class DatasetReviewOrchestrator:
parsed_context=parsed_context,
dataset_ref=dataset_ref,
)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=persisted_session.session_id,
actor_user_id=command.user.id,
event_type="session_started",
event_summary="Dataset review session shell created",
current_phase=persisted_session.current_phase.value,
readiness_state=persisted_session.readiness_state.value,
event_details={
"source_kind": persisted_session.source_kind,
"dataset_ref": persisted_session.dataset_ref,
"dataset_id": persisted_session.dataset_id,
"dashboard_id": persisted_session.dashboard_id,
"partial_recovery": bool(parsed_context and parsed_context.partial_recovery),
},
)
)
persisted_session = self.repository.save_profile_and_findings(
persisted_session.session_id,
command.user.id,
@@ -213,6 +286,17 @@ class DatasetReviewOrchestrator:
persisted_session.active_task_id = active_task_id
self.repository.db.commit()
self.repository.db.refresh(persisted_session)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=persisted_session.session_id,
actor_user_id=command.user.id,
event_type="recovery_task_linked",
event_summary="Recovery task linked to dataset review session",
current_phase=persisted_session.current_phase.value,
readiness_state=persisted_session.readiness_state.value,
event_details={"task_id": active_task_id},
)
)
logger.reason(
"Linked recovery task to started dataset review session",
extra={"session_id": persisted_session.session_id, "task_id": active_task_id},
@@ -237,6 +321,238 @@ class DatasetReviewOrchestrator:
)
# [/DEF:DatasetReviewOrchestrator.start_session:Function]
# [DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
# @COMPLEXITY: 4
# @PURPOSE: Assemble effective execution inputs and trigger Superset-side preview compilation.
# @RELATION: [CALLS] ->[SupersetCompilationAdapter.compile_preview]
# @PRE: all required variables have candidate values or explicitly accepted defaults.
# @POST: returns preview artifact in pending, ready, failed, or stale state.
# @SIDE_EFFECT: persists preview attempt and upstream compilation diagnostics.
# @DATA_CONTRACT: Input[PreparePreviewCommand] -> Output[PreparePreviewResult]
def prepare_launch_preview(self, command: PreparePreviewCommand) -> PreparePreviewResult:
with belief_scope("DatasetReviewOrchestrator.prepare_launch_preview"):
session = self.repository.load_session_detail(command.session_id, command.user.id)
if session is None or session.user_id != command.user.id:
logger.explore(
"Preview preparation rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id},
)
raise ValueError("Session not found")
if session.dataset_id is None:
raise ValueError("Preview requires a resolved dataset_id")
environment = self.config_manager.get_environment(session.environment_id)
if environment is None:
raise ValueError("Environment not found")
execution_snapshot = self._build_execution_snapshot(session)
preview_blockers = execution_snapshot["preview_blockers"]
if preview_blockers:
logger.explore(
"Preview preparation blocked by incomplete execution context",
extra={
"session_id": session.session_id,
"blocked_reasons": preview_blockers,
},
)
raise ValueError("Preview blocked: " + "; ".join(preview_blockers))
adapter = SupersetCompilationAdapter(environment)
preview = adapter.compile_preview(
PreviewCompilationPayload(
session_id=session.session_id,
dataset_id=session.dataset_id,
preview_fingerprint=execution_snapshot["preview_fingerprint"],
template_params=execution_snapshot["template_params"],
effective_filters=execution_snapshot["effective_filters"],
)
)
persisted_preview = self.repository.save_preview(
session.session_id,
command.user.id,
preview,
)
session.current_phase = SessionPhase.PREVIEW
session.last_activity_at = datetime.utcnow()
if persisted_preview.preview_status == PreviewStatus.READY:
launch_blockers = self._build_launch_blockers(
session=session,
execution_snapshot=execution_snapshot,
preview=persisted_preview,
)
if launch_blockers:
session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY
session.recommended_action = RecommendedAction.APPROVE_MAPPING
else:
session.readiness_state = ReadinessState.RUN_READY
session.recommended_action = RecommendedAction.LAUNCH_DATASET
else:
session.readiness_state = ReadinessState.PARTIALLY_READY
session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW
self.repository.db.commit()
self.repository.db.refresh(session)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=session.session_id,
actor_user_id=command.user.id,
event_type="preview_generated",
event_summary="Superset preview generation persisted",
current_phase=session.current_phase.value,
readiness_state=session.readiness_state.value,
event_details={
"preview_id": persisted_preview.preview_id,
"preview_status": persisted_preview.preview_status.value,
"preview_fingerprint": persisted_preview.preview_fingerprint,
},
)
)
logger.reflect(
"Superset preview preparation completed",
extra={
"session_id": session.session_id,
"preview_id": persisted_preview.preview_id,
"preview_status": persisted_preview.preview_status.value,
"preview_fingerprint": persisted_preview.preview_fingerprint,
},
)
return PreparePreviewResult(
session=session,
preview=persisted_preview,
blocked_reasons=[],
)
# [/DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
# [DEF:DatasetReviewOrchestrator.launch_dataset:Function]
# @COMPLEXITY: 5
# @PURPOSE: Start the approved dataset execution through SQL Lab and persist run context for audit/replay.
# @RELATION: [CALLS] ->[SupersetCompilationAdapter.create_sql_lab_session]
# @PRE: session is run-ready and compiled preview is current.
# @POST: returns persisted run context with SQL Lab session reference and launch outcome.
# @SIDE_EFFECT: creates SQL Lab execution session and audit snapshot.
# @DATA_CONTRACT: Input[LaunchDatasetCommand] -> Output[LaunchDatasetResult]
# @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest Superset preview fingerprint matches current execution inputs.
def launch_dataset(self, command: LaunchDatasetCommand) -> LaunchDatasetResult:
with belief_scope("DatasetReviewOrchestrator.launch_dataset"):
session = self.repository.load_session_detail(command.session_id, command.user.id)
if session is None or session.user_id != command.user.id:
logger.explore(
"Launch rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id},
)
raise ValueError("Session not found")
if session.dataset_id is None:
raise ValueError("Launch requires a resolved dataset_id")
environment = self.config_manager.get_environment(session.environment_id)
if environment is None:
raise ValueError("Environment not found")
execution_snapshot = self._build_execution_snapshot(session)
current_preview = self._get_latest_preview(session)
launch_blockers = self._build_launch_blockers(
session=session,
execution_snapshot=execution_snapshot,
preview=current_preview,
)
if launch_blockers:
logger.explore(
"Launch gate blocked dataset execution",
extra={
"session_id": session.session_id,
"blocked_reasons": launch_blockers,
},
)
raise ValueError("Launch blocked: " + "; ".join(launch_blockers))
adapter = SupersetCompilationAdapter(environment)
try:
sql_lab_session_ref = adapter.create_sql_lab_session(
SqlLabLaunchPayload(
session_id=session.session_id,
dataset_id=session.dataset_id,
preview_id=current_preview.preview_id,
compiled_sql=str(current_preview.compiled_sql or ""),
template_params=execution_snapshot["template_params"],
)
)
launch_status = LaunchStatus.STARTED
launch_error = None
except Exception as exc:
logger.explore(
"SQL Lab launch failed after passing gates",
extra={"session_id": session.session_id, "error": str(exc)},
)
sql_lab_session_ref = "unavailable"
launch_status = LaunchStatus.FAILED
launch_error = str(exc)
run_context = DatasetRunContext(
session_id=session.session_id,
dataset_ref=session.dataset_ref,
environment_id=session.environment_id,
preview_id=current_preview.preview_id,
sql_lab_session_ref=sql_lab_session_ref,
effective_filters=execution_snapshot["effective_filters"],
template_params=execution_snapshot["template_params"],
approved_mapping_ids=execution_snapshot["approved_mapping_ids"],
semantic_decision_refs=execution_snapshot["semantic_decision_refs"],
open_warning_refs=execution_snapshot["open_warning_refs"],
launch_status=launch_status,
launch_error=launch_error,
)
persisted_run_context = self.repository.save_run_context(
session.session_id,
command.user.id,
run_context,
)
session.current_phase = SessionPhase.LAUNCH
session.last_activity_at = datetime.utcnow()
if launch_status == LaunchStatus.FAILED:
session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY
session.recommended_action = RecommendedAction.LAUNCH_DATASET
else:
session.readiness_state = ReadinessState.RUN_IN_PROGRESS
session.recommended_action = RecommendedAction.EXPORT_OUTPUTS
self.repository.db.commit()
self.repository.db.refresh(session)
self.repository.event_logger.log_event(
SessionEventPayload(
session_id=session.session_id,
actor_user_id=command.user.id,
event_type="dataset_launch_requested",
event_summary="Dataset launch handoff persisted",
current_phase=session.current_phase.value,
readiness_state=session.readiness_state.value,
event_details={
"run_context_id": persisted_run_context.run_context_id,
"launch_status": persisted_run_context.launch_status.value,
"preview_id": persisted_run_context.preview_id,
"sql_lab_session_ref": persisted_run_context.sql_lab_session_ref,
},
)
)
logger.reflect(
"Dataset launch orchestration completed with audited run context",
extra={
"session_id": session.session_id,
"run_context_id": persisted_run_context.run_context_id,
"launch_status": persisted_run_context.launch_status.value,
"preview_id": persisted_run_context.preview_id,
},
)
return LaunchDatasetResult(
session=session,
run_context=persisted_run_context,
blocked_reasons=[],
)
# [/DEF:DatasetReviewOrchestrator.launch_dataset:Function]
# [DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
# @COMPLEXITY: 3
# @PURPOSE: Normalize dataset-selection payload into canonical session references.
@@ -328,6 +644,158 @@ class DatasetReviewOrchestrator:
return findings
# [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
# [DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build effective filters, template params, approvals, and fingerprint for preview and launch gating.
# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession]
# @PRE: Session aggregate includes imported filters, template variables, and current execution mappings.
# @POST: returns deterministic execution snapshot for current session state without mutating persistence.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[Dict[str,Any]]
def _build_execution_snapshot(self, session: DatasetReviewSession) -> Dict[str, Any]:
filter_lookup = {item.filter_id: item for item in session.imported_filters}
variable_lookup = {item.variable_id: item for item in session.template_variables}
effective_filters: List[Dict[str, Any]] = []
template_params: Dict[str, Any] = {}
approved_mapping_ids: List[str] = []
open_warning_refs: List[str] = []
preview_blockers: List[str] = []
for mapping in session.execution_mappings:
imported_filter = filter_lookup.get(mapping.filter_id)
template_variable = variable_lookup.get(mapping.variable_id)
if imported_filter is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter")
continue
if template_variable is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_variable")
continue
effective_value = mapping.effective_value
if effective_value is None:
effective_value = imported_filter.normalized_value
if effective_value is None:
effective_value = imported_filter.raw_value
if effective_value is None:
effective_value = template_variable.default_value
if effective_value is None and template_variable.is_required:
preview_blockers.append(f"variable:{template_variable.variable_name}:missing_required_value")
continue
effective_filters.append(
{
"mapping_id": mapping.mapping_id,
"filter_id": imported_filter.filter_id,
"filter_name": imported_filter.filter_name,
"variable_id": template_variable.variable_id,
"variable_name": template_variable.variable_name,
"effective_value": effective_value,
"raw_input_value": mapping.raw_input_value,
}
)
template_params[template_variable.variable_name] = effective_value
if mapping.approval_state == ApprovalState.APPROVED:
approved_mapping_ids.append(mapping.mapping_id)
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED:
open_warning_refs.append(mapping.mapping_id)
mapped_variable_ids = {mapping.variable_id for mapping in session.execution_mappings}
for variable in session.template_variables:
if variable.variable_id in mapped_variable_ids:
continue
if variable.default_value is not None:
template_params[variable.variable_name] = variable.default_value
continue
if variable.is_required:
preview_blockers.append(f"variable:{variable.variable_name}:unmapped")
semantic_decision_refs = [
field.field_id
for field in session.semantic_fields
if field.is_locked or not field.needs_review or field.provenance.value != "unresolved"
]
preview_fingerprint = self._compute_preview_fingerprint(
{
"dataset_id": session.dataset_id,
"template_params": template_params,
"effective_filters": effective_filters,
}
)
return {
"effective_filters": effective_filters,
"template_params": template_params,
"approved_mapping_ids": sorted(approved_mapping_ids),
"semantic_decision_refs": sorted(semantic_decision_refs),
"open_warning_refs": sorted(open_warning_refs),
"preview_blockers": sorted(set(preview_blockers)),
"preview_fingerprint": preview_fingerprint,
}
# [/DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# [DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
# @COMPLEXITY: 4
# @PURPOSE: Enforce launch gates from findings, approvals, and current preview truth.
# @RELATION: [DEPENDS_ON] ->[CompiledPreview]
# @PRE: execution_snapshot was computed from current session state and preview is the latest persisted preview or None.
# @POST: returns explicit blocker codes for every unmet launch invariant.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[DatasetReviewSession,Dict[str,Any],CompiledPreview|None] -> Output[List[str]]
def _build_launch_blockers(
self,
session: DatasetReviewSession,
execution_snapshot: Dict[str, Any],
preview: Optional[CompiledPreview],
) -> List[str]:
blockers = list(execution_snapshot["preview_blockers"])
for finding in session.findings:
if (
finding.severity == FindingSeverity.BLOCKING
and finding.resolution_state not in {ResolutionState.RESOLVED, ResolutionState.APPROVED}
):
blockers.append(f"finding:{finding.code}:blocking")
for mapping in session.execution_mappings:
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED:
blockers.append(f"mapping:{mapping.mapping_id}:approval_required")
if preview is None:
blockers.append("preview:missing")
else:
if preview.preview_status != PreviewStatus.READY:
blockers.append(f"preview:{preview.preview_status.value}")
if preview.preview_fingerprint != execution_snapshot["preview_fingerprint"]:
blockers.append("preview:fingerprint_mismatch")
return sorted(set(blockers))
# [/DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
# [DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve the current latest preview snapshot for one session aggregate.
def _get_latest_preview(self, session: DatasetReviewSession) -> Optional[CompiledPreview]:
if not session.previews:
return None
if session.last_preview_id:
for preview in session.previews:
if preview.preview_id == session.last_preview_id:
return preview
return sorted(
session.previews,
key=lambda item: (item.created_at or datetime.min, item.preview_id),
reverse=True,
)[0]
# [/DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# [DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
# @COMPLEXITY: 2
# @PURPOSE: Produce deterministic execution fingerprint for preview truth and staleness checks.
def _compute_preview_fingerprint(self, payload: Dict[str, Any]) -> str:
serialized = json.dumps(payload, sort_keys=True, default=str)
return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
# [/DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
# [DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
# @COMPLEXITY: 4
# @PURPOSE: Link session start to observable async recovery when task infrastructure is available.

View File

@@ -16,14 +16,19 @@ from typing import Optional, List
from sqlalchemy import or_
from sqlalchemy.orm import Session, joinedload
from src.models.dataset_review import (
ClarificationQuestion,
ClarificationSession,
DatasetReviewSession,
DatasetProfile,
ValidationFinding,
CompiledPreview,
DatasetRunContext,
SessionCollaborator
SemanticFieldEntry,
SessionCollaborator,
SessionEvent,
)
from src.core.logger import belief_scope
from src.core.logger import belief_scope, logger
from src.services.dataset_review.event_logger import SessionEventLogger
# [DEF:SessionRepo:Class]
# @COMPLEXITY: 4
@@ -37,16 +42,46 @@ from src.core.logger import belief_scope
# @SIDE_EFFECT: mutates and queries the persistence layer through the injected database session.
# @DATA_CONTRACT: Input[OwnedSessionQuery|SessionMutation] -> Output[PersistedSessionAggregate|PersistedChildRecord]
class DatasetReviewSessionRepository:
"""
@PURPOSE: Persist and retrieve dataset review session aggregates.
@INVARIANT: ownership_scope -> All operations must respect the session owner's user_id.
"""
# [DEF:init_repo:Function]
# @COMPLEXITY: 2
# @PURPOSE: Bind one live SQLAlchemy session to the repository instance.
def __init__(self, db: Session):
self.db = db
self.event_logger = SessionEventLogger(db)
# [/DEF:init_repo:Function]
# [DEF:get_owned_session:Function]
# @COMPLEXITY: 4
# @PURPOSE: Resolve one owner-scoped dataset review session for mutation paths without leaking foreign-session state.
# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
# @PRE: session_id and user_id are non-empty identifiers from the authenticated ownership scope.
# @POST: returns the owned session or raises a deterministic access error.
# @SIDE_EFFECT: reads one session row from the current database transaction.
# @DATA_CONTRACT: Input[OwnedSessionQuery] -> Output[DatasetReviewSession|ValueError]
def _get_owned_session(self, session_id: str, user_id: str) -> DatasetReviewSession:
with belief_scope("DatasetReviewSessionRepository.get_owned_session"):
logger.reason(
"Resolving owner-scoped dataset review session for mutation path",
extra={"session_id": session_id, "user_id": user_id},
)
session = self.db.query(DatasetReviewSession).filter(
DatasetReviewSession.session_id == session_id,
DatasetReviewSession.user_id == user_id,
).first()
if not session:
logger.explore(
"Owner-scoped dataset review session lookup failed",
extra={"session_id": session_id, "user_id": user_id},
)
raise ValueError("Session not found or access denied")
logger.reflect(
"Owner-scoped dataset review session resolved",
extra={"session_id": session.session_id, "user_id": session.user_id},
)
return session
# [/DEF:get_owned_session:Function]
# [DEF:create_sess:Function]
# @COMPLEXITY: 4
# @PURPOSE: Persist an initial dataset review session shell.
@@ -57,9 +92,17 @@ class DatasetReviewSessionRepository:
# @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[DatasetReviewSession]
def create_session(self, session: DatasetReviewSession) -> DatasetReviewSession:
with belief_scope("DatasetReviewSessionRepository.create_session"):
logger.reason(
"Persisting dataset review session shell",
extra={"user_id": session.user_id, "environment_id": session.environment_id},
)
self.db.add(session)
self.db.commit()
self.db.refresh(session)
logger.reflect(
"Dataset review session shell persisted with stable identifier",
extra={"session_id": session.session_id, "user_id": session.user_id},
)
return session
# [/DEF:create_sess:Function]
@@ -69,25 +112,27 @@ class DatasetReviewSessionRepository:
# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
# @RELATION: [DEPENDS_ON] -> [SessionCollaborator]
def load_session_detail(self, session_id: str, user_id: str) -> Optional[DatasetReviewSession]:
"""
@PRE: user_id must match session owner or authorized collaborator.
"""
with belief_scope("DatasetReviewSessionRepository.load_session_detail"):
# Check if user is owner or collaborator
return self.db.query(DatasetReviewSession)\
logger.reason(
"Loading dataset review session detail for owner-or-collaborator scope",
extra={"session_id": session_id, "user_id": user_id},
)
session = self.db.query(DatasetReviewSession)\
.outerjoin(SessionCollaborator, DatasetReviewSession.session_id == SessionCollaborator.session_id)\
.options(
joinedload(DatasetReviewSession.profile),
joinedload(DatasetReviewSession.findings),
joinedload(DatasetReviewSession.collaborators),
joinedload(DatasetReviewSession.semantic_sources),
joinedload(DatasetReviewSession.semantic_fields),
joinedload(DatasetReviewSession.semantic_fields).joinedload(SemanticFieldEntry.candidates),
joinedload(DatasetReviewSession.imported_filters),
joinedload(DatasetReviewSession.template_variables),
joinedload(DatasetReviewSession.execution_mappings),
joinedload(DatasetReviewSession.clarification_sessions),
joinedload(DatasetReviewSession.clarification_sessions).joinedload(ClarificationSession.questions).joinedload(ClarificationQuestion.options),
joinedload(DatasetReviewSession.clarification_sessions).joinedload(ClarificationSession.questions).joinedload(ClarificationQuestion.answer),
joinedload(DatasetReviewSession.previews),
joinedload(DatasetReviewSession.run_contexts)
joinedload(DatasetReviewSession.run_contexts),
joinedload(DatasetReviewSession.events)
)\
.filter(DatasetReviewSession.session_id == session_id)\
.filter(
@@ -97,6 +142,15 @@ class DatasetReviewSessionRepository:
)
)\
.first()
logger.reflect(
"Dataset review session detail lookup completed",
extra={
"session_id": session_id,
"user_id": user_id,
"found": bool(session),
},
)
return session
# [/DEF:load_detail:Function]
# [DEF:save_prof_find:Function]
@@ -111,32 +165,40 @@ class DatasetReviewSessionRepository:
# @DATA_CONTRACT: Input[ProfileAndFindingsMutation] -> Output[DatasetReviewSession]
def save_profile_and_findings(self, session_id: str, user_id: str, profile: DatasetProfile, findings: List[ValidationFinding]) -> DatasetReviewSession:
with belief_scope("DatasetReviewSessionRepository.save_profile_and_findings"):
session = self.db.query(DatasetReviewSession).filter(
DatasetReviewSession.session_id == session_id,
DatasetReviewSession.user_id == user_id
).first()
if not session:
raise ValueError("Session not found or access denied")
session = self._get_owned_session(session_id, user_id)
logger.reason(
"Persisting dataset profile and replacing validation findings",
extra={
"session_id": session_id,
"user_id": user_id,
"has_profile": bool(profile),
"findings_count": len(findings),
},
)
if profile:
# Ensure we update existing profile by session_id if it exists
existing_profile = self.db.query(DatasetProfile).filter_by(session_id=session_id).first()
if existing_profile:
profile.profile_id = existing_profile.profile_id
self.db.merge(profile)
# Remove old findings for this session to avoid stale data
self.db.query(ValidationFinding).filter(
ValidationFinding.session_id == session_id
).delete()
# Add new findings
for finding in findings:
finding.session_id = session_id
self.db.add(finding)
self.db.commit()
logger.reflect(
"Dataset profile and validation findings committed",
extra={
"session_id": session.session_id,
"user_id": user_id,
"findings_count": len(findings),
},
)
return self.load_session_detail(session_id, user_id)
# [/DEF:save_prof_find:Function]
@@ -151,15 +213,12 @@ class DatasetReviewSessionRepository:
# @DATA_CONTRACT: Input[PreviewMutation] -> Output[CompiledPreview]
def save_preview(self, session_id: str, user_id: str, preview: CompiledPreview) -> CompiledPreview:
with belief_scope("DatasetReviewSessionRepository.save_preview"):
session = self.db.query(DatasetReviewSession).filter(
DatasetReviewSession.session_id == session_id,
DatasetReviewSession.user_id == user_id
).first()
session = self._get_owned_session(session_id, user_id)
logger.reason(
"Persisting compiled preview and staling previous preview snapshots",
extra={"session_id": session_id, "user_id": user_id},
)
if not session:
raise ValueError("Session not found or access denied")
# Mark existing previews for this session as stale if they are not the new one
self.db.query(CompiledPreview).filter(
CompiledPreview.session_id == session_id
).update({"preview_status": "stale"})
@@ -170,6 +229,14 @@ class DatasetReviewSessionRepository:
self.db.commit()
self.db.refresh(preview)
logger.reflect(
"Compiled preview committed as latest session preview",
extra={
"session_id": session.session_id,
"preview_id": preview.preview_id,
"user_id": user_id,
},
)
return preview
# [/DEF:save_prev:Function]
@@ -184,13 +251,11 @@ class DatasetReviewSessionRepository:
# @DATA_CONTRACT: Input[RunContextMutation] -> Output[DatasetRunContext]
def save_run_context(self, session_id: str, user_id: str, run_context: DatasetRunContext) -> DatasetRunContext:
with belief_scope("DatasetReviewSessionRepository.save_run_context"):
session = self.db.query(DatasetReviewSession).filter(
DatasetReviewSession.session_id == session_id,
DatasetReviewSession.user_id == user_id
).first()
if not session:
raise ValueError("Session not found or access denied")
session = self._get_owned_session(session_id, user_id)
logger.reason(
"Persisting dataset run context audit snapshot",
extra={"session_id": session_id, "user_id": user_id},
)
self.db.add(run_context)
self.db.flush()
@@ -198,6 +263,14 @@ class DatasetReviewSessionRepository:
self.db.commit()
self.db.refresh(run_context)
logger.reflect(
"Dataset run context committed as latest launch snapshot",
extra={
"session_id": session.session_id,
"run_context_id": run_context.run_context_id,
"user_id": user_id,
},
)
return run_context
# [/DEF:save_run_ctx:Function]
@@ -207,9 +280,18 @@ class DatasetReviewSessionRepository:
# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
def list_sessions_for_user(self, user_id: str) -> List[DatasetReviewSession]:
with belief_scope("DatasetReviewSessionRepository.list_sessions_for_user"):
return self.db.query(DatasetReviewSession).filter(
logger.reason(
"Listing dataset review sessions for owner scope",
extra={"user_id": user_id},
)
sessions = self.db.query(DatasetReviewSession).filter(
DatasetReviewSession.user_id == user_id
).order_by(DatasetReviewSession.updated_at.desc()).all()
logger.reflect(
"Dataset review session list assembled",
extra={"user_id": user_id, "session_count": len(sessions)},
)
return sessions
# [/DEF:list_user_sess:Function]
# [/DEF:SessionRepo:Class]

View File

@@ -24,6 +24,7 @@ from src.models.dataset_review import (
CandidateMatchType,
CandidateStatus,
FieldProvenance,
SemanticSource,
)
# [/DEF:SemanticSourceResolver.imports:Block]
@@ -259,6 +260,63 @@ class SemanticSourceResolver:
return merged
# [/DEF:SemanticSourceResolver.apply_field_decision:Function]
# [DEF:SemanticSourceResolver.propagate_source_version_update:Function]
# @COMPLEXITY: 4
# @PURPOSE: Propagate a semantic source version change to unlocked field entries without silently overwriting manual or locked values.
# @RELATION: [DEPENDS_ON] ->[SemanticSource]
# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry]
# @PRE: source is persisted and fields belong to the same session aggregate.
# @POST: unlocked fields linked to the source carry the new source version and are marked reviewable; manual or locked fields keep their active values untouched.
# @SIDE_EFFECT: mutates in-memory field state for the caller to persist.
# @DATA_CONTRACT: Input[SemanticSource,List[SemanticFieldEntry]] -> Output[Dict[str,int]]
def propagate_source_version_update(
self,
source: SemanticSource,
fields: Iterable[Any],
) -> Dict[str, int]:
with belief_scope("SemanticSourceResolver.propagate_source_version_update"):
source_id = str(source.source_id or "").strip()
source_version = str(source.source_version or "").strip()
if not source_id or not source_version:
logger.explore(
"Semantic source version propagation rejected due to incomplete source metadata",
extra={"source_id": source_id, "source_version": source_version},
)
raise ValueError("Semantic source must provide source_id and source_version")
propagated = 0
preserved_locked = 0
untouched = 0
for field in fields:
if str(getattr(field, "source_id", "") or "").strip() != source_id:
untouched += 1
continue
if bool(getattr(field, "is_locked", False)) or getattr(field, "provenance", None) == FieldProvenance.MANUAL_OVERRIDE:
preserved_locked += 1
continue
field.source_version = source_version
field.needs_review = True
field.has_conflict = bool(getattr(field, "has_conflict", False))
propagated += 1
logger.reflect(
"Semantic source version propagation completed",
extra={
"source_id": source_id,
"source_version": source_version,
"propagated": propagated,
"preserved_locked": preserved_locked,
"untouched": untouched,
},
)
return {
"propagated": propagated,
"preserved_locked": preserved_locked,
"untouched": untouched,
}
# [/DEF:SemanticSourceResolver.propagate_source_version_update:Function]
# [DEF:SemanticSourceResolver._normalize_dictionary_row:Function]
# @COMPLEXITY: 2
# @PURPOSE: Normalize one dictionary row into a consistent lookup structure.