Files
ss-tools/backend/src/models/dataset_review.py
2026-03-20 17:20:24 +03:00

683 lines
28 KiB
Python

# [DEF:DatasetReviewModels:Module]
#
# @TIER: STANDARD
# @COMPLEXITY: 3
# @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy
# @PURPOSE: SQLAlchemy models for the dataset review orchestration flow.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> [AuthModels]
# @RELATION: DEPENDS_ON -> [MappingModels]
#
# @INVARIANT: Session and profile entities are strictly scoped to an authenticated user.
# [SECTION: IMPORTS]
import uuid
import enum
from datetime import datetime
from typing import List, Optional
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey, Text, JSON, Float, Enum as SQLEnum, Table
from sqlalchemy.orm import relationship
from .mapping import Base
# [/SECTION]
# [DEF:SessionStatus:Class]
class SessionStatus(str, enum.Enum):
ACTIVE = "active"
PAUSED = "paused"
COMPLETED = "completed"
ARCHIVED = "archived"
CANCELLED = "cancelled"
# [/DEF:SessionStatus:Class]
# [DEF:SessionPhase:Class]
class SessionPhase(str, enum.Enum):
INTAKE = "intake"
RECOVERY = "recovery"
REVIEW = "review"
SEMANTIC_REVIEW = "semantic_review"
CLARIFICATION = "clarification"
MAPPING_REVIEW = "mapping_review"
PREVIEW = "preview"
LAUNCH = "launch"
POST_RUN = "post_run"
# [/DEF:SessionPhase:Class]
# [DEF:ReadinessState:Class]
class ReadinessState(str, enum.Enum):
EMPTY = "empty"
IMPORTING = "importing"
REVIEW_READY = "review_ready"
SEMANTIC_SOURCE_REVIEW_NEEDED = "semantic_source_review_needed"
CLARIFICATION_NEEDED = "clarification_needed"
CLARIFICATION_ACTIVE = "clarification_active"
MAPPING_REVIEW_NEEDED = "mapping_review_needed"
COMPILED_PREVIEW_READY = "compiled_preview_ready"
PARTIALLY_READY = "partially_ready"
RUN_READY = "run_ready"
RUN_IN_PROGRESS = "run_in_progress"
COMPLETED = "completed"
RECOVERY_REQUIRED = "recovery_required"
# [/DEF:ReadinessState:Class]
# [DEF:RecommendedAction:Class]
class RecommendedAction(str, enum.Enum):
IMPORT_FROM_SUPERSET = "import_from_superset"
REVIEW_DOCUMENTATION = "review_documentation"
APPLY_SEMANTIC_SOURCE = "apply_semantic_source"
START_CLARIFICATION = "start_clarification"
ANSWER_NEXT_QUESTION = "answer_next_question"
APPROVE_MAPPING = "approve_mapping"
GENERATE_SQL_PREVIEW = "generate_sql_preview"
COMPLETE_REQUIRED_VALUES = "complete_required_values"
LAUNCH_DATASET = "launch_dataset"
RESUME_SESSION = "resume_session"
EXPORT_OUTPUTS = "export_outputs"
# [/DEF:RecommendedAction:Class]
# [DEF:SessionCollaboratorRole:Class]
class SessionCollaboratorRole(str, enum.Enum):
VIEWER = "viewer"
REVIEWER = "reviewer"
APPROVER = "approver"
# [/DEF:SessionCollaboratorRole:Class]
# [DEF:SessionCollaborator:Class]
class SessionCollaborator(Base):
__tablename__ = "session_collaborators"
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
user_id = Column(String, ForeignKey("users.id"), nullable=False)
role = Column(SQLEnum(SessionCollaboratorRole), nullable=False)
added_at = Column(DateTime, default=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="collaborators")
user = relationship("User")
# [/DEF:SessionCollaborator:Class]
# [DEF:DatasetReviewSession:Class]
class DatasetReviewSession(Base):
__tablename__ = "dataset_review_sessions"
session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
user_id = Column(String, ForeignKey("users.id"), nullable=False)
environment_id = Column(String, ForeignKey("environments.id"), nullable=False)
source_kind = Column(String, nullable=False) # superset_link, dataset_selection
source_input = Column(String, nullable=False)
dataset_ref = Column(String, nullable=False)
dataset_id = Column(Integer, nullable=True)
dashboard_id = Column(Integer, nullable=True)
readiness_state = Column(SQLEnum(ReadinessState), nullable=False, default=ReadinessState.EMPTY)
recommended_action = Column(SQLEnum(RecommendedAction), nullable=False, default=RecommendedAction.IMPORT_FROM_SUPERSET)
status = Column(SQLEnum(SessionStatus), nullable=False, default=SessionStatus.ACTIVE)
current_phase = Column(SQLEnum(SessionPhase), nullable=False, default=SessionPhase.INTAKE)
active_task_id = Column(String, nullable=True)
last_preview_id = Column(String, nullable=True)
last_run_context_id = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
last_activity_at = Column(DateTime, default=datetime.utcnow, nullable=False)
closed_at = Column(DateTime, nullable=True)
owner = relationship("User")
collaborators = relationship("SessionCollaborator", back_populates="session", cascade="all, delete-orphan")
profile = relationship("DatasetProfile", back_populates="session", uselist=False, cascade="all, delete-orphan")
findings = relationship("ValidationFinding", back_populates="session", cascade="all, delete-orphan")
semantic_sources = relationship("SemanticSource", back_populates="session", cascade="all, delete-orphan")
semantic_fields = relationship("SemanticFieldEntry", back_populates="session", cascade="all, delete-orphan")
imported_filters = relationship("ImportedFilter", back_populates="session", cascade="all, delete-orphan")
template_variables = relationship("TemplateVariable", back_populates="session", cascade="all, delete-orphan")
execution_mappings = relationship("ExecutionMapping", back_populates="session", cascade="all, delete-orphan")
clarification_sessions = relationship("ClarificationSession", back_populates="session", cascade="all, delete-orphan")
previews = relationship("CompiledPreview", back_populates="session", cascade="all, delete-orphan")
run_contexts = relationship("DatasetRunContext", back_populates="session", cascade="all, delete-orphan")
export_artifacts = relationship("ExportArtifact", back_populates="session", cascade="all, delete-orphan")
events = relationship("SessionEvent", back_populates="session", cascade="all, delete-orphan")
# [/DEF:DatasetReviewSession:Class]
# [DEF:BusinessSummarySource:Class]
class BusinessSummarySource(str, enum.Enum):
CONFIRMED = "confirmed"
IMPORTED = "imported"
INFERRED = "inferred"
AI_DRAFT = "ai_draft"
MANUAL_OVERRIDE = "manual_override"
# [/DEF:BusinessSummarySource:Class]
# [DEF:ConfidenceState:Class]
class ConfidenceState(str, enum.Enum):
CONFIRMED = "confirmed"
MOSTLY_CONFIRMED = "mostly_confirmed"
MIXED = "mixed"
LOW_CONFIDENCE = "low_confidence"
UNRESOLVED = "unresolved"
# [/DEF:ConfidenceState:Class]
# [DEF:DatasetProfile:Class]
class DatasetProfile(Base):
__tablename__ = "dataset_profiles"
profile_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False, unique=True)
dataset_name = Column(String, nullable=False)
schema_name = Column(String, nullable=True)
database_name = Column(String, nullable=True)
business_summary = Column(Text, nullable=False)
business_summary_source = Column(SQLEnum(BusinessSummarySource), nullable=False)
description = Column(Text, nullable=True)
dataset_type = Column(String, nullable=True) # table, virtual, sqllab_view, unknown
is_sqllab_view = Column(Boolean, nullable=False, default=False)
completeness_score = Column(Float, nullable=True)
confidence_state = Column(SQLEnum(ConfidenceState), nullable=False)
has_blocking_findings = Column(Boolean, nullable=False, default=False)
has_warning_findings = Column(Boolean, nullable=False, default=False)
manual_summary_locked = Column(Boolean, nullable=False, default=False)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="profile")
# [/DEF:DatasetProfile:Class]
# [DEF:FindingArea:Class]
class FindingArea(str, enum.Enum):
SOURCE_INTAKE = "source_intake"
DATASET_PROFILE = "dataset_profile"
SEMANTIC_ENRICHMENT = "semantic_enrichment"
CLARIFICATION = "clarification"
FILTER_RECOVERY = "filter_recovery"
TEMPLATE_MAPPING = "template_mapping"
COMPILED_PREVIEW = "compiled_preview"
LAUNCH = "launch"
AUDIT = "audit"
# [/DEF:FindingArea:Class]
# [DEF:FindingSeverity:Class]
class FindingSeverity(str, enum.Enum):
BLOCKING = "blocking"
WARNING = "warning"
INFORMATIONAL = "informational"
# [/DEF:FindingSeverity:Class]
# [DEF:ResolutionState:Class]
class ResolutionState(str, enum.Enum):
OPEN = "open"
RESOLVED = "resolved"
APPROVED = "approved"
SKIPPED = "skipped"
DEFERRED = "deferred"
EXPERT_REVIEW = "expert_review"
# [/DEF:ResolutionState:Class]
# [DEF:ValidationFinding:Class]
class ValidationFinding(Base):
__tablename__ = "validation_findings"
finding_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
area = Column(SQLEnum(FindingArea), nullable=False)
severity = Column(SQLEnum(FindingSeverity), nullable=False)
code = Column(String, nullable=False)
title = Column(String, nullable=False)
message = Column(Text, nullable=False)
resolution_state = Column(SQLEnum(ResolutionState), nullable=False, default=ResolutionState.OPEN)
resolution_note = Column(Text, nullable=True)
caused_by_ref = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
resolved_at = Column(DateTime, nullable=True)
session = relationship("DatasetReviewSession", back_populates="findings")
# [/DEF:ValidationFinding:Class]
# [DEF:SemanticSourceType:Class]
class SemanticSourceType(str, enum.Enum):
UPLOADED_FILE = "uploaded_file"
CONNECTED_DICTIONARY = "connected_dictionary"
REFERENCE_DATASET = "reference_dataset"
NEIGHBOR_DATASET = "neighbor_dataset"
AI_GENERATED = "ai_generated"
# [/DEF:SemanticSourceType:Class]
# [DEF:TrustLevel:Class]
class TrustLevel(str, enum.Enum):
TRUSTED = "trusted"
RECOMMENDED = "recommended"
CANDIDATE = "candidate"
GENERATED = "generated"
# [/DEF:TrustLevel:Class]
# [DEF:SemanticSourceStatus:Class]
class SemanticSourceStatus(str, enum.Enum):
AVAILABLE = "available"
SELECTED = "selected"
APPLIED = "applied"
REJECTED = "rejected"
PARTIAL = "partial"
FAILED = "failed"
# [/DEF:SemanticSourceStatus:Class]
# [DEF:SemanticSource:Class]
class SemanticSource(Base):
__tablename__ = "semantic_sources"
source_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
source_type = Column(SQLEnum(SemanticSourceType), nullable=False)
source_ref = Column(String, nullable=False)
source_version = Column(String, nullable=False)
display_name = Column(String, nullable=False)
trust_level = Column(SQLEnum(TrustLevel), nullable=False)
schema_overlap_score = Column(Float, nullable=True)
status = Column(SQLEnum(SemanticSourceStatus), nullable=False, default=SemanticSourceStatus.AVAILABLE)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="semantic_sources")
# [/DEF:SemanticSource:Class]
# [DEF:FieldKind:Class]
class FieldKind(str, enum.Enum):
COLUMN = "column"
METRIC = "metric"
FILTER_DIMENSION = "filter_dimension"
PARAMETER = "parameter"
# [/DEF:FieldKind:Class]
# [DEF:FieldProvenance:Class]
class FieldProvenance(str, enum.Enum):
DICTIONARY_EXACT = "dictionary_exact"
REFERENCE_IMPORTED = "reference_imported"
FUZZY_INFERRED = "fuzzy_inferred"
AI_GENERATED = "ai_generated"
MANUAL_OVERRIDE = "manual_override"
UNRESOLVED = "unresolved"
# [/DEF:FieldProvenance:Class]
# [DEF:SemanticFieldEntry:Class]
class SemanticFieldEntry(Base):
__tablename__ = "semantic_field_entries"
field_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
field_name = Column(String, nullable=False)
field_kind = Column(SQLEnum(FieldKind), nullable=False)
verbose_name = Column(String, nullable=True)
description = Column(Text, nullable=True)
display_format = Column(String, nullable=True)
provenance = Column(SQLEnum(FieldProvenance), nullable=False, default=FieldProvenance.UNRESOLVED)
source_id = Column(String, nullable=True)
source_version = Column(String, nullable=True)
confidence_rank = Column(Integer, nullable=True)
is_locked = Column(Boolean, nullable=False, default=False)
has_conflict = Column(Boolean, nullable=False, default=False)
needs_review = Column(Boolean, nullable=False, default=True)
last_changed_by = Column(String, nullable=False) # system, user, agent
user_feedback = Column(String, nullable=True) # up, down, null
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="semantic_fields")
candidates = relationship("SemanticCandidate", back_populates="field", cascade="all, delete-orphan")
# [/DEF:SemanticFieldEntry:Class]
# [DEF:CandidateMatchType:Class]
class CandidateMatchType(str, enum.Enum):
EXACT = "exact"
REFERENCE = "reference"
FUZZY = "fuzzy"
GENERATED = "generated"
# [/DEF:CandidateMatchType:Class]
# [DEF:CandidateStatus:Class]
class CandidateStatus(str, enum.Enum):
PROPOSED = "proposed"
ACCEPTED = "accepted"
REJECTED = "rejected"
SUPERSEDED = "superseded"
# [/DEF:CandidateStatus:Class]
# [DEF:SemanticCandidate:Class]
class SemanticCandidate(Base):
__tablename__ = "semantic_candidates"
candidate_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
field_id = Column(String, ForeignKey("semantic_field_entries.field_id"), nullable=False)
source_id = Column(String, nullable=True)
candidate_rank = Column(Integer, nullable=False)
match_type = Column(SQLEnum(CandidateMatchType), nullable=False)
confidence_score = Column(Float, nullable=False)
proposed_verbose_name = Column(String, nullable=True)
proposed_description = Column(Text, nullable=True)
proposed_display_format = Column(String, nullable=True)
status = Column(SQLEnum(CandidateStatus), nullable=False, default=CandidateStatus.PROPOSED)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
field = relationship("SemanticFieldEntry", back_populates="candidates")
# [/DEF:SemanticCandidate:Class]
# [DEF:FilterSource:Class]
class FilterSource(str, enum.Enum):
SUPERSET_NATIVE = "superset_native"
SUPERSET_URL = "superset_url"
SUPERSET_PERMALINK = "superset_permalink"
SUPERSET_NATIVE_FILTERS_KEY = "superset_native_filters_key"
MANUAL = "manual"
INFERRED = "inferred"
# [/DEF:FilterSource:Class]
# [DEF:FilterConfidenceState:Class]
class FilterConfidenceState(str, enum.Enum):
CONFIRMED = "confirmed"
IMPORTED = "imported"
INFERRED = "inferred"
AI_DRAFT = "ai_draft"
UNRESOLVED = "unresolved"
# [/DEF:FilterConfidenceState:Class]
# [DEF:FilterRecoveryStatus:Class]
class FilterRecoveryStatus(str, enum.Enum):
RECOVERED = "recovered"
PARTIAL = "partial"
MISSING = "missing"
CONFLICTED = "conflicted"
# [/DEF:FilterRecoveryStatus:Class]
# [DEF:ImportedFilter:Class]
class ImportedFilter(Base):
__tablename__ = "imported_filters"
filter_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
filter_name = Column(String, nullable=False)
display_name = Column(String, nullable=True)
raw_value = Column(JSON, nullable=False)
normalized_value = Column(JSON, nullable=True)
source = Column(SQLEnum(FilterSource), nullable=False)
confidence_state = Column(SQLEnum(FilterConfidenceState), nullable=False)
requires_confirmation = Column(Boolean, nullable=False, default=False)
recovery_status = Column(SQLEnum(FilterRecoveryStatus), nullable=False)
notes = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="imported_filters")
# [/DEF:ImportedFilter:Class]
# [DEF:VariableKind:Class]
class VariableKind(str, enum.Enum):
NATIVE_FILTER = "native_filter"
PARAMETER = "parameter"
DERIVED = "derived"
UNKNOWN = "unknown"
# [/DEF:VariableKind:Class]
# [DEF:MappingStatus:Class]
class MappingStatus(str, enum.Enum):
UNMAPPED = "unmapped"
PROPOSED = "proposed"
APPROVED = "approved"
OVERRIDDEN = "overridden"
INVALID = "invalid"
# [/DEF:MappingStatus:Class]
# [DEF:TemplateVariable:Class]
class TemplateVariable(Base):
__tablename__ = "template_variables"
variable_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
variable_name = Column(String, nullable=False)
expression_source = Column(Text, nullable=False)
variable_kind = Column(SQLEnum(VariableKind), nullable=False)
is_required = Column(Boolean, nullable=False, default=True)
default_value = Column(JSON, nullable=True)
mapping_status = Column(SQLEnum(MappingStatus), nullable=False, default=MappingStatus.UNMAPPED)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="template_variables")
# [/DEF:TemplateVariable:Class]
# [DEF:MappingMethod:Class]
class MappingMethod(str, enum.Enum):
DIRECT_MATCH = "direct_match"
HEURISTIC_MATCH = "heuristic_match"
SEMANTIC_MATCH = "semantic_match"
MANUAL_OVERRIDE = "manual_override"
# [/DEF:MappingMethod:Class]
# [DEF:MappingWarningLevel:Class]
class MappingWarningLevel(str, enum.Enum):
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
# [/DEF:MappingWarningLevel:Class]
# [DEF:ApprovalState:Class]
class ApprovalState(str, enum.Enum):
PENDING = "pending"
APPROVED = "approved"
REJECTED = "rejected"
NOT_REQUIRED = "not_required"
# [/DEF:ApprovalState:Class]
# [DEF:ExecutionMapping:Class]
class ExecutionMapping(Base):
__tablename__ = "execution_mappings"
mapping_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
filter_id = Column(String, nullable=False)
variable_id = Column(String, nullable=False)
mapping_method = Column(SQLEnum(MappingMethod), nullable=False)
raw_input_value = Column(JSON, nullable=False)
effective_value = Column(JSON, nullable=True)
transformation_note = Column(Text, nullable=True)
warning_level = Column(SQLEnum(MappingWarningLevel), nullable=True)
requires_explicit_approval = Column(Boolean, nullable=False, default=False)
approval_state = Column(SQLEnum(ApprovalState), nullable=False, default=ApprovalState.NOT_REQUIRED)
approved_by_user_id = Column(String, nullable=True)
approved_at = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="execution_mappings")
# [/DEF:ExecutionMapping:Class]
# [DEF:ClarificationStatus:Class]
class ClarificationStatus(str, enum.Enum):
PENDING = "pending"
ACTIVE = "active"
PAUSED = "paused"
COMPLETED = "completed"
CANCELLED = "cancelled"
# [/DEF:ClarificationStatus:Class]
# [DEF:ClarificationSession:Class]
class ClarificationSession(Base):
__tablename__ = "clarification_sessions"
clarification_session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
status = Column(SQLEnum(ClarificationStatus), nullable=False, default=ClarificationStatus.PENDING)
current_question_id = Column(String, nullable=True)
resolved_count = Column(Integer, nullable=False, default=0)
remaining_count = Column(Integer, nullable=False, default=0)
summary_delta = Column(Text, nullable=True)
started_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
completed_at = Column(DateTime, nullable=True)
session = relationship("DatasetReviewSession", back_populates="clarification_sessions")
questions = relationship("ClarificationQuestion", back_populates="clarification_session", cascade="all, delete-orphan")
# [/DEF:ClarificationSession:Class]
# [DEF:QuestionState:Class]
class QuestionState(str, enum.Enum):
OPEN = "open"
ANSWERED = "answered"
SKIPPED = "skipped"
EXPERT_REVIEW = "expert_review"
SUPERSEDED = "superseded"
# [/DEF:QuestionState:Class]
# [DEF:ClarificationQuestion:Class]
class ClarificationQuestion(Base):
__tablename__ = "clarification_questions"
question_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
clarification_session_id = Column(String, ForeignKey("clarification_sessions.clarification_session_id"), nullable=False)
topic_ref = Column(String, nullable=False)
question_text = Column(Text, nullable=False)
why_it_matters = Column(Text, nullable=False)
current_guess = Column(Text, nullable=True)
priority = Column(Integer, nullable=False, default=0)
state = Column(SQLEnum(QuestionState), nullable=False, default=QuestionState.OPEN)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
clarification_session = relationship("ClarificationSession", back_populates="questions")
options = relationship("ClarificationOption", back_populates="question", cascade="all, delete-orphan")
answer = relationship("ClarificationAnswer", back_populates="question", uselist=False, cascade="all, delete-orphan")
# [/DEF:ClarificationQuestion:Class]
# [DEF:ClarificationOption:Class]
class ClarificationOption(Base):
__tablename__ = "clarification_options"
option_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False)
label = Column(String, nullable=False)
value = Column(String, nullable=False)
is_recommended = Column(Boolean, nullable=False, default=False)
display_order = Column(Integer, nullable=False, default=0)
question = relationship("ClarificationQuestion", back_populates="options")
# [/DEF:ClarificationOption:Class]
# [DEF:AnswerKind:Class]
class AnswerKind(str, enum.Enum):
SELECTED = "selected"
CUSTOM = "custom"
SKIPPED = "skipped"
EXPERT_REVIEW = "expert_review"
# [/DEF:AnswerKind:Class]
# [DEF:ClarificationAnswer:Class]
class ClarificationAnswer(Base):
__tablename__ = "clarification_answers"
answer_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False, unique=True)
answer_kind = Column(SQLEnum(AnswerKind), nullable=False)
answer_value = Column(Text, nullable=True)
answered_by_user_id = Column(String, nullable=False)
impact_summary = Column(Text, nullable=True)
user_feedback = Column(String, nullable=True) # up, down, null
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
question = relationship("ClarificationQuestion", back_populates="answer")
# [/DEF:ClarificationAnswer:Class]
# [DEF:PreviewStatus:Class]
class PreviewStatus(str, enum.Enum):
PENDING = "pending"
READY = "ready"
FAILED = "failed"
STALE = "stale"
# [/DEF:PreviewStatus:Class]
# [DEF:CompiledPreview:Class]
class CompiledPreview(Base):
__tablename__ = "compiled_previews"
preview_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
preview_status = Column(SQLEnum(PreviewStatus), nullable=False, default=PreviewStatus.PENDING)
compiled_sql = Column(Text, nullable=True)
preview_fingerprint = Column(String, nullable=False)
compiled_by = Column(String, nullable=False, default="superset")
error_code = Column(String, nullable=True)
error_details = Column(Text, nullable=True)
compiled_at = Column(DateTime, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="previews")
# [/DEF:CompiledPreview:Class]
# [DEF:LaunchStatus:Class]
class LaunchStatus(str, enum.Enum):
STARTED = "started"
SUCCESS = "success"
FAILED = "failed"
# [/DEF:LaunchStatus:Class]
# [DEF:DatasetRunContext:Class]
class DatasetRunContext(Base):
__tablename__ = "dataset_run_contexts"
run_context_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
dataset_ref = Column(String, nullable=False)
environment_id = Column(String, nullable=False)
preview_id = Column(String, nullable=False)
sql_lab_session_ref = Column(String, nullable=False)
effective_filters = Column(JSON, nullable=False)
template_params = Column(JSON, nullable=False)
approved_mapping_ids = Column(JSON, nullable=False)
semantic_decision_refs = Column(JSON, nullable=False)
open_warning_refs = Column(JSON, nullable=False)
launch_status = Column(SQLEnum(LaunchStatus), nullable=False)
launch_error = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="run_contexts")
# [/DEF:DatasetRunContext:Class]
# [DEF:SessionEvent:Class]
class SessionEvent(Base):
__tablename__ = "session_events"
session_event_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
actor_user_id = Column(String, ForeignKey("users.id"), nullable=False)
event_type = Column(String, nullable=False)
event_summary = Column(Text, nullable=False)
current_phase = Column(String, nullable=True)
readiness_state = Column(String, nullable=True)
event_details = Column(JSON, nullable=False, default=dict)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="events")
actor = relationship("User")
# [/DEF:SessionEvent:Class]
# [DEF:ArtifactType:Class]
class ArtifactType(str, enum.Enum):
DOCUMENTATION = "documentation"
VALIDATION_REPORT = "validation_report"
RUN_SUMMARY = "run_summary"
# [/DEF:ArtifactType:Class]
# [DEF:ArtifactFormat:Class]
class ArtifactFormat(str, enum.Enum):
JSON = "json"
MARKDOWN = "markdown"
CSV = "csv"
PDF = "pdf"
# [/DEF:ArtifactFormat:Class]
# [DEF:ExportArtifact:Class]
class ExportArtifact(Base):
__tablename__ = "export_artifacts"
artifact_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
artifact_type = Column(SQLEnum(ArtifactType), nullable=False)
format = Column(SQLEnum(ArtifactFormat), nullable=False)
storage_ref = Column(String, nullable=False)
created_by_user_id = Column(String, nullable=False)
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
session = relationship("DatasetReviewSession", back_populates="export_artifacts")
# [/DEF:ExportArtifact:Class]
# [/DEF:DatasetReviewModels:Module]