683 lines
28 KiB
Python
683 lines
28 KiB
Python
# [DEF:DatasetReviewModels:Module]
|
|
#
|
|
# @TIER: STANDARD
|
|
# @COMPLEXITY: 3
|
|
# @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy
|
|
# @PURPOSE: SQLAlchemy models for the dataset review orchestration flow.
|
|
# @LAYER: Domain
|
|
# @RELATION: DEPENDS_ON -> [AuthModels]
|
|
# @RELATION: DEPENDS_ON -> [MappingModels]
|
|
#
|
|
# @INVARIANT: Session and profile entities are strictly scoped to an authenticated user.
|
|
|
|
# [SECTION: IMPORTS]
|
|
import uuid
|
|
import enum
|
|
from datetime import datetime
|
|
from typing import List, Optional
|
|
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey, Text, JSON, Float, Enum as SQLEnum, Table
|
|
from sqlalchemy.orm import relationship
|
|
from .mapping import Base
|
|
# [/SECTION]
|
|
|
|
# [DEF:SessionStatus:Class]
|
|
class SessionStatus(str, enum.Enum):
|
|
ACTIVE = "active"
|
|
PAUSED = "paused"
|
|
COMPLETED = "completed"
|
|
ARCHIVED = "archived"
|
|
CANCELLED = "cancelled"
|
|
# [/DEF:SessionStatus:Class]
|
|
|
|
# [DEF:SessionPhase:Class]
|
|
class SessionPhase(str, enum.Enum):
|
|
INTAKE = "intake"
|
|
RECOVERY = "recovery"
|
|
REVIEW = "review"
|
|
SEMANTIC_REVIEW = "semantic_review"
|
|
CLARIFICATION = "clarification"
|
|
MAPPING_REVIEW = "mapping_review"
|
|
PREVIEW = "preview"
|
|
LAUNCH = "launch"
|
|
POST_RUN = "post_run"
|
|
# [/DEF:SessionPhase:Class]
|
|
|
|
# [DEF:ReadinessState:Class]
|
|
class ReadinessState(str, enum.Enum):
|
|
EMPTY = "empty"
|
|
IMPORTING = "importing"
|
|
REVIEW_READY = "review_ready"
|
|
SEMANTIC_SOURCE_REVIEW_NEEDED = "semantic_source_review_needed"
|
|
CLARIFICATION_NEEDED = "clarification_needed"
|
|
CLARIFICATION_ACTIVE = "clarification_active"
|
|
MAPPING_REVIEW_NEEDED = "mapping_review_needed"
|
|
COMPILED_PREVIEW_READY = "compiled_preview_ready"
|
|
PARTIALLY_READY = "partially_ready"
|
|
RUN_READY = "run_ready"
|
|
RUN_IN_PROGRESS = "run_in_progress"
|
|
COMPLETED = "completed"
|
|
RECOVERY_REQUIRED = "recovery_required"
|
|
# [/DEF:ReadinessState:Class]
|
|
|
|
# [DEF:RecommendedAction:Class]
|
|
class RecommendedAction(str, enum.Enum):
|
|
IMPORT_FROM_SUPERSET = "import_from_superset"
|
|
REVIEW_DOCUMENTATION = "review_documentation"
|
|
APPLY_SEMANTIC_SOURCE = "apply_semantic_source"
|
|
START_CLARIFICATION = "start_clarification"
|
|
ANSWER_NEXT_QUESTION = "answer_next_question"
|
|
APPROVE_MAPPING = "approve_mapping"
|
|
GENERATE_SQL_PREVIEW = "generate_sql_preview"
|
|
COMPLETE_REQUIRED_VALUES = "complete_required_values"
|
|
LAUNCH_DATASET = "launch_dataset"
|
|
RESUME_SESSION = "resume_session"
|
|
EXPORT_OUTPUTS = "export_outputs"
|
|
# [/DEF:RecommendedAction:Class]
|
|
|
|
# [DEF:SessionCollaboratorRole:Class]
|
|
class SessionCollaboratorRole(str, enum.Enum):
|
|
VIEWER = "viewer"
|
|
REVIEWER = "reviewer"
|
|
APPROVER = "approver"
|
|
# [/DEF:SessionCollaboratorRole:Class]
|
|
|
|
# [DEF:SessionCollaborator:Class]
|
|
class SessionCollaborator(Base):
|
|
__tablename__ = "session_collaborators"
|
|
|
|
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
|
role = Column(SQLEnum(SessionCollaboratorRole), nullable=False)
|
|
added_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="collaborators")
|
|
user = relationship("User")
|
|
# [/DEF:SessionCollaborator:Class]
|
|
|
|
# [DEF:DatasetReviewSession:Class]
|
|
class DatasetReviewSession(Base):
|
|
__tablename__ = "dataset_review_sessions"
|
|
|
|
session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
|
environment_id = Column(String, ForeignKey("environments.id"), nullable=False)
|
|
source_kind = Column(String, nullable=False) # superset_link, dataset_selection
|
|
source_input = Column(String, nullable=False)
|
|
dataset_ref = Column(String, nullable=False)
|
|
dataset_id = Column(Integer, nullable=True)
|
|
dashboard_id = Column(Integer, nullable=True)
|
|
readiness_state = Column(SQLEnum(ReadinessState), nullable=False, default=ReadinessState.EMPTY)
|
|
recommended_action = Column(SQLEnum(RecommendedAction), nullable=False, default=RecommendedAction.IMPORT_FROM_SUPERSET)
|
|
status = Column(SQLEnum(SessionStatus), nullable=False, default=SessionStatus.ACTIVE)
|
|
current_phase = Column(SQLEnum(SessionPhase), nullable=False, default=SessionPhase.INTAKE)
|
|
active_task_id = Column(String, nullable=True)
|
|
last_preview_id = Column(String, nullable=True)
|
|
last_run_context_id = Column(String, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
last_activity_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
closed_at = Column(DateTime, nullable=True)
|
|
|
|
owner = relationship("User")
|
|
collaborators = relationship("SessionCollaborator", back_populates="session", cascade="all, delete-orphan")
|
|
profile = relationship("DatasetProfile", back_populates="session", uselist=False, cascade="all, delete-orphan")
|
|
findings = relationship("ValidationFinding", back_populates="session", cascade="all, delete-orphan")
|
|
semantic_sources = relationship("SemanticSource", back_populates="session", cascade="all, delete-orphan")
|
|
semantic_fields = relationship("SemanticFieldEntry", back_populates="session", cascade="all, delete-orphan")
|
|
imported_filters = relationship("ImportedFilter", back_populates="session", cascade="all, delete-orphan")
|
|
template_variables = relationship("TemplateVariable", back_populates="session", cascade="all, delete-orphan")
|
|
execution_mappings = relationship("ExecutionMapping", back_populates="session", cascade="all, delete-orphan")
|
|
clarification_sessions = relationship("ClarificationSession", back_populates="session", cascade="all, delete-orphan")
|
|
previews = relationship("CompiledPreview", back_populates="session", cascade="all, delete-orphan")
|
|
run_contexts = relationship("DatasetRunContext", back_populates="session", cascade="all, delete-orphan")
|
|
export_artifacts = relationship("ExportArtifact", back_populates="session", cascade="all, delete-orphan")
|
|
events = relationship("SessionEvent", back_populates="session", cascade="all, delete-orphan")
|
|
# [/DEF:DatasetReviewSession:Class]
|
|
|
|
# [DEF:BusinessSummarySource:Class]
|
|
class BusinessSummarySource(str, enum.Enum):
|
|
CONFIRMED = "confirmed"
|
|
IMPORTED = "imported"
|
|
INFERRED = "inferred"
|
|
AI_DRAFT = "ai_draft"
|
|
MANUAL_OVERRIDE = "manual_override"
|
|
# [/DEF:BusinessSummarySource:Class]
|
|
|
|
# [DEF:ConfidenceState:Class]
|
|
class ConfidenceState(str, enum.Enum):
|
|
CONFIRMED = "confirmed"
|
|
MOSTLY_CONFIRMED = "mostly_confirmed"
|
|
MIXED = "mixed"
|
|
LOW_CONFIDENCE = "low_confidence"
|
|
UNRESOLVED = "unresolved"
|
|
# [/DEF:ConfidenceState:Class]
|
|
|
|
# [DEF:DatasetProfile:Class]
|
|
class DatasetProfile(Base):
|
|
__tablename__ = "dataset_profiles"
|
|
|
|
profile_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False, unique=True)
|
|
dataset_name = Column(String, nullable=False)
|
|
schema_name = Column(String, nullable=True)
|
|
database_name = Column(String, nullable=True)
|
|
business_summary = Column(Text, nullable=False)
|
|
business_summary_source = Column(SQLEnum(BusinessSummarySource), nullable=False)
|
|
description = Column(Text, nullable=True)
|
|
dataset_type = Column(String, nullable=True) # table, virtual, sqllab_view, unknown
|
|
is_sqllab_view = Column(Boolean, nullable=False, default=False)
|
|
completeness_score = Column(Float, nullable=True)
|
|
confidence_state = Column(SQLEnum(ConfidenceState), nullable=False)
|
|
has_blocking_findings = Column(Boolean, nullable=False, default=False)
|
|
has_warning_findings = Column(Boolean, nullable=False, default=False)
|
|
manual_summary_locked = Column(Boolean, nullable=False, default=False)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="profile")
|
|
# [/DEF:DatasetProfile:Class]
|
|
|
|
# [DEF:FindingArea:Class]
|
|
class FindingArea(str, enum.Enum):
|
|
SOURCE_INTAKE = "source_intake"
|
|
DATASET_PROFILE = "dataset_profile"
|
|
SEMANTIC_ENRICHMENT = "semantic_enrichment"
|
|
CLARIFICATION = "clarification"
|
|
FILTER_RECOVERY = "filter_recovery"
|
|
TEMPLATE_MAPPING = "template_mapping"
|
|
COMPILED_PREVIEW = "compiled_preview"
|
|
LAUNCH = "launch"
|
|
AUDIT = "audit"
|
|
# [/DEF:FindingArea:Class]
|
|
|
|
# [DEF:FindingSeverity:Class]
|
|
class FindingSeverity(str, enum.Enum):
|
|
BLOCKING = "blocking"
|
|
WARNING = "warning"
|
|
INFORMATIONAL = "informational"
|
|
# [/DEF:FindingSeverity:Class]
|
|
|
|
# [DEF:ResolutionState:Class]
|
|
class ResolutionState(str, enum.Enum):
|
|
OPEN = "open"
|
|
RESOLVED = "resolved"
|
|
APPROVED = "approved"
|
|
SKIPPED = "skipped"
|
|
DEFERRED = "deferred"
|
|
EXPERT_REVIEW = "expert_review"
|
|
# [/DEF:ResolutionState:Class]
|
|
|
|
# [DEF:ValidationFinding:Class]
|
|
class ValidationFinding(Base):
|
|
__tablename__ = "validation_findings"
|
|
|
|
finding_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
area = Column(SQLEnum(FindingArea), nullable=False)
|
|
severity = Column(SQLEnum(FindingSeverity), nullable=False)
|
|
code = Column(String, nullable=False)
|
|
title = Column(String, nullable=False)
|
|
message = Column(Text, nullable=False)
|
|
resolution_state = Column(SQLEnum(ResolutionState), nullable=False, default=ResolutionState.OPEN)
|
|
resolution_note = Column(Text, nullable=True)
|
|
caused_by_ref = Column(String, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
resolved_at = Column(DateTime, nullable=True)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="findings")
|
|
# [/DEF:ValidationFinding:Class]
|
|
|
|
# [DEF:SemanticSourceType:Class]
|
|
class SemanticSourceType(str, enum.Enum):
|
|
UPLOADED_FILE = "uploaded_file"
|
|
CONNECTED_DICTIONARY = "connected_dictionary"
|
|
REFERENCE_DATASET = "reference_dataset"
|
|
NEIGHBOR_DATASET = "neighbor_dataset"
|
|
AI_GENERATED = "ai_generated"
|
|
# [/DEF:SemanticSourceType:Class]
|
|
|
|
# [DEF:TrustLevel:Class]
|
|
class TrustLevel(str, enum.Enum):
|
|
TRUSTED = "trusted"
|
|
RECOMMENDED = "recommended"
|
|
CANDIDATE = "candidate"
|
|
GENERATED = "generated"
|
|
# [/DEF:TrustLevel:Class]
|
|
|
|
# [DEF:SemanticSourceStatus:Class]
|
|
class SemanticSourceStatus(str, enum.Enum):
|
|
AVAILABLE = "available"
|
|
SELECTED = "selected"
|
|
APPLIED = "applied"
|
|
REJECTED = "rejected"
|
|
PARTIAL = "partial"
|
|
FAILED = "failed"
|
|
# [/DEF:SemanticSourceStatus:Class]
|
|
|
|
# [DEF:SemanticSource:Class]
|
|
class SemanticSource(Base):
|
|
__tablename__ = "semantic_sources"
|
|
|
|
source_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
source_type = Column(SQLEnum(SemanticSourceType), nullable=False)
|
|
source_ref = Column(String, nullable=False)
|
|
source_version = Column(String, nullable=False)
|
|
display_name = Column(String, nullable=False)
|
|
trust_level = Column(SQLEnum(TrustLevel), nullable=False)
|
|
schema_overlap_score = Column(Float, nullable=True)
|
|
status = Column(SQLEnum(SemanticSourceStatus), nullable=False, default=SemanticSourceStatus.AVAILABLE)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="semantic_sources")
|
|
# [/DEF:SemanticSource:Class]
|
|
|
|
# [DEF:FieldKind:Class]
|
|
class FieldKind(str, enum.Enum):
|
|
COLUMN = "column"
|
|
METRIC = "metric"
|
|
FILTER_DIMENSION = "filter_dimension"
|
|
PARAMETER = "parameter"
|
|
# [/DEF:FieldKind:Class]
|
|
|
|
# [DEF:FieldProvenance:Class]
|
|
class FieldProvenance(str, enum.Enum):
|
|
DICTIONARY_EXACT = "dictionary_exact"
|
|
REFERENCE_IMPORTED = "reference_imported"
|
|
FUZZY_INFERRED = "fuzzy_inferred"
|
|
AI_GENERATED = "ai_generated"
|
|
MANUAL_OVERRIDE = "manual_override"
|
|
UNRESOLVED = "unresolved"
|
|
# [/DEF:FieldProvenance:Class]
|
|
|
|
# [DEF:SemanticFieldEntry:Class]
|
|
class SemanticFieldEntry(Base):
|
|
__tablename__ = "semantic_field_entries"
|
|
|
|
field_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
field_name = Column(String, nullable=False)
|
|
field_kind = Column(SQLEnum(FieldKind), nullable=False)
|
|
verbose_name = Column(String, nullable=True)
|
|
description = Column(Text, nullable=True)
|
|
display_format = Column(String, nullable=True)
|
|
provenance = Column(SQLEnum(FieldProvenance), nullable=False, default=FieldProvenance.UNRESOLVED)
|
|
source_id = Column(String, nullable=True)
|
|
source_version = Column(String, nullable=True)
|
|
confidence_rank = Column(Integer, nullable=True)
|
|
is_locked = Column(Boolean, nullable=False, default=False)
|
|
has_conflict = Column(Boolean, nullable=False, default=False)
|
|
needs_review = Column(Boolean, nullable=False, default=True)
|
|
last_changed_by = Column(String, nullable=False) # system, user, agent
|
|
user_feedback = Column(String, nullable=True) # up, down, null
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="semantic_fields")
|
|
candidates = relationship("SemanticCandidate", back_populates="field", cascade="all, delete-orphan")
|
|
# [/DEF:SemanticFieldEntry:Class]
|
|
|
|
# [DEF:CandidateMatchType:Class]
|
|
class CandidateMatchType(str, enum.Enum):
|
|
EXACT = "exact"
|
|
REFERENCE = "reference"
|
|
FUZZY = "fuzzy"
|
|
GENERATED = "generated"
|
|
# [/DEF:CandidateMatchType:Class]
|
|
|
|
# [DEF:CandidateStatus:Class]
|
|
class CandidateStatus(str, enum.Enum):
|
|
PROPOSED = "proposed"
|
|
ACCEPTED = "accepted"
|
|
REJECTED = "rejected"
|
|
SUPERSEDED = "superseded"
|
|
# [/DEF:CandidateStatus:Class]
|
|
|
|
# [DEF:SemanticCandidate:Class]
|
|
class SemanticCandidate(Base):
|
|
__tablename__ = "semantic_candidates"
|
|
|
|
candidate_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
field_id = Column(String, ForeignKey("semantic_field_entries.field_id"), nullable=False)
|
|
source_id = Column(String, nullable=True)
|
|
candidate_rank = Column(Integer, nullable=False)
|
|
match_type = Column(SQLEnum(CandidateMatchType), nullable=False)
|
|
confidence_score = Column(Float, nullable=False)
|
|
proposed_verbose_name = Column(String, nullable=True)
|
|
proposed_description = Column(Text, nullable=True)
|
|
proposed_display_format = Column(String, nullable=True)
|
|
status = Column(SQLEnum(CandidateStatus), nullable=False, default=CandidateStatus.PROPOSED)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
field = relationship("SemanticFieldEntry", back_populates="candidates")
|
|
# [/DEF:SemanticCandidate:Class]
|
|
|
|
# [DEF:FilterSource:Class]
|
|
class FilterSource(str, enum.Enum):
|
|
SUPERSET_NATIVE = "superset_native"
|
|
SUPERSET_URL = "superset_url"
|
|
SUPERSET_PERMALINK = "superset_permalink"
|
|
SUPERSET_NATIVE_FILTERS_KEY = "superset_native_filters_key"
|
|
MANUAL = "manual"
|
|
INFERRED = "inferred"
|
|
# [/DEF:FilterSource:Class]
|
|
|
|
# [DEF:FilterConfidenceState:Class]
|
|
class FilterConfidenceState(str, enum.Enum):
|
|
CONFIRMED = "confirmed"
|
|
IMPORTED = "imported"
|
|
INFERRED = "inferred"
|
|
AI_DRAFT = "ai_draft"
|
|
UNRESOLVED = "unresolved"
|
|
# [/DEF:FilterConfidenceState:Class]
|
|
|
|
# [DEF:FilterRecoveryStatus:Class]
|
|
class FilterRecoveryStatus(str, enum.Enum):
|
|
RECOVERED = "recovered"
|
|
PARTIAL = "partial"
|
|
MISSING = "missing"
|
|
CONFLICTED = "conflicted"
|
|
# [/DEF:FilterRecoveryStatus:Class]
|
|
|
|
# [DEF:ImportedFilter:Class]
|
|
class ImportedFilter(Base):
|
|
__tablename__ = "imported_filters"
|
|
|
|
filter_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
filter_name = Column(String, nullable=False)
|
|
display_name = Column(String, nullable=True)
|
|
raw_value = Column(JSON, nullable=False)
|
|
normalized_value = Column(JSON, nullable=True)
|
|
source = Column(SQLEnum(FilterSource), nullable=False)
|
|
confidence_state = Column(SQLEnum(FilterConfidenceState), nullable=False)
|
|
requires_confirmation = Column(Boolean, nullable=False, default=False)
|
|
recovery_status = Column(SQLEnum(FilterRecoveryStatus), nullable=False)
|
|
notes = Column(Text, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="imported_filters")
|
|
# [/DEF:ImportedFilter:Class]
|
|
|
|
# [DEF:VariableKind:Class]
|
|
class VariableKind(str, enum.Enum):
|
|
NATIVE_FILTER = "native_filter"
|
|
PARAMETER = "parameter"
|
|
DERIVED = "derived"
|
|
UNKNOWN = "unknown"
|
|
# [/DEF:VariableKind:Class]
|
|
|
|
# [DEF:MappingStatus:Class]
|
|
class MappingStatus(str, enum.Enum):
|
|
UNMAPPED = "unmapped"
|
|
PROPOSED = "proposed"
|
|
APPROVED = "approved"
|
|
OVERRIDDEN = "overridden"
|
|
INVALID = "invalid"
|
|
# [/DEF:MappingStatus:Class]
|
|
|
|
# [DEF:TemplateVariable:Class]
|
|
class TemplateVariable(Base):
|
|
__tablename__ = "template_variables"
|
|
|
|
variable_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
variable_name = Column(String, nullable=False)
|
|
expression_source = Column(Text, nullable=False)
|
|
variable_kind = Column(SQLEnum(VariableKind), nullable=False)
|
|
is_required = Column(Boolean, nullable=False, default=True)
|
|
default_value = Column(JSON, nullable=True)
|
|
mapping_status = Column(SQLEnum(MappingStatus), nullable=False, default=MappingStatus.UNMAPPED)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="template_variables")
|
|
# [/DEF:TemplateVariable:Class]
|
|
|
|
# [DEF:MappingMethod:Class]
|
|
class MappingMethod(str, enum.Enum):
|
|
DIRECT_MATCH = "direct_match"
|
|
HEURISTIC_MATCH = "heuristic_match"
|
|
SEMANTIC_MATCH = "semantic_match"
|
|
MANUAL_OVERRIDE = "manual_override"
|
|
# [/DEF:MappingMethod:Class]
|
|
|
|
# [DEF:MappingWarningLevel:Class]
|
|
class MappingWarningLevel(str, enum.Enum):
|
|
LOW = "low"
|
|
MEDIUM = "medium"
|
|
HIGH = "high"
|
|
# [/DEF:MappingWarningLevel:Class]
|
|
|
|
# [DEF:ApprovalState:Class]
|
|
class ApprovalState(str, enum.Enum):
|
|
PENDING = "pending"
|
|
APPROVED = "approved"
|
|
REJECTED = "rejected"
|
|
NOT_REQUIRED = "not_required"
|
|
# [/DEF:ApprovalState:Class]
|
|
|
|
# [DEF:ExecutionMapping:Class]
|
|
class ExecutionMapping(Base):
|
|
__tablename__ = "execution_mappings"
|
|
|
|
mapping_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
filter_id = Column(String, nullable=False)
|
|
variable_id = Column(String, nullable=False)
|
|
mapping_method = Column(SQLEnum(MappingMethod), nullable=False)
|
|
raw_input_value = Column(JSON, nullable=False)
|
|
effective_value = Column(JSON, nullable=True)
|
|
transformation_note = Column(Text, nullable=True)
|
|
warning_level = Column(SQLEnum(MappingWarningLevel), nullable=True)
|
|
requires_explicit_approval = Column(Boolean, nullable=False, default=False)
|
|
approval_state = Column(SQLEnum(ApprovalState), nullable=False, default=ApprovalState.NOT_REQUIRED)
|
|
approved_by_user_id = Column(String, nullable=True)
|
|
approved_at = Column(DateTime, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="execution_mappings")
|
|
# [/DEF:ExecutionMapping:Class]
|
|
|
|
# [DEF:ClarificationStatus:Class]
|
|
class ClarificationStatus(str, enum.Enum):
|
|
PENDING = "pending"
|
|
ACTIVE = "active"
|
|
PAUSED = "paused"
|
|
COMPLETED = "completed"
|
|
CANCELLED = "cancelled"
|
|
# [/DEF:ClarificationStatus:Class]
|
|
|
|
# [DEF:ClarificationSession:Class]
|
|
class ClarificationSession(Base):
|
|
__tablename__ = "clarification_sessions"
|
|
|
|
clarification_session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
status = Column(SQLEnum(ClarificationStatus), nullable=False, default=ClarificationStatus.PENDING)
|
|
current_question_id = Column(String, nullable=True)
|
|
resolved_count = Column(Integer, nullable=False, default=0)
|
|
remaining_count = Column(Integer, nullable=False, default=0)
|
|
summary_delta = Column(Text, nullable=True)
|
|
started_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
completed_at = Column(DateTime, nullable=True)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="clarification_sessions")
|
|
questions = relationship("ClarificationQuestion", back_populates="clarification_session", cascade="all, delete-orphan")
|
|
# [/DEF:ClarificationSession:Class]
|
|
|
|
# [DEF:QuestionState:Class]
|
|
class QuestionState(str, enum.Enum):
|
|
OPEN = "open"
|
|
ANSWERED = "answered"
|
|
SKIPPED = "skipped"
|
|
EXPERT_REVIEW = "expert_review"
|
|
SUPERSEDED = "superseded"
|
|
# [/DEF:QuestionState:Class]
|
|
|
|
# [DEF:ClarificationQuestion:Class]
|
|
class ClarificationQuestion(Base):
|
|
__tablename__ = "clarification_questions"
|
|
|
|
question_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
clarification_session_id = Column(String, ForeignKey("clarification_sessions.clarification_session_id"), nullable=False)
|
|
topic_ref = Column(String, nullable=False)
|
|
question_text = Column(Text, nullable=False)
|
|
why_it_matters = Column(Text, nullable=False)
|
|
current_guess = Column(Text, nullable=True)
|
|
priority = Column(Integer, nullable=False, default=0)
|
|
state = Column(SQLEnum(QuestionState), nullable=False, default=QuestionState.OPEN)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
|
|
clarification_session = relationship("ClarificationSession", back_populates="questions")
|
|
options = relationship("ClarificationOption", back_populates="question", cascade="all, delete-orphan")
|
|
answer = relationship("ClarificationAnswer", back_populates="question", uselist=False, cascade="all, delete-orphan")
|
|
# [/DEF:ClarificationQuestion:Class]
|
|
|
|
# [DEF:ClarificationOption:Class]
|
|
class ClarificationOption(Base):
|
|
__tablename__ = "clarification_options"
|
|
|
|
option_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False)
|
|
label = Column(String, nullable=False)
|
|
value = Column(String, nullable=False)
|
|
is_recommended = Column(Boolean, nullable=False, default=False)
|
|
display_order = Column(Integer, nullable=False, default=0)
|
|
|
|
question = relationship("ClarificationQuestion", back_populates="options")
|
|
# [/DEF:ClarificationOption:Class]
|
|
|
|
# [DEF:AnswerKind:Class]
|
|
class AnswerKind(str, enum.Enum):
|
|
SELECTED = "selected"
|
|
CUSTOM = "custom"
|
|
SKIPPED = "skipped"
|
|
EXPERT_REVIEW = "expert_review"
|
|
# [/DEF:AnswerKind:Class]
|
|
|
|
# [DEF:ClarificationAnswer:Class]
|
|
class ClarificationAnswer(Base):
|
|
__tablename__ = "clarification_answers"
|
|
|
|
answer_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False, unique=True)
|
|
answer_kind = Column(SQLEnum(AnswerKind), nullable=False)
|
|
answer_value = Column(Text, nullable=True)
|
|
answered_by_user_id = Column(String, nullable=False)
|
|
impact_summary = Column(Text, nullable=True)
|
|
user_feedback = Column(String, nullable=True) # up, down, null
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
question = relationship("ClarificationQuestion", back_populates="answer")
|
|
# [/DEF:ClarificationAnswer:Class]
|
|
|
|
# [DEF:PreviewStatus:Class]
|
|
class PreviewStatus(str, enum.Enum):
|
|
PENDING = "pending"
|
|
READY = "ready"
|
|
FAILED = "failed"
|
|
STALE = "stale"
|
|
# [/DEF:PreviewStatus:Class]
|
|
|
|
# [DEF:CompiledPreview:Class]
|
|
class CompiledPreview(Base):
|
|
__tablename__ = "compiled_previews"
|
|
|
|
preview_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
preview_status = Column(SQLEnum(PreviewStatus), nullable=False, default=PreviewStatus.PENDING)
|
|
compiled_sql = Column(Text, nullable=True)
|
|
preview_fingerprint = Column(String, nullable=False)
|
|
compiled_by = Column(String, nullable=False, default="superset")
|
|
error_code = Column(String, nullable=True)
|
|
error_details = Column(Text, nullable=True)
|
|
compiled_at = Column(DateTime, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="previews")
|
|
# [/DEF:CompiledPreview:Class]
|
|
|
|
# [DEF:LaunchStatus:Class]
|
|
class LaunchStatus(str, enum.Enum):
|
|
STARTED = "started"
|
|
SUCCESS = "success"
|
|
FAILED = "failed"
|
|
# [/DEF:LaunchStatus:Class]
|
|
|
|
# [DEF:DatasetRunContext:Class]
|
|
class DatasetRunContext(Base):
|
|
__tablename__ = "dataset_run_contexts"
|
|
|
|
run_context_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
dataset_ref = Column(String, nullable=False)
|
|
environment_id = Column(String, nullable=False)
|
|
preview_id = Column(String, nullable=False)
|
|
sql_lab_session_ref = Column(String, nullable=False)
|
|
effective_filters = Column(JSON, nullable=False)
|
|
template_params = Column(JSON, nullable=False)
|
|
approved_mapping_ids = Column(JSON, nullable=False)
|
|
semantic_decision_refs = Column(JSON, nullable=False)
|
|
open_warning_refs = Column(JSON, nullable=False)
|
|
launch_status = Column(SQLEnum(LaunchStatus), nullable=False)
|
|
launch_error = Column(Text, nullable=True)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="run_contexts")
|
|
# [/DEF:DatasetRunContext:Class]
|
|
|
|
# [DEF:SessionEvent:Class]
|
|
class SessionEvent(Base):
|
|
__tablename__ = "session_events"
|
|
|
|
session_event_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
actor_user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
|
event_type = Column(String, nullable=False)
|
|
event_summary = Column(Text, nullable=False)
|
|
current_phase = Column(String, nullable=True)
|
|
readiness_state = Column(String, nullable=True)
|
|
event_details = Column(JSON, nullable=False, default=dict)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="events")
|
|
actor = relationship("User")
|
|
# [/DEF:SessionEvent:Class]
|
|
|
|
# [DEF:ArtifactType:Class]
|
|
class ArtifactType(str, enum.Enum):
|
|
DOCUMENTATION = "documentation"
|
|
VALIDATION_REPORT = "validation_report"
|
|
RUN_SUMMARY = "run_summary"
|
|
# [/DEF:ArtifactType:Class]
|
|
|
|
# [DEF:ArtifactFormat:Class]
|
|
class ArtifactFormat(str, enum.Enum):
|
|
JSON = "json"
|
|
MARKDOWN = "markdown"
|
|
CSV = "csv"
|
|
PDF = "pdf"
|
|
# [/DEF:ArtifactFormat:Class]
|
|
|
|
# [DEF:ExportArtifact:Class]
|
|
class ExportArtifact(Base):
|
|
__tablename__ = "export_artifacts"
|
|
|
|
artifact_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
|
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
|
artifact_type = Column(SQLEnum(ArtifactType), nullable=False)
|
|
format = Column(SQLEnum(ArtifactFormat), nullable=False)
|
|
storage_ref = Column(String, nullable=False)
|
|
created_by_user_id = Column(String, nullable=False)
|
|
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
|
|
session = relationship("DatasetReviewSession", back_populates="export_artifacts")
|
|
# [/DEF:ExportArtifact:Class]
|
|
|
|
# [/DEF:DatasetReviewModels:Module] |