feat: initial dataset review orchestration flow implementation
This commit is contained in:
661
backend/src/models/dataset_review.py
Normal file
661
backend/src/models/dataset_review.py
Normal file
@@ -0,0 +1,661 @@
|
||||
# [DEF:DatasetReviewModels:Module]
|
||||
#
|
||||
# @TIER: STANDARD
|
||||
# @COMPLEXITY: 3
|
||||
# @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy
|
||||
# @PURPOSE: SQLAlchemy models for the dataset review orchestration flow.
|
||||
# @LAYER: Domain
|
||||
# @RELATION: INHERITS_FROM -> [Base]
|
||||
# @RELATION: DEPENDS_ON -> [AuthModels]
|
||||
# @RELATION: DEPENDS_ON -> [MappingModels]
|
||||
#
|
||||
# @INVARIANT: Session and profile entities are strictly scoped to an authenticated user.
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
import uuid
|
||||
import enum
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey, Text, JSON, Float, Enum as SQLEnum, Table
|
||||
from sqlalchemy.orm import relationship
|
||||
from .mapping import Base
|
||||
# [/SECTION]
|
||||
|
||||
# [DEF:SessionStatus:Class]
|
||||
class SessionStatus(str, enum.Enum):
|
||||
ACTIVE = "active"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
ARCHIVED = "archived"
|
||||
CANCELLED = "cancelled"
|
||||
# [/DEF:SessionStatus:Class]
|
||||
|
||||
# [DEF:SessionPhase:Class]
|
||||
class SessionPhase(str, enum.Enum):
|
||||
INTAKE = "intake"
|
||||
RECOVERY = "recovery"
|
||||
REVIEW = "review"
|
||||
SEMANTIC_REVIEW = "semantic_review"
|
||||
CLARIFICATION = "clarification"
|
||||
MAPPING_REVIEW = "mapping_review"
|
||||
PREVIEW = "preview"
|
||||
LAUNCH = "launch"
|
||||
POST_RUN = "post_run"
|
||||
# [/DEF:SessionPhase:Class]
|
||||
|
||||
# [DEF:ReadinessState:Class]
|
||||
class ReadinessState(str, enum.Enum):
|
||||
EMPTY = "empty"
|
||||
IMPORTING = "importing"
|
||||
REVIEW_READY = "review_ready"
|
||||
SEMANTIC_SOURCE_REVIEW_NEEDED = "semantic_source_review_needed"
|
||||
CLARIFICATION_NEEDED = "clarification_needed"
|
||||
CLARIFICATION_ACTIVE = "clarification_active"
|
||||
MAPPING_REVIEW_NEEDED = "mapping_review_needed"
|
||||
COMPILED_PREVIEW_READY = "compiled_preview_ready"
|
||||
PARTIALLY_READY = "partially_ready"
|
||||
RUN_READY = "run_ready"
|
||||
RUN_IN_PROGRESS = "run_in_progress"
|
||||
COMPLETED = "completed"
|
||||
RECOVERY_REQUIRED = "recovery_required"
|
||||
# [/DEF:ReadinessState:Class]
|
||||
|
||||
# [DEF:RecommendedAction:Class]
|
||||
class RecommendedAction(str, enum.Enum):
|
||||
IMPORT_FROM_SUPERSET = "import_from_superset"
|
||||
REVIEW_DOCUMENTATION = "review_documentation"
|
||||
APPLY_SEMANTIC_SOURCE = "apply_semantic_source"
|
||||
START_CLARIFICATION = "start_clarification"
|
||||
ANSWER_NEXT_QUESTION = "answer_next_question"
|
||||
APPROVE_MAPPING = "approve_mapping"
|
||||
GENERATE_SQL_PREVIEW = "generate_sql_preview"
|
||||
COMPLETE_REQUIRED_VALUES = "complete_required_values"
|
||||
LAUNCH_DATASET = "launch_dataset"
|
||||
RESUME_SESSION = "resume_session"
|
||||
EXPORT_OUTPUTS = "export_outputs"
|
||||
# [/DEF:RecommendedAction:Class]
|
||||
|
||||
# [DEF:SessionCollaboratorRole:Class]
|
||||
class SessionCollaboratorRole(str, enum.Enum):
|
||||
VIEWER = "viewer"
|
||||
REVIEWER = "reviewer"
|
||||
APPROVER = "approver"
|
||||
# [/DEF:SessionCollaboratorRole:Class]
|
||||
|
||||
# [DEF:SessionCollaborator:Class]
|
||||
class SessionCollaborator(Base):
|
||||
__tablename__ = "session_collaborators"
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
||||
role = Column(SQLEnum(SessionCollaboratorRole), nullable=False)
|
||||
added_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="collaborators")
|
||||
user = relationship("User")
|
||||
# [/DEF:SessionCollaborator:Class]
|
||||
|
||||
# [DEF:DatasetReviewSession:Class]
|
||||
class DatasetReviewSession(Base):
|
||||
__tablename__ = "dataset_review_sessions"
|
||||
|
||||
session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
||||
environment_id = Column(String, ForeignKey("environments.id"), nullable=False)
|
||||
source_kind = Column(String, nullable=False) # superset_link, dataset_selection
|
||||
source_input = Column(String, nullable=False)
|
||||
dataset_ref = Column(String, nullable=False)
|
||||
dataset_id = Column(Integer, nullable=True)
|
||||
dashboard_id = Column(Integer, nullable=True)
|
||||
readiness_state = Column(SQLEnum(ReadinessState), nullable=False, default=ReadinessState.EMPTY)
|
||||
recommended_action = Column(SQLEnum(RecommendedAction), nullable=False, default=RecommendedAction.IMPORT_FROM_SUPERSET)
|
||||
status = Column(SQLEnum(SessionStatus), nullable=False, default=SessionStatus.ACTIVE)
|
||||
current_phase = Column(SQLEnum(SessionPhase), nullable=False, default=SessionPhase.INTAKE)
|
||||
active_task_id = Column(String, nullable=True)
|
||||
last_preview_id = Column(String, nullable=True)
|
||||
last_run_context_id = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
last_activity_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
closed_at = Column(DateTime, nullable=True)
|
||||
|
||||
owner = relationship("User")
|
||||
collaborators = relationship("SessionCollaborator", back_populates="session", cascade="all, delete-orphan")
|
||||
profile = relationship("DatasetProfile", back_populates="session", uselist=False, cascade="all, delete-orphan")
|
||||
findings = relationship("ValidationFinding", back_populates="session", cascade="all, delete-orphan")
|
||||
semantic_sources = relationship("SemanticSource", back_populates="session", cascade="all, delete-orphan")
|
||||
semantic_fields = relationship("SemanticFieldEntry", back_populates="session", cascade="all, delete-orphan")
|
||||
imported_filters = relationship("ImportedFilter", back_populates="session", cascade="all, delete-orphan")
|
||||
template_variables = relationship("TemplateVariable", back_populates="session", cascade="all, delete-orphan")
|
||||
execution_mappings = relationship("ExecutionMapping", back_populates="session", cascade="all, delete-orphan")
|
||||
clarification_sessions = relationship("ClarificationSession", back_populates="session", cascade="all, delete-orphan")
|
||||
previews = relationship("CompiledPreview", back_populates="session", cascade="all, delete-orphan")
|
||||
run_contexts = relationship("DatasetRunContext", back_populates="session", cascade="all, delete-orphan")
|
||||
export_artifacts = relationship("ExportArtifact", back_populates="session", cascade="all, delete-orphan")
|
||||
# [/DEF:DatasetReviewSession:Class]
|
||||
|
||||
# [DEF:BusinessSummarySource:Class]
|
||||
class BusinessSummarySource(str, enum.Enum):
|
||||
CONFIRMED = "confirmed"
|
||||
IMPORTED = "imported"
|
||||
INFERRED = "inferred"
|
||||
AI_DRAFT = "ai_draft"
|
||||
MANUAL_OVERRIDE = "manual_override"
|
||||
# [/DEF:BusinessSummarySource:Class]
|
||||
|
||||
# [DEF:ConfidenceState:Class]
|
||||
class ConfidenceState(str, enum.Enum):
|
||||
CONFIRMED = "confirmed"
|
||||
MOSTLY_CONFIRMED = "mostly_confirmed"
|
||||
MIXED = "mixed"
|
||||
LOW_CONFIDENCE = "low_confidence"
|
||||
UNRESOLVED = "unresolved"
|
||||
# [/DEF:ConfidenceState:Class]
|
||||
|
||||
# [DEF:DatasetProfile:Class]
|
||||
class DatasetProfile(Base):
|
||||
__tablename__ = "dataset_profiles"
|
||||
|
||||
profile_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False, unique=True)
|
||||
dataset_name = Column(String, nullable=False)
|
||||
schema_name = Column(String, nullable=True)
|
||||
database_name = Column(String, nullable=True)
|
||||
business_summary = Column(Text, nullable=False)
|
||||
business_summary_source = Column(SQLEnum(BusinessSummarySource), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
dataset_type = Column(String, nullable=True) # table, virtual, sqllab_view, unknown
|
||||
is_sqllab_view = Column(Boolean, nullable=False, default=False)
|
||||
completeness_score = Column(Float, nullable=True)
|
||||
confidence_state = Column(SQLEnum(ConfidenceState), nullable=False)
|
||||
has_blocking_findings = Column(Boolean, nullable=False, default=False)
|
||||
has_warning_findings = Column(Boolean, nullable=False, default=False)
|
||||
manual_summary_locked = Column(Boolean, nullable=False, default=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="profile")
|
||||
# [/DEF:DatasetProfile:Class]
|
||||
|
||||
# [DEF:FindingArea:Class]
|
||||
class FindingArea(str, enum.Enum):
|
||||
SOURCE_INTAKE = "source_intake"
|
||||
DATASET_PROFILE = "dataset_profile"
|
||||
SEMANTIC_ENRICHMENT = "semantic_enrichment"
|
||||
CLARIFICATION = "clarification"
|
||||
FILTER_RECOVERY = "filter_recovery"
|
||||
TEMPLATE_MAPPING = "template_mapping"
|
||||
COMPILED_PREVIEW = "compiled_preview"
|
||||
LAUNCH = "launch"
|
||||
AUDIT = "audit"
|
||||
# [/DEF:FindingArea:Class]
|
||||
|
||||
# [DEF:FindingSeverity:Class]
|
||||
class FindingSeverity(str, enum.Enum):
|
||||
BLOCKING = "blocking"
|
||||
WARNING = "warning"
|
||||
INFORMATIONAL = "informational"
|
||||
# [/DEF:FindingSeverity:Class]
|
||||
|
||||
# [DEF:ResolutionState:Class]
|
||||
class ResolutionState(str, enum.Enum):
|
||||
OPEN = "open"
|
||||
RESOLVED = "resolved"
|
||||
APPROVED = "approved"
|
||||
SKIPPED = "skipped"
|
||||
DEFERRED = "deferred"
|
||||
EXPERT_REVIEW = "expert_review"
|
||||
# [/DEF:ResolutionState:Class]
|
||||
|
||||
# [DEF:ValidationFinding:Class]
|
||||
class ValidationFinding(Base):
|
||||
__tablename__ = "validation_findings"
|
||||
|
||||
finding_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
area = Column(SQLEnum(FindingArea), nullable=False)
|
||||
severity = Column(SQLEnum(FindingSeverity), nullable=False)
|
||||
code = Column(String, nullable=False)
|
||||
title = Column(String, nullable=False)
|
||||
message = Column(Text, nullable=False)
|
||||
resolution_state = Column(SQLEnum(ResolutionState), nullable=False, default=ResolutionState.OPEN)
|
||||
resolution_note = Column(Text, nullable=True)
|
||||
caused_by_ref = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
resolved_at = Column(DateTime, nullable=True)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="findings")
|
||||
# [/DEF:ValidationFinding:Class]
|
||||
|
||||
# [DEF:SemanticSourceType:Class]
|
||||
class SemanticSourceType(str, enum.Enum):
|
||||
UPLOADED_FILE = "uploaded_file"
|
||||
CONNECTED_DICTIONARY = "connected_dictionary"
|
||||
REFERENCE_DATASET = "reference_dataset"
|
||||
NEIGHBOR_DATASET = "neighbor_dataset"
|
||||
AI_GENERATED = "ai_generated"
|
||||
# [/DEF:SemanticSourceType:Class]
|
||||
|
||||
# [DEF:TrustLevel:Class]
|
||||
class TrustLevel(str, enum.Enum):
|
||||
TRUSTED = "trusted"
|
||||
RECOMMENDED = "recommended"
|
||||
CANDIDATE = "candidate"
|
||||
GENERATED = "generated"
|
||||
# [/DEF:TrustLevel:Class]
|
||||
|
||||
# [DEF:SemanticSourceStatus:Class]
|
||||
class SemanticSourceStatus(str, enum.Enum):
|
||||
AVAILABLE = "available"
|
||||
SELECTED = "selected"
|
||||
APPLIED = "applied"
|
||||
REJECTED = "rejected"
|
||||
PARTIAL = "partial"
|
||||
FAILED = "failed"
|
||||
# [/DEF:SemanticSourceStatus:Class]
|
||||
|
||||
# [DEF:SemanticSource:Class]
|
||||
class SemanticSource(Base):
|
||||
__tablename__ = "semantic_sources"
|
||||
|
||||
source_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
source_type = Column(SQLEnum(SemanticSourceType), nullable=False)
|
||||
source_ref = Column(String, nullable=False)
|
||||
source_version = Column(String, nullable=False)
|
||||
display_name = Column(String, nullable=False)
|
||||
trust_level = Column(SQLEnum(TrustLevel), nullable=False)
|
||||
schema_overlap_score = Column(Float, nullable=True)
|
||||
status = Column(SQLEnum(SemanticSourceStatus), nullable=False, default=SemanticSourceStatus.AVAILABLE)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="semantic_sources")
|
||||
# [/DEF:SemanticSource:Class]
|
||||
|
||||
# [DEF:FieldKind:Class]
|
||||
class FieldKind(str, enum.Enum):
|
||||
COLUMN = "column"
|
||||
METRIC = "metric"
|
||||
FILTER_DIMENSION = "filter_dimension"
|
||||
PARAMETER = "parameter"
|
||||
# [/DEF:FieldKind:Class]
|
||||
|
||||
# [DEF:FieldProvenance:Class]
|
||||
class FieldProvenance(str, enum.Enum):
|
||||
DICTIONARY_EXACT = "dictionary_exact"
|
||||
REFERENCE_IMPORTED = "reference_imported"
|
||||
FUZZY_INFERRED = "fuzzy_inferred"
|
||||
AI_GENERATED = "ai_generated"
|
||||
MANUAL_OVERRIDE = "manual_override"
|
||||
UNRESOLVED = "unresolved"
|
||||
# [/DEF:FieldProvenance:Class]
|
||||
|
||||
# [DEF:SemanticFieldEntry:Class]
|
||||
class SemanticFieldEntry(Base):
|
||||
__tablename__ = "semantic_field_entries"
|
||||
|
||||
field_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
field_name = Column(String, nullable=False)
|
||||
field_kind = Column(SQLEnum(FieldKind), nullable=False)
|
||||
verbose_name = Column(String, nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
display_format = Column(String, nullable=True)
|
||||
provenance = Column(SQLEnum(FieldProvenance), nullable=False, default=FieldProvenance.UNRESOLVED)
|
||||
source_id = Column(String, nullable=True)
|
||||
confidence_rank = Column(Integer, nullable=True)
|
||||
is_locked = Column(Boolean, nullable=False, default=False)
|
||||
has_conflict = Column(Boolean, nullable=False, default=False)
|
||||
needs_review = Column(Boolean, nullable=False, default=True)
|
||||
last_changed_by = Column(String, nullable=False) # system, user, agent
|
||||
user_feedback = Column(String, nullable=True) # up, down, null
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="semantic_fields")
|
||||
candidates = relationship("SemanticCandidate", back_populates="field", cascade="all, delete-orphan")
|
||||
# [/DEF:SemanticFieldEntry:Class]
|
||||
|
||||
# [DEF:CandidateMatchType:Class]
|
||||
class CandidateMatchType(str, enum.Enum):
|
||||
EXACT = "exact"
|
||||
REFERENCE = "reference"
|
||||
FUZZY = "fuzzy"
|
||||
GENERATED = "generated"
|
||||
# [/DEF:CandidateMatchType:Class]
|
||||
|
||||
# [DEF:CandidateStatus:Class]
|
||||
class CandidateStatus(str, enum.Enum):
|
||||
PROPOSED = "proposed"
|
||||
ACCEPTED = "accepted"
|
||||
REJECTED = "rejected"
|
||||
SUPERSEDED = "superseded"
|
||||
# [/DEF:CandidateStatus:Class]
|
||||
|
||||
# [DEF:SemanticCandidate:Class]
|
||||
class SemanticCandidate(Base):
|
||||
__tablename__ = "semantic_candidates"
|
||||
|
||||
candidate_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
field_id = Column(String, ForeignKey("semantic_field_entries.field_id"), nullable=False)
|
||||
source_id = Column(String, nullable=True)
|
||||
candidate_rank = Column(Integer, nullable=False)
|
||||
match_type = Column(SQLEnum(CandidateMatchType), nullable=False)
|
||||
confidence_score = Column(Float, nullable=False)
|
||||
proposed_verbose_name = Column(String, nullable=True)
|
||||
proposed_description = Column(Text, nullable=True)
|
||||
proposed_display_format = Column(String, nullable=True)
|
||||
status = Column(SQLEnum(CandidateStatus), nullable=False, default=CandidateStatus.PROPOSED)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
field = relationship("SemanticFieldEntry", back_populates="candidates")
|
||||
# [/DEF:SemanticCandidate:Class]
|
||||
|
||||
# [DEF:FilterSource:Class]
|
||||
class FilterSource(str, enum.Enum):
|
||||
SUPERSET_NATIVE = "superset_native"
|
||||
SUPERSET_URL = "superset_url"
|
||||
MANUAL = "manual"
|
||||
INFERRED = "inferred"
|
||||
# [/DEF:FilterSource:Class]
|
||||
|
||||
# [DEF:FilterConfidenceState:Class]
|
||||
class FilterConfidenceState(str, enum.Enum):
|
||||
CONFIRMED = "confirmed"
|
||||
IMPORTED = "imported"
|
||||
INFERRED = "inferred"
|
||||
AI_DRAFT = "ai_draft"
|
||||
UNRESOLVED = "unresolved"
|
||||
# [/DEF:FilterConfidenceState:Class]
|
||||
|
||||
# [DEF:FilterRecoveryStatus:Class]
|
||||
class FilterRecoveryStatus(str, enum.Enum):
|
||||
RECOVERED = "recovered"
|
||||
PARTIAL = "partial"
|
||||
MISSING = "missing"
|
||||
CONFLICTED = "conflicted"
|
||||
# [/DEF:FilterRecoveryStatus:Class]
|
||||
|
||||
# [DEF:ImportedFilter:Class]
|
||||
class ImportedFilter(Base):
|
||||
__tablename__ = "imported_filters"
|
||||
|
||||
filter_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
filter_name = Column(String, nullable=False)
|
||||
display_name = Column(String, nullable=True)
|
||||
raw_value = Column(JSON, nullable=False)
|
||||
normalized_value = Column(JSON, nullable=True)
|
||||
source = Column(SQLEnum(FilterSource), nullable=False)
|
||||
confidence_state = Column(SQLEnum(FilterConfidenceState), nullable=False)
|
||||
requires_confirmation = Column(Boolean, nullable=False, default=False)
|
||||
recovery_status = Column(SQLEnum(FilterRecoveryStatus), nullable=False)
|
||||
notes = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="imported_filters")
|
||||
# [/DEF:ImportedFilter:Class]
|
||||
|
||||
# [DEF:VariableKind:Class]
|
||||
class VariableKind(str, enum.Enum):
|
||||
NATIVE_FILTER = "native_filter"
|
||||
PARAMETER = "parameter"
|
||||
DERIVED = "derived"
|
||||
UNKNOWN = "unknown"
|
||||
# [/DEF:VariableKind:Class]
|
||||
|
||||
# [DEF:MappingStatus:Class]
|
||||
class MappingStatus(str, enum.Enum):
|
||||
UNMAPPED = "unmapped"
|
||||
PROPOSED = "proposed"
|
||||
APPROVED = "approved"
|
||||
OVERRIDDEN = "overridden"
|
||||
INVALID = "invalid"
|
||||
# [/DEF:MappingStatus:Class]
|
||||
|
||||
# [DEF:TemplateVariable:Class]
|
||||
class TemplateVariable(Base):
|
||||
__tablename__ = "template_variables"
|
||||
|
||||
variable_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
variable_name = Column(String, nullable=False)
|
||||
expression_source = Column(Text, nullable=False)
|
||||
variable_kind = Column(SQLEnum(VariableKind), nullable=False)
|
||||
is_required = Column(Boolean, nullable=False, default=True)
|
||||
default_value = Column(JSON, nullable=True)
|
||||
mapping_status = Column(SQLEnum(MappingStatus), nullable=False, default=MappingStatus.UNMAPPED)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="template_variables")
|
||||
# [/DEF:TemplateVariable:Class]
|
||||
|
||||
# [DEF:MappingMethod:Class]
|
||||
class MappingMethod(str, enum.Enum):
|
||||
DIRECT_MATCH = "direct_match"
|
||||
HEURISTIC_MATCH = "heuristic_match"
|
||||
SEMANTIC_MATCH = "semantic_match"
|
||||
MANUAL_OVERRIDE = "manual_override"
|
||||
# [/DEF:MappingMethod:Class]
|
||||
|
||||
# [DEF:MappingWarningLevel:Class]
|
||||
class MappingWarningLevel(str, enum.Enum):
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
# [/DEF:MappingWarningLevel:Class]
|
||||
|
||||
# [DEF:ApprovalState:Class]
|
||||
class ApprovalState(str, enum.Enum):
|
||||
PENDING = "pending"
|
||||
APPROVED = "approved"
|
||||
REJECTED = "rejected"
|
||||
NOT_REQUIRED = "not_required"
|
||||
# [/DEF:ApprovalState:Class]
|
||||
|
||||
# [DEF:ExecutionMapping:Class]
|
||||
class ExecutionMapping(Base):
|
||||
__tablename__ = "execution_mappings"
|
||||
|
||||
mapping_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
filter_id = Column(String, nullable=False)
|
||||
variable_id = Column(String, nullable=False)
|
||||
mapping_method = Column(SQLEnum(MappingMethod), nullable=False)
|
||||
raw_input_value = Column(JSON, nullable=False)
|
||||
effective_value = Column(JSON, nullable=True)
|
||||
transformation_note = Column(Text, nullable=True)
|
||||
warning_level = Column(SQLEnum(MappingWarningLevel), nullable=True)
|
||||
requires_explicit_approval = Column(Boolean, nullable=False, default=False)
|
||||
approval_state = Column(SQLEnum(ApprovalState), nullable=False, default=ApprovalState.NOT_REQUIRED)
|
||||
approved_by_user_id = Column(String, nullable=True)
|
||||
approved_at = Column(DateTime, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="execution_mappings")
|
||||
# [/DEF:ExecutionMapping:Class]
|
||||
|
||||
# [DEF:ClarificationStatus:Class]
|
||||
class ClarificationStatus(str, enum.Enum):
|
||||
PENDING = "pending"
|
||||
ACTIVE = "active"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
CANCELLED = "cancelled"
|
||||
# [/DEF:ClarificationStatus:Class]
|
||||
|
||||
# [DEF:ClarificationSession:Class]
|
||||
class ClarificationSession(Base):
|
||||
__tablename__ = "clarification_sessions"
|
||||
|
||||
clarification_session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
status = Column(SQLEnum(ClarificationStatus), nullable=False, default=ClarificationStatus.PENDING)
|
||||
current_question_id = Column(String, nullable=True)
|
||||
resolved_count = Column(Integer, nullable=False, default=0)
|
||||
remaining_count = Column(Integer, nullable=False, default=0)
|
||||
summary_delta = Column(Text, nullable=True)
|
||||
started_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="clarification_sessions")
|
||||
questions = relationship("ClarificationQuestion", back_populates="clarification_session", cascade="all, delete-orphan")
|
||||
# [/DEF:ClarificationSession:Class]
|
||||
|
||||
# [DEF:QuestionState:Class]
|
||||
class QuestionState(str, enum.Enum):
|
||||
OPEN = "open"
|
||||
ANSWERED = "answered"
|
||||
SKIPPED = "skipped"
|
||||
EXPERT_REVIEW = "expert_review"
|
||||
SUPERSEDED = "superseded"
|
||||
# [/DEF:QuestionState:Class]
|
||||
|
||||
# [DEF:ClarificationQuestion:Class]
|
||||
class ClarificationQuestion(Base):
|
||||
__tablename__ = "clarification_questions"
|
||||
|
||||
question_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
clarification_session_id = Column(String, ForeignKey("clarification_sessions.clarification_session_id"), nullable=False)
|
||||
topic_ref = Column(String, nullable=False)
|
||||
question_text = Column(Text, nullable=False)
|
||||
why_it_matters = Column(Text, nullable=False)
|
||||
current_guess = Column(Text, nullable=True)
|
||||
priority = Column(Integer, nullable=False, default=0)
|
||||
state = Column(SQLEnum(QuestionState), nullable=False, default=QuestionState.OPEN)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
clarification_session = relationship("ClarificationSession", back_populates="questions")
|
||||
options = relationship("ClarificationOption", back_populates="question", cascade="all, delete-orphan")
|
||||
answer = relationship("ClarificationAnswer", back_populates="question", uselist=False, cascade="all, delete-orphan")
|
||||
# [/DEF:ClarificationQuestion:Class]
|
||||
|
||||
# [DEF:ClarificationOption:Class]
|
||||
class ClarificationOption(Base):
|
||||
__tablename__ = "clarification_options"
|
||||
|
||||
option_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False)
|
||||
label = Column(String, nullable=False)
|
||||
value = Column(String, nullable=False)
|
||||
is_recommended = Column(Boolean, nullable=False, default=False)
|
||||
display_order = Column(Integer, nullable=False, default=0)
|
||||
|
||||
question = relationship("ClarificationQuestion", back_populates="options")
|
||||
# [/DEF:ClarificationOption:Class]
|
||||
|
||||
# [DEF:AnswerKind:Class]
|
||||
class AnswerKind(str, enum.Enum):
|
||||
SELECTED = "selected"
|
||||
CUSTOM = "custom"
|
||||
SKIPPED = "skipped"
|
||||
EXPERT_REVIEW = "expert_review"
|
||||
# [/DEF:AnswerKind:Class]
|
||||
|
||||
# [DEF:ClarificationAnswer:Class]
|
||||
class ClarificationAnswer(Base):
|
||||
__tablename__ = "clarification_answers"
|
||||
|
||||
answer_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False, unique=True)
|
||||
answer_kind = Column(SQLEnum(AnswerKind), nullable=False)
|
||||
answer_value = Column(Text, nullable=True)
|
||||
answered_by_user_id = Column(String, nullable=False)
|
||||
impact_summary = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
question = relationship("ClarificationQuestion", back_populates="answer")
|
||||
# [/DEF:ClarificationAnswer:Class]
|
||||
|
||||
# [DEF:PreviewStatus:Class]
|
||||
class PreviewStatus(str, enum.Enum):
|
||||
PENDING = "pending"
|
||||
READY = "ready"
|
||||
FAILED = "failed"
|
||||
STALE = "stale"
|
||||
# [/DEF:PreviewStatus:Class]
|
||||
|
||||
# [DEF:CompiledPreview:Class]
|
||||
class CompiledPreview(Base):
|
||||
__tablename__ = "compiled_previews"
|
||||
|
||||
preview_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
preview_status = Column(SQLEnum(PreviewStatus), nullable=False, default=PreviewStatus.PENDING)
|
||||
compiled_sql = Column(Text, nullable=True)
|
||||
preview_fingerprint = Column(String, nullable=False)
|
||||
compiled_by = Column(String, nullable=False, default="superset")
|
||||
error_code = Column(String, nullable=True)
|
||||
error_details = Column(Text, nullable=True)
|
||||
compiled_at = Column(DateTime, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="previews")
|
||||
# [/DEF:CompiledPreview:Class]
|
||||
|
||||
# [DEF:LaunchStatus:Class]
|
||||
class LaunchStatus(str, enum.Enum):
|
||||
STARTED = "started"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
# [/DEF:LaunchStatus:Class]
|
||||
|
||||
# [DEF:DatasetRunContext:Class]
|
||||
class DatasetRunContext(Base):
|
||||
__tablename__ = "dataset_run_contexts"
|
||||
|
||||
run_context_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
dataset_ref = Column(String, nullable=False)
|
||||
environment_id = Column(String, nullable=False)
|
||||
preview_id = Column(String, nullable=False)
|
||||
sql_lab_session_ref = Column(String, nullable=False)
|
||||
effective_filters = Column(JSON, nullable=False)
|
||||
template_params = Column(JSON, nullable=False)
|
||||
approved_mapping_ids = Column(JSON, nullable=False)
|
||||
semantic_decision_refs = Column(JSON, nullable=False)
|
||||
open_warning_refs = Column(JSON, nullable=False)
|
||||
launch_status = Column(SQLEnum(LaunchStatus), nullable=False)
|
||||
launch_error = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="run_contexts")
|
||||
# [/DEF:DatasetRunContext:Class]
|
||||
|
||||
# [DEF:ArtifactType:Class]
|
||||
class ArtifactType(str, enum.Enum):
|
||||
DOCUMENTATION = "documentation"
|
||||
VALIDATION_REPORT = "validation_report"
|
||||
RUN_SUMMARY = "run_summary"
|
||||
# [/DEF:ArtifactType:Class]
|
||||
|
||||
# [DEF:ArtifactFormat:Class]
|
||||
class ArtifactFormat(str, enum.Enum):
|
||||
JSON = "json"
|
||||
MARKDOWN = "markdown"
|
||||
CSV = "csv"
|
||||
PDF = "pdf"
|
||||
# [/DEF:ArtifactFormat:Class]
|
||||
|
||||
# [DEF:ExportArtifact:Class]
|
||||
class ExportArtifact(Base):
|
||||
__tablename__ = "export_artifacts"
|
||||
|
||||
artifact_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
artifact_type = Column(SQLEnum(ArtifactType), nullable=False)
|
||||
format = Column(SQLEnum(ArtifactFormat), nullable=False)
|
||||
storage_ref = Column(String, nullable=False)
|
||||
created_by_user_id = Column(String, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="export_artifacts")
|
||||
# [/DEF:ExportArtifact:Class]
|
||||
|
||||
# [/DEF:DatasetReviewModels:Module]
|
||||
Reference in New Issue
Block a user