feat: initial dataset review orchestration flow implementation
This commit is contained in:
@@ -81,6 +81,11 @@ class GlobalSettings(BaseModel):
|
||||
|
||||
# Migration sync settings
|
||||
migration_sync_cron: str = "0 2 * * *"
|
||||
|
||||
# Dataset Review Feature Flags
|
||||
ff_dataset_auto_review: bool = True
|
||||
ff_dataset_clarification: bool = True
|
||||
ff_dataset_execution: bool = True
|
||||
# [/DEF:GlobalSettings:DataClass]
|
||||
|
||||
# [DEF:AppConfig:DataClass]
|
||||
|
||||
661
backend/src/models/dataset_review.py
Normal file
661
backend/src/models/dataset_review.py
Normal file
@@ -0,0 +1,661 @@
|
||||
# [DEF:DatasetReviewModels:Module]
|
||||
#
|
||||
# @TIER: STANDARD
|
||||
# @COMPLEXITY: 3
|
||||
# @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy
|
||||
# @PURPOSE: SQLAlchemy models for the dataset review orchestration flow.
|
||||
# @LAYER: Domain
|
||||
# @RELATION: INHERITS_FROM -> [Base]
|
||||
# @RELATION: DEPENDS_ON -> [AuthModels]
|
||||
# @RELATION: DEPENDS_ON -> [MappingModels]
|
||||
#
|
||||
# @INVARIANT: Session and profile entities are strictly scoped to an authenticated user.
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
import uuid
|
||||
import enum
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from sqlalchemy import Column, String, Integer, Boolean, DateTime, ForeignKey, Text, JSON, Float, Enum as SQLEnum, Table
|
||||
from sqlalchemy.orm import relationship
|
||||
from .mapping import Base
|
||||
# [/SECTION]
|
||||
|
||||
# [DEF:SessionStatus:Class]
|
||||
class SessionStatus(str, enum.Enum):
|
||||
ACTIVE = "active"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
ARCHIVED = "archived"
|
||||
CANCELLED = "cancelled"
|
||||
# [/DEF:SessionStatus:Class]
|
||||
|
||||
# [DEF:SessionPhase:Class]
|
||||
class SessionPhase(str, enum.Enum):
|
||||
INTAKE = "intake"
|
||||
RECOVERY = "recovery"
|
||||
REVIEW = "review"
|
||||
SEMANTIC_REVIEW = "semantic_review"
|
||||
CLARIFICATION = "clarification"
|
||||
MAPPING_REVIEW = "mapping_review"
|
||||
PREVIEW = "preview"
|
||||
LAUNCH = "launch"
|
||||
POST_RUN = "post_run"
|
||||
# [/DEF:SessionPhase:Class]
|
||||
|
||||
# [DEF:ReadinessState:Class]
|
||||
class ReadinessState(str, enum.Enum):
|
||||
EMPTY = "empty"
|
||||
IMPORTING = "importing"
|
||||
REVIEW_READY = "review_ready"
|
||||
SEMANTIC_SOURCE_REVIEW_NEEDED = "semantic_source_review_needed"
|
||||
CLARIFICATION_NEEDED = "clarification_needed"
|
||||
CLARIFICATION_ACTIVE = "clarification_active"
|
||||
MAPPING_REVIEW_NEEDED = "mapping_review_needed"
|
||||
COMPILED_PREVIEW_READY = "compiled_preview_ready"
|
||||
PARTIALLY_READY = "partially_ready"
|
||||
RUN_READY = "run_ready"
|
||||
RUN_IN_PROGRESS = "run_in_progress"
|
||||
COMPLETED = "completed"
|
||||
RECOVERY_REQUIRED = "recovery_required"
|
||||
# [/DEF:ReadinessState:Class]
|
||||
|
||||
# [DEF:RecommendedAction:Class]
|
||||
class RecommendedAction(str, enum.Enum):
|
||||
IMPORT_FROM_SUPERSET = "import_from_superset"
|
||||
REVIEW_DOCUMENTATION = "review_documentation"
|
||||
APPLY_SEMANTIC_SOURCE = "apply_semantic_source"
|
||||
START_CLARIFICATION = "start_clarification"
|
||||
ANSWER_NEXT_QUESTION = "answer_next_question"
|
||||
APPROVE_MAPPING = "approve_mapping"
|
||||
GENERATE_SQL_PREVIEW = "generate_sql_preview"
|
||||
COMPLETE_REQUIRED_VALUES = "complete_required_values"
|
||||
LAUNCH_DATASET = "launch_dataset"
|
||||
RESUME_SESSION = "resume_session"
|
||||
EXPORT_OUTPUTS = "export_outputs"
|
||||
# [/DEF:RecommendedAction:Class]
|
||||
|
||||
# [DEF:SessionCollaboratorRole:Class]
|
||||
class SessionCollaboratorRole(str, enum.Enum):
|
||||
VIEWER = "viewer"
|
||||
REVIEWER = "reviewer"
|
||||
APPROVER = "approver"
|
||||
# [/DEF:SessionCollaboratorRole:Class]
|
||||
|
||||
# [DEF:SessionCollaborator:Class]
|
||||
class SessionCollaborator(Base):
|
||||
__tablename__ = "session_collaborators"
|
||||
|
||||
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
||||
role = Column(SQLEnum(SessionCollaboratorRole), nullable=False)
|
||||
added_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="collaborators")
|
||||
user = relationship("User")
|
||||
# [/DEF:SessionCollaborator:Class]
|
||||
|
||||
# [DEF:DatasetReviewSession:Class]
|
||||
class DatasetReviewSession(Base):
|
||||
__tablename__ = "dataset_review_sessions"
|
||||
|
||||
session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
user_id = Column(String, ForeignKey("users.id"), nullable=False)
|
||||
environment_id = Column(String, ForeignKey("environments.id"), nullable=False)
|
||||
source_kind = Column(String, nullable=False) # superset_link, dataset_selection
|
||||
source_input = Column(String, nullable=False)
|
||||
dataset_ref = Column(String, nullable=False)
|
||||
dataset_id = Column(Integer, nullable=True)
|
||||
dashboard_id = Column(Integer, nullable=True)
|
||||
readiness_state = Column(SQLEnum(ReadinessState), nullable=False, default=ReadinessState.EMPTY)
|
||||
recommended_action = Column(SQLEnum(RecommendedAction), nullable=False, default=RecommendedAction.IMPORT_FROM_SUPERSET)
|
||||
status = Column(SQLEnum(SessionStatus), nullable=False, default=SessionStatus.ACTIVE)
|
||||
current_phase = Column(SQLEnum(SessionPhase), nullable=False, default=SessionPhase.INTAKE)
|
||||
active_task_id = Column(String, nullable=True)
|
||||
last_preview_id = Column(String, nullable=True)
|
||||
last_run_context_id = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
last_activity_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
closed_at = Column(DateTime, nullable=True)
|
||||
|
||||
owner = relationship("User")
|
||||
collaborators = relationship("SessionCollaborator", back_populates="session", cascade="all, delete-orphan")
|
||||
profile = relationship("DatasetProfile", back_populates="session", uselist=False, cascade="all, delete-orphan")
|
||||
findings = relationship("ValidationFinding", back_populates="session", cascade="all, delete-orphan")
|
||||
semantic_sources = relationship("SemanticSource", back_populates="session", cascade="all, delete-orphan")
|
||||
semantic_fields = relationship("SemanticFieldEntry", back_populates="session", cascade="all, delete-orphan")
|
||||
imported_filters = relationship("ImportedFilter", back_populates="session", cascade="all, delete-orphan")
|
||||
template_variables = relationship("TemplateVariable", back_populates="session", cascade="all, delete-orphan")
|
||||
execution_mappings = relationship("ExecutionMapping", back_populates="session", cascade="all, delete-orphan")
|
||||
clarification_sessions = relationship("ClarificationSession", back_populates="session", cascade="all, delete-orphan")
|
||||
previews = relationship("CompiledPreview", back_populates="session", cascade="all, delete-orphan")
|
||||
run_contexts = relationship("DatasetRunContext", back_populates="session", cascade="all, delete-orphan")
|
||||
export_artifacts = relationship("ExportArtifact", back_populates="session", cascade="all, delete-orphan")
|
||||
# [/DEF:DatasetReviewSession:Class]
|
||||
|
||||
# [DEF:BusinessSummarySource:Class]
|
||||
class BusinessSummarySource(str, enum.Enum):
|
||||
CONFIRMED = "confirmed"
|
||||
IMPORTED = "imported"
|
||||
INFERRED = "inferred"
|
||||
AI_DRAFT = "ai_draft"
|
||||
MANUAL_OVERRIDE = "manual_override"
|
||||
# [/DEF:BusinessSummarySource:Class]
|
||||
|
||||
# [DEF:ConfidenceState:Class]
|
||||
class ConfidenceState(str, enum.Enum):
|
||||
CONFIRMED = "confirmed"
|
||||
MOSTLY_CONFIRMED = "mostly_confirmed"
|
||||
MIXED = "mixed"
|
||||
LOW_CONFIDENCE = "low_confidence"
|
||||
UNRESOLVED = "unresolved"
|
||||
# [/DEF:ConfidenceState:Class]
|
||||
|
||||
# [DEF:DatasetProfile:Class]
|
||||
class DatasetProfile(Base):
|
||||
__tablename__ = "dataset_profiles"
|
||||
|
||||
profile_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False, unique=True)
|
||||
dataset_name = Column(String, nullable=False)
|
||||
schema_name = Column(String, nullable=True)
|
||||
database_name = Column(String, nullable=True)
|
||||
business_summary = Column(Text, nullable=False)
|
||||
business_summary_source = Column(SQLEnum(BusinessSummarySource), nullable=False)
|
||||
description = Column(Text, nullable=True)
|
||||
dataset_type = Column(String, nullable=True) # table, virtual, sqllab_view, unknown
|
||||
is_sqllab_view = Column(Boolean, nullable=False, default=False)
|
||||
completeness_score = Column(Float, nullable=True)
|
||||
confidence_state = Column(SQLEnum(ConfidenceState), nullable=False)
|
||||
has_blocking_findings = Column(Boolean, nullable=False, default=False)
|
||||
has_warning_findings = Column(Boolean, nullable=False, default=False)
|
||||
manual_summary_locked = Column(Boolean, nullable=False, default=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="profile")
|
||||
# [/DEF:DatasetProfile:Class]
|
||||
|
||||
# [DEF:FindingArea:Class]
|
||||
class FindingArea(str, enum.Enum):
|
||||
SOURCE_INTAKE = "source_intake"
|
||||
DATASET_PROFILE = "dataset_profile"
|
||||
SEMANTIC_ENRICHMENT = "semantic_enrichment"
|
||||
CLARIFICATION = "clarification"
|
||||
FILTER_RECOVERY = "filter_recovery"
|
||||
TEMPLATE_MAPPING = "template_mapping"
|
||||
COMPILED_PREVIEW = "compiled_preview"
|
||||
LAUNCH = "launch"
|
||||
AUDIT = "audit"
|
||||
# [/DEF:FindingArea:Class]
|
||||
|
||||
# [DEF:FindingSeverity:Class]
|
||||
class FindingSeverity(str, enum.Enum):
|
||||
BLOCKING = "blocking"
|
||||
WARNING = "warning"
|
||||
INFORMATIONAL = "informational"
|
||||
# [/DEF:FindingSeverity:Class]
|
||||
|
||||
# [DEF:ResolutionState:Class]
|
||||
class ResolutionState(str, enum.Enum):
|
||||
OPEN = "open"
|
||||
RESOLVED = "resolved"
|
||||
APPROVED = "approved"
|
||||
SKIPPED = "skipped"
|
||||
DEFERRED = "deferred"
|
||||
EXPERT_REVIEW = "expert_review"
|
||||
# [/DEF:ResolutionState:Class]
|
||||
|
||||
# [DEF:ValidationFinding:Class]
|
||||
class ValidationFinding(Base):
|
||||
__tablename__ = "validation_findings"
|
||||
|
||||
finding_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
area = Column(SQLEnum(FindingArea), nullable=False)
|
||||
severity = Column(SQLEnum(FindingSeverity), nullable=False)
|
||||
code = Column(String, nullable=False)
|
||||
title = Column(String, nullable=False)
|
||||
message = Column(Text, nullable=False)
|
||||
resolution_state = Column(SQLEnum(ResolutionState), nullable=False, default=ResolutionState.OPEN)
|
||||
resolution_note = Column(Text, nullable=True)
|
||||
caused_by_ref = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
resolved_at = Column(DateTime, nullable=True)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="findings")
|
||||
# [/DEF:ValidationFinding:Class]
|
||||
|
||||
# [DEF:SemanticSourceType:Class]
|
||||
class SemanticSourceType(str, enum.Enum):
|
||||
UPLOADED_FILE = "uploaded_file"
|
||||
CONNECTED_DICTIONARY = "connected_dictionary"
|
||||
REFERENCE_DATASET = "reference_dataset"
|
||||
NEIGHBOR_DATASET = "neighbor_dataset"
|
||||
AI_GENERATED = "ai_generated"
|
||||
# [/DEF:SemanticSourceType:Class]
|
||||
|
||||
# [DEF:TrustLevel:Class]
|
||||
class TrustLevel(str, enum.Enum):
|
||||
TRUSTED = "trusted"
|
||||
RECOMMENDED = "recommended"
|
||||
CANDIDATE = "candidate"
|
||||
GENERATED = "generated"
|
||||
# [/DEF:TrustLevel:Class]
|
||||
|
||||
# [DEF:SemanticSourceStatus:Class]
|
||||
class SemanticSourceStatus(str, enum.Enum):
|
||||
AVAILABLE = "available"
|
||||
SELECTED = "selected"
|
||||
APPLIED = "applied"
|
||||
REJECTED = "rejected"
|
||||
PARTIAL = "partial"
|
||||
FAILED = "failed"
|
||||
# [/DEF:SemanticSourceStatus:Class]
|
||||
|
||||
# [DEF:SemanticSource:Class]
|
||||
class SemanticSource(Base):
|
||||
__tablename__ = "semantic_sources"
|
||||
|
||||
source_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
source_type = Column(SQLEnum(SemanticSourceType), nullable=False)
|
||||
source_ref = Column(String, nullable=False)
|
||||
source_version = Column(String, nullable=False)
|
||||
display_name = Column(String, nullable=False)
|
||||
trust_level = Column(SQLEnum(TrustLevel), nullable=False)
|
||||
schema_overlap_score = Column(Float, nullable=True)
|
||||
status = Column(SQLEnum(SemanticSourceStatus), nullable=False, default=SemanticSourceStatus.AVAILABLE)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="semantic_sources")
|
||||
# [/DEF:SemanticSource:Class]
|
||||
|
||||
# [DEF:FieldKind:Class]
|
||||
class FieldKind(str, enum.Enum):
|
||||
COLUMN = "column"
|
||||
METRIC = "metric"
|
||||
FILTER_DIMENSION = "filter_dimension"
|
||||
PARAMETER = "parameter"
|
||||
# [/DEF:FieldKind:Class]
|
||||
|
||||
# [DEF:FieldProvenance:Class]
|
||||
class FieldProvenance(str, enum.Enum):
|
||||
DICTIONARY_EXACT = "dictionary_exact"
|
||||
REFERENCE_IMPORTED = "reference_imported"
|
||||
FUZZY_INFERRED = "fuzzy_inferred"
|
||||
AI_GENERATED = "ai_generated"
|
||||
MANUAL_OVERRIDE = "manual_override"
|
||||
UNRESOLVED = "unresolved"
|
||||
# [/DEF:FieldProvenance:Class]
|
||||
|
||||
# [DEF:SemanticFieldEntry:Class]
|
||||
class SemanticFieldEntry(Base):
|
||||
__tablename__ = "semantic_field_entries"
|
||||
|
||||
field_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
field_name = Column(String, nullable=False)
|
||||
field_kind = Column(SQLEnum(FieldKind), nullable=False)
|
||||
verbose_name = Column(String, nullable=True)
|
||||
description = Column(Text, nullable=True)
|
||||
display_format = Column(String, nullable=True)
|
||||
provenance = Column(SQLEnum(FieldProvenance), nullable=False, default=FieldProvenance.UNRESOLVED)
|
||||
source_id = Column(String, nullable=True)
|
||||
confidence_rank = Column(Integer, nullable=True)
|
||||
is_locked = Column(Boolean, nullable=False, default=False)
|
||||
has_conflict = Column(Boolean, nullable=False, default=False)
|
||||
needs_review = Column(Boolean, nullable=False, default=True)
|
||||
last_changed_by = Column(String, nullable=False) # system, user, agent
|
||||
user_feedback = Column(String, nullable=True) # up, down, null
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="semantic_fields")
|
||||
candidates = relationship("SemanticCandidate", back_populates="field", cascade="all, delete-orphan")
|
||||
# [/DEF:SemanticFieldEntry:Class]
|
||||
|
||||
# [DEF:CandidateMatchType:Class]
|
||||
class CandidateMatchType(str, enum.Enum):
|
||||
EXACT = "exact"
|
||||
REFERENCE = "reference"
|
||||
FUZZY = "fuzzy"
|
||||
GENERATED = "generated"
|
||||
# [/DEF:CandidateMatchType:Class]
|
||||
|
||||
# [DEF:CandidateStatus:Class]
|
||||
class CandidateStatus(str, enum.Enum):
|
||||
PROPOSED = "proposed"
|
||||
ACCEPTED = "accepted"
|
||||
REJECTED = "rejected"
|
||||
SUPERSEDED = "superseded"
|
||||
# [/DEF:CandidateStatus:Class]
|
||||
|
||||
# [DEF:SemanticCandidate:Class]
|
||||
class SemanticCandidate(Base):
|
||||
__tablename__ = "semantic_candidates"
|
||||
|
||||
candidate_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
field_id = Column(String, ForeignKey("semantic_field_entries.field_id"), nullable=False)
|
||||
source_id = Column(String, nullable=True)
|
||||
candidate_rank = Column(Integer, nullable=False)
|
||||
match_type = Column(SQLEnum(CandidateMatchType), nullable=False)
|
||||
confidence_score = Column(Float, nullable=False)
|
||||
proposed_verbose_name = Column(String, nullable=True)
|
||||
proposed_description = Column(Text, nullable=True)
|
||||
proposed_display_format = Column(String, nullable=True)
|
||||
status = Column(SQLEnum(CandidateStatus), nullable=False, default=CandidateStatus.PROPOSED)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
field = relationship("SemanticFieldEntry", back_populates="candidates")
|
||||
# [/DEF:SemanticCandidate:Class]
|
||||
|
||||
# [DEF:FilterSource:Class]
|
||||
class FilterSource(str, enum.Enum):
|
||||
SUPERSET_NATIVE = "superset_native"
|
||||
SUPERSET_URL = "superset_url"
|
||||
MANUAL = "manual"
|
||||
INFERRED = "inferred"
|
||||
# [/DEF:FilterSource:Class]
|
||||
|
||||
# [DEF:FilterConfidenceState:Class]
|
||||
class FilterConfidenceState(str, enum.Enum):
|
||||
CONFIRMED = "confirmed"
|
||||
IMPORTED = "imported"
|
||||
INFERRED = "inferred"
|
||||
AI_DRAFT = "ai_draft"
|
||||
UNRESOLVED = "unresolved"
|
||||
# [/DEF:FilterConfidenceState:Class]
|
||||
|
||||
# [DEF:FilterRecoveryStatus:Class]
|
||||
class FilterRecoveryStatus(str, enum.Enum):
|
||||
RECOVERED = "recovered"
|
||||
PARTIAL = "partial"
|
||||
MISSING = "missing"
|
||||
CONFLICTED = "conflicted"
|
||||
# [/DEF:FilterRecoveryStatus:Class]
|
||||
|
||||
# [DEF:ImportedFilter:Class]
|
||||
class ImportedFilter(Base):
|
||||
__tablename__ = "imported_filters"
|
||||
|
||||
filter_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
filter_name = Column(String, nullable=False)
|
||||
display_name = Column(String, nullable=True)
|
||||
raw_value = Column(JSON, nullable=False)
|
||||
normalized_value = Column(JSON, nullable=True)
|
||||
source = Column(SQLEnum(FilterSource), nullable=False)
|
||||
confidence_state = Column(SQLEnum(FilterConfidenceState), nullable=False)
|
||||
requires_confirmation = Column(Boolean, nullable=False, default=False)
|
||||
recovery_status = Column(SQLEnum(FilterRecoveryStatus), nullable=False)
|
||||
notes = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="imported_filters")
|
||||
# [/DEF:ImportedFilter:Class]
|
||||
|
||||
# [DEF:VariableKind:Class]
|
||||
class VariableKind(str, enum.Enum):
|
||||
NATIVE_FILTER = "native_filter"
|
||||
PARAMETER = "parameter"
|
||||
DERIVED = "derived"
|
||||
UNKNOWN = "unknown"
|
||||
# [/DEF:VariableKind:Class]
|
||||
|
||||
# [DEF:MappingStatus:Class]
|
||||
class MappingStatus(str, enum.Enum):
|
||||
UNMAPPED = "unmapped"
|
||||
PROPOSED = "proposed"
|
||||
APPROVED = "approved"
|
||||
OVERRIDDEN = "overridden"
|
||||
INVALID = "invalid"
|
||||
# [/DEF:MappingStatus:Class]
|
||||
|
||||
# [DEF:TemplateVariable:Class]
|
||||
class TemplateVariable(Base):
|
||||
__tablename__ = "template_variables"
|
||||
|
||||
variable_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
variable_name = Column(String, nullable=False)
|
||||
expression_source = Column(Text, nullable=False)
|
||||
variable_kind = Column(SQLEnum(VariableKind), nullable=False)
|
||||
is_required = Column(Boolean, nullable=False, default=True)
|
||||
default_value = Column(JSON, nullable=True)
|
||||
mapping_status = Column(SQLEnum(MappingStatus), nullable=False, default=MappingStatus.UNMAPPED)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="template_variables")
|
||||
# [/DEF:TemplateVariable:Class]
|
||||
|
||||
# [DEF:MappingMethod:Class]
|
||||
class MappingMethod(str, enum.Enum):
|
||||
DIRECT_MATCH = "direct_match"
|
||||
HEURISTIC_MATCH = "heuristic_match"
|
||||
SEMANTIC_MATCH = "semantic_match"
|
||||
MANUAL_OVERRIDE = "manual_override"
|
||||
# [/DEF:MappingMethod:Class]
|
||||
|
||||
# [DEF:MappingWarningLevel:Class]
|
||||
class MappingWarningLevel(str, enum.Enum):
|
||||
LOW = "low"
|
||||
MEDIUM = "medium"
|
||||
HIGH = "high"
|
||||
# [/DEF:MappingWarningLevel:Class]
|
||||
|
||||
# [DEF:ApprovalState:Class]
|
||||
class ApprovalState(str, enum.Enum):
|
||||
PENDING = "pending"
|
||||
APPROVED = "approved"
|
||||
REJECTED = "rejected"
|
||||
NOT_REQUIRED = "not_required"
|
||||
# [/DEF:ApprovalState:Class]
|
||||
|
||||
# [DEF:ExecutionMapping:Class]
|
||||
class ExecutionMapping(Base):
|
||||
__tablename__ = "execution_mappings"
|
||||
|
||||
mapping_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
filter_id = Column(String, nullable=False)
|
||||
variable_id = Column(String, nullable=False)
|
||||
mapping_method = Column(SQLEnum(MappingMethod), nullable=False)
|
||||
raw_input_value = Column(JSON, nullable=False)
|
||||
effective_value = Column(JSON, nullable=True)
|
||||
transformation_note = Column(Text, nullable=True)
|
||||
warning_level = Column(SQLEnum(MappingWarningLevel), nullable=True)
|
||||
requires_explicit_approval = Column(Boolean, nullable=False, default=False)
|
||||
approval_state = Column(SQLEnum(ApprovalState), nullable=False, default=ApprovalState.NOT_REQUIRED)
|
||||
approved_by_user_id = Column(String, nullable=True)
|
||||
approved_at = Column(DateTime, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="execution_mappings")
|
||||
# [/DEF:ExecutionMapping:Class]
|
||||
|
||||
# [DEF:ClarificationStatus:Class]
|
||||
class ClarificationStatus(str, enum.Enum):
|
||||
PENDING = "pending"
|
||||
ACTIVE = "active"
|
||||
PAUSED = "paused"
|
||||
COMPLETED = "completed"
|
||||
CANCELLED = "cancelled"
|
||||
# [/DEF:ClarificationStatus:Class]
|
||||
|
||||
# [DEF:ClarificationSession:Class]
|
||||
class ClarificationSession(Base):
|
||||
__tablename__ = "clarification_sessions"
|
||||
|
||||
clarification_session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
status = Column(SQLEnum(ClarificationStatus), nullable=False, default=ClarificationStatus.PENDING)
|
||||
current_question_id = Column(String, nullable=True)
|
||||
resolved_count = Column(Integer, nullable=False, default=0)
|
||||
remaining_count = Column(Integer, nullable=False, default=0)
|
||||
summary_delta = Column(Text, nullable=True)
|
||||
started_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="clarification_sessions")
|
||||
questions = relationship("ClarificationQuestion", back_populates="clarification_session", cascade="all, delete-orphan")
|
||||
# [/DEF:ClarificationSession:Class]
|
||||
|
||||
# [DEF:QuestionState:Class]
|
||||
class QuestionState(str, enum.Enum):
|
||||
OPEN = "open"
|
||||
ANSWERED = "answered"
|
||||
SKIPPED = "skipped"
|
||||
EXPERT_REVIEW = "expert_review"
|
||||
SUPERSEDED = "superseded"
|
||||
# [/DEF:QuestionState:Class]
|
||||
|
||||
# [DEF:ClarificationQuestion:Class]
|
||||
class ClarificationQuestion(Base):
|
||||
__tablename__ = "clarification_questions"
|
||||
|
||||
question_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
clarification_session_id = Column(String, ForeignKey("clarification_sessions.clarification_session_id"), nullable=False)
|
||||
topic_ref = Column(String, nullable=False)
|
||||
question_text = Column(Text, nullable=False)
|
||||
why_it_matters = Column(Text, nullable=False)
|
||||
current_guess = Column(Text, nullable=True)
|
||||
priority = Column(Integer, nullable=False, default=0)
|
||||
state = Column(SQLEnum(QuestionState), nullable=False, default=QuestionState.OPEN)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
||||
|
||||
clarification_session = relationship("ClarificationSession", back_populates="questions")
|
||||
options = relationship("ClarificationOption", back_populates="question", cascade="all, delete-orphan")
|
||||
answer = relationship("ClarificationAnswer", back_populates="question", uselist=False, cascade="all, delete-orphan")
|
||||
# [/DEF:ClarificationQuestion:Class]
|
||||
|
||||
# [DEF:ClarificationOption:Class]
|
||||
class ClarificationOption(Base):
|
||||
__tablename__ = "clarification_options"
|
||||
|
||||
option_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False)
|
||||
label = Column(String, nullable=False)
|
||||
value = Column(String, nullable=False)
|
||||
is_recommended = Column(Boolean, nullable=False, default=False)
|
||||
display_order = Column(Integer, nullable=False, default=0)
|
||||
|
||||
question = relationship("ClarificationQuestion", back_populates="options")
|
||||
# [/DEF:ClarificationOption:Class]
|
||||
|
||||
# [DEF:AnswerKind:Class]
|
||||
class AnswerKind(str, enum.Enum):
|
||||
SELECTED = "selected"
|
||||
CUSTOM = "custom"
|
||||
SKIPPED = "skipped"
|
||||
EXPERT_REVIEW = "expert_review"
|
||||
# [/DEF:AnswerKind:Class]
|
||||
|
||||
# [DEF:ClarificationAnswer:Class]
|
||||
class ClarificationAnswer(Base):
|
||||
__tablename__ = "clarification_answers"
|
||||
|
||||
answer_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False, unique=True)
|
||||
answer_kind = Column(SQLEnum(AnswerKind), nullable=False)
|
||||
answer_value = Column(Text, nullable=True)
|
||||
answered_by_user_id = Column(String, nullable=False)
|
||||
impact_summary = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
question = relationship("ClarificationQuestion", back_populates="answer")
|
||||
# [/DEF:ClarificationAnswer:Class]
|
||||
|
||||
# [DEF:PreviewStatus:Class]
|
||||
class PreviewStatus(str, enum.Enum):
|
||||
PENDING = "pending"
|
||||
READY = "ready"
|
||||
FAILED = "failed"
|
||||
STALE = "stale"
|
||||
# [/DEF:PreviewStatus:Class]
|
||||
|
||||
# [DEF:CompiledPreview:Class]
|
||||
class CompiledPreview(Base):
|
||||
__tablename__ = "compiled_previews"
|
||||
|
||||
preview_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
preview_status = Column(SQLEnum(PreviewStatus), nullable=False, default=PreviewStatus.PENDING)
|
||||
compiled_sql = Column(Text, nullable=True)
|
||||
preview_fingerprint = Column(String, nullable=False)
|
||||
compiled_by = Column(String, nullable=False, default="superset")
|
||||
error_code = Column(String, nullable=True)
|
||||
error_details = Column(Text, nullable=True)
|
||||
compiled_at = Column(DateTime, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="previews")
|
||||
# [/DEF:CompiledPreview:Class]
|
||||
|
||||
# [DEF:LaunchStatus:Class]
|
||||
class LaunchStatus(str, enum.Enum):
|
||||
STARTED = "started"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
# [/DEF:LaunchStatus:Class]
|
||||
|
||||
# [DEF:DatasetRunContext:Class]
|
||||
class DatasetRunContext(Base):
|
||||
__tablename__ = "dataset_run_contexts"
|
||||
|
||||
run_context_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
dataset_ref = Column(String, nullable=False)
|
||||
environment_id = Column(String, nullable=False)
|
||||
preview_id = Column(String, nullable=False)
|
||||
sql_lab_session_ref = Column(String, nullable=False)
|
||||
effective_filters = Column(JSON, nullable=False)
|
||||
template_params = Column(JSON, nullable=False)
|
||||
approved_mapping_ids = Column(JSON, nullable=False)
|
||||
semantic_decision_refs = Column(JSON, nullable=False)
|
||||
open_warning_refs = Column(JSON, nullable=False)
|
||||
launch_status = Column(SQLEnum(LaunchStatus), nullable=False)
|
||||
launch_error = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="run_contexts")
|
||||
# [/DEF:DatasetRunContext:Class]
|
||||
|
||||
# [DEF:ArtifactType:Class]
|
||||
class ArtifactType(str, enum.Enum):
|
||||
DOCUMENTATION = "documentation"
|
||||
VALIDATION_REPORT = "validation_report"
|
||||
RUN_SUMMARY = "run_summary"
|
||||
# [/DEF:ArtifactType:Class]
|
||||
|
||||
# [DEF:ArtifactFormat:Class]
|
||||
class ArtifactFormat(str, enum.Enum):
|
||||
JSON = "json"
|
||||
MARKDOWN = "markdown"
|
||||
CSV = "csv"
|
||||
PDF = "pdf"
|
||||
# [/DEF:ArtifactFormat:Class]
|
||||
|
||||
# [DEF:ExportArtifact:Class]
|
||||
class ExportArtifact(Base):
|
||||
__tablename__ = "export_artifacts"
|
||||
|
||||
artifact_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False)
|
||||
artifact_type = Column(SQLEnum(ArtifactType), nullable=False)
|
||||
format = Column(SQLEnum(ArtifactFormat), nullable=False)
|
||||
storage_ref = Column(String, nullable=False)
|
||||
created_by_user_id = Column(String, nullable=False)
|
||||
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
||||
|
||||
session = relationship("DatasetReviewSession", back_populates="export_artifacts")
|
||||
# [/DEF:ExportArtifact:Class]
|
||||
|
||||
# [/DEF:DatasetReviewModels:Module]
|
||||
362
backend/src/schemas/dataset_review.py
Normal file
362
backend/src/schemas/dataset_review.py
Normal file
@@ -0,0 +1,362 @@
|
||||
# [DEF:DatasetReviewSchemas:Module]
|
||||
#
|
||||
# @COMPLEXITY: 3
|
||||
# @SEMANTICS: dataset_review, schemas, pydantic, session, profile, findings
|
||||
# @PURPOSE: Defines API schemas for the dataset review orchestration flow.
|
||||
# @LAYER: API
|
||||
# @RELATION: DEPENDS_ON -> pydantic
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
from datetime import datetime
|
||||
from typing import List, Optional, Any
|
||||
from pydantic import BaseModel, Field
|
||||
from src.models.dataset_review import (
|
||||
SessionStatus,
|
||||
SessionPhase,
|
||||
ReadinessState,
|
||||
RecommendedAction,
|
||||
SessionCollaboratorRole,
|
||||
BusinessSummarySource,
|
||||
ConfidenceState,
|
||||
FindingArea,
|
||||
FindingSeverity,
|
||||
ResolutionState,
|
||||
SemanticSourceType,
|
||||
TrustLevel,
|
||||
SemanticSourceStatus,
|
||||
FieldKind,
|
||||
FieldProvenance,
|
||||
CandidateMatchType,
|
||||
CandidateStatus,
|
||||
FilterSource,
|
||||
FilterConfidenceState,
|
||||
FilterRecoveryStatus,
|
||||
VariableKind,
|
||||
MappingStatus,
|
||||
MappingMethod,
|
||||
MappingWarningLevel,
|
||||
ApprovalState,
|
||||
ClarificationStatus,
|
||||
QuestionState,
|
||||
AnswerKind,
|
||||
PreviewStatus,
|
||||
LaunchStatus,
|
||||
ArtifactType,
|
||||
ArtifactFormat
|
||||
)
|
||||
# [/SECTION]
|
||||
|
||||
# [DEF:SessionCollaboratorDto:Class]
|
||||
class SessionCollaboratorDto(BaseModel):
|
||||
user_id: str
|
||||
role: SessionCollaboratorRole
|
||||
added_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:SessionCollaboratorDto:Class]
|
||||
|
||||
# [DEF:DatasetProfileDto:Class]
|
||||
class DatasetProfileDto(BaseModel):
|
||||
profile_id: str
|
||||
session_id: str
|
||||
dataset_name: str
|
||||
schema_name: Optional[str] = None
|
||||
database_name: Optional[str] = None
|
||||
business_summary: str
|
||||
business_summary_source: BusinessSummarySource
|
||||
description: Optional[str] = None
|
||||
dataset_type: Optional[str] = None
|
||||
is_sqllab_view: bool
|
||||
completeness_score: Optional[float] = None
|
||||
confidence_state: ConfidenceState
|
||||
has_blocking_findings: bool
|
||||
has_warning_findings: bool
|
||||
manual_summary_locked: bool
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:DatasetProfileDto:Class]
|
||||
|
||||
# [DEF:ValidationFindingDto:Class]
|
||||
class ValidationFindingDto(BaseModel):
|
||||
finding_id: str
|
||||
session_id: str
|
||||
area: FindingArea
|
||||
severity: FindingSeverity
|
||||
code: str
|
||||
title: str
|
||||
message: str
|
||||
resolution_state: ResolutionState
|
||||
resolution_note: Optional[str] = None
|
||||
caused_by_ref: Optional[str] = None
|
||||
created_at: datetime
|
||||
resolved_at: Optional[datetime] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ValidationFindingDto:Class]
|
||||
|
||||
# [DEF:SemanticSourceDto:Class]
|
||||
class SemanticSourceDto(BaseModel):
|
||||
source_id: str
|
||||
session_id: str
|
||||
source_type: SemanticSourceType
|
||||
source_ref: str
|
||||
source_version: str
|
||||
display_name: str
|
||||
trust_level: TrustLevel
|
||||
schema_overlap_score: Optional[float] = None
|
||||
status: SemanticSourceStatus
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:SemanticSourceDto:Class]
|
||||
|
||||
# [DEF:SemanticCandidateDto:Class]
|
||||
class SemanticCandidateDto(BaseModel):
|
||||
candidate_id: str
|
||||
field_id: str
|
||||
source_id: Optional[str] = None
|
||||
candidate_rank: int
|
||||
match_type: CandidateMatchType
|
||||
confidence_score: float
|
||||
proposed_verbose_name: Optional[str] = None
|
||||
proposed_description: Optional[str] = None
|
||||
proposed_display_format: Optional[str] = None
|
||||
status: CandidateStatus
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:SemanticCandidateDto:Class]
|
||||
|
||||
# [DEF:SemanticFieldEntryDto:Class]
|
||||
class SemanticFieldEntryDto(BaseModel):
|
||||
field_id: str
|
||||
session_id: str
|
||||
field_name: str
|
||||
field_kind: FieldKind
|
||||
verbose_name: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
display_format: Optional[str] = None
|
||||
provenance: FieldProvenance
|
||||
source_id: Optional[str] = None
|
||||
confidence_rank: Optional[int] = None
|
||||
is_locked: bool
|
||||
has_conflict: bool
|
||||
needs_review: bool
|
||||
last_changed_by: str
|
||||
user_feedback: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
candidates: List[SemanticCandidateDto] = []
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:SemanticFieldEntryDto:Class]
|
||||
|
||||
# [DEF:ImportedFilterDto:Class]
|
||||
class ImportedFilterDto(BaseModel):
|
||||
filter_id: str
|
||||
session_id: str
|
||||
filter_name: str
|
||||
display_name: Optional[str] = None
|
||||
raw_value: Any
|
||||
normalized_value: Optional[Any] = None
|
||||
source: FilterSource
|
||||
confidence_state: FilterConfidenceState
|
||||
requires_confirmation: bool
|
||||
recovery_status: FilterRecoveryStatus
|
||||
notes: Optional[str] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ImportedFilterDto:Class]
|
||||
|
||||
# [DEF:TemplateVariableDto:Class]
|
||||
class TemplateVariableDto(BaseModel):
|
||||
variable_id: str
|
||||
session_id: str
|
||||
variable_name: str
|
||||
expression_source: str
|
||||
variable_kind: VariableKind
|
||||
is_required: bool
|
||||
default_value: Optional[Any] = None
|
||||
mapping_status: MappingStatus
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:TemplateVariableDto:Class]
|
||||
|
||||
# [DEF:ExecutionMappingDto:Class]
|
||||
class ExecutionMappingDto(BaseModel):
|
||||
mapping_id: str
|
||||
session_id: str
|
||||
filter_id: str
|
||||
variable_id: str
|
||||
mapping_method: MappingMethod
|
||||
raw_input_value: Any
|
||||
effective_value: Optional[Any] = None
|
||||
transformation_note: Optional[str] = None
|
||||
warning_level: Optional[MappingWarningLevel] = None
|
||||
requires_explicit_approval: bool
|
||||
approval_state: ApprovalState
|
||||
approved_by_user_id: Optional[str] = None
|
||||
approved_at: Optional[datetime] = None
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ExecutionMappingDto:Class]
|
||||
|
||||
# [DEF:ClarificationOptionDto:Class]
|
||||
class ClarificationOptionDto(BaseModel):
|
||||
option_id: str
|
||||
question_id: str
|
||||
label: str
|
||||
value: str
|
||||
is_recommended: bool
|
||||
display_order: int
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ClarificationOptionDto:Class]
|
||||
|
||||
# [DEF:ClarificationAnswerDto:Class]
|
||||
class ClarificationAnswerDto(BaseModel):
|
||||
answer_id: str
|
||||
question_id: str
|
||||
answer_kind: AnswerKind
|
||||
answer_value: Optional[str] = None
|
||||
answered_by_user_id: str
|
||||
impact_summary: Optional[str] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ClarificationAnswerDto:Class]
|
||||
|
||||
# [DEF:ClarificationQuestionDto:Class]
|
||||
class ClarificationQuestionDto(BaseModel):
|
||||
question_id: str
|
||||
clarification_session_id: str
|
||||
topic_ref: str
|
||||
question_text: str
|
||||
why_it_matters: str
|
||||
current_guess: Optional[str] = None
|
||||
priority: int
|
||||
state: QuestionState
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
options: List[ClarificationOptionDto] = []
|
||||
answer: Optional[ClarificationAnswerDto] = None
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ClarificationQuestionDto:Class]
|
||||
|
||||
# [DEF:ClarificationSessionDto:Class]
|
||||
class ClarificationSessionDto(BaseModel):
|
||||
clarification_session_id: str
|
||||
session_id: str
|
||||
status: ClarificationStatus
|
||||
current_question_id: Optional[str] = None
|
||||
resolved_count: int
|
||||
remaining_count: int
|
||||
summary_delta: Optional[str] = None
|
||||
started_at: datetime
|
||||
updated_at: datetime
|
||||
completed_at: Optional[datetime] = None
|
||||
questions: List[ClarificationQuestionDto] = []
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:ClarificationSessionDto:Class]
|
||||
|
||||
# [DEF:CompiledPreviewDto:Class]
|
||||
class CompiledPreviewDto(BaseModel):
|
||||
preview_id: str
|
||||
session_id: str
|
||||
preview_status: PreviewStatus
|
||||
compiled_sql: Optional[str] = None
|
||||
preview_fingerprint: str
|
||||
compiled_by: str
|
||||
error_code: Optional[str] = None
|
||||
error_details: Optional[str] = None
|
||||
compiled_at: Optional[datetime] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:CompiledPreviewDto:Class]
|
||||
|
||||
# [DEF:DatasetRunContextDto:Class]
|
||||
class DatasetRunContextDto(BaseModel):
|
||||
run_context_id: str
|
||||
session_id: str
|
||||
dataset_ref: str
|
||||
environment_id: str
|
||||
preview_id: str
|
||||
sql_lab_session_ref: str
|
||||
effective_filters: Any
|
||||
template_params: Any
|
||||
approved_mapping_ids: List[str]
|
||||
semantic_decision_refs: List[str]
|
||||
open_warning_refs: List[str]
|
||||
launch_status: LaunchStatus
|
||||
launch_error: Optional[str] = None
|
||||
created_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:DatasetRunContextDto:Class]
|
||||
|
||||
# [DEF:SessionSummary:Class]
|
||||
class SessionSummary(BaseModel):
|
||||
session_id: str
|
||||
user_id: str
|
||||
environment_id: str
|
||||
source_kind: str
|
||||
source_input: str
|
||||
dataset_ref: str
|
||||
dataset_id: Optional[int] = None
|
||||
readiness_state: ReadinessState
|
||||
recommended_action: RecommendedAction
|
||||
status: SessionStatus
|
||||
current_phase: SessionPhase
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
last_activity_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:SessionSummary:Class]
|
||||
|
||||
# [DEF:SessionDetail:Class]
|
||||
class SessionDetail(SessionSummary):
|
||||
collaborators: List[SessionCollaboratorDto] = []
|
||||
profile: Optional[DatasetProfileDto] = None
|
||||
findings: List[ValidationFindingDto] = []
|
||||
semantic_sources: List[SemanticSourceDto] = []
|
||||
semantic_fields: List[SemanticFieldEntryDto] = []
|
||||
imported_filters: List[ImportedFilterDto] = []
|
||||
template_variables: List[TemplateVariableDto] = []
|
||||
execution_mappings: List[ExecutionMappingDto] = []
|
||||
clarification_sessions: List[ClarificationSessionDto] = []
|
||||
previews: List[CompiledPreviewDto] = []
|
||||
run_contexts: List[DatasetRunContextDto] = []
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
# [/DEF:SessionDetail:Class]
|
||||
|
||||
# [/DEF:DatasetReviewSchemas:Module]
|
||||
@@ -46,6 +46,14 @@ INITIAL_PERMISSIONS = [
|
||||
{"resource": "plugin:storage", "action": "WRITE"},
|
||||
{"resource": "plugin:debug", "action": "EXECUTE"},
|
||||
{"resource": "git_config", "action": "READ"},
|
||||
|
||||
# Dataset Review Permissions
|
||||
{"resource": "dataset:session", "action": "READ"},
|
||||
{"resource": "dataset:session", "action": "MANAGE"},
|
||||
{"resource": "dataset:session", "action": "APPROVE"},
|
||||
{"resource": "dataset:execution", "action": "PREVIEW"},
|
||||
{"resource": "dataset:execution", "action": "LAUNCH"},
|
||||
{"resource": "dataset:execution", "action": "LAUNCH_PROD"},
|
||||
]
|
||||
# [/DEF:INITIAL_PERMISSIONS:Constant]
|
||||
|
||||
@@ -95,6 +103,10 @@ def seed_permissions():
|
||||
("tasks", "READ"),
|
||||
("tasks", "WRITE"),
|
||||
("git_config", "READ"),
|
||||
("dataset:session", "READ"),
|
||||
("dataset:session", "MANAGE"),
|
||||
("dataset:execution", "PREVIEW"),
|
||||
("dataset:execution", "LAUNCH"),
|
||||
]
|
||||
|
||||
for res, act in user_permissions:
|
||||
|
||||
7
backend/src/services/dataset_review/__init__.py
Normal file
7
backend/src/services/dataset_review/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
# [DEF:backend.src.services.dataset_review:Module]
|
||||
#
|
||||
# @SEMANTICS: dataset, review, orchestration
|
||||
# @PURPOSE: Provides services for dataset-centered orchestration flow.
|
||||
# @LAYER: Services
|
||||
#
|
||||
# [/DEF:backend.src.services.dataset_review:Module]
|
||||
@@ -0,0 +1,171 @@
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from src.models.mapping import Base, Environment
|
||||
from src.models.auth import User
|
||||
from src.models.dataset_review import (
|
||||
DatasetReviewSession,
|
||||
DatasetProfile,
|
||||
ValidationFinding,
|
||||
CompiledPreview,
|
||||
DatasetRunContext,
|
||||
BusinessSummarySource,
|
||||
ConfidenceState,
|
||||
FindingArea,
|
||||
FindingSeverity,
|
||||
ReadinessState,
|
||||
RecommendedAction
|
||||
)
|
||||
from src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository
|
||||
|
||||
# [DEF:SessionRepositoryTests:Module]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Unit tests for DatasetReviewSessionRepository.
|
||||
# @RELATION: TESTS -> [DatasetReviewSessionRepository]
|
||||
|
||||
@pytest.fixture
|
||||
def db_session():
|
||||
# [DEF:db_session:Function]
|
||||
# @COMPLEXITY: 1
|
||||
# @RELATION: BINDS_TO -> [SessionRepositoryTests]
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
|
||||
# Create test data
|
||||
user = User(id="user1", username="testuser", email="test@example.com", password_hash="pw")
|
||||
env = Environment(id="env1", name="Prod", url="http://superset", credentials_id="cred1")
|
||||
session.add_all([user, env])
|
||||
session.commit()
|
||||
|
||||
yield session
|
||||
session.close()
|
||||
|
||||
def test_create_session(db_session):
|
||||
# @PURPOSE: Verify session creation and persistence.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1",
|
||||
environment_id="env1",
|
||||
source_kind="superset_link",
|
||||
source_input="http://link",
|
||||
dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
assert session.session_id is not None
|
||||
loaded = db_session.query(DatasetReviewSession).filter_by(session_id=session.session_id).first()
|
||||
assert loaded.user_id == "user1"
|
||||
|
||||
def test_load_session_detail_ownership(db_session):
|
||||
# @PURPOSE: Verify ownership enforcement in detail loading.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
# Correct user
|
||||
loaded = repo.load_session_detail(session.session_id, "user1")
|
||||
assert loaded is not None
|
||||
|
||||
# Wrong user
|
||||
loaded_wrong = repo.load_session_detail(session.session_id, "wrong_user")
|
||||
assert loaded_wrong is None
|
||||
|
||||
def test_save_preview_marks_stale(db_session):
|
||||
# @PURPOSE: Verify that saving a new preview marks old ones as stale.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
p1 = CompiledPreview(session_id=session.session_id, preview_status="ready", preview_fingerprint="f1")
|
||||
repo.save_preview(session.session_id, "user1", p1)
|
||||
|
||||
p2 = CompiledPreview(session_id=session.session_id, preview_status="ready", preview_fingerprint="f2")
|
||||
repo.save_preview(session.session_id, "user1", p2)
|
||||
|
||||
db_session.refresh(p1)
|
||||
assert p1.preview_status == "stale"
|
||||
assert p2.preview_status == "ready"
|
||||
assert session.last_preview_id == p2.preview_id
|
||||
|
||||
def test_save_profile_and_findings(db_session):
|
||||
# @PURPOSE: Verify persistence of profile and findings.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
profile = DatasetProfile(
|
||||
session_id=session.session_id,
|
||||
dataset_name="Test DS",
|
||||
business_summary="Summary",
|
||||
business_summary_source=BusinessSummarySource.INFERRED,
|
||||
confidence_state=ConfidenceState.UNRESOLVED
|
||||
)
|
||||
|
||||
finding = ValidationFinding(
|
||||
session_id=session.session_id,
|
||||
area=FindingArea.SOURCE_INTAKE,
|
||||
severity=FindingSeverity.BLOCKING,
|
||||
code="ERR1",
|
||||
title="Error",
|
||||
message="Failure"
|
||||
)
|
||||
|
||||
repo.save_profile_and_findings(session.session_id, "user1", profile, [finding])
|
||||
|
||||
updated_session = repo.load_session_detail(session.session_id, "user1")
|
||||
assert updated_session.profile.dataset_name == "Test DS"
|
||||
assert len(updated_session.findings) == 1
|
||||
assert updated_session.findings[0].code == "ERR1"
|
||||
|
||||
def test_save_run_context(db_session):
|
||||
# @PURPOSE: Verify saving of run context.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
rc = DatasetRunContext(
|
||||
session_id=session.session_id,
|
||||
dataset_ref="ds1",
|
||||
environment_id="env1",
|
||||
preview_id="p1",
|
||||
sql_lab_session_ref="s1",
|
||||
effective_filters={},
|
||||
template_params={},
|
||||
approved_mapping_ids=[],
|
||||
semantic_decision_refs=[],
|
||||
open_warning_refs=[],
|
||||
launch_status="success"
|
||||
)
|
||||
repo.save_run_context(session.session_id, "user1", rc)
|
||||
|
||||
assert session.last_run_context_id == rc.run_context_id
|
||||
|
||||
def test_list_sessions_for_user(db_session):
|
||||
# @PURPOSE: Verify listing of sessions by user.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
s1 = DatasetReviewSession(user_id="user1", environment_id="env1", source_kind="k", source_input="i", dataset_ref="r1")
|
||||
s2 = DatasetReviewSession(user_id="user1", environment_id="env1", source_kind="k", source_input="i", dataset_ref="r2")
|
||||
s3 = DatasetReviewSession(user_id="other", environment_id="env1", source_kind="k", source_input="i", dataset_ref="r3")
|
||||
|
||||
db_session.add_all([s1, s2, s3])
|
||||
db_session.commit()
|
||||
|
||||
sessions = repo.list_sessions_for_user("user1")
|
||||
assert len(sessions) == 2
|
||||
assert all(s.user_id == "user1" for s in sessions)
|
||||
|
||||
# [/DEF:SessionRepositoryTests:Module]
|
||||
@@ -0,0 +1,146 @@
|
||||
# [DEF:DatasetReviewSessionRepository:Module]
|
||||
# @COMPLEXITY: 5
|
||||
# @PURPOSE: Persist and retrieve dataset review session aggregates, including readiness, findings, semantic decisions, clarification state, previews, and run contexts.
|
||||
# @LAYER: Domain
|
||||
# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
|
||||
# @RELATION: [DEPENDS_ON] -> [DatasetProfile]
|
||||
# @RELATION: [DEPENDS_ON] -> [ValidationFinding]
|
||||
# @RELATION: [DEPENDS_ON] -> [CompiledPreview]
|
||||
# @PRE: repository operations execute within authenticated request or task scope.
|
||||
# @POST: session aggregate reads are structurally consistent and writes preserve ownership and version semantics.
|
||||
|
||||
from typing import Optional, List
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from src.models.dataset_review import (
|
||||
DatasetReviewSession,
|
||||
DatasetProfile,
|
||||
ValidationFinding,
|
||||
CompiledPreview,
|
||||
DatasetRunContext
|
||||
)
|
||||
from src.core.logger import belief_scope
|
||||
|
||||
class DatasetReviewSessionRepository:
|
||||
"""
|
||||
@PURPOSE: Persist and retrieve dataset review session aggregates.
|
||||
@INVARIANT: ownership_scope -> All operations must respect the session owner's user_id.
|
||||
"""
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def create_session(self, session: DatasetReviewSession) -> DatasetReviewSession:
|
||||
"""
|
||||
@PURPOSE: Persist initial session shell.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.create_session"):
|
||||
self.db.add(session)
|
||||
self.db.commit()
|
||||
self.db.refresh(session)
|
||||
return session
|
||||
|
||||
def load_session_detail(self, session_id: str, user_id: str) -> Optional[DatasetReviewSession]:
|
||||
"""
|
||||
@PURPOSE: Return the full session aggregate for API/frontend use.
|
||||
@PRE: user_id must match session owner or authorized collaborator.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.load_session_detail"):
|
||||
# Note: We check user_id to enforce the ownership_scope invariant.
|
||||
return self.db.query(DatasetReviewSession)\
|
||||
.options(
|
||||
joinedload(DatasetReviewSession.profile),
|
||||
joinedload(DatasetReviewSession.findings),
|
||||
joinedload(DatasetReviewSession.collaborators),
|
||||
joinedload(DatasetReviewSession.semantic_sources),
|
||||
joinedload(DatasetReviewSession.semantic_fields),
|
||||
joinedload(DatasetReviewSession.imported_filters),
|
||||
joinedload(DatasetReviewSession.template_variables),
|
||||
joinedload(DatasetReviewSession.execution_mappings),
|
||||
joinedload(DatasetReviewSession.clarification_sessions),
|
||||
joinedload(DatasetReviewSession.previews),
|
||||
joinedload(DatasetReviewSession.run_contexts)
|
||||
)\
|
||||
.filter(DatasetReviewSession.session_id == session_id)\
|
||||
.filter(DatasetReviewSession.user_id == user_id)\
|
||||
.first()
|
||||
|
||||
def save_profile_and_findings(self, session_id: str, user_id: str, profile: DatasetProfile, findings: List[ValidationFinding]) -> DatasetReviewSession:
|
||||
"""
|
||||
@PURPOSE: Persist profile and validation state together.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.save_profile_and_findings"):
|
||||
session = self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.session_id == session_id,
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError("Session not found or access denied")
|
||||
|
||||
if profile:
|
||||
self.db.merge(profile)
|
||||
|
||||
# For findings, we might want to sync them (remove old ones if not in new list, or update)
|
||||
# Simplest for now: add/merge findings
|
||||
for finding in findings:
|
||||
self.db.merge(finding)
|
||||
|
||||
self.db.commit()
|
||||
return self.load_session_detail(session_id, user_id)
|
||||
|
||||
def save_preview(self, session_id: str, user_id: str, preview: CompiledPreview) -> CompiledPreview:
|
||||
"""
|
||||
@PURPOSE: Persist compiled preview attempt and mark older fingerprints stale.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.save_preview"):
|
||||
session = self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.session_id == session_id,
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError("Session not found or access denied")
|
||||
|
||||
# Mark existing previews for this session as stale if they are not the new one
|
||||
self.db.query(CompiledPreview).filter(
|
||||
CompiledPreview.session_id == session_id
|
||||
).update({"preview_status": "stale"})
|
||||
|
||||
self.db.add(preview)
|
||||
self.db.flush()
|
||||
session.last_preview_id = preview.preview_id
|
||||
|
||||
self.db.commit()
|
||||
self.db.refresh(preview)
|
||||
return preview
|
||||
|
||||
def save_run_context(self, session_id: str, user_id: str, run_context: DatasetRunContext) -> DatasetRunContext:
|
||||
"""
|
||||
@PURPOSE: Persist immutable launch audit snapshot.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.save_run_context"):
|
||||
session = self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.session_id == session_id,
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError("Session not found or access denied")
|
||||
|
||||
self.db.add(run_context)
|
||||
self.db.flush()
|
||||
session.last_run_context_id = run_context.run_context_id
|
||||
|
||||
self.db.commit()
|
||||
self.db.refresh(run_context)
|
||||
return run_context
|
||||
|
||||
def list_sessions_for_user(self, user_id: str) -> List[DatasetReviewSession]:
|
||||
"""
|
||||
@PURPOSE: List all review sessions owned by a user.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.list_sessions_for_user"):
|
||||
return self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).order_by(DatasetReviewSession.updated_at.desc()).all()
|
||||
|
||||
# [/DEF:DatasetReviewSessionRepository:Module]
|
||||
Reference in New Issue
Block a user