feat: initial dataset review orchestration flow implementation
This commit is contained in:
7
backend/src/services/dataset_review/__init__.py
Normal file
7
backend/src/services/dataset_review/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
# [DEF:backend.src.services.dataset_review:Module]
|
||||
#
|
||||
# @SEMANTICS: dataset, review, orchestration
|
||||
# @PURPOSE: Provides services for dataset-centered orchestration flow.
|
||||
# @LAYER: Services
|
||||
#
|
||||
# [/DEF:backend.src.services.dataset_review:Module]
|
||||
@@ -0,0 +1,171 @@
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from src.models.mapping import Base, Environment
|
||||
from src.models.auth import User
|
||||
from src.models.dataset_review import (
|
||||
DatasetReviewSession,
|
||||
DatasetProfile,
|
||||
ValidationFinding,
|
||||
CompiledPreview,
|
||||
DatasetRunContext,
|
||||
BusinessSummarySource,
|
||||
ConfidenceState,
|
||||
FindingArea,
|
||||
FindingSeverity,
|
||||
ReadinessState,
|
||||
RecommendedAction
|
||||
)
|
||||
from src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository
|
||||
|
||||
# [DEF:SessionRepositoryTests:Module]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Unit tests for DatasetReviewSessionRepository.
|
||||
# @RELATION: TESTS -> [DatasetReviewSessionRepository]
|
||||
|
||||
@pytest.fixture
|
||||
def db_session():
|
||||
# [DEF:db_session:Function]
|
||||
# @COMPLEXITY: 1
|
||||
# @RELATION: BINDS_TO -> [SessionRepositoryTests]
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(engine)
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
|
||||
# Create test data
|
||||
user = User(id="user1", username="testuser", email="test@example.com", password_hash="pw")
|
||||
env = Environment(id="env1", name="Prod", url="http://superset", credentials_id="cred1")
|
||||
session.add_all([user, env])
|
||||
session.commit()
|
||||
|
||||
yield session
|
||||
session.close()
|
||||
|
||||
def test_create_session(db_session):
|
||||
# @PURPOSE: Verify session creation and persistence.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1",
|
||||
environment_id="env1",
|
||||
source_kind="superset_link",
|
||||
source_input="http://link",
|
||||
dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
assert session.session_id is not None
|
||||
loaded = db_session.query(DatasetReviewSession).filter_by(session_id=session.session_id).first()
|
||||
assert loaded.user_id == "user1"
|
||||
|
||||
def test_load_session_detail_ownership(db_session):
|
||||
# @PURPOSE: Verify ownership enforcement in detail loading.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
# Correct user
|
||||
loaded = repo.load_session_detail(session.session_id, "user1")
|
||||
assert loaded is not None
|
||||
|
||||
# Wrong user
|
||||
loaded_wrong = repo.load_session_detail(session.session_id, "wrong_user")
|
||||
assert loaded_wrong is None
|
||||
|
||||
def test_save_preview_marks_stale(db_session):
|
||||
# @PURPOSE: Verify that saving a new preview marks old ones as stale.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
p1 = CompiledPreview(session_id=session.session_id, preview_status="ready", preview_fingerprint="f1")
|
||||
repo.save_preview(session.session_id, "user1", p1)
|
||||
|
||||
p2 = CompiledPreview(session_id=session.session_id, preview_status="ready", preview_fingerprint="f2")
|
||||
repo.save_preview(session.session_id, "user1", p2)
|
||||
|
||||
db_session.refresh(p1)
|
||||
assert p1.preview_status == "stale"
|
||||
assert p2.preview_status == "ready"
|
||||
assert session.last_preview_id == p2.preview_id
|
||||
|
||||
def test_save_profile_and_findings(db_session):
|
||||
# @PURPOSE: Verify persistence of profile and findings.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
profile = DatasetProfile(
|
||||
session_id=session.session_id,
|
||||
dataset_name="Test DS",
|
||||
business_summary="Summary",
|
||||
business_summary_source=BusinessSummarySource.INFERRED,
|
||||
confidence_state=ConfidenceState.UNRESOLVED
|
||||
)
|
||||
|
||||
finding = ValidationFinding(
|
||||
session_id=session.session_id,
|
||||
area=FindingArea.SOURCE_INTAKE,
|
||||
severity=FindingSeverity.BLOCKING,
|
||||
code="ERR1",
|
||||
title="Error",
|
||||
message="Failure"
|
||||
)
|
||||
|
||||
repo.save_profile_and_findings(session.session_id, "user1", profile, [finding])
|
||||
|
||||
updated_session = repo.load_session_detail(session.session_id, "user1")
|
||||
assert updated_session.profile.dataset_name == "Test DS"
|
||||
assert len(updated_session.findings) == 1
|
||||
assert updated_session.findings[0].code == "ERR1"
|
||||
|
||||
def test_save_run_context(db_session):
|
||||
# @PURPOSE: Verify saving of run context.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
session = DatasetReviewSession(
|
||||
user_id="user1", environment_id="env1", source_kind="superset_link",
|
||||
source_input="http://link", dataset_ref="dataset1"
|
||||
)
|
||||
repo.create_session(session)
|
||||
|
||||
rc = DatasetRunContext(
|
||||
session_id=session.session_id,
|
||||
dataset_ref="ds1",
|
||||
environment_id="env1",
|
||||
preview_id="p1",
|
||||
sql_lab_session_ref="s1",
|
||||
effective_filters={},
|
||||
template_params={},
|
||||
approved_mapping_ids=[],
|
||||
semantic_decision_refs=[],
|
||||
open_warning_refs=[],
|
||||
launch_status="success"
|
||||
)
|
||||
repo.save_run_context(session.session_id, "user1", rc)
|
||||
|
||||
assert session.last_run_context_id == rc.run_context_id
|
||||
|
||||
def test_list_sessions_for_user(db_session):
|
||||
# @PURPOSE: Verify listing of sessions by user.
|
||||
repo = DatasetReviewSessionRepository(db_session)
|
||||
s1 = DatasetReviewSession(user_id="user1", environment_id="env1", source_kind="k", source_input="i", dataset_ref="r1")
|
||||
s2 = DatasetReviewSession(user_id="user1", environment_id="env1", source_kind="k", source_input="i", dataset_ref="r2")
|
||||
s3 = DatasetReviewSession(user_id="other", environment_id="env1", source_kind="k", source_input="i", dataset_ref="r3")
|
||||
|
||||
db_session.add_all([s1, s2, s3])
|
||||
db_session.commit()
|
||||
|
||||
sessions = repo.list_sessions_for_user("user1")
|
||||
assert len(sessions) == 2
|
||||
assert all(s.user_id == "user1" for s in sessions)
|
||||
|
||||
# [/DEF:SessionRepositoryTests:Module]
|
||||
@@ -0,0 +1,146 @@
|
||||
# [DEF:DatasetReviewSessionRepository:Module]
|
||||
# @COMPLEXITY: 5
|
||||
# @PURPOSE: Persist and retrieve dataset review session aggregates, including readiness, findings, semantic decisions, clarification state, previews, and run contexts.
|
||||
# @LAYER: Domain
|
||||
# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession]
|
||||
# @RELATION: [DEPENDS_ON] -> [DatasetProfile]
|
||||
# @RELATION: [DEPENDS_ON] -> [ValidationFinding]
|
||||
# @RELATION: [DEPENDS_ON] -> [CompiledPreview]
|
||||
# @PRE: repository operations execute within authenticated request or task scope.
|
||||
# @POST: session aggregate reads are structurally consistent and writes preserve ownership and version semantics.
|
||||
|
||||
from typing import Optional, List
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from src.models.dataset_review import (
|
||||
DatasetReviewSession,
|
||||
DatasetProfile,
|
||||
ValidationFinding,
|
||||
CompiledPreview,
|
||||
DatasetRunContext
|
||||
)
|
||||
from src.core.logger import belief_scope
|
||||
|
||||
class DatasetReviewSessionRepository:
|
||||
"""
|
||||
@PURPOSE: Persist and retrieve dataset review session aggregates.
|
||||
@INVARIANT: ownership_scope -> All operations must respect the session owner's user_id.
|
||||
"""
|
||||
def __init__(self, db: Session):
|
||||
self.db = db
|
||||
|
||||
def create_session(self, session: DatasetReviewSession) -> DatasetReviewSession:
|
||||
"""
|
||||
@PURPOSE: Persist initial session shell.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.create_session"):
|
||||
self.db.add(session)
|
||||
self.db.commit()
|
||||
self.db.refresh(session)
|
||||
return session
|
||||
|
||||
def load_session_detail(self, session_id: str, user_id: str) -> Optional[DatasetReviewSession]:
|
||||
"""
|
||||
@PURPOSE: Return the full session aggregate for API/frontend use.
|
||||
@PRE: user_id must match session owner or authorized collaborator.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.load_session_detail"):
|
||||
# Note: We check user_id to enforce the ownership_scope invariant.
|
||||
return self.db.query(DatasetReviewSession)\
|
||||
.options(
|
||||
joinedload(DatasetReviewSession.profile),
|
||||
joinedload(DatasetReviewSession.findings),
|
||||
joinedload(DatasetReviewSession.collaborators),
|
||||
joinedload(DatasetReviewSession.semantic_sources),
|
||||
joinedload(DatasetReviewSession.semantic_fields),
|
||||
joinedload(DatasetReviewSession.imported_filters),
|
||||
joinedload(DatasetReviewSession.template_variables),
|
||||
joinedload(DatasetReviewSession.execution_mappings),
|
||||
joinedload(DatasetReviewSession.clarification_sessions),
|
||||
joinedload(DatasetReviewSession.previews),
|
||||
joinedload(DatasetReviewSession.run_contexts)
|
||||
)\
|
||||
.filter(DatasetReviewSession.session_id == session_id)\
|
||||
.filter(DatasetReviewSession.user_id == user_id)\
|
||||
.first()
|
||||
|
||||
def save_profile_and_findings(self, session_id: str, user_id: str, profile: DatasetProfile, findings: List[ValidationFinding]) -> DatasetReviewSession:
|
||||
"""
|
||||
@PURPOSE: Persist profile and validation state together.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.save_profile_and_findings"):
|
||||
session = self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.session_id == session_id,
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError("Session not found or access denied")
|
||||
|
||||
if profile:
|
||||
self.db.merge(profile)
|
||||
|
||||
# For findings, we might want to sync them (remove old ones if not in new list, or update)
|
||||
# Simplest for now: add/merge findings
|
||||
for finding in findings:
|
||||
self.db.merge(finding)
|
||||
|
||||
self.db.commit()
|
||||
return self.load_session_detail(session_id, user_id)
|
||||
|
||||
def save_preview(self, session_id: str, user_id: str, preview: CompiledPreview) -> CompiledPreview:
|
||||
"""
|
||||
@PURPOSE: Persist compiled preview attempt and mark older fingerprints stale.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.save_preview"):
|
||||
session = self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.session_id == session_id,
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError("Session not found or access denied")
|
||||
|
||||
# Mark existing previews for this session as stale if they are not the new one
|
||||
self.db.query(CompiledPreview).filter(
|
||||
CompiledPreview.session_id == session_id
|
||||
).update({"preview_status": "stale"})
|
||||
|
||||
self.db.add(preview)
|
||||
self.db.flush()
|
||||
session.last_preview_id = preview.preview_id
|
||||
|
||||
self.db.commit()
|
||||
self.db.refresh(preview)
|
||||
return preview
|
||||
|
||||
def save_run_context(self, session_id: str, user_id: str, run_context: DatasetRunContext) -> DatasetRunContext:
|
||||
"""
|
||||
@PURPOSE: Persist immutable launch audit snapshot.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.save_run_context"):
|
||||
session = self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.session_id == session_id,
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).first()
|
||||
|
||||
if not session:
|
||||
raise ValueError("Session not found or access denied")
|
||||
|
||||
self.db.add(run_context)
|
||||
self.db.flush()
|
||||
session.last_run_context_id = run_context.run_context_id
|
||||
|
||||
self.db.commit()
|
||||
self.db.refresh(run_context)
|
||||
return run_context
|
||||
|
||||
def list_sessions_for_user(self, user_id: str) -> List[DatasetReviewSession]:
|
||||
"""
|
||||
@PURPOSE: List all review sessions owned by a user.
|
||||
"""
|
||||
with belief_scope("DatasetReviewSessionRepository.list_sessions_for_user"):
|
||||
return self.db.query(DatasetReviewSession).filter(
|
||||
DatasetReviewSession.user_id == user_id
|
||||
).order_by(DatasetReviewSession.updated_at.desc()).all()
|
||||
|
||||
# [/DEF:DatasetReviewSessionRepository:Module]
|
||||
Reference in New Issue
Block a user