From 064ffea330291adf9999523b65134d0c0aa0e715 Mon Sep 17 00:00:00 2001 From: busya Date: Fri, 24 Apr 2026 17:10:02 +0300 Subject: [PATCH] refactor --- .axiom/axiom_config.yaml | 25 +- .axiom/runtime/belief_events.jsonl | 321 +++ .../dataset_review_session_repository.sqlite | Bin 0 -> 557056 bytes .../test_save_profile_and_findingscurrent | 1 + .axiom/temp/pytest-of-busya/pytest-current | 1 + .kilo/agents/mcp-backend-coder.md | 139 + .kilo/agents/qa-tester.md | 5 +- .kilo/mcp.json | 44 +- .kilo/skills/semantics-testing/SKILL.md | 8 + .kilocode/mcp.json | 16 +- backend/src/api/routes/dataset_review.py | 2507 +---------------- .../dataset_review_pkg/_dependencies.py | 900 ++++++ .../api/routes/dataset_review_pkg/_routes.py | 923 ++++++ backend/src/core/async_superset_client.py | 36 +- backend/src/models/dataset_review.py | 1055 +------ .../src/models/dataset_review_pkg/__init__.py | 122 + .../_clarification_models.py | 125 + .../src/models/dataset_review_pkg/_enums.py | 463 +++ .../dataset_review_pkg/_execution_models.py | 140 + .../dataset_review_pkg/_filter_models.py | 95 + .../dataset_review_pkg/_finding_models.py | 59 + .../dataset_review_pkg/_mapping_models.py | 61 + .../dataset_review_pkg/_profile_models.py | 68 + .../dataset_review_pkg/_semantic_models.py | 139 + .../dataset_review_pkg/_session_models.py | 156 + backend/src/schemas/dataset_review.py | 439 +-- .../schemas/dataset_review_pkg/_composites.py | 219 ++ .../src/schemas/dataset_review_pkg/_dtos.py | 262 ++ .../dataset_review/clarification_engine.py | 470 +-- .../clarification_pkg/_helpers.py | 220 ++ .../services/dataset_review/orchestrator.py | 784 +----- .../orchestrator_pkg/_commands.py | 102 + .../orchestrator_pkg/_helpers.py | 356 +++ .../repositories/repository_pkg/_mutations.py | 202 ++ .../repositories/session_repository.py | 524 +--- 35 files changed, 5541 insertions(+), 5446 deletions(-) create mode 100644 .axiom/runtime/belief_events.jsonl create mode 100644 .axiom/temp/pytest-of-busya/pytest-0/test_save_profile_and_findings0/dataset_review_session_repository.sqlite create mode 120000 .axiom/temp/pytest-of-busya/pytest-0/test_save_profile_and_findingscurrent create mode 120000 .axiom/temp/pytest-of-busya/pytest-current create mode 100644 .kilo/agents/mcp-backend-coder.md create mode 100644 backend/src/api/routes/dataset_review_pkg/_dependencies.py create mode 100644 backend/src/api/routes/dataset_review_pkg/_routes.py create mode 100644 backend/src/models/dataset_review_pkg/__init__.py create mode 100644 backend/src/models/dataset_review_pkg/_clarification_models.py create mode 100644 backend/src/models/dataset_review_pkg/_enums.py create mode 100644 backend/src/models/dataset_review_pkg/_execution_models.py create mode 100644 backend/src/models/dataset_review_pkg/_filter_models.py create mode 100644 backend/src/models/dataset_review_pkg/_finding_models.py create mode 100644 backend/src/models/dataset_review_pkg/_mapping_models.py create mode 100644 backend/src/models/dataset_review_pkg/_profile_models.py create mode 100644 backend/src/models/dataset_review_pkg/_semantic_models.py create mode 100644 backend/src/models/dataset_review_pkg/_session_models.py create mode 100644 backend/src/schemas/dataset_review_pkg/_composites.py create mode 100644 backend/src/schemas/dataset_review_pkg/_dtos.py create mode 100644 backend/src/services/dataset_review/clarification_pkg/_helpers.py create mode 100644 backend/src/services/dataset_review/orchestrator_pkg/_commands.py create mode 100644 backend/src/services/dataset_review/orchestrator_pkg/_helpers.py create mode 100644 backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py diff --git a/.axiom/axiom_config.yaml b/.axiom/axiom_config.yaml index e6057960..48d825bf 100644 --- a/.axiom/axiom_config.yaml +++ b/.axiom/axiom_config.yaml @@ -15,16 +15,15 @@ indexing: # If empty, indexes the entire workspace (default behavior). # If specified, only these directories are scanned for contracts. - include: - - "backend/src/" - - "frontend/src/" + # include: + # - "src/" # - "tests/" # Excluded paths/patterns applied on top of include (or full workspace). # Supports directory names and glob patterns. exclude: # Directories - - "specs/" + #- "specs/" - ".ai/" - ".git/" - ".venv/" @@ -35,7 +34,7 @@ indexing: - ".ruff_cache/" - ".axiom/" # File patterns - - "*.md" + #- "*.md" - "*.txt" - "*.log" - "*.yaml" @@ -88,12 +87,14 @@ tags: - IMPLEMENTS - DISPATCHES - BINDS_TO - min_complexity: 3 + - VERIFIES # Добавлено для тестов + # min_complexity: 3 <-- УБРАНО! RELATION может быть в ADR (C1-C5) или Тестах (C1-C2) contract_types: - Module - Function - Class - Component + - ADR # Добавлено! ADR обязан линковаться LAYER: type: string @@ -193,8 +194,8 @@ tags: RATIONALE: type: string multiline: true - protected: true description: "Почему выбран этот путь, какое ограничение/цель защищается" + protected: true contract_types: - Module - Function @@ -204,8 +205,8 @@ tags: REJECTED: type: string multiline: true - protected: true description: "Какой путь запрещен и какой риск делает его недопустимым" + protected: true contract_types: - Module - Function @@ -285,3 +286,11 @@ tags: - Function - Class - Component + + STATUS: + type: string + description: "Статус жизненного цикла узла (например, DEPRECATED -> REPLACED_BY: [ID])" + contract_types: + - Tombstone + - Module + - ADR \ No newline at end of file diff --git a/.axiom/runtime/belief_events.jsonl b/.axiom/runtime/belief_events.jsonl new file mode 100644 index 00000000..6d7e3208 --- /dev/null +++ b/.axiom/runtime/belief_events.jsonl @@ -0,0 +1,321 @@ +{"timestamp":1776760544.058,"event_type":"semantic_index_reindex","component":"semantic_index","data":{"contract_count":439}} +{"recorded_at":"2026-04-21T11:41:43.973713783Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/__init__.py"}} +{"timestamp":1776771703.973,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/__init__.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:41:43.973773054Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/__init__.py"}} +{"timestamp":1776771703.973,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/__init__.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:41:43.974055021Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"dfabdf5e-8d53-451a-b5f4-4429fef64b26"}} +{"timestamp":1776771703.974,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"dfabdf5e-8d53-451a-b5f4-4429fef64b26"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:41:43.974133618Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"dfabdf5e-8d53-451a-b5f4-4429fef64b26","path":"backend/src/services/dataset_review/__init__.py"}} +{"timestamp":1776771703.974,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"dfabdf5e-8d53-451a-b5f4-4429fef64b26","path":"backend/src/services/dataset_review/__init__.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:48:58.459567584Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/__init__.py"}} +{"timestamp":1776772138.459,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/__init__.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:48:58.459630181Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/__init__.py"}} +{"timestamp":1776772138.459,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/__init__.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:48:58.459835364Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"ccc6b884-76e7-4953-a4cd-4c0814ebc65d"}} +{"timestamp":1776772138.459,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"ccc6b884-76e7-4953-a4cd-4c0814ebc65d"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:48:58.460202900Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"ccc6b884-76e7-4953-a4cd-4c0814ebc65d","path":"backend/src/models/dataset_review_pkg/__init__.py"}} +{"timestamp":1776772138.46,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"ccc6b884-76e7-4953-a4cd-4c0814ebc65d","path":"backend/src/models/dataset_review_pkg/__init__.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:50:09.075447312Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_enums.py"}} +{"timestamp":1776772209.075,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_enums.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:50:09.075511161Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_enums.py"}} +{"timestamp":1776772209.075,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_enums.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:50:09.075710293Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"b4926a22-20f6-4613-bb0b-895806e14f03"}} +{"timestamp":1776772209.075,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"b4926a22-20f6-4613-bb0b-895806e14f03"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:50:09.075779793Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"b4926a22-20f6-4613-bb0b-895806e14f03","path":"backend/src/models/dataset_review_pkg/_enums.py"}} +{"timestamp":1776772209.075,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"b4926a22-20f6-4613-bb0b-895806e14f03","path":"backend/src/models/dataset_review_pkg/_enums.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:50:43.178833122Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_session_models.py"}} +{"timestamp":1776772243.178,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_session_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:50:43.178884929Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_session_models.py"}} +{"timestamp":1776772243.178,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_session_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:50:43.179037484Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"5d051c0a-8545-47f0-9c50-a069258ac5fb"}} +{"timestamp":1776772243.179,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"5d051c0a-8545-47f0-9c50-a069258ac5fb"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:50:43.179079562Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"5d051c0a-8545-47f0-9c50-a069258ac5fb","path":"backend/src/models/dataset_review_pkg/_session_models.py"}} +{"timestamp":1776772243.179,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"5d051c0a-8545-47f0-9c50-a069258ac5fb","path":"backend/src/models/dataset_review_pkg/_session_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:51:04.551294334Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_profile_models.py"}} +{"timestamp":1776772264.551,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_profile_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:51:04.551343366Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_profile_models.py"}} +{"timestamp":1776772264.551,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_profile_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:51:04.551505518Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"161bd9d6-9ec3-43fb-bf68-b12d52306175"}} +{"timestamp":1776772264.551,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"161bd9d6-9ec3-43fb-bf68-b12d52306175"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:51:04.551556564Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"161bd9d6-9ec3-43fb-bf68-b12d52306175","path":"backend/src/models/dataset_review_pkg/_profile_models.py"}} +{"timestamp":1776772264.551,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"161bd9d6-9ec3-43fb-bf68-b12d52306175","path":"backend/src/models/dataset_review_pkg/_profile_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:51:22.604870200Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_finding_models.py"}} +{"timestamp":1776772282.604,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_finding_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:51:22.604933307Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_finding_models.py"}} +{"timestamp":1776772282.604,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_finding_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:51:22.605109376Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"38318063-de25-44da-a5f5-5df04e2e9348"}} +{"timestamp":1776772282.605,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"38318063-de25-44da-a5f5-5df04e2e9348"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:51:22.605163547Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"38318063-de25-44da-a5f5-5df04e2e9348","path":"backend/src/models/dataset_review_pkg/_finding_models.py"}} +{"timestamp":1776772282.605,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"38318063-de25-44da-a5f5-5df04e2e9348","path":"backend/src/models/dataset_review_pkg/_finding_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:52:03.610121835Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_semantic_models.py"}} +{"timestamp":1776772323.61,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_semantic_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:52:03.610173371Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_semantic_models.py"}} +{"timestamp":1776772323.61,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_semantic_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:52:03.610366341Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"d211f8b2-7796-474b-b710-27628643e717"}} +{"timestamp":1776772323.61,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"d211f8b2-7796-474b-b710-27628643e717"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:52:03.610437654Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"d211f8b2-7796-474b-b710-27628643e717","path":"backend/src/models/dataset_review_pkg/_semantic_models.py"}} +{"timestamp":1776772323.61,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"d211f8b2-7796-474b-b710-27628643e717","path":"backend/src/models/dataset_review_pkg/_semantic_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:52:28.642272446Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_filter_models.py"}} +{"timestamp":1776772348.642,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_filter_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:52:28.642317750Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_filter_models.py"}} +{"timestamp":1776772348.642,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_filter_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:52:28.642481155Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"c0e7fbfb-9be4-470b-8429-3daf9566929e"}} +{"timestamp":1776772348.642,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"c0e7fbfb-9be4-470b-8429-3daf9566929e"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:52:28.642521911Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"c0e7fbfb-9be4-470b-8429-3daf9566929e","path":"backend/src/models/dataset_review_pkg/_filter_models.py"}} +{"timestamp":1776772348.642,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"c0e7fbfb-9be4-470b-8429-3daf9566929e","path":"backend/src/models/dataset_review_pkg/_filter_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:52:47.300068913Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_mapping_models.py"}} +{"timestamp":1776772367.3,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_mapping_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:52:47.300122022Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_mapping_models.py"}} +{"timestamp":1776772367.3,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_mapping_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:52:47.300287681Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"46e74151-044c-4464-b622-f2b13d47ebc5"}} +{"timestamp":1776772367.3,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"46e74151-044c-4464-b622-f2b13d47ebc5"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:52:47.300364304Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"46e74151-044c-4464-b622-f2b13d47ebc5","path":"backend/src/models/dataset_review_pkg/_mapping_models.py"}} +{"timestamp":1776772367.3,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"46e74151-044c-4464-b622-f2b13d47ebc5","path":"backend/src/models/dataset_review_pkg/_mapping_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:53:27.445472210Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_clarification_models.py"}} +{"timestamp":1776772407.445,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_clarification_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:53:27.445522093Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_clarification_models.py"}} +{"timestamp":1776772407.445,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_clarification_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:53:27.445680549Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"f1b340fd-6750-4e26-81a2-dd2164bb89e7"}} +{"timestamp":1776772407.445,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"f1b340fd-6750-4e26-81a2-dd2164bb89e7"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:53:27.445729320Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"f1b340fd-6750-4e26-81a2-dd2164bb89e7","path":"backend/src/models/dataset_review_pkg/_clarification_models.py"}} +{"timestamp":1776772407.445,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"f1b340fd-6750-4e26-81a2-dd2164bb89e7","path":"backend/src/models/dataset_review_pkg/_clarification_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:54:02.107022337Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_execution_models.py"}} +{"timestamp":1776772442.107,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/models/dataset_review_pkg/_execution_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:54:02.107112916Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_execution_models.py"}} +{"timestamp":1776772442.107,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_execution_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:54:02.107338878Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"af023735-9da0-47ea-ae6b-b81f1ea58f0d"}} +{"timestamp":1776772442.107,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"af023735-9da0-47ea-ae6b-b81f1ea58f0d"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:54:02.107404530Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"af023735-9da0-47ea-ae6b-b81f1ea58f0d","path":"backend/src/models/dataset_review_pkg/_execution_models.py"}} +{"timestamp":1776772442.107,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"af023735-9da0-47ea-ae6b-b81f1ea58f0d","path":"backend/src/models/dataset_review_pkg/_execution_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:54:44.486738778Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/models/dataset_review.py"}} +{"timestamp":1776772484.486,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/models/dataset_review.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:54:44.486794643Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review.py"}} +{"timestamp":1776772484.486,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:54:44.487060449Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"faf0dc2c-2748-47c2-8bf4-8c4c648239f1"}} +{"timestamp":1776772484.487,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"faf0dc2c-2748-47c2-8bf4-8c4c648239f1"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:54:44.487433264Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"faf0dc2c-2748-47c2-8bf4-8c4c648239f1","path":"backend/src/models/dataset_review.py"}} +{"timestamp":1776772484.487,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"faf0dc2c-2748-47c2-8bf4-8c4c648239f1","path":"backend/src/models/dataset_review.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:55:50.277753308Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/schemas/dataset_review_pkg/_dtos.py"}} +{"timestamp":1776772550.277,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/schemas/dataset_review_pkg/_dtos.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:55:50.277814472Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/schemas/dataset_review_pkg/_dtos.py"}} +{"timestamp":1776772550.277,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/schemas/dataset_review_pkg/_dtos.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:55:50.277984319Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"d5a40f89-dced-4c08-baf1-bc52a62f5f81"}} +{"timestamp":1776772550.277,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"d5a40f89-dced-4c08-baf1-bc52a62f5f81"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:55:50.278443957Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"d5a40f89-dced-4c08-baf1-bc52a62f5f81","path":"backend/src/schemas/dataset_review_pkg/_dtos.py"}} +{"timestamp":1776772550.278,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"d5a40f89-dced-4c08-baf1-bc52a62f5f81","path":"backend/src/schemas/dataset_review_pkg/_dtos.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:56:26.640999678Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/schemas/dataset_review_pkg/_composites.py"}} +{"timestamp":1776772586.641,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/schemas/dataset_review_pkg/_composites.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:56:26.641050443Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/schemas/dataset_review_pkg/_composites.py"}} +{"timestamp":1776772586.641,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/schemas/dataset_review_pkg/_composites.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:56:26.641202607Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"a0b5bd62-93de-41d4-8a41-3e25b7e29c47"}} +{"timestamp":1776772586.641,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"a0b5bd62-93de-41d4-8a41-3e25b7e29c47"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:56:26.641255355Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"a0b5bd62-93de-41d4-8a41-3e25b7e29c47","path":"backend/src/schemas/dataset_review_pkg/_composites.py"}} +{"timestamp":1776772586.641,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"a0b5bd62-93de-41d4-8a41-3e25b7e29c47","path":"backend/src/schemas/dataset_review_pkg/_composites.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:56:40.292055708Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/schemas/dataset_review.py"}} +{"timestamp":1776772600.292,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/schemas/dataset_review.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:56:40.292112494Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/schemas/dataset_review.py"}} +{"timestamp":1776772600.292,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/schemas/dataset_review.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:56:40.292393067Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"149b0854-553b-430b-ae49-d72110708b13"}} +{"timestamp":1776772600.292,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"149b0854-553b-430b-ae49-d72110708b13"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:56:40.292884584Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"149b0854-553b-430b-ae49-d72110708b13","path":"backend/src/schemas/dataset_review.py"}} +{"timestamp":1776772600.292,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"149b0854-553b-430b-ae49-d72110708b13","path":"backend/src/schemas/dataset_review.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:57:31.991831766Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"}} +{"timestamp":1776772651.991,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:57:31.991904632Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator_pkg/_commands.py"}} +{"timestamp":1776772651.991,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator_pkg/_commands.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:57:31.992111718Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"bbb34bc9-caaf-42d2-95a0-2a8065febc78"}} +{"timestamp":1776772651.992,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"bbb34bc9-caaf-42d2-95a0-2a8065febc78"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:57:31.992193390Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"bbb34bc9-caaf-42d2-95a0-2a8065febc78","path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"}} +{"timestamp":1776772651.992,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"bbb34bc9-caaf-42d2-95a0-2a8065febc78","path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T11:58:35.280110200Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/orchestrator_pkg/_helpers.py"}} +{"timestamp":1776772715.28,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/orchestrator_pkg/_helpers.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T11:58:35.280164321Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator_pkg/_helpers.py"}} +{"timestamp":1776772715.28,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator_pkg/_helpers.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T11:58:35.280338766Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"4acaae79-1c83-40ab-a478-b704c628206f"}} +{"timestamp":1776772715.28,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"4acaae79-1c83-40ab-a478-b704c628206f"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T11:58:35.280412083Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"4acaae79-1c83-40ab-a478-b704c628206f","path":"backend/src/services/dataset_review/orchestrator_pkg/_helpers.py"}} +{"timestamp":1776772715.28,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"4acaae79-1c83-40ab-a478-b704c628206f","path":"backend/src/services/dataset_review/orchestrator_pkg/_helpers.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:00:46.134194583Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/orchestrator.py"}} +{"timestamp":1776772846.134,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/orchestrator.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:00:46.134274562Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator.py"}} +{"timestamp":1776772846.134,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:00:46.134694335Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"3459b7f2-f48a-4bcf-961a-52e3f580c939"}} +{"timestamp":1776772846.134,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"3459b7f2-f48a-4bcf-961a-52e3f580c939"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:00:46.134808338Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"3459b7f2-f48a-4bcf-961a-52e3f580c939","path":"backend/src/services/dataset_review/orchestrator.py"}} +{"timestamp":1776772846.134,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"3459b7f2-f48a-4bcf-961a-52e3f580c939","path":"backend/src/services/dataset_review/orchestrator.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:01:52.807127672Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/clarification_pkg/_helpers.py"}} +{"timestamp":1776772912.807,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/clarification_pkg/_helpers.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:01:52.807195538Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/clarification_pkg/_helpers.py"}} +{"timestamp":1776772912.807,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/clarification_pkg/_helpers.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:01:52.807419496Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"a3e02868-ad4f-4e55-8b8f-8914a81c7b1b"}} +{"timestamp":1776772912.807,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"a3e02868-ad4f-4e55-8b8f-8914a81c7b1b"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:01:52.807510826Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"a3e02868-ad4f-4e55-8b8f-8914a81c7b1b","path":"backend/src/services/dataset_review/clarification_pkg/_helpers.py"}} +{"timestamp":1776772912.807,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"a3e02868-ad4f-4e55-8b8f-8914a81c7b1b","path":"backend/src/services/dataset_review/clarification_pkg/_helpers.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:03:08.877000922Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/clarification_engine.py"}} +{"timestamp":1776772988.877,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/clarification_engine.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:03:08.877064080Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/clarification_engine.py"}} +{"timestamp":1776772988.877,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/clarification_engine.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:03:08.877357257Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"cc92a1c5-17e1-4720-8f29-2ac28a976763"}} +{"timestamp":1776772988.877,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"cc92a1c5-17e1-4720-8f29-2ac28a976763"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:03:08.877455209Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"cc92a1c5-17e1-4720-8f29-2ac28a976763","path":"backend/src/services/dataset_review/clarification_engine.py"}} +{"timestamp":1776772988.877,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"cc92a1c5-17e1-4720-8f29-2ac28a976763","path":"backend/src/services/dataset_review/clarification_engine.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:04:01.149200407Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py"}} +{"timestamp":1776773041.149,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:04:01.149293761Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py"}} +{"timestamp":1776773041.149,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:04:01.149455864Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"3a4f47ca-e640-4948-ac4f-1aae0265574e"}} +{"timestamp":1776773041.149,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"3a4f47ca-e640-4948-ac4f-1aae0265574e"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:04:01.149515334Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"3a4f47ca-e640-4948-ac4f-1aae0265574e","path":"backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py"}} +{"timestamp":1776773041.149,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"3a4f47ca-e640-4948-ac4f-1aae0265574e","path":"backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:05:35.998968983Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/repositories/session_repository.py"}} +{"timestamp":1776773135.998,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/repositories/session_repository.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:05:35.999019277Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/repositories/session_repository.py"}} +{"timestamp":1776773135.999,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/repositories/session_repository.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:05:35.999347509Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"01432c94-49de-4daf-a040-a2b6c5cb6f13"}} +{"timestamp":1776773135.999,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"01432c94-49de-4daf-a040-a2b6c5cb6f13"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:05:35.999460419Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"01432c94-49de-4daf-a040-a2b6c5cb6f13","path":"backend/src/services/dataset_review/repositories/session_repository.py"}} +{"timestamp":1776773135.999,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"01432c94-49de-4daf-a040-a2b6c5cb6f13","path":"backend/src/services/dataset_review/repositories/session_repository.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:09:02.219645585Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/api/routes/dataset_review_pkg/_dependencies.py"}} +{"timestamp":1776773342.219,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/api/routes/dataset_review_pkg/_dependencies.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:09:02.219710667Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review_pkg/_dependencies.py"}} +{"timestamp":1776773342.219,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review_pkg/_dependencies.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:09:02.219903025Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"58b67526-11de-4476-93f0-e3a8bfc5255c"}} +{"timestamp":1776773342.219,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"58b67526-11de-4476-93f0-e3a8bfc5255c"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:09:02.219978465Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"58b67526-11de-4476-93f0-e3a8bfc5255c","path":"backend/src/api/routes/dataset_review_pkg/_dependencies.py"}} +{"timestamp":1776773342.219,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"58b67526-11de-4476-93f0-e3a8bfc5255c","path":"backend/src/api/routes/dataset_review_pkg/_dependencies.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T12:56:45.580424061Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":false,"path":"backend/src/api/routes/dataset_review_pkg/_routes.py"}} +{"timestamp":1776776205.58,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":false,"path":"backend/src/api/routes/dataset_review_pkg/_routes.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T12:56:45.580490504Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review_pkg/_routes.py"}} +{"timestamp":1776776205.58,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review_pkg/_routes.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T12:56:45.581308034Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"c8401e5e-8655-466f-a3cf-db03a6c00266"}} +{"timestamp":1776776205.581,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"c8401e5e-8655-466f-a3cf-db03a6c00266"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T12:56:45.581375229Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"c8401e5e-8655-466f-a3cf-db03a6c00266","path":"backend/src/api/routes/dataset_review_pkg/_routes.py"}} +{"timestamp":1776776205.581,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"c8401e5e-8655-466f-a3cf-db03a6c00266","path":"backend/src/api/routes/dataset_review_pkg/_routes.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:04:50.481263387Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/api/routes/dataset_review.py"}} +{"timestamp":1776776690.481,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/api/routes/dataset_review.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T13:04:50.481357071Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review.py"}} +{"timestamp":1776776690.481,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T13:04:50.482354007Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"f32a2bbd-93c6-4d55-bf11-d788c014d6b9"}} +{"timestamp":1776776690.482,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"f32a2bbd-93c6-4d55-bf11-d788c014d6b9"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T13:04:50.482545103Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"f32a2bbd-93c6-4d55-bf11-d788c014d6b9","path":"backend/src/api/routes/dataset_review.py"}} +{"timestamp":1776776690.482,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"f32a2bbd-93c6-4d55-bf11-d788c014d6b9","path":"backend/src/api/routes/dataset_review.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:05:54.208393357Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/api/routes/dataset_review_pkg/_routes.py"}} +{"timestamp":1776776754.208,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/api/routes/dataset_review_pkg/_routes.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T13:05:54.208458769Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review_pkg/_routes.py"}} +{"timestamp":1776776754.208,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/api/routes/dataset_review_pkg/_routes.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T13:05:54.208871798Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"2b78e865-257f-4cfb-9300-9f231ed83d09"}} +{"timestamp":1776776754.208,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"2b78e865-257f-4cfb-9300-9f231ed83d09"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T13:05:54.208956676Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"2b78e865-257f-4cfb-9300-9f231ed83d09","path":"backend/src/api/routes/dataset_review_pkg/_routes.py"}} +{"timestamp":1776776754.208,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"2b78e865-257f-4cfb-9300-9f231ed83d09","path":"backend/src/api/routes/dataset_review_pkg/_routes.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:06:18.990074733Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"}} +{"timestamp":1776776778.99,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T13:06:18.990140005Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator_pkg/_commands.py"}} +{"timestamp":1776776778.99,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator_pkg/_commands.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T13:06:18.990518990Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"a9f3314c-09a9-4fb9-9e5a-5869fef800f3"}} +{"timestamp":1776776778.99,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"a9f3314c-09a9-4fb9-9e5a-5869fef800f3"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T13:06:18.990623425Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"a9f3314c-09a9-4fb9-9e5a-5869fef800f3","path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"}} +{"timestamp":1776776778.99,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"a9f3314c-09a9-4fb9-9e5a-5869fef800f3","path":"backend/src/services/dataset_review/orchestrator_pkg/_commands.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:07:43.022613643Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/orchestrator.py"}} +{"timestamp":1776776863.022,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/services/dataset_review/orchestrator.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T13:07:43.022696688Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator.py"}} +{"timestamp":1776776863.022,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/services/dataset_review/orchestrator.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T13:07:43.023125696Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"980c4ed9-e9b4-4a2d-8c7c-eee0ab25c141"}} +{"timestamp":1776776863.023,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"980c4ed9-e9b4-4a2d-8c7c-eee0ab25c141"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T13:07:43.023257672Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"980c4ed9-e9b4-4a2d-8c7c-eee0ab25c141","path":"backend/src/services/dataset_review/orchestrator.py"}} +{"timestamp":1776776863.023,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"980c4ed9-e9b4-4a2d-8c7c-eee0ab25c141","path":"backend/src/services/dataset_review/orchestrator.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:07:59.679929264Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/api/routes/__tests__/test_dataset_review_api.py -x --tb=short -q 2>&1 | head -80","timeout_seconds":60}} +{"timestamp":1776776879.679,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/api/routes/__tests__/test_dataset_review_api.py -x --tb=short -q 2>&1 | head -80","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:08:01.551547562Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776881.551,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:08:30.209771091Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/models/dataset_review_pkg/_clarification_models.py"}} +{"timestamp":1776776910.209,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/models/dataset_review_pkg/_clarification_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T13:08:30.209839568Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_clarification_models.py"}} +{"timestamp":1776776910.209,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_clarification_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T13:08:30.210221239Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"6be4ec4c-6d6a-4474-a7b8-1cae4ed0574d"}} +{"timestamp":1776776910.21,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"6be4ec4c-6d6a-4474-a7b8-1cae4ed0574d"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T13:08:30.210372250Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"6be4ec4c-6d6a-4474-a7b8-1cae4ed0574d","path":"backend/src/models/dataset_review_pkg/_clarification_models.py"}} +{"timestamp":1776776910.21,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"6be4ec4c-6d6a-4474-a7b8-1cae4ed0574d","path":"backend/src/models/dataset_review_pkg/_clarification_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:08:33.941160865Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/api/routes/__tests__/test_dataset_review_api.py -x --tb=short -q 2>&1 | head -80","timeout_seconds":60}} +{"timestamp":1776776913.941,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/api/routes/__tests__/test_dataset_review_api.py -x --tb=short -q 2>&1 | head -80","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:08:35.404177322Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776915.404,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:08:56.031344710Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -c \"\nfrom src.models.dataset_review import DatasetReviewSession, SessionStatus, ReadinessState, ClarificationSession, CompiledPreview, DatasetRunContext, ExecutionMapping, SemanticFieldEntry, ImportedFilter, TemplateVariable, ValidationFinding, SemanticSource, SemanticCandidate, SessionEvent, ExportArtifact, DatasetProfile, SessionCollaborator\nprint('All models imported successfully')\nfrom src.schemas.dataset_review import SessionSummary, SessionDetail, SemanticFieldEntryDto, ExecutionMappingDto, ClarificationStateResponse\nprint('All schemas imported successfully')\nfrom src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand, StartSessionResult, PreparePreviewCommand, PreparePreviewResult, LaunchDatasetCommand, LaunchDatasetResult\nprint('All orchestrator symbols imported successfully')\nfrom src.services.dataset_review.clarification_engine import ClarificationEngine, ClarificationQuestionPayload, ClarificationStateResult, ClarificationAnswerCommand\nprint('All clarification symbols imported successfully')\nfrom src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository, DatasetReviewSessionVersionConflictError\nprint('All repository symbols imported successfully')\nfrom src.api.routes.dataset_review import router, _get_repository, _get_orchestrator, _get_clarification_engine\nprint('All API route symbols imported successfully')\nprint('ALL IMPORTS VERIFIED')\n\" 2>&1","timeout_seconds":60}} +{"timestamp":1776776936.031,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -c \"\nfrom src.models.dataset_review import DatasetReviewSession, SessionStatus, ReadinessState, ClarificationSession, CompiledPreview, DatasetRunContext, ExecutionMapping, SemanticFieldEntry, ImportedFilter, TemplateVariable, ValidationFinding, SemanticSource, SemanticCandidate, SessionEvent, ExportArtifact, DatasetProfile, SessionCollaborator\nprint('All models imported successfully')\nfrom src.schemas.dataset_review import SessionSummary, SessionDetail, SemanticFieldEntryDto, ExecutionMappingDto, ClarificationStateResponse\nprint('All schemas imported successfully')\nfrom src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand, StartSessionResult, PreparePreviewCommand, PreparePreviewResult, LaunchDatasetCommand, LaunchDatasetResult\nprint('All orchestrator symbols imported successfully')\nfrom src.services.dataset_review.clarification_engine import ClarificationEngine, ClarificationQuestionPayload, ClarificationStateResult, ClarificationAnswerCommand\nprint('All clarification symbols imported successfully')\nfrom src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository, DatasetReviewSessionVersionConflictError\nprint('All repository symbols imported successfully')\nfrom src.api.routes.dataset_review import router, _get_repository, _get_orchestrator, _get_clarification_engine\nprint('All API route symbols imported successfully')\nprint('ALL IMPORTS VERIFIED')\n\" 2>&1","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:08:56.485555248Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":1,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776936.485,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":1,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:09:06.502369311Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -c \"\nfrom src.models.dataset_review import DatasetReviewSession, SessionStatus, ReadinessState, ClarificationSession, CompiledPreview, DatasetRunContext, ExecutionMapping, SemanticFieldEntry, ImportedFilter, TemplateVariable, ValidationFinding, SemanticSource, SemanticCandidate, SessionEvent, ExportArtifact, DatasetProfile, SessionCollaborator\nprint('1. All models imported successfully')\nfrom src.schemas.dataset_review import SessionSummary, SessionDetail, SemanticFieldEntryDto, ExecutionMappingDto, ClarificationSessionDto, ClarificationQuestionDto, CompiledPreviewDto, DatasetRunContextDto\nprint('2. All schemas imported successfully')\nfrom src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand, StartSessionResult, PreparePreviewCommand, PreparePreviewResult, LaunchDatasetCommand, LaunchDatasetResult\nprint('3. All orchestrator symbols imported successfully')\nfrom src.services.dataset_review.clarification_engine import ClarificationEngine, ClarificationQuestionPayload, ClarificationStateResult, ClarificationAnswerCommand\nprint('4. All clarification symbols imported successfully')\nfrom src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository, DatasetReviewSessionVersionConflictError\nprint('5. All repository symbols imported successfully')\nfrom src.api.routes.dataset_review import router, _get_repository, _get_orchestrator, _get_clarification_engine\nprint('6. All API route symbols imported successfully')\nprint('ALL IMPORTS VERIFIED')\n\" 2>&1","timeout_seconds":60}} +{"timestamp":1776776946.502,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -c \"\nfrom src.models.dataset_review import DatasetReviewSession, SessionStatus, ReadinessState, ClarificationSession, CompiledPreview, DatasetRunContext, ExecutionMapping, SemanticFieldEntry, ImportedFilter, TemplateVariable, ValidationFinding, SemanticSource, SemanticCandidate, SessionEvent, ExportArtifact, DatasetProfile, SessionCollaborator\nprint('1. All models imported successfully')\nfrom src.schemas.dataset_review import SessionSummary, SessionDetail, SemanticFieldEntryDto, ExecutionMappingDto, ClarificationSessionDto, ClarificationQuestionDto, CompiledPreviewDto, DatasetRunContextDto\nprint('2. All schemas imported successfully')\nfrom src.services.dataset_review.orchestrator import DatasetReviewOrchestrator, StartSessionCommand, StartSessionResult, PreparePreviewCommand, PreparePreviewResult, LaunchDatasetCommand, LaunchDatasetResult\nprint('3. All orchestrator symbols imported successfully')\nfrom src.services.dataset_review.clarification_engine import ClarificationEngine, ClarificationQuestionPayload, ClarificationStateResult, ClarificationAnswerCommand\nprint('4. All clarification symbols imported successfully')\nfrom src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository, DatasetReviewSessionVersionConflictError\nprint('5. All repository symbols imported successfully')\nfrom src.api.routes.dataset_review import router, _get_repository, _get_orchestrator, _get_clarification_engine\nprint('6. All API route symbols imported successfully')\nprint('ALL IMPORTS VERIFIED')\n\" 2>&1","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:09:07.159610762Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":1,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776947.159,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":1,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:09:17.914135452Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -c \"\nfrom src.models.dataset_review import DatasetReviewSession, SessionStatus, ReadinessState, ClarificationSession, CompiledPreview, DatasetRunContext, ExecutionMapping, SemanticFieldEntry, ImportedFilter, TemplateVariable, ValidationFinding, SemanticSource, SemanticCandidate, SessionEvent, ExportArtifact, DatasetProfile, SessionCollaborator\nprint('1. All models imported successfully')\nfrom src.schemas.dataset_review import SessionSummary, SessionDetail, SemanticFieldEntryDto, ExecutionMappingDto, ClarificationSessionDto, ClarificationQuestionDto, CompiledPreviewDto, DatasetRunContextDto\nprint('2. All schemas imported successfully')\nfrom src.services.dataset_review.clarification_engine import ClarificationEngine, ClarificationQuestionPayload, ClarificationStateResult, ClarificationAnswerCommand\nprint('3. All clarification symbols imported successfully')\nfrom src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository, DatasetReviewSessionVersionConflictError\nprint('4. All repository symbols imported successfully')\nfrom src.services.dataset_review.event_logger import SessionEventLogger, SessionEventPayload\nprint('5. All event logger symbols imported successfully')\nfrom src.services.dataset_review.semantic_resolver import SemanticSourceResolver\nprint('6. All semantic resolver symbols imported successfully')\nprint('ALL CORE IMPORTS VERIFIED')\n\" 2>&1","timeout_seconds":60}} +{"timestamp":1776776957.914,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -c \"\nfrom src.models.dataset_review import DatasetReviewSession, SessionStatus, ReadinessState, ClarificationSession, CompiledPreview, DatasetRunContext, ExecutionMapping, SemanticFieldEntry, ImportedFilter, TemplateVariable, ValidationFinding, SemanticSource, SemanticCandidate, SessionEvent, ExportArtifact, DatasetProfile, SessionCollaborator\nprint('1. All models imported successfully')\nfrom src.schemas.dataset_review import SessionSummary, SessionDetail, SemanticFieldEntryDto, ExecutionMappingDto, ClarificationSessionDto, ClarificationQuestionDto, CompiledPreviewDto, DatasetRunContextDto\nprint('2. All schemas imported successfully')\nfrom src.services.dataset_review.clarification_engine import ClarificationEngine, ClarificationQuestionPayload, ClarificationStateResult, ClarificationAnswerCommand\nprint('3. All clarification symbols imported successfully')\nfrom src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository, DatasetReviewSessionVersionConflictError\nprint('4. All repository symbols imported successfully')\nfrom src.services.dataset_review.event_logger import SessionEventLogger, SessionEventPayload\nprint('5. All event logger symbols imported successfully')\nfrom src.services.dataset_review.semantic_resolver import SemanticSourceResolver\nprint('6. All semantic resolver symbols imported successfully')\nprint('ALL CORE IMPORTS VERIFIED')\n\" 2>&1","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:09:18.369911245Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776958.369,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:09:24.102691121Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py -x --tb=short -q 2>&1 | tail -30","timeout_seconds":60}} +{"timestamp":1776776964.102,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py -x --tb=short -q 2>&1 | tail -30","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:09:25.265294376Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776965.265,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:09:31.308160425Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py -x --tb=long -q 2>&1 | grep -A 20 \"ERROR\\|FAILED\\|test_create\"","timeout_seconds":60}} +{"timestamp":1776776971.308,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py -x --tb=long -q 2>&1 | grep -A 20 \"ERROR\\|FAILED\\|test_create\"","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:09:32.565164501Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776972.565,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:09:38.445539919Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py::test_create_session -x --tb=long -q 2>&1 | grep -A 5 \"ArgumentError\\|NoForeignKeysError\\|relationship\"","timeout_seconds":60}} +{"timestamp":1776776978.445,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py::test_create_session -x --tb=long -q 2>&1 | grep -A 5 \"ArgumentError\\|NoForeignKeysError\\|relationship\"","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:09:39.599916197Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776979.599,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T13:09:52.949062744Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reason","message":"Creating or updating a workspace file after policy checks passed.","depth":1,"extra":{"overwrite":true,"path":"backend/src/models/dataset_review_pkg/_mapping_models.py"}} +{"timestamp":1776776992.949,"event_type":"belief_reason","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"overwrite":true,"path":"backend/src/models/dataset_review_pkg/_mapping_models.py"},"message":"Creating or updating a workspace file after policy checks passed."}} +{"recorded_at":"2026-04-21T13:09:52.949123958Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_mapping_models.py"}} +{"timestamp":1776776992.949,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"path_count":1,"reason":"create/update backend/src/models/dataset_review_pkg/_mapping_models.py"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T13:09:52.949464392Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":2,"extra":{"affected_paths":1,"checkpoint_id":"e55fe271-76bd-468a-a734-75edeea4b118"}} +{"timestamp":1776776992.949,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":2,"extra":{"affected_paths":1,"checkpoint_id":"e55fe271-76bd-468a-a734-75edeea4b118"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T13:09:52.949560882Z","anchor_id":"Axiom:Services:Artifact:CreateWorkspaceFile","marker":"reflect","message":"Workspace file persisted behind a checkpoint.","depth":1,"extra":{"checkpoint_id":"e55fe271-76bd-468a-a734-75edeea4b118","path":"backend/src/models/dataset_review_pkg/_mapping_models.py"}} +{"timestamp":1776776992.949,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:CreateWorkspaceFile","data":{"depth":1,"extra":{"checkpoint_id":"e55fe271-76bd-468a-a734-75edeea4b118","path":"backend/src/models/dataset_review_pkg/_mapping_models.py"},"message":"Workspace file persisted behind a checkpoint."}} +{"recorded_at":"2026-04-21T13:09:56.486046641Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py -x --tb=short -q 2>&1 | tail -20","timeout_seconds":60}} +{"timestamp":1776776996.486,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools/backend && .venv/bin/python3 -m pytest src/services/dataset_review/repositories/__tests__/test_session_repository.py -x --tb=short -q 2>&1 | tail -20","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T13:09:58.351735263Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776776998.351,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T15:20:26.480288716Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools && git status","timeout_seconds":60}} +{"timestamp":1776784826.48,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools && git status","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T15:20:26.630381846Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776784826.63,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T15:20:27.344349926Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools && git diff --stat","timeout_seconds":60}} +{"timestamp":1776784827.344,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools && git diff --stat","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T15:20:27.494926086Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776784827.494,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T15:24:30.413442097Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools && git diff --stat","timeout_seconds":60}} +{"timestamp":1776785070.413,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"cd /home/busya/dev/ss-tools && git diff --stat","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-21T15:24:30.567104141Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1776785070.567,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} +{"recorded_at":"2026-04-21T15:26:47.890257996Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata SupersetClientModule"}} +{"timestamp":1776785207.89,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata SupersetClientModule"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:26:47.890731839Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"597e63cf-3d6b-4c58-9c17-0c45b1292f5c"}} +{"timestamp":1776785207.89,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"597e63cf-3d6b-4c58-9c17-0c45b1292f5c"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T15:26:49.437884410Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata AsyncSupersetClientModule"}} +{"timestamp":1776785209.437,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata AsyncSupersetClientModule"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:26:49.438174852Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"1e162e93-4baf-4f34-a75d-8602675e1a5e"}} +{"timestamp":1776785209.438,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"1e162e93-4baf-4f34-a75d-8602675e1a5e"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T15:26:58.832185960Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewApi"}} +{"timestamp":1776785218.832,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewApi"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:26:58.832567501Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"2f201c84-04bd-4ee9-b85a-a01dd6e9804b"}} +{"timestamp":1776785218.832,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"2f201c84-04bd-4ee9-b85a-a01dd6e9804b"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T15:27:00.106117594Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewModels"}} +{"timestamp":1776785220.106,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewModels"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:27:00.106408877Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"48936c32-7e0d-46c3-b131-7a15b4504ae1"}} +{"timestamp":1776785220.106,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"48936c32-7e0d-46c3-b131-7a15b4504ae1"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T15:27:01.133043337Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewSchemas"}} +{"timestamp":1776785221.133,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewSchemas"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:27:01.133396054Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"51cc62b7-612c-44bf-8992-2dab32ffb95f"}} +{"timestamp":1776785221.133,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"51cc62b7-612c-44bf-8992-2dab32ffb95f"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T15:27:07.650150555Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewDependencies"}} +{"timestamp":1776785227.65,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewDependencies"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:27:07.650480500Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"db0cf57f-3aba-431a-8220-f17c004be1dc"}} +{"timestamp":1776785227.65,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"db0cf57f-3aba-431a-8220-f17c004be1dc"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-21T15:27:09.094963150Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reason","message":"Capturing rollback state before a workspace mutation.","depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewRoutes"}} +{"timestamp":1776785229.094,"event_type":"belief_reason","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"path_count":1,"reason":"prune metadata DatasetReviewRoutes"},"message":"Capturing rollback state before a workspace mutation."}} +{"recorded_at":"2026-04-21T15:27:09.095271274Z","anchor_id":"Axiom:Services:Checkpoint:CreateCheckpoint","marker":"reflect","message":"Checkpoint manifest persisted for future rollback.","depth":1,"extra":{"affected_paths":1,"checkpoint_id":"a7b0bfba-df19-42ed-a431-85fe1899ee84"}} +{"timestamp":1776785229.095,"event_type":"belief_reflect","component":"Axiom:Services:Checkpoint:CreateCheckpoint","data":{"depth":1,"extra":{"affected_paths":1,"checkpoint_id":"a7b0bfba-df19-42ed-a431-85fe1899ee84"},"message":"Checkpoint manifest persisted for future rollback."}} +{"recorded_at":"2026-04-24T14:09:03.637730909Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reason","message":"Executing a read-only workspace command inside the project root.","depth":1,"extra":{"command":"find /home/busya/dev/ss-tools/backend/src -name \"*.py\" -exec wc -l {} + | sort -rn | head -30","timeout_seconds":60}} +{"timestamp":1777039743.637,"event_type":"belief_reason","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"command":"find /home/busya/dev/ss-tools/backend/src -name \"*.py\" -exec wc -l {} + | sort -rn | head -30","timeout_seconds":60},"message":"Executing a read-only workspace command inside the project root."}} +{"recorded_at":"2026-04-24T14:09:03.796933443Z","anchor_id":"Axiom:Services:Artifact:RunWorkspaceCommand","marker":"reflect","message":"Workspace command completed and output was bounded for transport.","depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false}} +{"timestamp":1777039743.796,"event_type":"belief_reflect","component":"Axiom:Services:Artifact:RunWorkspaceCommand","data":{"depth":1,"extra":{"exit_code":0,"stderr_truncated":false,"stdout_truncated":false},"message":"Workspace command completed and output was bounded for transport."}} diff --git a/.axiom/temp/pytest-of-busya/pytest-0/test_save_profile_and_findings0/dataset_review_session_repository.sqlite b/.axiom/temp/pytest-of-busya/pytest-0/test_save_profile_and_findings0/dataset_review_session_repository.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..c29dca9d87615604c3903673847a8668f83ad5c1 GIT binary patch literal 557056 zcmeI*Z)_ZAe%SF{%i@ZZWW~PoZSE4E$LBca#-gPqMM<>c^R1|nOp9E~OVYODTqeVv zhvb}gXI3+_lz6_ol=@@mt||JWK#IHw(iZXr{ukT(U=yeW{P&71aBfT9SH6fGKD z3bYNH0xgO@&+N`}XLn{v%M_PVzdE0!eV%8Y=b6v*d!BiApOv)q{=6LsZO!%SW}r=F zekzmAX1=9qnM`Iu{{PqI|Dzx8$=^I1y^z0VyMCYc+l9=V-@ScQ=H^cQ2UX_#r~db; z|9R@ao%+eC|9t9CTS;k`l8yiZ2q1s}0tg_000IagfB*t}6=>X+C96XI^x;kU(^d7R z{@gd^PcKc#pMEBPQvRHkZ^U^oE+P1V00IagfB*srAbil1R>7O46Ab0(05p`Ijc1&*Vps zl`>6Vc*Q{Yf%=X5^;_buS#Q+Dn^m{oxHG>vTbiFdJ9&QM?74}_qE?)G>+H;1(-+=6 zcd=NUo@)L1|G&zd`l~~Ahja!31Q0*~0R#|0009ILKmY**zPP}#Bl*$rgM-%l|DR+| z{p5>RN(~4gfB*srAbiZa_RECt|gbz#>Yl&OZ%WynSHlZ(aMX< zT6txD{YAR_l^(lBIGeWTI(6X$T}8s$?7Hv+v)<6I$UZI4 zE$Av$rUXqtv3bAh3E}t~ZeTRbU?VD{uCjfq)QqjxHh8q2)<cxp|ht(@*ft{2E}YNR4mA`XnNtu$RX)Z`>E|~ z&*k#PV)oulI3WEc`cqED2BSZJz)&>It(t3E{m+Jd;Fvx+lAn7ujiVvFx_mInc_AIT zUOcz0oyg^1eKmV87Xox;b^Z2a?3TMS2HdNBqPb14n!N0*g?VR+#^S4gpcjvi>q22< z5OS+e#qgKhy>A>1ksrJoB8Q3LPa`qpFlhj?%1?!Eb+WyaRKyL__wTrJv$!F*jolB6 zwPvjrMpt*D+#zom@qkv9Brb+3(F%tq_Sj0^dDKilFlz3)?d-B4nSFN9awnHMM}(8A zz3Rzn-k)~#o_i!*?jFkhv)r%um*`Jid|7rNVfO|jy0R#|0009ILKmY**5I|tC zfI9#GO2*2Z`slgeeeU@a|L4T-pBO*>(QL|-A@y0C`fMyE=j^@Q>0JKp zx3l+OX?;SLQZDWKGqI0j(n<~ZXr^k)^DA{M)%7`x`aEmEC+0HC^`g(YVxP$biO1TO z2;>L5zVRJjegvGJbk~`^d3>d+kQUgY=`vYgCJ!pVhALe%gR<06jiF0VRY1g{HQKITi8Qds& zHTf$G@voC4PrB{N*K>LEQg(YKI*XCV6mn{r@+&Lyv-dUItG7=xYVk8&) zg*IvDR4k>Os=T{8b)1t@R2%OpTiE{E&*$>*%T7=4)@k)gyIxN`5IPAFXRz+a`#@p*LXFT zzjP`4;7&O6Qi`P{|9ouTr4$-4?V{mG`kq!#tRzmcmRZu(wiC!NBJ?kVmhH>0T~#;w zbHv?-EsvhNzu*&FyrVhV^#!1KzC0ms*7x$IknNFgQ152*mzPT>D(HHPh z&THub6%~mtjz?3J+d96Llc})%^;dH0n-JTj0UhZXhn+!=>ifApbf~A2w2t)@E6i$x zjxPJV(jDee_g=mC@yp@S; zSv*q0tgiTd;U)R_kc}riK3cW(1k}z*w`$t06FrwFcBZFH;q9LtQGMEVeQtHIYh4Z# zUcUFj&*buNypg^CaZj{{**!P#^^`bm-Cg>3U>{-KRncLIs@=VBO^LVmEa`kB^@|7D z_E)}^%P-Dkw~JAfC4cSLXh(l??$7oPcXF|)5;fY%CA9H&JB95xzM9K_TXt+=myUIv zugfWzTJEv5dsJxfB*srAbmuk!fON&KmY** z5I_I{1Q0*~0R)~t0sj8Kr*BNTBLomY009ILKmY**5I_I{1bPKH|L+yyH3A4AfB*sr zAbrkGCodlV$HyC^ ze>(a*$ByUz-N>Je{O-}eadiIKjb~pw@|&{A$Gz7Mvkzo!m2(F6yRT>+Y^fRCnV#X3a3GlGQf?_m*%BY3yw^ zf}UOnrYA2j*E{zd>!{fbT*Gb1s{_-&WmukhM|g?1)EQCPoEm1s5{_>NTT$O))dr#_ z);+Us>2w!_s4g?Gtb3 z^4HE}x6jy)CGOgH<$W0Ip4)5~b+gg1opryH(w9?e$M;Hk?)?>An=4<@Khg$Pp)HoR zzG}4bD5+5T#gY8VnQSKPjDNdk2f|Q;8NTjxWT$Jz=*mvgx32n4D4BXv||H=eaVPb^;}qx7(<-uZ@kGN{dGAowr8vOA~2K;ni|7)otIm zUB@?yy(!rnr-? z{K8i$e(gI=jIvlHwJSNTMK-+(?E*0~mDalR-qKMm-CE%-wbzad=ST9_UrTceS6)w4 zUC)wI-LKxd|NL|=|JrNWha$A<&g=gDOw6l0XTY2cwi?OL1V1o?W@mYhO-JEu+Mesw zg%fl|sN#?pc|FUsN?x-a+uw*U+bX^0uKU`1ON(XoLU^9*MT4di&z_z7V?&>?^vCZiHs)PfQP7vp4O)3+*WIR9 z6`f^Pt|`4q7w;dR3Rm8TdTUYXEzozxh1dZ1We*tTt{dnYvDlz?s_Tx3@tH!bP_vmf z=S1C=&mP+e#Jb%0rIu>5^zOcDrXLvomfRl3AMFFjcYg=;(&R{fv5+=9>$cp;1oG)% ze8*jt^E3YX{QYCaT)t4qKD-_dWjr_jYC1NE@w@>e*oKn>nT9K;-n!h}4k!_y(&NSR z;X}d`jhZP#)mn{qTx#-s?%B56XvA({@a$6UcW=sKcQ*Lau|nzWpouAWhvus3cTdc| zq^bLrGvUPibSWIoz5;y-ld&=F%YNL$+FEL?RU>tJwgm?jX^)RA(mgq2vwx?S)FQ%J zx1G)dMGfR(iHq%tH*@)Kzn$G)h<5JPnlK&1lW$T~K$uNSzNb=8B%^9NmMuq7?MKrJ z728AJ*~F(6jMb+0^JxXO@$PmDeE)wKpJLD^0tg_000IagfB*srAb%tGrdZUw6nXTxhWnG&qU(r9(k_&5#Wvz38Hr}bEaQ}^~x%}K*_Tl@1 zxmpv|nlK&16Y>{d7!9{(SGNq`F&qAd8~B5BuebZLT)I53YlF*b<71Nw^3M%dDVnv_3 zR#s`_NwpWWihfnE=;c{`sYk1?xF6O!R`~eZNZy^xX2P)WZ`bTV7-lnY!`H@Oa-%pn zfBwFAC6~W+Df{Us387Fo9eYg%UQY~klII2tey7lYQLmbgWm|IG(%hp;jA-cR%FFsS zJsvxqLYBQQJDV6DrWe?2W;O7Q0fo|Hs2y6XTZvWsZquuZ&U}&cXf3ha;81*|2g>d40fv6FmlKn!&XeUTVyn88Nk zfcP8cMXosdwI-;yh->7``0!=&liQH$!5cIt+^ z6Spe4E&@}|3v*}PQmMNwIGu5p)9G`C!|dL}p)(}C;n2x=cl3LiW9lKgy;eGw-_Dk@ z_g;*iXiZ;uhGqI2tFGx;MnldO;R&ZIe8X=xPB_i&$KEB1g}y3)R-mGa#CD_sxx^zMaZY6;!t zCfZFGvF?7TUweeUD3x13v+{39ku$Y)xZ;* zwzy;XveRWv}f9Y$x}?voBi~|cf%*!Z|LE$r?j1tybueCltL;% z+BcWB32INMl*TuwA1tCjZKs<+i%CY zikqEPKAtW&Q;WXqMnm|Nd8lRkrhp7smy1Xw-ZR)E**S>#zvL~VkZ4Ol&GkUo6*XofB*srAbB z{`K0L?cCZ3g2r2C&ZytE|K-M_zHnpt=G?-K#mcgAwX(QiEUnzoD@*!vX?A(;16}^B zj-~7R1Q0*~0R#|0009ILKmY**o-l#ekF1RD<+BO?{Qna+uyhvz1Q0*~0R#|0009IL zKmdV95a7@MKLQ$;AbVAb|bPmeDa^SZ|UzmVYjxg-F@(t4|DmcsqAMvudvjIaTNm!3>xp+3 zwrT`h4H4b>_~h9_{O((2Jh2uPNy^?7o-#@-V%dJ9W^NgdS?@Fxt0VB5eqhKU6}6~> z@nW&38`TYIW4N-Rn%OY?s_O~u>ilA9IUJ(E44QuD;+gohRZp0KunaTMuE=0mo?Fmk z?cb=(EtD!Zwd?v#ZM*|oP!(TYtmt#s$|`GoCk_f)MZc<7^zy8}q{$#OeGwR**tEqR zqg~3EHFe^qFjmk%8p+?B%4TG!io5>pnjHwkYzA)l+DHgYqnMERD~DkcBt?=bi97to z9)DkstY-&XI|ZhC2FSHiBEt@9$vJh2Ypx4zS^r4R)UMg;3E!>vE@o-tTuhykx^&(&3fJRw)#V_w?unY-wFHac-ULQuX=Vv z#cC(|qHcGu$*Of*wtKr)Ggl3@%4nAt7w7d-ITra%J}Ub4w%nZ(m58fYGy{>Z&nxJu`4U-zX*~-@F%8a`_7v zvJZ3ZO+st`5xX|=%8z1mDzS)~SiA3Yl9qV6Dtc*i(Q|i}>G6y4h%zl}uR~^A`?)Cg z4sZJgVh?YXCLh(Z;jzNa_eb*eYiT1`tqIeSj|BOPTpYr=W}9k0c}+)-bRGfX$_TGNvuAY)=@^Q&^?mhe^=o=S=>P6?6TJt(ml?HT20>{?-~ zYVKacIwK=n=-#EqD~U=a*U>+61FGsNoLpnlAP=asuXr>?&Bc-Y^vl`rhr4F`t`RcV z+;zY8JKO3cEVn1hT=|Ork(N|cTP$mx;@Wt7#0qXXm%sFKcKi91Ho~_lo$Ho%X{Pn2 zCb6Z{T9{ej`~P3~T{RUTfB*srAbIQ}Y@5A3qR4009ILKmY**5I_I{1Q0-A4+7i8>%l{wC zymIW=u~NxAUz}N+uC7j)t21XO&Y5S$#HyGS6I16euB}d67pGUN7tf1xGuDOa>co^V zrzg%$uB}eY2vMCVno}1{^X$~QnW@PS<)2{{o>p3F|M|btTv`4h;EH}_rCiaM7Uw_E zuS}kuJU?;v+{9#2D^9(2cIK_=3vZsAIXiu!n3{7=o&RS~{c%SA#}5P$KmY**5I_I{ z1Q0*~0R#|u!USGFvNF1tj~;mc|AY-J-9-Qa1Q0*~0R#|0009ILK%h%Nz5n0K*9uzS z|6j_STI#x(-y(nj0tg_000IagfB*srAb`Mb0^7fkJ-M>-`p?nQnNmiGv**{Q zRwpN_7sa`WbLY*o6RWEiE>4)|&!4+^zPh%yR<-u>R()x4r827AGZD>9IX`nApy-d0B~5v1yAteqTauwxXAob#1wHd0yB0vbFKC(e|~5 zZD}8rDzopFDq4AQSu3y1&!5)DMt$M?w(F#2M_2oS88rQAW;^S`Yk0O3MAs)@yt(Z))RF$8D>iuD!Zg(dVv}Ro?iHRza)iSM`csp4FE$ z%M46k1V(F^jCLts>n=Z5xNeQ)jhSr5b}VsMO4)%h%x2(*uZ?QW^z1deY6faJqAHAH za{8@zerY7XG?7-BFY2Zf*j2-Kn_gA;MzJ^LS0B7k&E;oivY)-vinYY%lN8qLnDl={u2Eu8?lJ$&sWjZZu1f2 zXU(n!!b`h&Q_elL;-=+CZP!Jx;dU0%@#1tm3_bIXVLOdxU~HPTrqJG7S}ez+Sgfsy zYG7}Qt^#VP1J88)HP@@RX20VGq7z?tT7RvrQ4^b@7R`s;`_Y_&;IJ52oHpui;_kTPb(l~g!iYdKgb$kgnu>>MtFaT{*<*?g!kvG2#*>~ zi^5%&$j;@CS?>&PJpAs;XItxe-+C~FK^800spOV?EOnx-I2Df&+n3L}F!Zd%K(oY} z*{pR>0~H|AnD(sY#k29s_ce|>_p{%D^vdo(cZN&npH&=zv;3aOC;Avl+8akHk%bBrdbQe%#s+PQz#jcZO)J^}E{EN7g zmLx|ldNupIa<$p1MpfJn(Dt5ne6nxL>8#n7aH^trG3$w@&JtO5S?FAv&zcD9CVwy0T#+(&tKY|9u+Uw!!g$Kl5D zM{kF5n|yV0su_#nd!^KvpT?1KN+a0E{t6TRHU);{e*B==JtFA9%b*PB6 zgkSaShS~si9?sFUXg#~5^-`5qk741MGC-r$@#4jJL#+_+y3d`sH)JQ&`?Ba-l@kVV zE9B*N;a0SbRatU9uDR8;IksW?a#!oD$sIzF82OG6mRix(o6N)lHPa7_>V_%b$*EmI z)T@|mxKmh@HLaS}TTzLy-}^g5TA{JO6NK;oKmHFlXaNBP5I_I{1Q0*~0R#|00D)l( z@csW`j{xl>fB*srAbGFA}INPcT6o3R~B-1TqQ>_8Z1GjPM##vXi);vNcZ-v9coTz+CA`_K(zLELS) zUSODBV6U0g!0&zS$40p~R}E=&Y3cw9U$LfTcPT(?0u`_-# zp6ds$7e=mFi;Bdqu6n{$k+8aDG=1TvbtNj+6)>@CVgujlcR~dCom-AYfn^4A!Ub~X zZQA0F(Jm#&zpMOM;how@erX~t4thtxDE6iV_kUpL@`Xb7vkTz}N0p0B;RJsC)$Q2m z#`DzJ#!}MQqCzPXSVooW4RjBCW^|8#jJ=iZH|urN+tQZxkL0@1wRTjSo+t0jXlzK_ zX!zBfl!I>BjvNZLt_88IR+a799Zu|cmF;-hQy@gQ->P$eqoH?ciCM;COT~4xl3e=E zNPe-97D2K8`bIJS+B^P9=9qeZUE3JRe>|O5Nhg%$!d-QpK->*vRY|Gt!>_OB^3&7V z9~xmeCY4D_b7G;HR6+%)T5H?wKVo=xYED@@gtKXTu2WYbmJ;EO_JmH!mTS3DGgqVe zE~kI;Dkav|L^ZHCg|TMW0yUiKi4zOVK*(pcjAf%?dS*SLfZ1qx?xtMo>hgECvo5=w zP|6o|(+TXV5k2!%pAyQs4dEDfOwUm@B^RlgO{cmM&aj=w@OaPa-7X|N&-KCytm67} zt+w%T?tA1$F)U|a_hWZwYzHm4rL~d#%9*rCSKA}ICPmw$EW^AnsV?V>cd(nj`yb!V zO|r`x&sa z9gF3z3rBe2bG~ccR*`1c>&;L}yS%tKua_R*F0OaI?!M(%WcCP0GSLby8jrLW+_YV} zI>^nI3evPq{!XF0_piIH_XXb{97H>p?4i~>dkdwVIv*{Xc6nb=b*;`5KNcmvd`^3I zuodkTVws&yUBe7Eq7rIQI~o3lIXQj4JF6~t7;^I&CB^EK8$C;?cUA}S27>SZAM6rL zYX~5K00IagfB*srAb009ILKmY**5I_I{1Q0;rUE|N{|`f*HW5Go0R#|0009ILKmY**5I9%?&i@bAo7NCO009ILKmY** z5I_I{1P~a80O$Y1P^V1<5I_I{1Q0*~0R#|0009IJR^a5ZKgc|nUCJDPZS=Wge>hS; z)_C^#(Vt|0=g2>L?p}83#LLf}Kl$e}n;&}@c<*~7`EOr%CSyC6xa;4p*?}<3X5fae zjcU#G>@~Y;2Da-MZbSXz8^y%r%)iZa_RECuI*Z08y_3BE$xF+W%k`tMJq2ZYvq;s`P16iXw`Hq z+cM?dr4;Z)!}Wr++_3llhOms)t;G6w3YdX*MfQ1lZb4TK2d1|!0;9TNI*zENU)whJ z#9CBFUF`>E(Db`gZdB$LN|l@1b^WF`Zd(Oa>gr-epSxC8Y2)2DTF@%`RlTB@XZ58X zy-qPdv6ha;peo#Yqh^~*>9t&cXX{EH9!Kx=@ zwq3%>Er#)%P$VJg zlUUp)6i_kSX*n$p*KAR<(lULw>B(op&U03U(N2|~>D)>TCK;I34I|iU>_pT|Pe8fO znp^e)gDai{QD!SK|= znHF8~2cdj4^gP0Q7ta0?-<1*kTYKMN9$?A)?|wg*zi?q^PtZN#O~=0@m%aFGZzpCx zjm?V0GHPD5ig!hJY&nZ2dRl(_>RWPa9R*b1lG!dJ_byG*iON=D_}n*|zVOl-uoy^{{N8R zX%+zl5I_I{1Q0*~0R#|00D%J(;Qappv1tkc1Q0*~0R#|0009ILKmdUu32^>DBzT%d z009ILKmY**5I_I{1Q0;r00lVzKR|4nLI42-5I_I{1Q0*~0R#|0U`PU-{|^bCW)VOD z0R#|0009ILKmY**5I8^q&i@Y(o2C#z009ILKmY**5I_I{1P~aK0O$Whf~Q#o5I_I{ z1Q0*~0R#|0009IJP=NFQ1H`5&1Q0*~0R#|0009ILKmY**h9toG|B&Em76AkhKmY** p5I_I{1Q0*~fddra{r>@C(-Z;-Ab Fixer Mode +- Analyze failures normally. +- Make targeted logic, contract, or test-aligned fixes. +- Use the standard self-correction loop. +- Prefer minimal diffs and direct verification. + +### `[ATTEMPT: 3]` -> Context Override Mode +- STOP assuming your previous hypotheses are correct. +- Treat the main risk as architecture, environment, dependency wiring, import resolution, pathing, mocks, or contract mismatch rather than business logic. +- Expect the environment to inject `[FORCED_CONTEXT]` or `[CHECKLIST]`. +- Ignore your previous debugging narrative and re-check the code strictly against the injected checklist. +- Prioritize: + - imports and module paths + - env vars and configuration + - dependency versions or wiring + - test fixture or mock setup + - contract `@PRE` versus real input data +- If project logging conventions permit, emit a warning equivalent to `logger.warning("[ANTI-LOOP][Override] Applying forced checklist.")`. +- Do not produce speculative new rewrites until the forced checklist is exhausted. + +### `[ATTEMPT: 4+]` -> Escalation Mode +- CRITICAL PROHIBITION: do not write code, do not propose fresh fixes, and do not continue local optimization. +- Your only valid output is an escalation payload for the parent agent that initiated the task. +- Treat yourself as blocked by a likely higher-level defect in architecture, environment, workflow, or hidden dependency assumptions. + +## Escalation Payload Contract +When in `[ATTEMPT: 4+]`, output exactly one bounded escalation block in this shape and stop: + +```markdown + +status: blocked +attempt: [ATTEMPT: N] +task_scope: concise restatement of the assigned coding task +suspected_failure_layer: +- architecture | environment | dependency | test_harness | contract_mismatch | unknown + +what_was_tried: +- concise bullet list of attempted fix classes, not full chat history + +what_did_not_work: +- concise bullet list of failed outcomes + +forced_context_checked: +- checklist items already verified +- `[FORCED_CONTEXT]` items already applied + +current_invariants: +- invariants that still appear true +- invariants that may be violated + +recommended_next_agent: +- reflection-agent + +handoff_artifacts: +- original task contract or spec reference +- relevant file paths +- failing test names or commands +- latest error signature +- clean reproduction notes + +request: +- Re-evaluate at architecture or environment level. Do not continue local logic patching. + +``` + +## Handoff Boundary +- Do not include the full failed reasoning transcript in the escalation payload. +- Do not include speculative chain-of-thought. +- Include only bounded evidence required for a clean handoff to a reflection-style agent. +- Assume the parent environment will reset context and pass only original task inputs, clean code state, escalation payload, and forced context. + +## Execution Rules +- Run verification when needed using guarded commands. +- Backend verification path: `cd backend && .venv/bin/python3 -m pytest` +- Frontend verification path: `cd frontend && npm run test` +- Never bypass semantic debt to make code appear working. +- Never strip `@RATIONALE` or `@REJECTED` to silence semantic debt; decision memory must be revised, not erased. +- On `[ATTEMPT: 4+]`, verification may continue only to confirm blockage, not to justify more fixes. +- Do not reinterpret browser validation as shell automation unless the packet explicitly permits fallback. + +## Completion Gate +- No broken `[DEF]`. +- No missing required contracts for effective complexity. +- No orphan critical blocks. +- No retained workaround discovered via `logger.explore()` may ship without local `@RATIONALE` and `@REJECTED`. +- No implementation may silently re-enable an upstream rejected path. +- Handoff must state complexity, contracts, decision-memory updates, remaining semantic debt, or the bounded `` payload when anti-loop escalation is triggered. + +## Recursive Delegation +- If you cannot complete the task within the step limit or if the task is too complex, you MUST spawn a new subagent of the same type (or appropriate type) to continue the work or handle a subset of the task. +- Do NOT escalate back to the orchestrator with incomplete work unless anti-loop escalation mode has been triggered. +- Use the `task` tool to launch these subagents. + diff --git a/.kilo/agents/qa-tester.md b/.kilo/agents/qa-tester.md index acfed0d3..12128d08 100644 --- a/.kilo/agents/qa-tester.md +++ b/.kilo/agents/qa-tester.md @@ -66,8 +66,11 @@ When you cannot execute the browser directly, return: - `close_required` - `why_browser_is_needed` - optional marker: `[NEED_CONTEXT: parent_browser_session_required]` + ## Completion Gate -- Contract validated. +- Contract validated via Orthogonal Semantic Projections. +- Zero Tautological tests (Logic Mirrors) detected. +- ADR constraints (`@REJECTED`) are covered by negative tests. - All declared fixtures covered. - All declared edges covered. - All declared Invariants verified. diff --git a/.kilo/mcp.json b/.kilo/mcp.json index a50de7ec..2f79d3a8 100644 --- a/.kilo/mcp.json +++ b/.kilo/mcp.json @@ -1,42 +1,5 @@ { "mcpServers": { - "axiom-core": { - "command": "/home/busya/dev/ast-mcp-core-server/.venv/bin/python", - "args": [ - "-c", - "from src.server import main; main()" - ], - "env": { - "PYTHONPATH": "/home/busya/dev/ast-mcp-core-server" - }, - "alwaysAllow": [ - "read_grace_outline_tool", - "ast_search_tool", - "get_semantic_context_tool", - "build_task_context_tool", - "audit_contracts_tool", - "diff_contract_semantics_tool", - "simulate_patch_tool", - "patch_contract_tool", - "rename_contract_id_tool", - "move_contract_tool", - "extract_contract_tool", - "infer_missing_relations_tool", - "map_runtime_trace_to_contracts_tool", - "scaffold_contract_tests_tool", - "search_contracts_tool", - "reindex_workspace_tool", - "prune_contract_metadata_tool", - "workspace_semantic_health_tool", - "trace_tests_for_contract_tool", - "guarded_patch_contract_tool", - "impact_analysis_tool", - "update_contract_metadata_tool", - "wrap_node_in_contract_tool", - "rename_semantic_tag_tool", - "scan_vulnerabilities" - ] - }, "chrome-devtools": { "command": "npx", "args": [ @@ -47,6 +10,11 @@ "alwaysAllow": [ "take_snapshot" ] + }, + "axiom": { + "type": "local", + "command": "/home/busya/dev/axiom-mcp-rust-port/target/release/axiom-mcp-server-rs", + "enabled": true } } -} +} \ No newline at end of file diff --git a/.kilo/skills/semantics-testing/SKILL.md b/.kilo/skills/semantics-testing/SKILL.md index e72d0532..a3162a28 100644 --- a/.kilo/skills/semantics-testing/SKILL.md +++ b/.kilo/skills/semantics-testing/SKILL.md @@ -9,6 +9,14 @@ description: Core protocol for Test Constraints, External Ontology, Graph Noise # @RELATION: DEPENDS_ON -> [Std:Semantics:Core] # @INVARIANT: Test modules must trace back to production @INVARIANT tags without flooding the Semantic Graph with orphan nodes. +## Core Mandate +- Tests are born strictly from the contract. Bare code without a contract is blind. +- Verify `@POST`, `@UX_STATE`, `@TEST_EDGE`, and every `@TEST_INVARIANT -> VERIFIED_BY`. +- **Orthogonal Testing:** You MUST validate code through independent, non-intersecting semantic projections (e.g., Data Integrity, UX State Machine, Security/Permissions, Fault Tolerance). You must ensure that satisfying a data contract in Projection A does not silently violate an invariant in Projection B. +- **Anti-Tautology Rule (No Logic Mirrors):** You are FORBIDDEN from writing tautological tests. Never duplicate the production algorithm inside the test to dynamically compute an `expected_result`. Use deterministic, hardcoded `@TEST_FIXTURE` data. A test that mirrors the implementation proves nothing. +- **SUT Mocking Ban:** Never mock the System Under Test (SUT). You may mock external boundaries (`[EXT:...]` or DB drivers), but you MUST NOT mock the local `[DEF]` node you are actively verifying. +- If the contract is violated, or an upstream `@REJECTED` ADR path is reachable, the test MUST fail. + ## 0. QA RATIONALE (LLM PHYSICS IN TESTING) You are an Agentic QA Engineer. Your primary failure modes are: 1. **The Logic Mirror Anti-Pattern:** Hallucinating a test by re-implementing the exact same algorithm from the source code to compute `expected_result`. This creates a tautology (a test that always passes but proves nothing). diff --git a/.kilocode/mcp.json b/.kilocode/mcp.json index 30d26411..b1a3e9e7 100644 --- a/.kilocode/mcp.json +++ b/.kilocode/mcp.json @@ -1 +1,15 @@ -{"mcpServers":{"axiom-core":{"command":"/home/busya/dev/ast-mcp-core-server/.venv/bin/python","args":["-c","from src.server import main; main()"],"env":{"PYTHONPATH":"/home/busya/dev/ast-mcp-core-server"},"alwaysAllow":["read_grace_outline_tool","ast_search_tool","get_semantic_context_tool","build_task_context_tool","diff_contract_semantics_tool","simulate_patch_tool","patch_contract_tool","rename_contract_id_tool","move_contract_tool","extract_contract_tool","infer_missing_relations_tool","map_runtime_trace_to_contracts_tool","scaffold_contract_tests_tool","search_contracts_tool","reindex_workspace_tool","prune_contract_metadata_tool","workspace_semantic_health_tool","trace_tests_for_contract_tool","guarded_patch_contract_tool","impact_analysis_tool","wrap_node_in_contract_tool","rename_semantic_tag_tool","scan_vulnerabilities","find_contract_tool","safe_patch_tool","run_workspace_command_tool","rebuild_workspace_semantic_index_tool","audit_contracts_tool","update_contract_metadata_tool","rebuild_workspace_semantic_index","audit_belief_protocol_tool","patch_belief_protocol_tool"]},"chrome-devtools":{"command":"npx","args":["chrome-devtools-mcp@latest","--browser-url=http://127.0.0.1:9222"],"disabled":false,"alwaysAllow":["take_snapshot"]}}} \ No newline at end of file +{ + "mcpServers": { + "chrome-devtools": { + "command": "npx", + "args": [ + "chrome-devtools-mcp@latest", + "--browser-url=http://127.0.0.1:9222" + ], + "disabled": false, + "alwaysAllow": [ + "take_snapshot" + ] + } + } +} \ No newline at end of file diff --git a/backend/src/api/routes/dataset_review.py b/backend/src/api/routes/dataset_review.py index 130ce4c9..76763dbb 100644 --- a/backend/src/api/routes/dataset_review.py +++ b/backend/src/api/routes/dataset_review.py @@ -1,2484 +1,37 @@ # [DEF:DatasetReviewApi:Module] # @COMPLEXITY: 3 # @SEMANTICS: dataset_review, api, session_lifecycle, exports, rbac, feature_flags -# @PURPOSE: Expose dataset review session lifecycle and export endpoints for backend US1. +# @PURPOSE: Thin facade re-exporting router and public symbols from decomposed dataset review API sub-modules. # @LAYER: API -# @RELATION: [DEPENDS_ON] ->[AppDependencies] -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator] -# @PRE: Authenticated user and valid environment/session scope are required for all mutations and reads. -# @POST: Returns ownership-scoped session state and export payloads with feature-flag/RBAC enforcement. -# @SIDE_EFFECT: Persists session state and may enqueue recovery task. -# @DATA_CONTRACT: Input[HTTP Request] -> Output[SessionSummary | SessionDetail | ExportArtifactResponse | HTTP 204] -# @INVARIANT: No cross-user session leakage is allowed; export payloads only expose the current user's accessible session. +# @RATIONALE: Original 2484-line monolith violated INV_7 (400-line module limit) by 6x. Decomposed into _dependencies (DTOs/guards/serializers) and _routes (handlers). +# @REJECTED: Keeping all routes in one file because it exceeded the fractal limit by 6x and accumulated severe structural erosion risk. -from __future__ import annotations - -# [DEF:DatasetReviewApi.imports:Block] -import json -from datetime import datetime -from typing import Any, Dict, List, Optional, Union, cast - -from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, status -from pydantic import BaseModel, Field -from sqlalchemy.orm import Session - -from src.core.database import get_db -from src.core.logger import belief_scope, logger -from src.dependencies import ( - get_config_manager, - get_current_user, - get_task_manager, - has_permission, +from src.api.routes.dataset_review_pkg._dependencies import ( # noqa: F401 + StartSessionRequest, + UpdateSessionRequest, + SessionCollectionResponse, + ExportArtifactResponse, + FieldSemanticUpdateRequest, + FeedbackRequest, + ClarificationAnswerRequest, + ClarificationSessionSummaryResponse, + ClarificationStateResponse, + ClarificationAnswerResultResponse, + FeedbackResponse, + ApproveMappingRequest, + BatchApproveSemanticItemRequest, + BatchApproveSemanticRequest, + BatchApproveMappingRequest, + PreviewEnqueueResultResponse, + MappingCollectionResponse, + UpdateExecutionMappingRequest, + LaunchDatasetResponse, + _require_auto_review_flag, + _require_clarification_flag, + _require_execution_flag, + _get_repository, + _get_orchestrator, + _get_clarification_engine, ) -from src.models.auth import User -from src.models.dataset_review import ( - AnswerKind, - ApprovalState, - ArtifactFormat, - CandidateStatus, - ClarificationSession, - DatasetReviewSession, - ExecutionMapping, - FieldProvenance, - MappingMethod, - PreviewStatus, - QuestionState, - ReadinessState, - RecommendedAction, - SemanticCandidate, - SemanticFieldEntry, - SessionStatus, -) -from src.schemas.dataset_review import ( - ClarificationAnswerDto, - ClarificationQuestionDto, - ClarificationSessionDto, - CompiledPreviewDto, - DatasetRunContextDto, - ExecutionMappingDto, - SemanticFieldEntryDto, - SessionDetail, - SessionSummary, - ValidationFindingDto, -) -from src.services.dataset_review.clarification_engine import ( - ClarificationAnswerCommand, - ClarificationEngine, - ClarificationQuestionPayload, - ClarificationStateResult, -) -from src.services.dataset_review.orchestrator import ( - DatasetReviewOrchestrator, - LaunchDatasetCommand, - PreparePreviewCommand, - StartSessionCommand, -) -from src.services.dataset_review.repositories.session_repository import ( - DatasetReviewSessionRepository, - DatasetReviewSessionVersionConflictError, -) -# [/DEF:DatasetReviewApi.imports:Block] - -router = APIRouter(prefix="/api/dataset-orchestration", tags=["Dataset Orchestration"]) - - -# [DEF:StartSessionRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for starting one dataset review session from a Superset link or dataset selection. -class StartSessionRequest(BaseModel): - source_kind: str = Field(..., pattern="^(superset_link|dataset_selection)$") - source_input: str = Field(..., min_length=1) - environment_id: str = Field(..., min_length=1) - - -# [/DEF:StartSessionRequest:Class] - - -# [DEF:UpdateSessionRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for lifecycle state updates on an existing session. -class UpdateSessionRequest(BaseModel): - status: SessionStatus - note: Optional[str] = None - - -# [/DEF:UpdateSessionRequest:Class] - - -# [DEF:SessionCollectionResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Paginated ownership-scoped dataset review session collection response. -class SessionCollectionResponse(BaseModel): - items: List[SessionSummary] - total: int - page: int - page_size: int - has_next: bool - - -# [/DEF:SessionCollectionResponse:Class] - - -# [DEF:ExportArtifactResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Inline export response for documentation or validation outputs without introducing unrelated persistence changes. -class ExportArtifactResponse(BaseModel): - artifact_id: str - session_id: str - artifact_type: str - format: str - storage_ref: str - created_by_user_id: str - created_at: Optional[str] = None - content: Dict[str, Any] - - -# [/DEF:ExportArtifactResponse:Class] - - -# [DEF:FieldSemanticUpdateRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for field-level semantic candidate acceptance or manual override. -class FieldSemanticUpdateRequest(BaseModel): - candidate_id: Optional[str] = None - verbose_name: Optional[str] = None - description: Optional[str] = None - display_format: Optional[str] = None - lock_field: bool = False - resolution_note: Optional[str] = None - - -# [/DEF:FieldSemanticUpdateRequest:Class] - - -# [DEF:FeedbackRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for thumbs up/down feedback persistence on AI-assisted content. -class FeedbackRequest(BaseModel): - feedback: str = Field(..., pattern="^(up|down)$") - - -# [/DEF:FeedbackRequest:Class] - - -# [DEF:ClarificationAnswerRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for submitting one clarification answer. -class ClarificationAnswerRequest(BaseModel): - question_id: str = Field(..., min_length=1) - answer_kind: AnswerKind - answer_value: Optional[str] = None - - -# [/DEF:ClarificationAnswerRequest:Class] - - -# [DEF:ClarificationSessionSummaryResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Summary DTO for current clarification session state without exposing historical noise. -class ClarificationSessionSummaryResponse(BaseModel): - clarification_session_id: str - session_id: str - status: str - current_question_id: Optional[str] = None - resolved_count: int - remaining_count: int - summary_delta: Optional[str] = None - - -# [/DEF:ClarificationSessionSummaryResponse:Class] - - -# [DEF:ClarificationStateResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Response DTO for current clarification state and active question payload. -class ClarificationStateResponse(BaseModel): - clarification_session: Optional[ClarificationSessionSummaryResponse] = None - current_question: Optional[ClarificationQuestionDto] = None - - -# [/DEF:ClarificationStateResponse:Class] - - -# [DEF:ClarificationAnswerResultResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Response DTO for one clarification answer mutation result. -class ClarificationAnswerResultResponse(BaseModel): - clarification_state: ClarificationStateResponse - session: SessionSummary - changed_findings: List[ValidationFindingDto] - - -# [/DEF:ClarificationAnswerResultResponse:Class] - - -# [DEF:FeedbackResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Minimal response DTO for persisted AI feedback actions. -class FeedbackResponse(BaseModel): - target_id: str - feedback: str - - -# [/DEF:FeedbackResponse:Class] - - -# [DEF:ApproveMappingRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Optional request DTO for explicit mapping approval audit notes. -class ApproveMappingRequest(BaseModel): - approval_note: Optional[str] = None - - -# [/DEF:ApproveMappingRequest:Class] - - -# [DEF:BatchApproveSemanticItemRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for one batch semantic-approval item aligned with single-field acceptance semantics. -class BatchApproveSemanticItemRequest(BaseModel): - field_id: str = Field(..., min_length=1) - candidate_id: str = Field(..., min_length=1) - lock_field: bool = False - - -# [/DEF:BatchApproveSemanticItemRequest:Class] - - -# [DEF:BatchApproveSemanticRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for explicit batch semantic approvals inside one owned session scope. -class BatchApproveSemanticRequest(BaseModel): - items: List[BatchApproveSemanticItemRequest] = Field(..., min_length=1) - - -# [/DEF:BatchApproveSemanticRequest:Class] - - -# [DEF:BatchApproveMappingRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for explicit batch mapping approvals aligned with single-item approval semantics. -class BatchApproveMappingRequest(BaseModel): - mapping_ids: List[str] = Field(..., min_length=1) - approval_note: Optional[str] = None - - -# [/DEF:BatchApproveMappingRequest:Class] - - -# [DEF:PreviewEnqueueResultResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Contract-compliant async preview trigger response exposing only enqueue state. -class PreviewEnqueueResultResponse(BaseModel): - session_id: str - session_version: Optional[int] = None - preview_status: str - task_id: Optional[str] = None - - -# [/DEF:PreviewEnqueueResultResponse:Class] - - -# [DEF:MappingCollectionResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Contract-compliant wrapper for execution mapping list responses. -class MappingCollectionResponse(BaseModel): - items: List[ExecutionMappingDto] - - -# [/DEF:MappingCollectionResponse:Class] - - -# [DEF:UpdateExecutionMappingRequest:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Request DTO for one manual execution-mapping override update without introducing unrelated bulk mutation semantics. -class UpdateExecutionMappingRequest(BaseModel): - effective_value: Optional[Any] = None - mapping_method: Optional[str] = Field( - default=None, - pattern="^(manual_override|direct_match|heuristic_match|semantic_match)$", - ) - transformation_note: Optional[str] = None - - -# [/DEF:UpdateExecutionMappingRequest:Class] - - -# [DEF:LaunchDatasetResponse:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Contract-compliant launch result exposing audited run context and SQL Lab redirect target. -class LaunchDatasetResponse(BaseModel): - run_context: DatasetRunContextDto - redirect_url: str - - -# [/DEF:LaunchDatasetResponse:Class] - - -# [DEF:_require_auto_review_flag:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Guard US1 dataset review endpoints behind the configured feature flag. -# @RELATION: [DEPENDS_ON] ->[ConfigManager] -def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> bool: - with belief_scope("dataset_review.require_auto_review_flag"): - if not config_manager.get_config().settings.ff_dataset_auto_review: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Dataset auto review feature is disabled", - ) - return True - - -# [/DEF:_require_auto_review_flag:Function] - - -# [DEF:_require_clarification_flag:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Guard clarification-specific US2 endpoints behind the configured feature flag. -# @RELATION: [DEPENDS_ON] ->[ConfigManager] -def _require_clarification_flag(config_manager=Depends(get_config_manager)) -> bool: - with belief_scope("dataset_review.require_clarification_flag"): - if not config_manager.get_config().settings.ff_dataset_clarification: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Dataset clarification feature is disabled", - ) - return True - - -# [/DEF:_require_clarification_flag:Function] - - -# [DEF:_require_execution_flag:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Guard US3 execution endpoints behind the configured feature flag. -# @RELATION: [DEPENDS_ON] ->[ConfigManager] -def _require_execution_flag(config_manager=Depends(get_config_manager)) -> bool: - with belief_scope("dataset_review.require_execution_flag"): - if not config_manager.get_config().settings.ff_dataset_execution: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Dataset execution feature is disabled", - ) - return True - - -# [/DEF:_require_execution_flag:Function] - - -# [DEF:_get_repository:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Build repository dependency for dataset review session aggregate access. -def _get_repository(db: Session = Depends(get_db)) -> DatasetReviewSessionRepository: - return DatasetReviewSessionRepository(db) - - -# [/DEF:_get_repository:Function] - - -# [DEF:_get_orchestrator:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Build orchestrator dependency for session lifecycle actions. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewOrchestrator] -def _get_orchestrator( - repository: DatasetReviewSessionRepository = Depends(_get_repository), - config_manager=Depends(get_config_manager), - task_manager=Depends(get_task_manager), -) -> DatasetReviewOrchestrator: - return DatasetReviewOrchestrator( - repository=repository, - config_manager=config_manager, - task_manager=task_manager, - ) - - -# [/DEF:_get_orchestrator:Function] - - -# [DEF:_get_clarification_engine:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Build clarification engine dependency for one-question-at-a-time guided clarification mutations. -# @RELATION: [DEPENDS_ON] ->[ClarificationEngine] -def _get_clarification_engine( - repository: DatasetReviewSessionRepository = Depends(_get_repository), -) -> ClarificationEngine: - return ClarificationEngine(repository=repository) - - -# [/DEF:_get_clarification_engine:Function] - - -# [DEF:_serialize_session_summary:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Map SQLAlchemy session aggregate root into stable API summary DTO. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -# @RELATION: [DEPENDS_ON] ->[SessionSummary] -def _serialize_session_summary(session: DatasetReviewSession) -> SessionSummary: - summary = SessionSummary.model_validate(session, from_attributes=True) - summary.session_version = summary.version - return summary - - -# [/DEF:_serialize_session_summary:Function] - - -# [DEF:_serialize_session_detail:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Map SQLAlchemy session aggregate root into stable API detail DTO. -# @RELATION: [DEPENDS_ON] ->[SessionDetail] -def _serialize_session_detail(session: DatasetReviewSession) -> SessionDetail: - detail = SessionDetail.model_validate(session, from_attributes=True) - detail.session_version = detail.version - return detail - - -# [/DEF:_serialize_session_detail:Function] - - -# [DEF:_require_session_version_header:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Read the optimistic-lock session version header required by dataset review mutation endpoints. -def _require_session_version_header( - session_version: int = Header(..., alias="X-Session-Version", ge=0), -) -> int: - return session_version - - -# [/DEF:_require_session_version_header:Function] - - -# [DEF:_enforce_session_version:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Convert repository optimistic-lock conflicts into deterministic HTTP 409 responses. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @PRE: Session belongs to the active owner-scoped mutation flow and expected_version comes from the caller's optimistic-lock header. -# @POST: Returns the same session when versions match or raises HTTP 409 with deterministic conflict payload. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[DatasetReviewSessionRepository,DatasetReviewSession,int] -> Output[DatasetReviewSession|HTTPException] -def _enforce_session_version( - repository: DatasetReviewSessionRepository, - session: DatasetReviewSession, - expected_version: int, -) -> DatasetReviewSession: - with belief_scope("_enforce_session_version"): - logger.reason( - "Checking dataset review optimistic-lock version", - extra={ - "session_id": session.session_id, - "expected_version": expected_version, - }, - ) - try: - repository.require_session_version(session, expected_version) - except DatasetReviewSessionVersionConflictError as exc: - logger.explore( - "Dataset review optimistic-lock conflict detected", - extra={ - "session_id": exc.session_id, - "expected_version": exc.expected_version, - "actual_version": exc.actual_version, - }, - ) - raise _build_session_version_conflict_http_exception(exc) from exc - logger.reflect( - "Dataset review optimistic-lock version accepted", - extra={ - "session_id": session.session_id, - "version": getattr(session, "version", None), - }, - ) - return session - - -# [/DEF:_enforce_session_version:Function] - - -# [DEF:_build_session_version_conflict_http_exception:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Normalize optimistic-lock conflict errors into deterministic dataset-review HTTP 409 responses. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionVersionConflictError] -def _build_session_version_conflict_http_exception( - exc: DatasetReviewSessionVersionConflictError, -) -> HTTPException: - return HTTPException( - status_code=status.HTTP_409_CONFLICT, - detail={ - "error_code": "session_version_conflict", - "message": str(exc), - "session_id": exc.session_id, - "expected_version": exc.expected_version, - "actual_version": exc.actual_version, - }, - ) - - -# [/DEF:_build_session_version_conflict_http_exception:Function] - - -# [DEF:_prepare_owned_session_mutation:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Resolve owner-scoped mutation session and enforce optimistic-lock version before changing dataset review state. -# @RELATION: [CALLS] ->[_get_owned_session_or_404] -# @RELATION: [CALLS] ->[_require_owner_mutation_scope] -# @RELATION: [CALLS] ->[_enforce_session_version] -# @PRE: session_id targets an existing session visible to current_user and expected_version comes from the client mutation header. -# @POST: Returns the owned session only when access and optimistic-lock checks both pass. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[DatasetReviewSessionRepository,str,User,int] -> Output[DatasetReviewSession|HTTPException] -def _prepare_owned_session_mutation( - repository: DatasetReviewSessionRepository, - session_id: str, - current_user: User, - expected_version: int, -) -> DatasetReviewSession: - with belief_scope("_prepare_owned_session_mutation"): - logger.reason( - "Preparing owner-scoped dataset review mutation", - extra={"session_id": session_id, "user_id": current_user.id}, - ) - session = _get_owned_session_or_404(repository, session_id, current_user) - _require_owner_mutation_scope(session, current_user) - guarded_session = _enforce_session_version( - repository, session, expected_version - ) - logger.reflect( - "Dataset review mutation session passed ownership and version guards", - extra={ - "session_id": guarded_session.session_id, - "user_id": current_user.id, - "version": getattr(guarded_session, "version", None), - }, - ) - return guarded_session - - -# [/DEF:_prepare_owned_session_mutation:Function] - - -# [DEF:_commit_owned_session_mutation:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Centralize dataset-review session version bumping and commit semantics for owner-scoped mutation endpoints. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @PRE: Session mutation has already passed ownership and optimistic-lock guards. -# @POST: Session version is bumped, changes are committed, and requested targets are refreshed before returning the same session. -# @SIDE_EFFECT: Persists the current transaction and refreshes ORM targets from the database. -# @DATA_CONTRACT: Input[DatasetReviewSessionRepository,DatasetReviewSession,List[Any]|None] -> Output[DatasetReviewSession] -def _commit_owned_session_mutation( - repository: DatasetReviewSessionRepository, - session: DatasetReviewSession, - *, - refresh_targets: Optional[List[Any]] = None, -) -> DatasetReviewSession: - with belief_scope("_commit_owned_session_mutation"): - logger.reason( - "Committing dataset review mutation", - extra={"session_id": session.session_id}, - ) - try: - repository.commit_session_mutation( - session, - refresh_targets=refresh_targets, - ) - except DatasetReviewSessionVersionConflictError as exc: - logger.explore( - "Dataset review mutation commit detected stale version", - extra={ - "session_id": exc.session_id, - "expected_version": exc.expected_version, - "actual_version": exc.actual_version, - }, - ) - raise _build_session_version_conflict_http_exception(exc) from exc - logger.reflect( - "Dataset review mutation committed and refreshed", - extra={ - "session_id": session.session_id, - "version": getattr(session, "version", None), - "refresh_count": len(refresh_targets or []), - }, - ) - return session - - -# [/DEF:_commit_owned_session_mutation:Function] - - -# [DEF:_serialize_semantic_field:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Map one semantic field aggregate into stable field-level DTO output. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntryDto] -def _serialize_semantic_field(field: SemanticFieldEntry) -> SemanticFieldEntryDto: - payload = SemanticFieldEntryDto.model_validate(field, from_attributes=True) - session_ref = getattr(field, "session", None) - version_value = getattr(session_ref, "version", None) - payload.session_version = ( - int(version_value or 0) if version_value is not None else None - ) - return payload - - -# [/DEF:_serialize_semantic_field:Function] - - -# [DEF:_serialize_clarification_question_payload:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Convert clarification engine payload into API DTO aligned with the clarification contract. -# @RELATION: [DEPENDS_ON] ->[ClarificationQuestionDto] -def _serialize_clarification_question_payload( - payload: Optional[ClarificationQuestionPayload], -) -> Optional[ClarificationQuestionDto]: - if payload is None: - return None - return ClarificationQuestionDto.model_validate( - { - "question_id": payload.question_id, - "clarification_session_id": payload.clarification_session_id, - "topic_ref": payload.topic_ref, - "question_text": payload.question_text, - "why_it_matters": payload.why_it_matters, - "current_guess": payload.current_guess, - "priority": payload.priority, - "state": payload.state, - "options": payload.options, - "answer": None, - "created_at": datetime.utcnow(), - "updated_at": datetime.utcnow(), - } - ) - - -# [/DEF:_serialize_clarification_question_payload:Function] - - -# [DEF:_serialize_clarification_state:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Convert clarification engine state into stable API response payload. -# @RELATION: [DEPENDS_ON] ->[ClarificationStateResponse] -def _serialize_clarification_state( - state: ClarificationStateResult, -) -> ClarificationStateResponse: - return ClarificationStateResponse( - clarification_session=ClarificationSessionSummaryResponse( - clarification_session_id=state.clarification_session.clarification_session_id, - session_id=state.clarification_session.session_id, - status=state.clarification_session.status.value, - current_question_id=state.clarification_session.current_question_id, - resolved_count=state.clarification_session.resolved_count, - remaining_count=state.clarification_session.remaining_count, - summary_delta=state.clarification_session.summary_delta, - ), - current_question=_serialize_clarification_question_payload( - state.current_question - ), - ) - - -# [/DEF:_serialize_clarification_state:Function] - - -# [DEF:_serialize_empty_clarification_state:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Return a stable empty clarification payload for sessions that have not started clarification yet. -# @RELATION: [DEPENDS_ON] ->[ClarificationStateResponse] -def _serialize_empty_clarification_state() -> ClarificationStateResponse: - return ClarificationStateResponse( - clarification_session=None, - current_question=None, - ) - - -# [/DEF:_serialize_empty_clarification_state:Function] - - -# [DEF:_get_owned_session_or_404:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Resolve one session for current user or collaborator scope, returning 404 when inaccessible. -# @RELATION: [CALLS] ->[load_detail] -# @PRE: session_id is a non-empty identifier and current_user is authenticated. -# @POST: returns accessible session detail or raises HTTP 404 without leaking foreign-session existence. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[session_id:str,current_user:User] -> Output[DatasetReviewSession|HTTPException] -def _get_owned_session_or_404( - repository: DatasetReviewSessionRepository, - session_id: str, - current_user: User, -) -> DatasetReviewSession: - with belief_scope("_get_owned_session_or_404"): - logger.reason( - "Resolving dataset review session in current ownership scope", - extra={"session_id": session_id, "user_id": current_user.id}, - ) - session = repository.load_session_detail(session_id, current_user.id) - if session is None: - logger.explore( - "Dataset review session not found in current ownership scope", - extra={"session_id": session_id, "user_id": current_user.id}, - ) - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail="Session not found" - ) - logger.reflect( - "Dataset review session resolved for current ownership scope", - extra={"session_id": session.session_id, "user_id": current_user.id}, - ) - return session - - -# [/DEF:_get_owned_session_or_404:Function] - - -# [DEF:_require_owner_mutation_scope:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Enforce owner-only mutation scope for dataset review write endpoints. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -# @PRE: Session is already ownership-visible to the current user. -# @POST: Returns the session when current user is owner, otherwise raises HTTP 403. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[DatasetReviewSession,User] -> Output[DatasetReviewSession|HTTPException] -def _require_owner_mutation_scope( - session: DatasetReviewSession, - current_user: User, -) -> DatasetReviewSession: - with belief_scope("_require_owner_mutation_scope"): - logger.reason( - "Checking owner-only mutation scope for dataset review session", - extra={"session_id": session.session_id, "user_id": current_user.id}, - ) - if session.user_id != current_user.id: - logger.explore( - "Dataset review mutation blocked for non-owner", - extra={ - "session_id": session.session_id, - "session_owner_id": session.user_id, - "user_id": current_user.id, - }, - ) - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Only the owner can mutate dataset review state", - ) - logger.reflect( - "Dataset review mutation confirmed for session owner", - extra={"session_id": session.session_id, "user_id": current_user.id}, - ) - return session - - -# [/DEF:_require_owner_mutation_scope:Function] - - -# [DEF:_record_session_event:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Persist one explicit audit event for an owned dataset-review mutation endpoint. -# @RELATION: [CALLS] ->[SessionEventLogger.log_for_session] -def _record_session_event( - repository: DatasetReviewSessionRepository, - session: DatasetReviewSession, - current_user: User, - *, - event_type: str, - event_summary: str, - event_details: Optional[Dict[str, Any]] = None, -) -> None: - repository.event_logger.log_for_session( - session, - actor_user_id=current_user.id, - event_type=event_type, - event_summary=event_summary, - event_details=event_details or {}, - ) - - -# [/DEF:_record_session_event:Function] - - -# [DEF:_get_owned_mapping_or_404:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Resolve one execution mapping inside one owned session aggregate without leaking foreign-mapping existence. -# @RELATION: [DEPENDS_ON] ->[ExecutionMapping] -# @PRE: Session is accessible to current user. -# @POST: Returns the requested mapping or raises HTTP 404. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[DatasetReviewSession,mapping_id:str] -> Output[ExecutionMapping|HTTPException] -def _get_owned_mapping_or_404( - session: DatasetReviewSession, - mapping_id: str, -) -> ExecutionMapping: - with belief_scope("_get_owned_mapping_or_404"): - logger.reason( - "Resolving execution mapping inside owned dataset review session", - extra={"session_id": session.session_id, "mapping_id": mapping_id}, - ) - for mapping in session.execution_mappings: - if mapping.mapping_id == mapping_id: - logger.reflect( - "Execution mapping resolved inside owned session", - extra={"session_id": session.session_id, "mapping_id": mapping_id}, - ) - return mapping - logger.explore( - "Execution mapping missing from owned dataset review session", - extra={"session_id": session.session_id, "mapping_id": mapping_id}, - ) - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail="Execution mapping not found" - ) - - -# [/DEF:_get_owned_mapping_or_404:Function] - - -# [DEF:_get_owned_field_or_404:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Resolve a semantic field inside one owned session aggregate without leaking foreign-field existence. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry] -# @PRE: Session is accessible to current user. -# @POST: Returns the requested field or raises HTTP 404. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[DatasetReviewSession,field_id:str] -> Output[SemanticFieldEntry|HTTPException] -def _get_owned_field_or_404( - session: DatasetReviewSession, - field_id: str, -) -> SemanticFieldEntry: - with belief_scope("_get_owned_field_or_404"): - logger.reason( - "Resolving semantic field inside owned dataset review session", - extra={"session_id": session.session_id, "field_id": field_id}, - ) - for field in session.semantic_fields: - if field.field_id == field_id: - logger.reflect( - "Semantic field resolved inside owned session", - extra={"session_id": session.session_id, "field_id": field_id}, - ) - return field - logger.explore( - "Semantic field missing from owned dataset review session", - extra={"session_id": session.session_id, "field_id": field_id}, - ) - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, detail="Semantic field not found" - ) - - -# [/DEF:_get_owned_field_or_404:Function] - - -# [DEF:_get_latest_clarification_session_or_404:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Resolve the latest clarification aggregate for one session or raise when clarification is unavailable. -# @RELATION: [DEPENDS_ON] ->[ClarificationSession] -def _get_latest_clarification_session_or_404( - session: DatasetReviewSession, -) -> ClarificationSession: - if not session.clarification_sessions: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Clarification session not found", - ) - return sorted( - session.clarification_sessions, - key=lambda item: (item.started_at, item.clarification_session_id), - reverse=True, - )[0] - - -# [/DEF:_get_latest_clarification_session_or_404:Function] - - -# [DEF:_map_candidate_provenance:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Translate accepted semantic candidate type into stable field provenance. -def _map_candidate_provenance(candidate: SemanticCandidate) -> FieldProvenance: - if str(candidate.match_type.value) == "exact": - return FieldProvenance.DICTIONARY_EXACT - if str(candidate.match_type.value) == "reference": - return FieldProvenance.REFERENCE_IMPORTED - if str(candidate.match_type.value) == "generated": - return FieldProvenance.AI_GENERATED - return FieldProvenance.FUZZY_INFERRED - - -# [/DEF:_map_candidate_provenance:Function] - - -# [DEF:_resolve_candidate_source_version:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Resolve the semantic source version for one accepted candidate from the loaded session aggregate. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry] -# @RELATION: [DEPENDS_ON] ->[SemanticSource] -def _resolve_candidate_source_version( - field: SemanticFieldEntry, source_id: Optional[str] -) -> Optional[str]: - if not source_id: - return None - session = getattr(field, "session", None) - if session is None: - return None - for source in getattr(session, "semantic_sources", []) or []: - if source.source_id == source_id: - return source.source_version - return None - - -# [/DEF:_resolve_candidate_source_version:Function] - - -# [DEF:_update_semantic_field_state:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Apply field-level semantic manual override or candidate acceptance while preserving lock/provenance invariants. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry] -# @RELATION: [DEPENDS_ON] ->[SemanticCandidate] -# @PRE: Field belongs to the owned session and request is owner-authorized. -# @POST: Manual overrides always set manual provenance plus lock; explicit field edits may lock accepted candidate state but later imports cannot silently replace locked values. -# @SIDE_EFFECT: Mutates field state and candidate statuses in persistence. -# @DATA_CONTRACT: Input[SemanticFieldEntry,FieldSemanticUpdateRequest,changed_by:str] -> Output[SemanticFieldEntry] -def _update_semantic_field_state( - field: SemanticFieldEntry, - request: FieldSemanticUpdateRequest, - changed_by: str, -) -> SemanticFieldEntry: - has_manual_override = any( - value is not None - for value in [request.verbose_name, request.description, request.display_format] - ) - selected_candidate = None - if request.candidate_id: - selected_candidate = next( - ( - candidate - for candidate in field.candidates - if candidate.candidate_id == request.candidate_id - ), - None, - ) - if selected_candidate is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Semantic candidate not found", - ) - - if has_manual_override: - field.verbose_name = request.verbose_name - field.description = request.description - field.display_format = request.display_format - field.provenance = FieldProvenance.MANUAL_OVERRIDE - field.source_id = None - field.source_version = None - field.confidence_rank = None - field.is_locked = True - field.has_conflict = False - field.needs_review = False - field.last_changed_by = changed_by - for candidate in field.candidates: - candidate.status = CandidateStatus.SUPERSEDED - return field - - if selected_candidate is not None: - field.verbose_name = selected_candidate.proposed_verbose_name - field.description = selected_candidate.proposed_description - field.display_format = selected_candidate.proposed_display_format - field.provenance = _map_candidate_provenance(selected_candidate) - field.source_id = selected_candidate.source_id - field.source_version = _resolve_candidate_source_version( - field, selected_candidate.source_id - ) - field.confidence_rank = selected_candidate.candidate_rank - field.is_locked = bool(request.lock_field or field.is_locked) - field.has_conflict = len(field.candidates) > 1 - field.needs_review = False - field.last_changed_by = changed_by - for candidate in field.candidates: - candidate.status = ( - CandidateStatus.ACCEPTED - if candidate.candidate_id == selected_candidate.candidate_id - else CandidateStatus.SUPERSEDED - ) - return field - - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Provide candidate_id or at least one manual override field", - ) - - -# [/DEF:_update_semantic_field_state:Function] - - -# [DEF:_serialize_execution_mapping:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Map one persisted execution mapping into stable API DTO output. -# @RELATION: [DEPENDS_ON] ->[ExecutionMappingDto] -def _serialize_execution_mapping(mapping: ExecutionMapping) -> ExecutionMappingDto: - payload = ExecutionMappingDto.model_validate(mapping, from_attributes=True) - session_ref = getattr(mapping, "session", None) - version_value = getattr(session_ref, "version", None) - payload.session_version = ( - int(version_value or 0) if version_value is not None else None - ) - return payload - - -# [/DEF:_serialize_execution_mapping:Function] - - -# [DEF:_serialize_preview:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Map one persisted preview snapshot into stable API DTO output and surface the refreshed session version for follow-up optimistic-lock mutations. -# @RELATION: [DEPENDS_ON] ->[CompiledPreviewDto] -def _serialize_preview( - preview: CompiledPreview, *, session_version_fallback: Optional[int] = None -) -> CompiledPreviewDto: - payload = CompiledPreviewDto.model_validate(preview, from_attributes=True) - session_ref = getattr(preview, "session", None) - version_value = getattr(session_ref, "version", None) - if version_value is None: - version_value = session_version_fallback - payload.session_version = ( - int(version_value or 0) if version_value is not None else None - ) - return payload - - -# [/DEF:_serialize_preview:Function] - - -# [DEF:_serialize_run_context:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Map one persisted launch run context into stable API DTO output for SQL Lab handoff confirmation. -# @RELATION: [DEPENDS_ON] ->[DatasetRunContextDto] -def _serialize_run_context(run_context) -> DatasetRunContextDto: - payload = DatasetRunContextDto.model_validate(run_context, from_attributes=True) - session_ref = getattr(run_context, "session", None) - version_value = getattr(session_ref, "version", None) - payload.session_version = ( - int(version_value or 0) if version_value is not None else None - ) - return payload - - -# [/DEF:_serialize_run_context:Function] - - -# [DEF:_build_sql_lab_redirect_url:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Build a stable SQL Lab redirect URL from the configured Superset environment and persisted run context reference. -# @RELATION: [DEPENDS_ON] ->[DatasetRunContextDto] -def _build_sql_lab_redirect_url(environment_url: str, sql_lab_session_ref: str) -> str: - base_url = str(environment_url or "").rstrip("/") - session_ref = str(sql_lab_session_ref or "").strip() - if not base_url: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Superset environment URL is not configured", - ) - if not session_ref: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="SQL Lab session reference is missing", - ) - return f"{base_url}/superset/sqllab?queryId={session_ref}" - - -# [/DEF:_build_sql_lab_redirect_url:Function] - - -# [DEF:_build_documentation_export:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Produce session documentation export content from current persisted review state. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -def _build_documentation_export( - session: DatasetReviewSession, export_format: ArtifactFormat -) -> Dict[str, Any]: - profile = session.profile - findings = sorted( - session.findings, key=lambda item: (item.severity.value, item.code) - ) - if export_format == ArtifactFormat.MARKDOWN: - lines = [ - f"# Dataset Review: {session.dataset_ref}", - "", - f"- Session ID: {session.session_id}", - f"- Environment: {session.environment_id}", - f"- Readiness: {session.readiness_state.value}", - f"- Recommended action: {session.recommended_action.value}", - "", - "## Business Summary", - profile.business_summary if profile else "No profile summary available.", - "", - "## Findings", - ] - if findings: - for finding in findings: - lines.append( - f"- [{finding.severity.value}] {finding.title}: {finding.message}" - ) - else: - lines.append("- No findings recorded.") - content = {"markdown": "\n".join(lines)} - storage_ref = f"inline://dataset-review/{session.session_id}/documentation.md" - else: - content = { - "session": _serialize_session_summary(session).model_dump(mode="json"), - "profile": profile - and { - "dataset_name": profile.dataset_name, - "business_summary": profile.business_summary, - "confidence_state": profile.confidence_state.value, - "dataset_type": profile.dataset_type, - }, - "findings": [ - { - "code": finding.code, - "severity": finding.severity.value, - "title": finding.title, - "message": finding.message, - "resolution_state": finding.resolution_state.value, - } - for finding in findings - ], - } - storage_ref = f"inline://dataset-review/{session.session_id}/documentation.json" - return {"storage_ref": storage_ref, "content": content} - - -# [/DEF:_build_documentation_export:Function] - - -# [DEF:_build_validation_export:Function] -# @COMPLEXITY: 2 -# @PURPOSE: Produce validation-focused export content from persisted findings and readiness state. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -def _build_validation_export( - session: DatasetReviewSession, export_format: ArtifactFormat -) -> Dict[str, Any]: - findings = sorted( - session.findings, key=lambda item: (item.severity.value, item.code) - ) - if export_format == ArtifactFormat.MARKDOWN: - lines = [ - f"# Validation Report: {session.dataset_ref}", - "", - f"- Session ID: {session.session_id}", - f"- Readiness: {session.readiness_state.value}", - "", - "## Findings", - ] - if findings: - for finding in findings: - lines.append( - f"- `{finding.code}` [{finding.severity.value}] {finding.message}" - ) - else: - lines.append("- No findings recorded.") - content = {"markdown": "\n".join(lines)} - storage_ref = f"inline://dataset-review/{session.session_id}/validation.md" - else: - content = { - "session_id": session.session_id, - "dataset_ref": session.dataset_ref, - "readiness_state": session.readiness_state.value, - "findings": [ - { - "finding_id": finding.finding_id, - "area": finding.area.value, - "severity": finding.severity.value, - "code": finding.code, - "title": finding.title, - "message": finding.message, - "resolution_state": finding.resolution_state.value, - } - for finding in findings - ], - } - storage_ref = f"inline://dataset-review/{session.session_id}/validation.json" - return {"storage_ref": storage_ref, "content": content} - - -# [/DEF:_build_validation_export:Function] - - -# [DEF:list_sessions:Function] -# @COMPLEXITY: 3 -# @PURPOSE: List resumable dataset review sessions for the current user. -# @RELATION: [CALLS] ->[list_user_sess] -@router.get( - "/sessions", - response_model=SessionCollectionResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "READ")), - ], -) -async def list_sessions( - page: int = Query(1, ge=1), - page_size: int = Query(20, ge=1, le=100), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.list_sessions"): - logger.reason( - "Listing dataset review sessions for current user", - extra={"user_id": current_user.id, "page": page, "page_size": page_size}, - ) - sessions = repository.list_user_sess(current_user.id) - start = (page - 1) * page_size - end = start + page_size - items = [_serialize_session_summary(session) for session in sessions[start:end]] - response_payload = SessionCollectionResponse( - items=items, - total=len(sessions), - page=page, - page_size=page_size, - has_next=end < len(sessions), - ) - logger.reflect( - "Dataset review session page assembled", - extra={ - "user_id": current_user.id, - "returned_items": len(items), - "total": len(sessions), - }, - ) - return response_payload - - -# [/DEF:list_sessions:Function] - - -# [DEF:start_session:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Start a new dataset review session from a Superset link or dataset selection. -# @RELATION: [CALLS] ->[start_session:Function] -# @PRE: feature flag enabled, user authenticated, and request body valid. -# @POST: returns persisted session summary scoped to the authenticated user. -# @SIDE_EFFECT: persists session/profile/findings and may enqueue recovery task. -# @DATA_CONTRACT: Input[StartSessionRequest] -> Output[SessionSummary] -@router.post( - "/sessions", - response_model=SessionSummary, - status_code=status.HTTP_201_CREATED, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def start_session( - request: StartSessionRequest, - orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator), - current_user: User = Depends(get_current_user), -): - with belief_scope("start_session"): - logger.reason( - "Starting dataset review session", - extra={ - "user_id": current_user.id, - "environment_id": request.environment_id, - "source_kind": request.source_kind, - }, - ) - try: - result = orchestrator.start_session( - StartSessionCommand( - user=current_user, - environment_id=request.environment_id, - source_kind=request.source_kind, - source_input=request.source_input, - ) - ) - except ValueError as exc: - logger.explore( - "Dataset review session start rejected", - extra={"user_id": current_user.id, "error": str(exc)}, - ) - detail = str(exc) - status_code = ( - status.HTTP_404_NOT_FOUND - if detail == "Environment not found" - else status.HTTP_400_BAD_REQUEST - ) - raise HTTPException(status_code=status_code, detail=detail) from exc - logger.reflect( - "Dataset review session started and serialized", - extra={ - "session_id": result.session.session_id, - "user_id": current_user.id, - }, - ) - return _serialize_session_summary(result.session) - - -# [/DEF:start_session:Function] - - -# [DEF:get_session_detail:Function] -# @COMPLEXITY: 3 -# @PURPOSE: Return the full accessible dataset review session aggregate for current user scope. -# @RELATION: [CALLS] ->[_get_owned_session_or_404] -@router.get( - "/sessions/{session_id}", - response_model=SessionDetail, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "READ")), - ], -) -async def get_session_detail( - session_id: str, - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.get_session_detail"): - logger.reason( - "Loading dataset review session detail", - extra={"session_id": session_id, "user_id": current_user.id}, - ) - session = _get_owned_session_or_404(repository, session_id, current_user) - detail = _serialize_session_detail(session) - logger.reflect( - "Dataset review session detail serialized", - extra={"session_id": session.session_id, "user_id": current_user.id}, - ) - return detail - - -# [/DEF:get_session_detail:Function] - - -# [DEF:update_session:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Update resumable lifecycle status for an owned dataset review session. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -# @PRE: session is accessible to current user and requested status is allowed by lifecycle policy. -# @POST: returns updated summary without changing ownership or unrelated aggregates. -# @SIDE_EFFECT: mutates session lifecycle fields in persistence. -# @DATA_CONTRACT: Input[UpdateSessionRequest] -> Output[SessionSummary] -@router.patch( - "/sessions/{session_id}", - response_model=SessionSummary, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def update_session( - session_id: str, - request: UpdateSessionRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("update_session"): - logger.reason( - "Updating dataset review session lifecycle state", - extra={ - "session_id": session_id, - "user_id": current_user.id, - "requested_status": request.status.value, - }, - ) - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - session_record = cast(Any, session) - session_record.status = request.status - if request.status == SessionStatus.PAUSED: - session_record.recommended_action = RecommendedAction.RESUME_SESSION - elif request.status in { - SessionStatus.ARCHIVED, - SessionStatus.CANCELLED, - SessionStatus.COMPLETED, - }: - session_record.active_task_id = None - _commit_owned_session_mutation(repository, session) - _record_session_event( - repository, - session, - current_user, - event_type="session_status_updated", - event_summary="Dataset review session lifecycle updated", - event_details={ - "status": session_record.status.value, - "version": session_record.version, - }, - ) - logger.reflect( - "Dataset review session lifecycle updated", - extra={ - "session_id": session.session_id, - "user_id": current_user.id, - "status": session_record.status.value, - "version": session_record.version, - }, - ) - return _serialize_session_summary(session) - - -# [/DEF:update_session:Function] - - -# [DEF:delete_session:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Archive or hard-delete a session owned by the current user. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -# @PRE: session is owner-scoped to current user. -# @POST: session is archived or deleted and no foreign-session existence is disclosed. -# @SIDE_EFFECT: mutates or deletes persisted session aggregate. -# @DATA_CONTRACT: Input[session_id:str,hard_delete:bool] -> Output[HTTP 204] -@router.delete( - "/sessions/{session_id}", - status_code=status.HTTP_204_NO_CONTENT, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def delete_session( - session_id: str, - hard_delete: bool = Query(False), - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("delete_session"): - logger.reason( - "Deleting or archiving dataset review session", - extra={ - "session_id": session_id, - "user_id": current_user.id, - "hard_delete": hard_delete, - }, - ) - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - if hard_delete: - _record_session_event( - repository, - session, - current_user, - event_type="session_deleted", - event_summary="Dataset review session hard-deleted", - event_details={"hard_delete": True}, - ) - repository.db.delete(session) - repository.db.commit() - logger.reflect( - "Dataset review session hard-delete committed", - extra={"session_id": session_id, "user_id": current_user.id}, - ) - return Response(status_code=status.HTTP_204_NO_CONTENT) - session_record = cast(Any, session) - session_record.status = SessionStatus.ARCHIVED - session_record.active_task_id = None - _commit_owned_session_mutation(repository, session) - _record_session_event( - repository, - session, - current_user, - event_type="session_archived", - event_summary="Dataset review session archived", - event_details={"hard_delete": False, "version": session_record.version}, - ) - logger.reflect( - "Dataset review session archive committed", - extra={ - "session_id": session.session_id, - "user_id": current_user.id, - "version": session_record.version, - }, - ) - return Response(status_code=status.HTTP_204_NO_CONTENT) - - -# [/DEF:delete_session:Function] - - -# [DEF:export_documentation:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Export documentation output for the current session in JSON or Markdown form. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -# @PRE: session is accessible to current user and requested format is supported. -# @POST: returns ownership-scoped export payload without fabricating unrelated artifacts. -# @SIDE_EFFECT: none beyond response construction. -# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse] -@router.get( - "/sessions/{session_id}/exports/documentation", - response_model=ExportArtifactResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "READ")), - ], -) -async def export_documentation( - session_id: str, - format: ArtifactFormat = Query(ArtifactFormat.JSON), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("export_documentation"): - if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Only json and markdown exports are supported", - ) - logger.reason( - "Building dataset review documentation export", - extra={ - "session_id": session_id, - "user_id": current_user.id, - "format": format.value, - }, - ) - session = _get_owned_session_or_404(repository, session_id, current_user) - export_payload = _build_documentation_export(session, format) - logger.reflect( - "Dataset review documentation export assembled", - extra={ - "session_id": session.session_id, - "user_id": current_user.id, - "format": format.value, - }, - ) - return ExportArtifactResponse( - artifact_id=f"documentation-{session.session_id}-{format.value}", - session_id=session.session_id, - artifact_type="documentation", - format=format.value, - storage_ref=export_payload["storage_ref"], - created_by_user_id=current_user.id, - content=export_payload["content"], - ) - - -# [/DEF:export_documentation:Function] - - -# [DEF:export_validation:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Export validation findings for the current session in JSON or Markdown form. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] -# @PRE: session is accessible to current user and requested format is supported. -# @POST: returns explicit validation export payload scoped to current user session access. -# @SIDE_EFFECT: none beyond response construction. -# @DATA_CONTRACT: Input[session_id:str,format:ArtifactFormat] -> Output[ExportArtifactResponse] -@router.get( - "/sessions/{session_id}/exports/validation", - response_model=ExportArtifactResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "READ")), - ], -) -async def export_validation( - session_id: str, - format: ArtifactFormat = Query(ArtifactFormat.JSON), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("export_validation"): - if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Only json and markdown exports are supported", - ) - logger.reason( - "Building dataset review validation export", - extra={ - "session_id": session_id, - "user_id": current_user.id, - "format": format.value, - }, - ) - session = _get_owned_session_or_404(repository, session_id, current_user) - export_payload = _build_validation_export(session, format) - logger.reflect( - "Dataset review validation export assembled", - extra={ - "session_id": session.session_id, - "user_id": current_user.id, - "format": format.value, - }, - ) - return ExportArtifactResponse( - artifact_id=f"validation-{session.session_id}-{format.value}", - session_id=session.session_id, - artifact_type="validation_report", - format=format.value, - storage_ref=export_payload["storage_ref"], - created_by_user_id=current_user.id, - content=export_payload["content"], - ) - - -# [/DEF:export_validation:Function] - - -# [DEF:get_clarification_state:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Return the current clarification session summary and one active question payload, or an empty state when clarification has not started. -# @RELATION: [CALLS] ->[build_question_payload:Function] -# @PRE: Session is accessible to current user and clarification feature is enabled. -# @POST: Returns at most one active clarification question with why_it_matters, current_guess, and ordered options; sessions without a clarification record return a non-blocking empty state. -# @SIDE_EFFECT: May normalize clarification pointer and readiness state in persistence. -# @DATA_CONTRACT: Input[session_id:str] -> Output[ClarificationStateResponse] -@router.get( - "/sessions/{session_id}/clarification", - response_model=ClarificationStateResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_clarification_flag), - Depends(has_permission("dataset:session", "READ")), - ], -) -async def get_clarification_state( - session_id: str, - repository: DatasetReviewSessionRepository = Depends(_get_repository), - clarification_engine: ClarificationEngine = Depends(_get_clarification_engine), - current_user: User = Depends(get_current_user), -): - with belief_scope("get_clarification_state"): - logger.reason( - "Belief protocol reasoning checkpoint for get_clarification_state" - ) - session = _get_owned_session_or_404(repository, session_id, current_user) - if not session.clarification_sessions: - logger.reflect( - "Belief protocol postcondition checkpoint for get_clarification_state" - ) - return _serialize_empty_clarification_state() - clarification_session = _get_latest_clarification_session_or_404(session) - current_question = clarification_engine.build_question_payload(session) - logger.reflect( - "Belief protocol postcondition checkpoint for get_clarification_state" - ) - return _serialize_clarification_state( - ClarificationStateResult( - clarification_session=clarification_session, - current_question=current_question, - session=session, - changed_findings=[], - ) - ) - - -# [/DEF:get_clarification_state:Function] - - -# [DEF:resume_clarification:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Resume clarification mode on the highest-priority unresolved question for an owned session. -# @RELATION: [CALLS] ->[build_question_payload:Function] -# @PRE: Session belongs to the current owner and clarification feature is enabled. -# @POST: Clarification session enters active state with one current question or completes deterministically when no unresolved items remain. -# @SIDE_EFFECT: Mutates clarification pointer, readiness, and recommended action. -# @DATA_CONTRACT: Input[session_id:str] -> Output[ClarificationStateResponse] -@router.post( - "/sessions/{session_id}/clarification/resume", - response_model=ClarificationStateResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_clarification_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def resume_clarification( - session_id: str, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - clarification_engine: ClarificationEngine = Depends(_get_clarification_engine), - current_user: User = Depends(get_current_user), -): - with belief_scope("resume_clarification"): - logger.reason("Belief protocol reasoning checkpoint for resume_clarification") - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - clarification_session = _get_latest_clarification_session_or_404(session) - current_question = clarification_engine.build_question_payload(session) - logger.reflect( - "Belief protocol postcondition checkpoint for resume_clarification" - ) - return _serialize_clarification_state( - ClarificationStateResult( - clarification_session=clarification_session, - current_question=current_question, - session=session, - changed_findings=[], - ) - ) - - -# [/DEF:resume_clarification:Function] - - -# [DEF:record_clarification_answer:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Persist one clarification answer before advancing the active pointer or readiness state. -# @RELATION: [CALLS] ->[record_answer:Function] -# @PRE: Target question is the session's active clarification question and current user owns the session. -# @POST: Answer is persisted, changed findings are returned, and unresolved skipped/expert-review questions remain visible. -# @SIDE_EFFECT: Inserts answer row and mutates clarification/session state. -# @DATA_CONTRACT: Input[ClarificationAnswerRequest] -> Output[ClarificationAnswerResultResponse] -@router.post( - "/sessions/{session_id}/clarification/answers", - response_model=ClarificationAnswerResultResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_clarification_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def record_clarification_answer( - session_id: str, - request: ClarificationAnswerRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - clarification_engine: ClarificationEngine = Depends(_get_clarification_engine), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.record_clarification_answer"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - try: - result = clarification_engine.record_answer( - ClarificationAnswerCommand( - session=session, - question_id=request.question_id, - answer_kind=request.answer_kind, - answer_value=request.answer_value, - user=current_user, - ) - ) - except ValueError as exc: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc) - ) from exc - - return ClarificationAnswerResultResponse( - clarification_state=_serialize_clarification_state(result), - session=_serialize_session_summary(result.session), - changed_findings=[ - ValidationFindingDto.model_validate(item, from_attributes=True) - for item in result.changed_findings - ], - ) - - -# [/DEF:record_clarification_answer:Function] - - -# [DEF:update_field_semantic:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Apply one field-level semantic candidate decision or manual override with lock/provenance safeguards. -# @RELATION: [CALLS] ->[_update_semantic_field_state] -# @PRE: Session and field belong to the current owner, and request contains a candidate selection or manual override values. -# @POST: Manual overrides set manual provenance plus lock; explicit lock state prevents later silent replacement. -# @SIDE_EFFECT: Mutates field state and accepted/superseded candidate statuses in persistence. -# @DATA_CONTRACT: Input[FieldSemanticUpdateRequest] -> Output[SemanticFieldEntryDto] -@router.patch( - "/sessions/{session_id}/fields/{field_id}/semantic", - response_model=SemanticFieldEntryDto, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def update_field_semantic( - session_id: str, - field_id: str, - request: FieldSemanticUpdateRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.update_field_semantic"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - field = _get_owned_field_or_404(session, field_id) - _update_semantic_field_state(field, request, changed_by="user") - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session, refresh_targets=[field]) - _record_session_event( - repository, - session, - current_user, - event_type="semantic_field_updated", - event_summary="Semantic field decision persisted", - event_details={ - "field_id": field.field_id, - "candidate_id": request.candidate_id, - "is_locked": field.is_locked, - "source_id": field.source_id, - "source_version": field.source_version, - "version": session_record.version, - }, - ) - return _serialize_semantic_field(field) - - -# [/DEF:update_field_semantic:Function] - - -# [DEF:lock_field_semantic:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Lock one semantic field against later automatic overwrite while preserving the current active value. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry] -# @PRE: Field belongs to the current owner. -# @POST: Field remains active and locked; later imports may add candidates but cannot replace the locked value implicitly. -# @SIDE_EFFECT: Mutates field lock state in persistence. -# @DATA_CONTRACT: Input[session_id:str,field_id:str] -> Output[SemanticFieldEntryDto] -@router.post( - "/sessions/{session_id}/fields/{field_id}/lock", - response_model=SemanticFieldEntryDto, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def lock_field_semantic( - session_id: str, - field_id: str, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.lock_field_semantic"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - field = _get_owned_field_or_404(session, field_id) - field.is_locked = True - field.last_changed_by = "user" - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session, refresh_targets=[field]) - _record_session_event( - repository, - session, - current_user, - event_type="semantic_field_locked", - event_summary="Semantic field lock persisted", - event_details={ - "field_id": field.field_id, - "version": session_record.version, - }, - ) - return _serialize_semantic_field(field) - - -# [/DEF:lock_field_semantic:Function] - - -# [DEF:unlock_field_semantic:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Unlock one semantic field so later automated candidate application may replace it explicitly. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry] -# @PRE: Field belongs to the current owner. -# @POST: Field becomes unlocked; manual-override provenance is downgraded to unresolved to preserve the lock/provenance invariant. -# @SIDE_EFFECT: Mutates field lock/provenance state in persistence. -# @DATA_CONTRACT: Input[session_id:str,field_id:str] -> Output[SemanticFieldEntryDto] -@router.post( - "/sessions/{session_id}/fields/{field_id}/unlock", - response_model=SemanticFieldEntryDto, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def unlock_field_semantic( - session_id: str, - field_id: str, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.unlock_field_semantic"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - field = _get_owned_field_or_404(session, field_id) - field.is_locked = False - field.last_changed_by = "user" - if field.provenance == FieldProvenance.MANUAL_OVERRIDE: - field.provenance = FieldProvenance.UNRESOLVED - field.needs_review = True - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session, refresh_targets=[field]) - _record_session_event( - repository, - session, - current_user, - event_type="semantic_field_unlocked", - event_summary="Semantic field unlock persisted", - event_details={ - "field_id": field.field_id, - "version": session_record.version, - }, - ) - return _serialize_semantic_field(field) - - -# [/DEF:unlock_field_semantic:Function] - - -# [DEF:approve_batch_semantic_fields:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Approve multiple semantic candidate decisions in one owner-authorized batch without bypassing single-field semantics. -# @RELATION: [CALLS] ->[_update_semantic_field_state] -# @PRE: Session belongs to the current owner and each requested field/candidate pair is contained in the session aggregate. -# @POST: Returns updated semantic fields after applying the same candidate/lock invariants as the single-field endpoint. -# @SIDE_EFFECT: Persists multiple semantic field decisions in one transaction and records one explicit session audit event. -# @DATA_CONTRACT: Input[BatchApproveSemanticRequest] -> Output[List[SemanticFieldEntryDto]] -@router.post( - "/sessions/{session_id}/fields/semantic/approve-batch", - response_model=List[SemanticFieldEntryDto], - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def approve_batch_semantic_fields( - session_id: str, - request: BatchApproveSemanticRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.approve_batch_semantic_fields"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - - updated_fields: List[SemanticFieldEntry] = [] - for item in request.items: - field = _get_owned_field_or_404(session, item.field_id) - updated_field = _update_semantic_field_state( - field, - FieldSemanticUpdateRequest( - candidate_id=item.candidate_id, lock_field=item.lock_field - ), - changed_by="user", - ) - updated_fields.append(updated_field) - - session_record = cast(Any, session) - _commit_owned_session_mutation( - repository, session, refresh_targets=list(updated_fields) - ) - _record_session_event( - repository, - session, - current_user, - event_type="semantic_fields_batch_approved", - event_summary="Batch semantic approval persisted", - event_details={ - "field_ids": [field.field_id for field in updated_fields], - "count": len(updated_fields), - "version": session_record.version, - }, - ) - return [_serialize_semantic_field(field) for field in updated_fields] - - -# [/DEF:approve_batch_semantic_fields:Function] - - -# [DEF:list_execution_mappings:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Return the current mapping-review set for one accessible session. -# @RELATION: [CALLS] ->[_get_owned_session_or_404] -# @PRE: Session is ownership-accessible to the authenticated user and execution feature is enabled. -# @POST: Returns the persisted mapping review set for the requested session wrapped in the contract collection shape without mutating approval state. -# @SIDE_EFFECT: none. -# @DATA_CONTRACT: Input[session_id:str] -> Output[MappingCollectionResponse] -@router.get( - "/sessions/{session_id}/mappings", - response_model=MappingCollectionResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_execution_flag), - Depends(has_permission("dataset:session", "READ")), - ], -) -async def list_execution_mappings( - session_id: str, - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.list_execution_mappings"): - session = _get_owned_session_or_404(repository, session_id, current_user) - return MappingCollectionResponse( - items=[ - _serialize_execution_mapping(item) - for item in session.execution_mappings - ] - ) - - -# [/DEF:list_execution_mappings:Function] - - -# [DEF:update_execution_mapping:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Persist one owner-authorized execution-mapping effective value override and invalidate stale preview truth. -# @RELATION: [DEPENDS_ON] ->[ExecutionMapping] -# @PRE: Mapping belongs to the current owner session and request carries an explicit effective value decision. -# @POST: Mapping effective value and override metadata are persisted and any prior preview truth is marked stale for safe relaunch. -# @SIDE_EFFECT: Mutates mapping value/approval state, may mark latest preview stale, and updates session readiness cues. -# @DATA_CONTRACT: Input[UpdateExecutionMappingRequest] -> Output[ExecutionMappingDto] -@router.patch( - "/sessions/{session_id}/mappings/{mapping_id}", - response_model=ExecutionMappingDto, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_execution_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def update_execution_mapping( - session_id: str, - mapping_id: str, - request: UpdateExecutionMappingRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.update_execution_mapping"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - mapping = _get_owned_mapping_or_404(session, mapping_id) - - if request.effective_value is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="effective_value is required for execution mapping updates", - ) - - mapping.effective_value = request.effective_value - mapping.mapping_method = MappingMethod( - request.mapping_method or MappingMethod.MANUAL_OVERRIDE.value - ) - mapping.transformation_note = request.transformation_note - mapping.approval_state = ApprovalState.APPROVED - mapping.approved_by_user_id = current_user.id - mapping.approved_at = datetime.utcnow() - - session.last_activity_at = datetime.utcnow() - session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW - if session.readiness_state in { - ReadinessState.MAPPING_REVIEW_NEEDED, - ReadinessState.COMPILED_PREVIEW_READY, - ReadinessState.RUN_READY, - ReadinessState.RUN_IN_PROGRESS, - }: - session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY - - for preview in session.previews: - if preview.preview_status == PreviewStatus.READY: - preview.preview_status = PreviewStatus.STALE - - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session, refresh_targets=[mapping]) - _record_session_event( - repository, - session, - current_user, - event_type="execution_mapping_updated", - event_summary="Execution mapping override persisted", - event_details={ - "mapping_id": mapping.mapping_id, - "approval_state": mapping.approval_state.value, - "preview_state": "stale", - "version": session_record.version, - }, - ) - return _serialize_execution_mapping(mapping) - - -# [/DEF:update_execution_mapping:Function] - - -# [DEF:approve_execution_mapping:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Explicitly approve a warning-sensitive mapping transformation and preserve audit note state. -# @RELATION: [DEPENDS_ON] ->[ExecutionMapping] -# @PRE: Mapping belongs to the current owner session and execution feature is enabled. -# @POST: Mapping approval state becomes approved and owner-scoped audit markers are updated. -# @SIDE_EFFECT: Mutates persisted mapping approval state and session readiness cues. -# @DATA_CONTRACT: Input[ApproveMappingRequest] -> Output[ExecutionMappingDto] -@router.post( - "/sessions/{session_id}/mappings/{mapping_id}/approve", - response_model=ExecutionMappingDto, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_execution_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def approve_execution_mapping( - session_id: str, - mapping_id: str, - request: ApproveMappingRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.approve_execution_mapping"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - mapping = _get_owned_mapping_or_404(session, mapping_id) - mapping.approval_state = ApprovalState.APPROVED - mapping.approved_by_user_id = current_user.id - mapping.approved_at = datetime.utcnow() - if request.approval_note: - mapping.transformation_note = request.approval_note - session.last_activity_at = datetime.utcnow() - if session.readiness_state == ReadinessState.MAPPING_REVIEW_NEEDED: - session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session, refresh_targets=[mapping]) - _record_session_event( - repository, - session, - current_user, - event_type="execution_mapping_approved", - event_summary="Execution mapping approval persisted", - event_details={ - "mapping_id": mapping.mapping_id, - "approval_state": mapping.approval_state.value, - "version": session_record.version, - }, - ) - return _serialize_execution_mapping(mapping) - - -# [/DEF:approve_execution_mapping:Function] - - -# [DEF:approve_batch_execution_mappings:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Approve multiple warning-sensitive execution mappings in one owner-authorized batch. -# @RELATION: [DEPENDS_ON] ->[ExecutionMapping] -# @PRE: Session belongs to the current owner and every requested mapping belongs to the same session aggregate. -# @POST: Returns updated mappings after applying the same approval semantics as the single mapping endpoint. -# @SIDE_EFFECT: Persists multiple approvals and records one explicit audit event. -# @DATA_CONTRACT: Input[BatchApproveMappingRequest] -> Output[List[ExecutionMappingDto]] -@router.post( - "/sessions/{session_id}/mappings/approve-batch", - response_model=List[ExecutionMappingDto], - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_execution_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def approve_batch_execution_mappings( - session_id: str, - request: BatchApproveMappingRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.approve_batch_execution_mappings"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - - updated_mappings: List[ExecutionMapping] = [] - for mapping_id in list(dict.fromkeys(request.mapping_ids)): - mapping = _get_owned_mapping_or_404(session, mapping_id) - mapping.approval_state = ApprovalState.APPROVED - mapping.approved_by_user_id = current_user.id - mapping.approved_at = datetime.utcnow() - if request.approval_note: - mapping.transformation_note = request.approval_note - updated_mappings.append(mapping) - - session.last_activity_at = datetime.utcnow() - if session.readiness_state == ReadinessState.MAPPING_REVIEW_NEEDED: - session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW - - session_record = cast(Any, session) - _commit_owned_session_mutation( - repository, session, refresh_targets=list(updated_mappings) - ) - _record_session_event( - repository, - session, - current_user, - event_type="execution_mappings_batch_approved", - event_summary="Batch mapping approval persisted", - event_details={ - "mapping_ids": [mapping.mapping_id for mapping in updated_mappings], - "count": len(updated_mappings), - "version": session_record.version, - }, - ) - return [_serialize_execution_mapping(mapping) for mapping in updated_mappings] - - -# [/DEF:approve_batch_execution_mappings:Function] - - -# [DEF:trigger_preview_generation:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Trigger Superset-side preview compilation for the current owned execution context. -# @RELATION: [CALLS] ->[prepare_launch_preview:Function] -# @PRE: Session belongs to the current owner and required mapping inputs are available. -# @POST: Returns the compiled preview directly for synchronous success or enqueue-state shape when preview generation remains pending. -# @SIDE_EFFECT: Persists preview attempt and updates readiness state. -# @DATA_CONTRACT: Input[session_id:str] -> Output[CompiledPreviewDto | PreviewEnqueueResultResponse] -@router.post( - "/sessions/{session_id}/preview", - response_model=Union[CompiledPreviewDto, PreviewEnqueueResultResponse], - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_execution_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def trigger_preview_generation( - session_id: str, - response: Response, - orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - session_version: int = Depends(_require_session_version_header), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.trigger_preview_generation"): - _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - try: - result = orchestrator.prepare_launch_preview( - PreparePreviewCommand( - user=current_user, - session_id=session_id, - expected_version=session_version, - ) - ) - except DatasetReviewSessionVersionConflictError as exc: - raise _build_session_version_conflict_http_exception(exc) from exc - except ValueError as exc: - detail = str(exc) - status_code = ( - status.HTTP_404_NOT_FOUND - if detail in {"Session not found", "Environment not found"} - else status.HTTP_409_CONFLICT - if detail.startswith("Preview blocked:") - else status.HTTP_400_BAD_REQUEST - ) - raise HTTPException(status_code=status_code, detail=detail) from exc - - if result.preview.preview_status == PreviewStatus.PENDING: - response.status_code = status.HTTP_202_ACCEPTED - return PreviewEnqueueResultResponse( - session_id=result.session.session_id, - session_version=int(getattr(result.session, "version", 0) or 0), - preview_status=result.preview.preview_status.value, - task_id=None, - ) - - response.status_code = status.HTTP_200_OK - return _serialize_preview( - result.preview, - session_version_fallback=int(getattr(result.session, "version", 0) or 0), - ) - - -# [/DEF:trigger_preview_generation:Function] - - -# [DEF:launch_dataset:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Execute the current owned session launch handoff through the orchestrator and return audited SQL Lab run context. -# @RELATION: [CALLS] ->[launch_dataset:Function] -# @PRE: Session belongs to the current owner, execution feature is enabled, and launch gates are satisfied or a deterministic conflict is returned. -# @POST: Returns persisted run context plus redirect URL when launch handoff is accepted. -# @SIDE_EFFECT: Persists launch audit snapshot and may trigger SQL Lab session creation. -# @DATA_CONTRACT: Input[session_id:str] -> Output[LaunchDatasetResponse] -@router.post( - "/sessions/{session_id}/launch", - response_model=LaunchDatasetResponse, - status_code=status.HTTP_201_CREATED, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_execution_flag), - Depends(has_permission("dataset:execution:launch", "EXECUTE")), - ], -) -async def launch_dataset( - session_id: str, - orchestrator: DatasetReviewOrchestrator = Depends(_get_orchestrator), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - session_version: int = Depends(_require_session_version_header), - config_manager=Depends(get_config_manager), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.launch_dataset"): - _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - try: - result = orchestrator.launch_dataset( - LaunchDatasetCommand( - user=current_user, - session_id=session_id, - expected_version=session_version, - ) - ) - except DatasetReviewSessionVersionConflictError as exc: - raise _build_session_version_conflict_http_exception(exc) from exc - except ValueError as exc: - detail = str(exc) - status_code = ( - status.HTTP_404_NOT_FOUND - if detail in {"Session not found", "Environment not found"} - else status.HTTP_409_CONFLICT - if detail.startswith("Launch blocked:") - else status.HTTP_400_BAD_REQUEST - ) - raise HTTPException(status_code=status_code, detail=detail) from exc - - environment = config_manager.get_environment(result.session.environment_id) - environment_url = ( - getattr(environment, "url", "") if environment is not None else "" - ) - return LaunchDatasetResponse( - run_context=_serialize_run_context(result.run_context), - redirect_url=_build_sql_lab_redirect_url( - environment_url=environment_url, - sql_lab_session_ref=result.run_context.sql_lab_session_ref, - ), - ) - - -# [/DEF:launch_dataset:Function] - - -# [DEF:record_field_feedback:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Persist thumbs up/down feedback for AI-assisted semantic field content. -# @RELATION: [DEPENDS_ON] ->[SemanticFieldEntry] -# @PRE: Field belongs to the current owner and feedback value is valid. -# @POST: Field feedback is stored without altering lock or active semantic value. -# @SIDE_EFFECT: Updates one persisted semantic field feedback marker. -# @DATA_CONTRACT: Input[FeedbackRequest] -> Output[FeedbackResponse] -@router.post( - "/sessions/{session_id}/fields/{field_id}/feedback", - response_model=FeedbackResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def record_field_feedback( - session_id: str, - field_id: str, - request: FeedbackRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.record_field_feedback"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - field = _get_owned_field_or_404(session, field_id) - field.user_feedback = request.feedback - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session) - _record_session_event( - repository, - session, - current_user, - event_type="semantic_field_feedback_recorded", - event_summary="Semantic field feedback persisted", - event_details={ - "field_id": field.field_id, - "feedback": request.feedback, - "version": session_record.version, - }, - ) - return FeedbackResponse(target_id=field.field_id, feedback=request.feedback) - - -# [/DEF:record_field_feedback:Function] - - -# [DEF:record_clarification_feedback:Function] -# @COMPLEXITY: 4 -# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content used in guided review. -# @RELATION: [DEPENDS_ON] ->[ClarificationAnswer] -# @PRE: Clarification question belongs to the current owner session and already has a persisted answer. -# @POST: Feedback is stored on the clarification answer audit record. -# @SIDE_EFFECT: Updates one clarification answer feedback marker in persistence. -# @DATA_CONTRACT: Input[FeedbackRequest] -> Output[FeedbackResponse] -@router.post( - "/sessions/{session_id}/clarification/questions/{question_id}/feedback", - response_model=FeedbackResponse, - dependencies=[ - Depends(_require_auto_review_flag), - Depends(_require_clarification_flag), - Depends(has_permission("dataset:session", "MANAGE")), - ], -) -async def record_clarification_feedback( - session_id: str, - question_id: str, - request: FeedbackRequest, - session_version: int = Depends(_require_session_version_header), - repository: DatasetReviewSessionRepository = Depends(_get_repository), - current_user: User = Depends(get_current_user), -): - with belief_scope("dataset_review.record_clarification_feedback"): - session = _prepare_owned_session_mutation( - repository, session_id, current_user, session_version - ) - clarification_session = _get_latest_clarification_session_or_404(session) - question = next( - ( - item - for item in clarification_session.questions - if item.question_id == question_id - ), - None, - ) - if question is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Clarification question not found", - ) - if question.answer is None: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="Clarification answer not found", - ) - question.answer.user_feedback = request.feedback - session_record = cast(Any, session) - _commit_owned_session_mutation(repository, session) - _record_session_event( - repository, - session, - current_user, - event_type="clarification_feedback_recorded", - event_summary="Clarification feedback persisted", - event_details={ - "question_id": question.question_id, - "feedback": request.feedback, - "version": session_record.version, - }, - ) - return FeedbackResponse( - target_id=question.question_id, feedback=request.feedback - ) - - -# [/DEF:record_clarification_feedback:Function] - +from src.api.routes.dataset_review_pkg._routes import router # noqa: F401 # [/DEF:DatasetReviewApi:Module] diff --git a/backend/src/api/routes/dataset_review_pkg/_dependencies.py b/backend/src/api/routes/dataset_review_pkg/_dependencies.py new file mode 100644 index 00000000..6ecd63cf --- /dev/null +++ b/backend/src/api/routes/dataset_review_pkg/_dependencies.py @@ -0,0 +1,900 @@ +# [DEF:DatasetReviewDependencies:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @LAYER: API +# @RATIONALE: Extracted from 2484-line monolith to satisfy INV_7 (400-line module limit). +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. + +from __future__ import annotations + +import json +from datetime import datetime +from typing import Any, Dict, List, Optional, Union, cast + +from fastapi import APIRouter, Depends, Header, HTTPException, Query, Response, status +from pydantic import BaseModel, Field +from sqlalchemy.orm import Session + +from src.core.database import get_db +from src.core.logger import belief_scope, logger +from src.dependencies import ( + get_config_manager, + get_current_user, + get_task_manager, + has_permission, +) +from src.models.auth import User +from src.models.dataset_review import ( + AnswerKind, + ApprovalState, + ArtifactFormat, + CandidateStatus, + ClarificationSession, + DatasetReviewSession, + ExecutionMapping, + FieldProvenance, + MappingMethod, + PreviewStatus, + QuestionState, + ReadinessState, + RecommendedAction, + SemanticCandidate, + SemanticFieldEntry, + SessionStatus, +) +from src.schemas.dataset_review import ( + ClarificationAnswerDto, + ClarificationQuestionDto, + ClarificationSessionDto, + CompiledPreviewDto, + DatasetRunContextDto, + ExecutionMappingDto, + SemanticFieldEntryDto, + SessionDetail, + SessionSummary, + ValidationFindingDto, +) +from src.services.dataset_review.clarification_engine import ( + ClarificationAnswerCommand, + ClarificationEngine, + ClarificationQuestionPayload, + ClarificationStateResult, +) +from src.services.dataset_review.orchestrator import ( + DatasetReviewOrchestrator, + LaunchDatasetCommand, + PreparePreviewCommand, + StartSessionCommand, +) +from src.services.dataset_review.repositories.session_repository import ( + DatasetReviewSessionRepository, + DatasetReviewSessionVersionConflictError, +) + + +# [DEF:StartSessionRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for starting one dataset review session. +class StartSessionRequest(BaseModel): + source_kind: str = Field(..., pattern="^(superset_link|dataset_selection)$") + source_input: str = Field(..., min_length=1) + environment_id: str = Field(..., min_length=1) + + +# [/DEF:StartSessionRequest:Class] + + +# [DEF:UpdateSessionRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for lifecycle state updates on an existing session. +class UpdateSessionRequest(BaseModel): + status: SessionStatus + note: Optional[str] = None + + +# [/DEF:UpdateSessionRequest:Class] + + +# [DEF:SessionCollectionResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Paginated session collection response. +class SessionCollectionResponse(BaseModel): + items: List[SessionSummary] + total: int + page: int + page_size: int + has_next: bool + + +# [/DEF:SessionCollectionResponse:Class] + + +# [DEF:ExportArtifactResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Inline export response for documentation or validation outputs. +class ExportArtifactResponse(BaseModel): + artifact_id: str + session_id: str + artifact_type: str + format: str + storage_ref: str + created_by_user_id: str + created_at: Optional[str] = None + content: Dict[str, Any] + + +# [/DEF:ExportArtifactResponse:Class] + + +# [DEF:FieldSemanticUpdateRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for field-level semantic candidate acceptance or manual override. +class FieldSemanticUpdateRequest(BaseModel): + candidate_id: Optional[str] = None + verbose_name: Optional[str] = None + description: Optional[str] = None + display_format: Optional[str] = None + lock_field: bool = False + resolution_note: Optional[str] = None + + +# [/DEF:FieldSemanticUpdateRequest:Class] + + +# [DEF:FeedbackRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for thumbs up/down feedback. +class FeedbackRequest(BaseModel): + feedback: str = Field(..., pattern="^(up|down)$") + + +# [/DEF:FeedbackRequest:Class] + + +# [DEF:ClarificationAnswerRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for submitting one clarification answer. +class ClarificationAnswerRequest(BaseModel): + question_id: str = Field(..., min_length=1) + answer_kind: AnswerKind + answer_value: Optional[str] = None + + +# [/DEF:ClarificationAnswerRequest:Class] + + +# [DEF:ClarificationSessionSummaryResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Summary DTO for current clarification session state. +class ClarificationSessionSummaryResponse(BaseModel): + clarification_session_id: str + session_id: str + status: str + current_question_id: Optional[str] = None + resolved_count: int + remaining_count: int + summary_delta: Optional[str] = None + + +# [/DEF:ClarificationSessionSummaryResponse:Class] + + +# [DEF:ClarificationStateResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Response DTO for current clarification state and active question payload. +class ClarificationStateResponse(BaseModel): + clarification_session: Optional[ClarificationSessionSummaryResponse] = None + current_question: Optional[ClarificationQuestionDto] = None + + +# [/DEF:ClarificationStateResponse:Class] + + +# [DEF:ClarificationAnswerResultResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Response DTO for one clarification answer mutation result. +class ClarificationAnswerResultResponse(BaseModel): + clarification_state: ClarificationStateResponse + session: SessionSummary + changed_findings: List[ValidationFindingDto] + + +# [/DEF:ClarificationAnswerResultResponse:Class] + + +# [DEF:FeedbackResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Minimal response DTO for persisted AI feedback actions. +class FeedbackResponse(BaseModel): + target_id: str + feedback: str + + +# [/DEF:FeedbackResponse:Class] + + +# [DEF:ApproveMappingRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Optional request DTO for explicit mapping approval audit notes. +class ApproveMappingRequest(BaseModel): + approval_note: Optional[str] = None + + +# [/DEF:ApproveMappingRequest:Class] + + +# [DEF:BatchApproveSemanticItemRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for one batch semantic-approval item. +class BatchApproveSemanticItemRequest(BaseModel): + field_id: str = Field(..., min_length=1) + candidate_id: str = Field(..., min_length=1) + lock_field: bool = False + + +# [/DEF:BatchApproveSemanticItemRequest:Class] + + +# [DEF:BatchApproveSemanticRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for explicit batch semantic approvals. +class BatchApproveSemanticRequest(BaseModel): + items: List[BatchApproveSemanticItemRequest] = Field(..., min_length=1) + + +# [/DEF:BatchApproveSemanticRequest:Class] + + +# [DEF:BatchApproveMappingRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for explicit batch mapping approvals. +class BatchApproveMappingRequest(BaseModel): + mapping_ids: List[str] = Field(..., min_length=1) + approval_note: Optional[str] = None + + +# [/DEF:BatchApproveMappingRequest:Class] + + +# [DEF:PreviewEnqueueResultResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Async preview trigger response exposing only enqueue state. +class PreviewEnqueueResultResponse(BaseModel): + session_id: str + session_version: Optional[int] = None + preview_status: str + task_id: Optional[str] = None + + +# [/DEF:PreviewEnqueueResultResponse:Class] + + +# [DEF:MappingCollectionResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Wrapper for execution mapping list responses. +class MappingCollectionResponse(BaseModel): + items: List[ExecutionMappingDto] + + +# [/DEF:MappingCollectionResponse:Class] + + +# [DEF:UpdateExecutionMappingRequest:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Request DTO for one manual execution-mapping override update. +class UpdateExecutionMappingRequest(BaseModel): + effective_value: Optional[Any] = None + mapping_method: Optional[str] = Field(default=None, pattern="^(manual_override|direct_match|heuristic_match|semantic_match)$") + transformation_note: Optional[str] = None + + +# [/DEF:UpdateExecutionMappingRequest:Class] + + +# [DEF:LaunchDatasetResponse:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Launch result exposing audited run context and SQL Lab redirect target. +class LaunchDatasetResponse(BaseModel): + run_context: DatasetRunContextDto + redirect_url: str + + +# [/DEF:LaunchDatasetResponse:Class] + + +# --- Dependency Injection --- + +# [DEF:_require_auto_review_flag:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Guard US1 dataset review endpoints behind the configured feature flag. +def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> bool: + with belief_scope("dataset_review.require_auto_review_flag"): + if not config_manager.get_config().settings.ff_dataset_auto_review: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset auto review feature is disabled") + return True + + +# [/DEF:_require_auto_review_flag:Function] + + +# [DEF:_require_clarification_flag:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Guard clarification-specific US2 endpoints behind the configured feature flag. +def _require_clarification_flag(config_manager=Depends(get_config_manager)) -> bool: + with belief_scope("dataset_review.require_clarification_flag"): + if not config_manager.get_config().settings.ff_dataset_clarification: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset clarification feature is disabled") + return True + + +# [/DEF:_require_clarification_flag:Function] + + +# [DEF:_require_execution_flag:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Guard US3 execution endpoints behind the configured feature flag. +def _require_execution_flag(config_manager=Depends(get_config_manager)) -> bool: + with belief_scope("dataset_review.require_execution_flag"): + if not config_manager.get_config().settings.ff_dataset_execution: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset execution feature is disabled") + return True + + +# [/DEF:_require_execution_flag:Function] + + +# [DEF:_get_repository:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Build repository dependency. +def _get_repository(db: Session = Depends(get_db)) -> DatasetReviewSessionRepository: + return DatasetReviewSessionRepository(db) + + +# [/DEF:_get_repository:Function] + + +# [DEF:_get_orchestrator:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Build orchestrator dependency. +def _get_orchestrator( + repository: DatasetReviewSessionRepository = Depends(_get_repository), + config_manager=Depends(get_config_manager), + task_manager=Depends(get_task_manager), +) -> DatasetReviewOrchestrator: + return DatasetReviewOrchestrator(repository=repository, config_manager=config_manager, task_manager=task_manager) + + +# [/DEF:_get_orchestrator:Function] + + +# [DEF:_get_clarification_engine:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Build clarification engine dependency. +def _get_clarification_engine( + repository: DatasetReviewSessionRepository = Depends(_get_repository), +) -> ClarificationEngine: + return ClarificationEngine(repository=repository) + + +# [/DEF:_get_clarification_engine:Function] + + +# --- Serialization Helpers --- + +# [DEF:_serialize_session_summary:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Map session aggregate into stable API summary DTO. +def _serialize_session_summary(session: DatasetReviewSession) -> SessionSummary: + summary = SessionSummary.model_validate(session, from_attributes=True) + summary.session_version = summary.version + return summary + + +# [/DEF:_serialize_session_summary:Function] + + +# [DEF:_serialize_session_detail:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Map session aggregate into stable API detail DTO. +def _serialize_session_detail(session: DatasetReviewSession) -> SessionDetail: + detail = SessionDetail.model_validate(session, from_attributes=True) + detail.session_version = detail.version + return detail + + +# [/DEF:_serialize_session_detail:Function] + + +# [DEF:_require_session_version_header:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Read the optimistic-lock session version header. +def _require_session_version_header( + session_version: int = Header(..., alias="X-Session-Version", ge=0), +) -> int: + return session_version + + +# [/DEF:_require_session_version_header:Function] + + +# [DEF:_build_session_version_conflict_http_exception:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Normalize optimistic-lock conflict errors into HTTP 409 responses. +def _build_session_version_conflict_http_exception(exc: DatasetReviewSessionVersionConflictError) -> HTTPException: + return HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail={"error_code": "session_version_conflict", "message": str(exc), "session_id": exc.session_id, "expected_version": exc.expected_version, "actual_version": exc.actual_version}, + ) + + +# [/DEF:_build_session_version_conflict_http_exception:Function] + + +# [DEF:_enforce_session_version:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Convert repository optimistic-lock conflicts into deterministic HTTP 409 responses. +def _enforce_session_version(repository, session, expected_version): + with belief_scope("_enforce_session_version"): + try: + repository.require_session_version(session, expected_version) + except DatasetReviewSessionVersionConflictError as exc: + logger.explore("Dataset review optimistic-lock conflict detected", extra={"session_id": exc.session_id, "expected_version": exc.expected_version, "actual_version": exc.actual_version}) + raise _build_session_version_conflict_http_exception(exc) from exc + return session + + +# [/DEF:_enforce_session_version:Function] + + +# [DEF:_get_owned_session_or_404:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Resolve one session for current user or collaborator scope, returning 404 when inaccessible. +def _get_owned_session_or_404(repository, session_id, current_user): + with belief_scope("_get_owned_session_or_404"): + session = repository.load_session_detail(session_id, current_user.id) + if session is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Session not found") + return session + + +# [/DEF:_get_owned_session_or_404:Function] + + +# [DEF:_require_owner_mutation_scope:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Enforce owner-only mutation scope. +def _require_owner_mutation_scope(session, current_user): + with belief_scope("_require_owner_mutation_scope"): + if session.user_id != current_user.id: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Only the owner can mutate dataset review state") + return session + + +# [/DEF:_require_owner_mutation_scope:Function] + + +# [DEF:_prepare_owned_session_mutation:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Resolve owner-scoped mutation session and enforce optimistic-lock version. +def _prepare_owned_session_mutation(repository, session_id, current_user, expected_version): + with belief_scope("_prepare_owned_session_mutation"): + session = _get_owned_session_or_404(repository, session_id, current_user) + _require_owner_mutation_scope(session, current_user) + return _enforce_session_version(repository, session, expected_version) + + +# [/DEF:_prepare_owned_session_mutation:Function] + + +# [DEF:_commit_owned_session_mutation:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Centralize session version bumping and commit semantics. +def _commit_owned_session_mutation(repository, session, *, refresh_targets=None): + with belief_scope("_commit_owned_session_mutation"): + try: + repository.commit_session_mutation(session, refresh_targets=refresh_targets) + except DatasetReviewSessionVersionConflictError as exc: + raise _build_session_version_conflict_http_exception(exc) from exc + return session + + +# [/DEF:_commit_owned_session_mutation:Function] + + +# [DEF:_record_session_event:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Persist one explicit audit event for an owned mutation endpoint. +def _record_session_event(repository, session, current_user, *, event_type, event_summary, event_details=None): + repository.event_logger.log_for_session(session, actor_user_id=current_user.id, event_type=event_type, event_summary=event_summary, event_details=event_details or {}) + + +# [/DEF:_record_session_event:Function] + + +# [DEF:_serialize_semantic_field:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Map one semantic field into stable DTO. +def _serialize_semantic_field(field): + payload = SemanticFieldEntryDto.model_validate(field, from_attributes=True) + session_ref = getattr(field, "session", None) + version_value = getattr(session_ref, "version", None) + payload.session_version = int(version_value or 0) if version_value is not None else None + return payload + + +# [/DEF:_serialize_semantic_field:Function] + + +# [DEF:_serialize_execution_mapping:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Map one execution mapping into stable DTO. +def _serialize_execution_mapping(mapping): + payload = ExecutionMappingDto.model_validate(mapping, from_attributes=True) + session_ref = getattr(mapping, "session", None) + version_value = getattr(session_ref, "version", None) + payload.session_version = int(version_value or 0) if version_value is not None else None + return payload + + +# [/DEF:_serialize_execution_mapping:Function] + + +# [DEF:_serialize_preview:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Map one preview into stable DTO. +def _serialize_preview(preview, *, session_version_fallback=None): + payload = CompiledPreviewDto.model_validate(preview, from_attributes=True) + session_ref = getattr(preview, "session", None) + version_value = getattr(session_ref, "version", None) + if version_value is None: + version_value = session_version_fallback + payload.session_version = int(version_value or 0) if version_value is not None else None + return payload + + +# [/DEF:_serialize_preview:Function] + + +# [DEF:_serialize_run_context:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Map one run context into stable DTO. +def _serialize_run_context(run_context): + payload = DatasetRunContextDto.model_validate(run_context, from_attributes=True) + session_ref = getattr(run_context, "session", None) + version_value = getattr(session_ref, "version", None) + payload.session_version = int(version_value or 0) if version_value is not None else None + return payload + + +# [/DEF:_serialize_run_context:Function] + + +# [DEF:_serialize_clarification_question_payload:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Convert clarification engine payload into API DTO. +def _serialize_clarification_question_payload(payload): + if payload is None: + return None + return ClarificationQuestionDto.model_validate({ + "question_id": payload.question_id, "clarification_session_id": payload.clarification_session_id, + "topic_ref": payload.topic_ref, "question_text": payload.question_text, + "why_it_matters": payload.why_it_matters, "current_guess": payload.current_guess, + "priority": payload.priority, "state": payload.state, "options": payload.options, + "answer": None, "created_at": datetime.utcnow(), "updated_at": datetime.utcnow(), + }) + + +# [/DEF:_serialize_clarification_question_payload:Function] + + +# [DEF:_serialize_clarification_state:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Convert clarification engine state into API response. +def _serialize_clarification_state(state): + return ClarificationStateResponse( + clarification_session=ClarificationSessionSummaryResponse( + clarification_session_id=state.clarification_session.clarification_session_id, + session_id=state.clarification_session.session_id, status=state.clarification_session.status.value, + current_question_id=state.clarification_session.current_question_id, + resolved_count=state.clarification_session.resolved_count, + remaining_count=state.clarification_session.remaining_count, + summary_delta=state.clarification_session.summary_delta, + ), + current_question=_serialize_clarification_question_payload(state.current_question), + ) + + +# [/DEF:_serialize_clarification_state:Function] + + +# [DEF:_serialize_empty_clarification_state:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Return empty clarification payload. +def _serialize_empty_clarification_state(): + return ClarificationStateResponse(clarification_session=None, current_question=None) + + +# [/DEF:_serialize_empty_clarification_state:Function] + + +# [DEF:_get_latest_clarification_session_or_404:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Resolve the latest clarification aggregate or raise. +def _get_latest_clarification_session_or_404(session): + if not session.clarification_sessions: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Clarification session not found") + return sorted(session.clarification_sessions, key=lambda item: (item.started_at, item.clarification_session_id), reverse=True)[0] + + +# [/DEF:_get_latest_clarification_session_or_404:Function] + + +# [DEF:_get_owned_mapping_or_404:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Resolve one execution mapping inside one owned session. +def _get_owned_mapping_or_404(session, mapping_id): + for mapping in session.execution_mappings: + if mapping.mapping_id == mapping_id: + return mapping + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Execution mapping not found") + + +# [/DEF:_get_owned_mapping_or_404:Function] + + +# [DEF:_get_owned_field_or_404:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Resolve a semantic field inside one owned session. +def _get_owned_field_or_404(session, field_id): + for field in session.semantic_fields: + if field.field_id == field_id: + return field + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Semantic field not found") + + +# [/DEF:_get_owned_field_or_404:Function] + + +# [DEF:_map_candidate_provenance:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Translate accepted semantic candidate type into stable field provenance. +def _map_candidate_provenance(candidate): + if str(candidate.match_type.value) == "exact": + return FieldProvenance.DICTIONARY_EXACT + if str(candidate.match_type.value) == "reference": + return FieldProvenance.REFERENCE_IMPORTED + if str(candidate.match_type.value) == "generated": + return FieldProvenance.AI_GENERATED + return FieldProvenance.FUZZY_INFERRED + + +# [/DEF:_map_candidate_provenance:Function] + + +# [DEF:_resolve_candidate_source_version:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Resolve the semantic source version for one accepted candidate. +def _resolve_candidate_source_version(field, source_id): + if not source_id: + return None + session = getattr(field, "session", None) + if session is None: + return None + for source in getattr(session, "semantic_sources", []) or []: + if source.source_id == source_id: + return source.source_version + return None + + +# [/DEF:_resolve_candidate_source_version:Function] + + +# [DEF:_update_semantic_field_state:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Apply field-level semantic manual override or candidate acceptance. +# @POST: Manual overrides always set manual provenance plus lock. +def _update_semantic_field_state(field, request, changed_by): + has_manual_override = any(v is not None for v in [request.verbose_name, request.description, request.display_format]) + selected_candidate = None + if request.candidate_id: + selected_candidate = next((c for c in field.candidates if c.candidate_id == request.candidate_id), None) + if selected_candidate is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Semantic candidate not found") + + if has_manual_override: + field.verbose_name = request.verbose_name + field.description = request.description + field.display_format = request.display_format + field.provenance = FieldProvenance.MANUAL_OVERRIDE + field.source_id = None + field.source_version = None + field.confidence_rank = None + field.is_locked = True + field.has_conflict = False + field.needs_review = False + field.last_changed_by = changed_by + for c in field.candidates: + c.status = CandidateStatus.SUPERSEDED + return field + + if selected_candidate is not None: + field.verbose_name = selected_candidate.proposed_verbose_name + field.description = selected_candidate.proposed_description + field.display_format = selected_candidate.proposed_display_format + field.provenance = _map_candidate_provenance(selected_candidate) + field.source_id = selected_candidate.source_id + field.source_version = _resolve_candidate_source_version(field, selected_candidate.source_id) + field.confidence_rank = selected_candidate.candidate_rank + field.is_locked = bool(request.lock_field or field.is_locked) + field.has_conflict = len(field.candidates) > 1 + field.needs_review = False + field.last_changed_by = changed_by + for c in field.candidates: + c.status = CandidateStatus.ACCEPTED if c.candidate_id == selected_candidate.candidate_id else CandidateStatus.SUPERSEDED + return field + + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Provide candidate_id or at least one manual override field") + + +# [/DEF:_update_semantic_field_state:Function] + + +# [DEF:_build_sql_lab_redirect_url:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Build SQL Lab redirect URL. +def _build_sql_lab_redirect_url(environment_url, sql_lab_session_ref): + base_url = str(environment_url or "").rstrip("/") + session_ref = str(sql_lab_session_ref or "").strip() + if not base_url: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Superset environment URL is not configured") + if not session_ref: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="SQL Lab session reference is missing") + return f"{base_url}/superset/sqllab?queryId={session_ref}" + + +# [/DEF:_build_sql_lab_redirect_url:Function] + + +# [DEF:_build_documentation_export:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Produce session documentation export content. +def _build_documentation_export(session, export_format): + profile = session.profile + findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code)) + if export_format == ArtifactFormat.MARKDOWN: + lines = [f"# Dataset Review: {session.dataset_ref}", "", f"- Session ID: {session.session_id}", f"- Environment: {session.environment_id}", f"- Readiness: {session.readiness_state.value}", f"- Recommended action: {session.recommended_action.value}", "", "## Business Summary", profile.business_summary if profile else "No profile summary available.", "", "## Findings"] + if findings: + for f in findings: + lines.append(f"- [{f.severity.value}] {f.title}: {f.message}") + else: + lines.append("- No findings recorded.") + return {"storage_ref": f"inline://dataset-review/{session.session_id}/documentation.md", "content": {"markdown": "\n".join(lines)}} + content = {"session": _serialize_session_summary(session).model_dump(mode="json"), "profile": profile and {"dataset_name": profile.dataset_name, "business_summary": profile.business_summary, "confidence_state": profile.confidence_state.value, "dataset_type": profile.dataset_type}, "findings": [{"code": f.code, "severity": f.severity.value, "title": f.title, "message": f.message, "resolution_state": f.resolution_state.value} for f in findings]} + return {"storage_ref": f"inline://dataset-review/{session.session_id}/documentation.json", "content": content} + + +# [/DEF:_build_documentation_export:Function] + + +# [DEF:_build_validation_export:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Produce validation-focused export content. +def _build_validation_export(session, export_format): + findings = sorted(session.findings, key=lambda item: (item.severity.value, item.code)) + if export_format == ArtifactFormat.MARKDOWN: + lines = [f"# Validation Report: {session.dataset_ref}", "", f"- Session ID: {session.session_id}", f"- Readiness: {session.readiness_state.value}", "", "## Findings"] + if findings: + for f in findings: + lines.append(f"- `{f.code}` [{f.severity.value}] {f.message}") + else: + lines.append("- No findings recorded.") + return {"storage_ref": f"inline://dataset-review/{session.session_id}/validation.md", "content": {"markdown": "\n".join(lines)}} + content = {"session_id": session.session_id, "dataset_ref": session.dataset_ref, "readiness_state": session.readiness_state.value, "findings": [{"finding_id": f.finding_id, "area": f.area.value, "severity": f.severity.value, "code": f.code, "title": f.title, "message": f.message, "resolution_state": f.resolution_state.value} for f in findings]} + return {"storage_ref": f"inline://dataset-review/{session.session_id}/validation.json", "content": content} + + +# [/DEF:_build_validation_export:Function] + + +# [/DEF:DatasetReviewDependencies:Module] diff --git a/backend/src/api/routes/dataset_review_pkg/_routes.py b/backend/src/api/routes/dataset_review_pkg/_routes.py new file mode 100644 index 00000000..f43becb2 --- /dev/null +++ b/backend/src/api/routes/dataset_review_pkg/_routes.py @@ -0,0 +1,923 @@ +# [DEF:DatasetReviewRoutes:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @LAYER: API +# @RATIONALE: Extracted from 2484-line monolith to satisfy INV_7 (400-line module limit). +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. + +from __future__ import annotations + +from datetime import datetime +from typing import Any, List, Optional, Union, cast + +from fastapi import APIRouter, Depends, HTTPException, Query, Response, status + +from src.core.logger import belief_scope, logger +from src.dependencies import get_config_manager, get_current_user, has_permission +from src.models.auth import User +from src.models.dataset_review import ( + ApprovalState, + ArtifactFormat, + FieldProvenance, + MappingMethod, + PreviewStatus, + ReadinessState, + RecommendedAction, + SessionStatus, +) +from src.schemas.dataset_review import ( + ClarificationAnswerDto, + ClarificationStateResponse as _ClarificationStateResponseSchema, + CompiledPreviewDto, + ExecutionMappingDto, + SemanticFieldEntryDto, + SessionSummary, + ValidationFindingDto, +) +from src.services.dataset_review.clarification_engine import ( + ClarificationAnswerCommand, + ClarificationStateResult, +) +from src.services.dataset_review.orchestrator import ( + LaunchDatasetCommand, + PreparePreviewCommand, + StartSessionCommand, +) +from src.services.dataset_review.repositories.session_repository import ( + DatasetReviewSessionVersionConflictError, +) +from src.api.routes.dataset_review_pkg._dependencies import ( + BatchApproveMappingRequest, + BatchApproveSemanticRequest, + ClarificationAnswerRequest, + ClarificationAnswerResultResponse, + ClarificationStateResponse, + ExportArtifactResponse, + FeedbackRequest, + FeedbackResponse, + FieldSemanticUpdateRequest, + LaunchDatasetResponse, + MappingCollectionResponse, + PreviewEnqueueResultResponse, + SessionCollectionResponse, + StartSessionRequest, + UpdateExecutionMappingRequest, + UpdateSessionRequest, + _build_documentation_export, + _build_sql_lab_redirect_url, + _build_validation_export, + _commit_owned_session_mutation, + _get_clarification_engine, + _get_latest_clarification_session_or_404, + _get_owned_field_or_404, + _get_owned_mapping_or_404, + _get_owned_session_or_404, + _get_orchestrator, + _get_repository, + _prepare_owned_session_mutation, + _record_session_event, + _require_auto_review_flag, + _require_clarification_flag, + _require_execution_flag, + _require_session_version_header, + _serialize_clarification_state, + _serialize_empty_clarification_state, + _serialize_execution_mapping, + _serialize_preview, + _serialize_run_context, + _serialize_semantic_field, + _serialize_session_detail, + _serialize_session_summary, + _update_semantic_field_state, + _build_session_version_conflict_http_exception, +) + +router = APIRouter(prefix="/api/dataset-orchestration", tags=["Dataset Orchestration"]) + + +# [DEF:list_sessions:Function] +# @COMPLEXITY: 3 +# @PURPOSE: List resumable dataset review sessions for the current user. +@router.get( + "/sessions", + response_model=SessionCollectionResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "READ")), + ], +) +async def list_sessions( + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.list_sessions"): + logger.reason( + "Listing dataset review sessions", + extra={"user_id": current_user.id, "page": page, "page_size": page_size}, + ) + sessions = repository.list_sessions_for_user(current_user.id) + start = (page - 1) * page_size + end = start + page_size + items = [_serialize_session_summary(s) for s in sessions[start:end]] + logger.reflect( + "Session page assembled", + extra={"user_id": current_user.id, "returned": len(items), "total": len(sessions)}, + ) + return SessionCollectionResponse( + items=items, total=len(sessions), page=page, + page_size=page_size, has_next=end < len(sessions), + ) + + +# [/DEF:list_sessions:Function] + + +# [DEF:start_session:Function] +# @COMPLEXITY: 4 +# @PURPOSE: Start a new dataset review session from a Superset link or dataset selection. +@router.post( + "/sessions", + response_model=SessionSummary, + status_code=status.HTTP_201_CREATED, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def start_session( + request: StartSessionRequest, + orchestrator=Depends(_get_orchestrator), + current_user: User = Depends(get_current_user), +): + with belief_scope("start_session"): + logger.reason( + "Starting dataset review session", + extra={"user_id": current_user.id, "environment_id": request.environment_id}, + ) + try: + result = orchestrator.start_session( + StartSessionCommand( + user=current_user, environment_id=request.environment_id, + source_kind=request.source_kind, source_input=request.source_input, + ) + ) + except ValueError as exc: + logger.explore("Session start rejected", extra={"user_id": current_user.id, "error": str(exc)}) + detail = str(exc) + sc = status.HTTP_404_NOT_FOUND if detail == "Environment not found" else status.HTTP_400_BAD_REQUEST + raise HTTPException(status_code=sc, detail=detail) from exc + logger.reflect("Session started", extra={"session_id": result.session.session_id}) + return _serialize_session_summary(result.session) + + +# [/DEF:start_session:Function] + + +# [DEF:get_session_detail:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Return the full accessible dataset review session aggregate. +@router.get( + "/sessions/{session_id}", + response_model=SessionSummary, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "READ")), + ], +) +async def get_session_detail( + session_id: str, + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.get_session_detail"): + session = _get_owned_session_or_404(repository, session_id, current_user) + return _serialize_session_detail(session) + + +# [/DEF:get_session_detail:Function] + + +# [DEF:update_session:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Update resumable lifecycle status for an owned session. +@router.patch( + "/sessions/{session_id}", + response_model=SessionSummary, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def update_session( + session_id: str, + request: UpdateSessionRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("update_session"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + session_record = cast(Any, session) + session_record.status = request.status + if request.status == SessionStatus.PAUSED: + session_record.recommended_action = RecommendedAction.RESUME_SESSION + elif request.status in {SessionStatus.ARCHIVED, SessionStatus.CANCELLED, SessionStatus.COMPLETED}: + session_record.active_task_id = None + _commit_owned_session_mutation(repository, session) + _record_session_event( + repository, session, current_user, + event_type="session_status_updated", + event_summary="Dataset review session lifecycle updated", + event_details={"status": session_record.status.value, "version": session_record.version}, + ) + return _serialize_session_summary(session) + + +# [/DEF:update_session:Function] + + +# [DEF:delete_session:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Archive or hard-delete a session owned by the current user. +@router.delete( + "/sessions/{session_id}", + status_code=status.HTTP_204_NO_CONTENT, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def delete_session( + session_id: str, + hard_delete: bool = Query(False), + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("delete_session"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + if hard_delete: + _record_session_event(repository, session, current_user, event_type="session_deleted", event_summary="Session hard-deleted", event_details={"hard_delete": True}) + repository.db.delete(session) + repository.db.commit() + return Response(status_code=status.HTTP_204_NO_CONTENT) + session_record = cast(Any, session) + session_record.status = SessionStatus.ARCHIVED + session_record.active_task_id = None + _commit_owned_session_mutation(repository, session) + _record_session_event(repository, session, current_user, event_type="session_archived", event_summary="Session archived", event_details={"hard_delete": False, "version": session_record.version}) + return Response(status_code=status.HTTP_204_NO_CONTENT) + + +# [/DEF:delete_session:Function] + + +# [DEF:export_documentation:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Export documentation output for the current session. +@router.get( + "/sessions/{session_id}/exports/documentation", + response_model=ExportArtifactResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "READ")), + ], +) +async def export_documentation( + session_id: str, + format: ArtifactFormat = Query(ArtifactFormat.JSON), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("export_documentation"): + if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported") + session = _get_owned_session_or_404(repository, session_id, current_user) + payload = _build_documentation_export(session, format) + return ExportArtifactResponse( + artifact_id=f"documentation-{session.session_id}-{format.value}", + session_id=session.session_id, artifact_type="documentation", + format=format.value, storage_ref=payload["storage_ref"], + created_by_user_id=current_user.id, content=payload["content"], + ) + + +# [/DEF:export_documentation:Function] + + +# [DEF:export_validation:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Export validation findings for the current session. +@router.get( + "/sessions/{session_id}/exports/validation", + response_model=ExportArtifactResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "READ")), + ], +) +async def export_validation( + session_id: str, + format: ArtifactFormat = Query(ArtifactFormat.JSON), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("export_validation"): + if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported") + session = _get_owned_session_or_404(repository, session_id, current_user) + payload = _build_validation_export(session, format) + return ExportArtifactResponse( + artifact_id=f"validation-{session.session_id}-{format.value}", + session_id=session.session_id, artifact_type="validation_report", + format=format.value, storage_ref=payload["storage_ref"], + created_by_user_id=current_user.id, content=payload["content"], + ) + + +# [/DEF:export_validation:Function] + + +# [DEF:get_clarification_state:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Return the current clarification session summary and active question payload. +@router.get( + "/sessions/{session_id}/clarification", + response_model=ClarificationStateResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_clarification_flag), + Depends(has_permission("dataset:session", "READ")), + ], +) +async def get_clarification_state( + session_id: str, + repository=Depends(_get_repository), + clarification_engine=Depends(_get_clarification_engine), + current_user: User = Depends(get_current_user), +): + with belief_scope("get_clarification_state"): + session = _get_owned_session_or_404(repository, session_id, current_user) + if not session.clarification_sessions: + return _serialize_empty_clarification_state() + cs = _get_latest_clarification_session_or_404(session) + question = clarification_engine.build_question_payload(session) + return _serialize_clarification_state( + ClarificationStateResult(clarification_session=cs, current_question=question, session=session, changed_findings=[]) + ) + + +# [/DEF:get_clarification_state:Function] + + +# [DEF:resume_clarification:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Resume clarification mode on the highest-priority unresolved question. +@router.post( + "/sessions/{session_id}/clarification/resume", + response_model=ClarificationStateResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_clarification_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def resume_clarification( + session_id: str, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + clarification_engine=Depends(_get_clarification_engine), + current_user: User = Depends(get_current_user), +): + with belief_scope("resume_clarification"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + cs = _get_latest_clarification_session_or_404(session) + question = clarification_engine.build_question_payload(session) + return _serialize_clarification_state( + ClarificationStateResult(clarification_session=cs, current_question=question, session=session, changed_findings=[]) + ) + + +# [/DEF:resume_clarification:Function] + + +# [DEF:record_clarification_answer:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Persist one clarification answer before advancing the active pointer. +@router.post( + "/sessions/{session_id}/clarification/answers", + response_model=ClarificationAnswerResultResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_clarification_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def record_clarification_answer( + session_id: str, + request: ClarificationAnswerRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + clarification_engine=Depends(_get_clarification_engine), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.record_clarification_answer"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + try: + result = clarification_engine.record_answer( + ClarificationAnswerCommand( + session=session, question_id=request.question_id, + answer_kind=request.answer_kind, answer_value=request.answer_value, + user=current_user, + ) + ) + except ValueError as exc: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc + return ClarificationAnswerResultResponse( + clarification_state=_serialize_clarification_state(result), + session=_serialize_session_summary(result.session), + changed_findings=[ValidationFindingDto.model_validate(f, from_attributes=True) for f in result.changed_findings], + ) + + +# [/DEF:record_clarification_answer:Function] + + +# [DEF:update_field_semantic:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Apply one field-level semantic candidate decision or manual override. +@router.patch( + "/sessions/{session_id}/fields/{field_id}/semantic", + response_model=SemanticFieldEntryDto, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def update_field_semantic( + session_id: str, field_id: str, request: FieldSemanticUpdateRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.update_field_semantic"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + field = _get_owned_field_or_404(session, field_id) + _update_semantic_field_state(field, request, changed_by="user") + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=[field]) + _record_session_event(repository, session, current_user, event_type="semantic_field_updated", event_summary="Semantic field decision persisted", event_details={"field_id": field.field_id, "version": sr.version}) + return _serialize_semantic_field(field) + + +# [/DEF:update_field_semantic:Function] + + +# [DEF:lock_field_semantic:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Lock one semantic field against later automatic overwrite. +@router.post( + "/sessions/{session_id}/fields/{field_id}/lock", + response_model=SemanticFieldEntryDto, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def lock_field_semantic( + session_id: str, field_id: str, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.lock_field_semantic"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + field = _get_owned_field_or_404(session, field_id) + field.is_locked = True + field.last_changed_by = "user" + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=[field]) + _record_session_event(repository, session, current_user, event_type="semantic_field_locked", event_summary="Semantic field lock persisted", event_details={"field_id": field.field_id, "version": sr.version}) + return _serialize_semantic_field(field) + + +# [/DEF:lock_field_semantic:Function] + + +# [DEF:unlock_field_semantic:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Unlock one semantic field so later automated candidate application may replace it. +@router.post( + "/sessions/{session_id}/fields/{field_id}/unlock", + response_model=SemanticFieldEntryDto, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def unlock_field_semantic( + session_id: str, field_id: str, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.unlock_field_semantic"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + field = _get_owned_field_or_404(session, field_id) + field.is_locked = False + field.last_changed_by = "user" + if field.provenance == FieldProvenance.MANUAL_OVERRIDE: + field.provenance = FieldProvenance.UNRESOLVED + field.needs_review = True + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=[field]) + _record_session_event(repository, session, current_user, event_type="semantic_field_unlocked", event_summary="Semantic field unlock persisted", event_details={"field_id": field.field_id, "version": sr.version}) + return _serialize_semantic_field(field) + + +# [/DEF:unlock_field_semantic:Function] + + +# [DEF:approve_batch_semantic_fields:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Approve multiple semantic candidate decisions in one batch. +@router.post( + "/sessions/{session_id}/fields/semantic/approve-batch", + response_model=List[SemanticFieldEntryDto], + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def approve_batch_semantic_fields( + session_id: str, request: BatchApproveSemanticRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.approve_batch_semantic_fields"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + updated = [] + for item in request.items: + field = _get_owned_field_or_404(session, item.field_id) + _update_semantic_field_state(field, FieldSemanticUpdateRequest(candidate_id=item.candidate_id, lock_field=item.lock_field), changed_by="user") + updated.append(field) + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=list(updated)) + _record_session_event(repository, session, current_user, event_type="semantic_fields_batch_approved", event_summary="Batch semantic approval persisted", event_details={"count": len(updated), "version": sr.version}) + return [_serialize_semantic_field(f) for f in updated] + + +# [/DEF:approve_batch_semantic_fields:Function] + + +# [DEF:list_execution_mappings:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Return the current mapping-review set for one accessible session. +@router.get( + "/sessions/{session_id}/mappings", + response_model=MappingCollectionResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_execution_flag), + Depends(has_permission("dataset:session", "READ")), + ], +) +async def list_execution_mappings( + session_id: str, + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.list_execution_mappings"): + session = _get_owned_session_or_404(repository, session_id, current_user) + return MappingCollectionResponse(items=[_serialize_execution_mapping(m) for m in session.execution_mappings]) + + +# [/DEF:list_execution_mappings:Function] + + +# [DEF:update_execution_mapping:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Persist one owner-authorized execution-mapping effective value override. +@router.patch( + "/sessions/{session_id}/mappings/{mapping_id}", + response_model=ExecutionMappingDto, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_execution_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def update_execution_mapping( + session_id: str, mapping_id: str, request: UpdateExecutionMappingRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.update_execution_mapping"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + mapping = _get_owned_mapping_or_404(session, mapping_id) + if request.effective_value is None: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="effective_value is required") + mapping.effective_value = request.effective_value + mapping.mapping_method = MappingMethod(request.mapping_method or MappingMethod.MANUAL_OVERRIDE.value) + mapping.transformation_note = request.transformation_note + mapping.approval_state = ApprovalState.APPROVED + mapping.approved_by_user_id = current_user.id + mapping.approved_at = datetime.utcnow() + session.last_activity_at = datetime.utcnow() + session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW + if session.readiness_state in {ReadinessState.MAPPING_REVIEW_NEEDED, ReadinessState.COMPILED_PREVIEW_READY, ReadinessState.RUN_READY, ReadinessState.RUN_IN_PROGRESS}: + session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY + for preview in session.previews: + if preview.preview_status == PreviewStatus.READY: + preview.preview_status = PreviewStatus.STALE + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=[mapping]) + _record_session_event(repository, session, current_user, event_type="execution_mapping_updated", event_summary="Mapping override persisted", event_details={"mapping_id": mapping.mapping_id, "version": sr.version}) + return _serialize_execution_mapping(mapping) + + +# [/DEF:update_execution_mapping:Function] + + +# [DEF:approve_execution_mapping:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Explicitly approve a warning-sensitive mapping transformation. +@router.post( + "/sessions/{session_id}/mappings/{mapping_id}/approve", + response_model=ExecutionMappingDto, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_execution_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def approve_execution_mapping( + session_id: str, mapping_id: str, request: ApproveMappingRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.approve_execution_mapping"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + mapping = _get_owned_mapping_or_404(session, mapping_id) + mapping.approval_state = ApprovalState.APPROVED + mapping.approved_by_user_id = current_user.id + mapping.approved_at = datetime.utcnow() + if request.approval_note: + mapping.transformation_note = request.approval_note + session.last_activity_at = datetime.utcnow() + if session.readiness_state == ReadinessState.MAPPING_REVIEW_NEEDED: + session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=[mapping]) + _record_session_event(repository, session, current_user, event_type="execution_mapping_approved", event_summary="Mapping approval persisted", event_details={"mapping_id": mapping.mapping_id, "version": sr.version}) + return _serialize_execution_mapping(mapping) + + +# [/DEF:approve_execution_mapping:Function] + + +# [DEF:approve_batch_execution_mappings:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Approve multiple warning-sensitive execution mappings in one batch. +@router.post( + "/sessions/{session_id}/mappings/approve-batch", + response_model=List[ExecutionMappingDto], + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_execution_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def approve_batch_execution_mappings( + session_id: str, request: BatchApproveMappingRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.approve_batch_execution_mappings"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + updated = [] + for mid in list(dict.fromkeys(request.mapping_ids)): + mapping = _get_owned_mapping_or_404(session, mid) + mapping.approval_state = ApprovalState.APPROVED + mapping.approved_by_user_id = current_user.id + mapping.approved_at = datetime.utcnow() + if request.approval_note: + mapping.transformation_note = request.approval_note + updated.append(mapping) + session.last_activity_at = datetime.utcnow() + if session.readiness_state == ReadinessState.MAPPING_REVIEW_NEEDED: + session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session, refresh_targets=list(updated)) + _record_session_event(repository, session, current_user, event_type="execution_mappings_batch_approved", event_summary="Batch mapping approval persisted", event_details={"count": len(updated), "version": sr.version}) + return [_serialize_execution_mapping(m) for m in updated] + + +# [/DEF:approve_batch_execution_mappings:Function] + + +# [DEF:trigger_preview_generation:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Trigger Superset-side preview compilation for the current owned execution context. +@router.post( + "/sessions/{session_id}/preview", + response_model=Union[CompiledPreviewDto, PreviewEnqueueResultResponse], + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_execution_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def trigger_preview_generation( + session_id: str, response: Response, + orchestrator=Depends(_get_orchestrator), + repository=Depends(_get_repository), + session_version: int = Depends(_require_session_version_header), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.trigger_preview_generation"): + _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + try: + result = orchestrator.prepare_launch_preview( + PreparePreviewCommand(user=current_user, session_id=session_id, expected_version=session_version) + ) + except DatasetReviewSessionVersionConflictError as exc: + raise _build_session_version_conflict_http_exception(exc) from exc + except ValueError as exc: + detail = str(exc) + sc = status.HTTP_404_NOT_FOUND if detail in {"Session not found", "Environment not found"} else status.HTTP_409_CONFLICT if detail.startswith("Preview blocked:") else status.HTTP_400_BAD_REQUEST + raise HTTPException(status_code=sc, detail=detail) from exc + if result.preview.preview_status == PreviewStatus.PENDING: + response.status_code = status.HTTP_202_ACCEPTED + return PreviewEnqueueResultResponse( + session_id=result.session.session_id, + session_version=int(getattr(result.session, "version", 0) or 0), + preview_status=result.preview.preview_status.value, task_id=None, + ) + response.status_code = status.HTTP_200_OK + return _serialize_preview(result.preview, session_version_fallback=int(getattr(result.session, "version", 0) or 0)) + + +# [/DEF:trigger_preview_generation:Function] + + +# [DEF:launch_dataset:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Execute the current owned session launch handoff and return audited SQL Lab run context. +@router.post( + "/sessions/{session_id}/launch", + response_model=LaunchDatasetResponse, + status_code=status.HTTP_201_CREATED, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_execution_flag), + Depends(has_permission("dataset:execution:launch", "EXECUTE")), + ], +) +async def launch_dataset( + session_id: str, + orchestrator=Depends(_get_orchestrator), + repository=Depends(_get_repository), + session_version: int = Depends(_require_session_version_header), + config_manager=Depends(get_config_manager), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.launch_dataset"): + _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + try: + result = orchestrator.launch_dataset( + LaunchDatasetCommand(user=current_user, session_id=session_id, expected_version=session_version) + ) + except DatasetReviewSessionVersionConflictError as exc: + raise _build_session_version_conflict_http_exception(exc) from exc + except ValueError as exc: + detail = str(exc) + sc = status.HTTP_404_NOT_FOUND if detail in {"Session not found", "Environment not found"} else status.HTTP_409_CONFLICT if detail.startswith("Launch blocked:") else status.HTTP_400_BAD_REQUEST + raise HTTPException(status_code=sc, detail=detail) from exc + environment = config_manager.get_environment(result.session.environment_id) + env_url = getattr(environment, "url", "") if environment is not None else "" + return LaunchDatasetResponse( + run_context=_serialize_run_context(result.run_context), + redirect_url=_build_sql_lab_redirect_url(environment_url=env_url, sql_lab_session_ref=result.run_context.sql_lab_session_ref), + ) + + +# [/DEF:launch_dataset:Function] + + +# [DEF:record_field_feedback:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for AI-assisted semantic field content. +@router.post( + "/sessions/{session_id}/fields/{field_id}/feedback", + response_model=FeedbackResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def record_field_feedback( + session_id: str, field_id: str, request: FeedbackRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.record_field_feedback"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + field = _get_owned_field_or_404(session, field_id) + field.user_feedback = request.feedback + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session) + _record_session_event(repository, session, current_user, event_type="semantic_field_feedback_recorded", event_summary="Feedback persisted", event_details={"field_id": field.field_id, "feedback": request.feedback, "version": sr.version}) + return FeedbackResponse(target_id=field.field_id, feedback=request.feedback) + + +# [/DEF:record_field_feedback:Function] + + +# [DEF:record_clarification_feedback:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content. +@router.post( + "/sessions/{session_id}/clarification/questions/{question_id}/feedback", + response_model=FeedbackResponse, + dependencies=[ + Depends(_require_auto_review_flag), + Depends(_require_clarification_flag), + Depends(has_permission("dataset:session", "MANAGE")), + ], +) +async def record_clarification_feedback( + session_id: str, question_id: str, request: FeedbackRequest, + session_version: int = Depends(_require_session_version_header), + repository=Depends(_get_repository), + current_user: User = Depends(get_current_user), +): + with belief_scope("dataset_review.record_clarification_feedback"): + session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version) + cs = _get_latest_clarification_session_or_404(session) + question = next((q for q in cs.questions if q.question_id == question_id), None) + if question is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Clarification question not found") + if question.answer is None: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Clarification answer not found") + question.answer.user_feedback = request.feedback + sr = cast(Any, session) + _commit_owned_session_mutation(repository, session) + _record_session_event(repository, session, current_user, event_type="clarification_feedback_recorded", event_summary="Feedback persisted", event_details={"question_id": question.question_id, "feedback": request.feedback, "version": sr.version}) + return FeedbackResponse(target_id=question.question_id, feedback=request.feedback) + + +# [/DEF:record_clarification_feedback:Function] + + +# [/DEF:DatasetReviewRoutes:Module] diff --git a/backend/src/core/async_superset_client.py b/backend/src/core/async_superset_client.py index 6ed3adc5..17893612 100644 --- a/backend/src/core/async_superset_client.py +++ b/backend/src/core/async_superset_client.py @@ -1,16 +1,32 @@ # [DEF:AsyncSupersetClientModule:Module] -# -# @COMPLEXITY: 5 +# @COMPLEXITY: 3 # @SEMANTICS: superset, async, client, httpx, dashboards, datasets -# @PURPOSE: Async Superset client for dashboard hot-path requests without blocking FastAPI event loop. +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. # @LAYER: Core -# @PRE: Environment configuration is valid and Superset endpoint is reachable. -# @POST: Provides non-blocking API access to Superset resources. -# @SIDE_EFFECT: Performs network I/O via httpx. -# @DATA_CONTRACT: Input[Environment] -> Model[dashboard, chart, dataset] -# @RELATION: [DEPENDS_ON] ->[SupersetClientModule] -# @RELATION: [DEPENDS_ON] ->[AsyncAPIClient] -# @INVARIANT: Async dashboard operations reuse shared auth cache and avoid sync requests in async routes. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. +# @COMPLEXITY: 3 +# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. # [SECTION: IMPORTS] import asyncio diff --git a/backend/src/models/dataset_review.py b/backend/src/models/dataset_review.py index ac6286f6..873cbb85 100644 --- a/backend/src/models/dataset_review.py +++ b/backend/src/models/dataset_review.py @@ -1,984 +1,81 @@ # [DEF:DatasetReviewModels:Module] -# @COMPLEXITY: 3 +# @COMPLEXITY: 2 # @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy -# @PURPOSE: SQLAlchemy models for the dataset review orchestration flow. -# @LAYER: Domain -# @RELATION: DEPENDS_ON -> [AuthModels] -# @RELATION: DEPENDS_ON -> [MappingModels] -# -# @INVARIANT: Session and profile entities are strictly scoped to an authenticated user. +# @PURPOSE: Thin facade re-exporting all dataset review domain models from the decomposed sub-package. +# @LAYER: Domain +# @RELATION: EXPORTS -> [DatasetReviewEnums:Module] +# @RELATION: EXPORTS -> [DatasetReviewSessionModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewProfileModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewFindingModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewSemanticModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewFilterModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewMappingModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewClarificationModels:Module] +# @RELATION: EXPORTS -> [DatasetReviewExecutionModels:Module] +# @INVARIANT: All public model classes and enums remain importable from `src.models.dataset_review` without changes. +# @RATIONALE: Original 984-line monolith violated INV_7 (400-line module limit). Decomposed into domain-focused sub-modules while preserving backward-compatible import paths. +# @REJECTED: Keeping all models in a single file because it exceeded the fractal limit by 2.5x and accumulated structural erosion risk. -# [SECTION: IMPORTS] -import uuid -import enum -from datetime import datetime -from typing import List, Optional -from sqlalchemy import ( - Column, - String, - Integer, - Boolean, - DateTime, - ForeignKey, - Text, - JSON, - Float, - Enum as SQLEnum, - Table, +from src.models.dataset_review_pkg._enums import ( # noqa: F401 + SessionStatus, + SessionPhase, + ReadinessState, + RecommendedAction, + SessionCollaboratorRole, + BusinessSummarySource, + ConfidenceState, + FindingArea, + FindingSeverity, + ResolutionState, + SemanticSourceType, + TrustLevel, + SemanticSourceStatus, + FieldKind, + FieldProvenance, + CandidateMatchType, + CandidateStatus, + FilterSource, + FilterConfidenceState, + FilterRecoveryStatus, + VariableKind, + MappingStatus, + MappingMethod, + MappingWarningLevel, + ApprovalState, + ClarificationStatus, + QuestionState, + AnswerKind, + PreviewStatus, + LaunchStatus, + ArtifactType, + ArtifactFormat, +) +from src.models.dataset_review_pkg._session_models import ( # noqa: F401 + SessionCollaborator, + DatasetReviewSession, +) +from src.models.dataset_review_pkg._profile_models import DatasetProfile # noqa: F401 +from src.models.dataset_review_pkg._finding_models import ValidationFinding # noqa: F401 +from src.models.dataset_review_pkg._semantic_models import ( # noqa: F401 + SemanticSource, + SemanticFieldEntry, + SemanticCandidate, +) +from src.models.dataset_review_pkg._filter_models import ( # noqa: F401 + ImportedFilter, + TemplateVariable, +) +from src.models.dataset_review_pkg._mapping_models import ExecutionMapping # noqa: F401 +from src.models.dataset_review_pkg._clarification_models import ( # noqa: F401 + ClarificationSession, + ClarificationQuestion, + ClarificationOption, + ClarificationAnswer, +) +from src.models.dataset_review_pkg._execution_models import ( # noqa: F401 + CompiledPreview, + DatasetRunContext, + SessionEvent, + ExportArtifact, ) -from sqlalchemy.orm import relationship -from .mapping import Base -# [/SECTION] - - -# [DEF:SessionStatus:Class] -class SessionStatus(str, enum.Enum): - ACTIVE = "active" - PAUSED = "paused" - COMPLETED = "completed" - ARCHIVED = "archived" - CANCELLED = "cancelled" - - -# [/DEF:SessionStatus:Class] - - -# [DEF:SessionPhase:Class] -class SessionPhase(str, enum.Enum): - INTAKE = "intake" - RECOVERY = "recovery" - REVIEW = "review" - SEMANTIC_REVIEW = "semantic_review" - CLARIFICATION = "clarification" - MAPPING_REVIEW = "mapping_review" - PREVIEW = "preview" - LAUNCH = "launch" - POST_RUN = "post_run" - - -# [/DEF:SessionPhase:Class] - - -# [DEF:ReadinessState:Class] -class ReadinessState(str, enum.Enum): - EMPTY = "empty" - IMPORTING = "importing" - REVIEW_READY = "review_ready" - SEMANTIC_SOURCE_REVIEW_NEEDED = "semantic_source_review_needed" - CLARIFICATION_NEEDED = "clarification_needed" - CLARIFICATION_ACTIVE = "clarification_active" - MAPPING_REVIEW_NEEDED = "mapping_review_needed" - COMPILED_PREVIEW_READY = "compiled_preview_ready" - PARTIALLY_READY = "partially_ready" - RUN_READY = "run_ready" - RUN_IN_PROGRESS = "run_in_progress" - COMPLETED = "completed" - RECOVERY_REQUIRED = "recovery_required" - - -# [/DEF:ReadinessState:Class] - - -# [DEF:RecommendedAction:Class] -class RecommendedAction(str, enum.Enum): - IMPORT_FROM_SUPERSET = "import_from_superset" - REVIEW_DOCUMENTATION = "review_documentation" - APPLY_SEMANTIC_SOURCE = "apply_semantic_source" - START_CLARIFICATION = "start_clarification" - ANSWER_NEXT_QUESTION = "answer_next_question" - APPROVE_MAPPING = "approve_mapping" - GENERATE_SQL_PREVIEW = "generate_sql_preview" - COMPLETE_REQUIRED_VALUES = "complete_required_values" - LAUNCH_DATASET = "launch_dataset" - RESUME_SESSION = "resume_session" - EXPORT_OUTPUTS = "export_outputs" - - -# [/DEF:RecommendedAction:Class] - - -# [DEF:SessionCollaboratorRole:Class] -class SessionCollaboratorRole(str, enum.Enum): - VIEWER = "viewer" - REVIEWER = "reviewer" - APPROVER = "approver" - - -# [/DEF:SessionCollaboratorRole:Class] - - -# [DEF:SessionCollaborator:Class] -class SessionCollaborator(Base): - __tablename__ = "session_collaborators" - - id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - user_id = Column(String, ForeignKey("users.id"), nullable=False) - role = Column(SQLEnum(SessionCollaboratorRole), nullable=False) - added_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - session = relationship("DatasetReviewSession", back_populates="collaborators") - user = relationship("User") - - -# [/DEF:SessionCollaborator:Class] - - -# [DEF:DatasetReviewSession:Class] -class DatasetReviewSession(Base): - __tablename__ = "dataset_review_sessions" - - session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - user_id = Column(String, ForeignKey("users.id"), nullable=False) - environment_id = Column(String, ForeignKey("environments.id"), nullable=False) - source_kind = Column(String, nullable=False) # superset_link, dataset_selection - source_input = Column(String, nullable=False) - dataset_ref = Column(String, nullable=False) - dataset_id = Column(Integer, nullable=True) - dashboard_id = Column(Integer, nullable=True) - readiness_state = Column( - SQLEnum(ReadinessState), nullable=False, default=ReadinessState.EMPTY - ) - recommended_action = Column( - SQLEnum(RecommendedAction), - nullable=False, - default=RecommendedAction.IMPORT_FROM_SUPERSET, - ) - version = Column(Integer, nullable=False, default=0) - __mapper_args__ = {"version_id_col": version, "version_id_generator": False} - status = Column( - SQLEnum(SessionStatus), nullable=False, default=SessionStatus.ACTIVE - ) - current_phase = Column( - SQLEnum(SessionPhase), nullable=False, default=SessionPhase.INTAKE - ) - active_task_id = Column(String, nullable=True) - last_preview_id = Column(String, nullable=True) - last_run_context_id = Column(String, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - last_activity_at = Column(DateTime, default=datetime.utcnow, nullable=False) - closed_at = Column(DateTime, nullable=True) - - owner = relationship("User") - collaborators = relationship( - "SessionCollaborator", back_populates="session", cascade="all, delete-orphan" - ) - profile = relationship( - "DatasetProfile", - back_populates="session", - uselist=False, - cascade="all, delete-orphan", - ) - findings = relationship( - "ValidationFinding", back_populates="session", cascade="all, delete-orphan" - ) - semantic_sources = relationship( - "SemanticSource", back_populates="session", cascade="all, delete-orphan" - ) - semantic_fields = relationship( - "SemanticFieldEntry", back_populates="session", cascade="all, delete-orphan" - ) - imported_filters = relationship( - "ImportedFilter", back_populates="session", cascade="all, delete-orphan" - ) - template_variables = relationship( - "TemplateVariable", back_populates="session", cascade="all, delete-orphan" - ) - execution_mappings = relationship( - "ExecutionMapping", back_populates="session", cascade="all, delete-orphan" - ) - clarification_sessions = relationship( - "ClarificationSession", back_populates="session", cascade="all, delete-orphan" - ) - previews = relationship( - "CompiledPreview", back_populates="session", cascade="all, delete-orphan" - ) - run_contexts = relationship( - "DatasetRunContext", back_populates="session", cascade="all, delete-orphan" - ) - export_artifacts = relationship( - "ExportArtifact", back_populates="session", cascade="all, delete-orphan" - ) - events = relationship( - "SessionEvent", back_populates="session", cascade="all, delete-orphan" - ) - - -# [/DEF:DatasetReviewSession:Class] - - -# [DEF:BusinessSummarySource:Class] -class BusinessSummarySource(str, enum.Enum): - CONFIRMED = "confirmed" - IMPORTED = "imported" - INFERRED = "inferred" - AI_DRAFT = "ai_draft" - MANUAL_OVERRIDE = "manual_override" - - -# [/DEF:BusinessSummarySource:Class] - - -# [DEF:ConfidenceState:Class] -class ConfidenceState(str, enum.Enum): - CONFIRMED = "confirmed" - MOSTLY_CONFIRMED = "mostly_confirmed" - MIXED = "mixed" - LOW_CONFIDENCE = "low_confidence" - UNRESOLVED = "unresolved" - - -# [/DEF:ConfidenceState:Class] - - -# [DEF:DatasetProfile:Class] -class DatasetProfile(Base): - __tablename__ = "dataset_profiles" - - profile_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, - ForeignKey("dataset_review_sessions.session_id"), - nullable=False, - unique=True, - ) - dataset_name = Column(String, nullable=False) - schema_name = Column(String, nullable=True) - database_name = Column(String, nullable=True) - business_summary = Column(Text, nullable=False) - business_summary_source = Column(SQLEnum(BusinessSummarySource), nullable=False) - description = Column(Text, nullable=True) - dataset_type = Column(String, nullable=True) # table, virtual, sqllab_view, unknown - is_sqllab_view = Column(Boolean, nullable=False, default=False) - completeness_score = Column(Float, nullable=True) - confidence_state = Column(SQLEnum(ConfidenceState), nullable=False) - has_blocking_findings = Column(Boolean, nullable=False, default=False) - has_warning_findings = Column(Boolean, nullable=False, default=False) - manual_summary_locked = Column(Boolean, nullable=False, default=False) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - - session = relationship("DatasetReviewSession", back_populates="profile") - - -# [/DEF:DatasetProfile:Class] - - -# [DEF:FindingArea:Class] -class FindingArea(str, enum.Enum): - SOURCE_INTAKE = "source_intake" - DATASET_PROFILE = "dataset_profile" - SEMANTIC_ENRICHMENT = "semantic_enrichment" - CLARIFICATION = "clarification" - FILTER_RECOVERY = "filter_recovery" - TEMPLATE_MAPPING = "template_mapping" - COMPILED_PREVIEW = "compiled_preview" - LAUNCH = "launch" - AUDIT = "audit" - - -# [/DEF:FindingArea:Class] - - -# [DEF:FindingSeverity:Class] -class FindingSeverity(str, enum.Enum): - BLOCKING = "blocking" - WARNING = "warning" - INFORMATIONAL = "informational" - - -# [/DEF:FindingSeverity:Class] - - -# [DEF:ResolutionState:Class] -class ResolutionState(str, enum.Enum): - OPEN = "open" - RESOLVED = "resolved" - APPROVED = "approved" - SKIPPED = "skipped" - DEFERRED = "deferred" - EXPERT_REVIEW = "expert_review" - - -# [/DEF:ResolutionState:Class] - - -# [DEF:ValidationFinding:Class] -class ValidationFinding(Base): - __tablename__ = "validation_findings" - - finding_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - area = Column(SQLEnum(FindingArea), nullable=False) - severity = Column(SQLEnum(FindingSeverity), nullable=False) - code = Column(String, nullable=False) - title = Column(String, nullable=False) - message = Column(Text, nullable=False) - resolution_state = Column( - SQLEnum(ResolutionState), nullable=False, default=ResolutionState.OPEN - ) - resolution_note = Column(Text, nullable=True) - caused_by_ref = Column(String, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - resolved_at = Column(DateTime, nullable=True) - - session = relationship("DatasetReviewSession", back_populates="findings") - - -# [/DEF:ValidationFinding:Class] - - -# [DEF:SemanticSourceType:Class] -class SemanticSourceType(str, enum.Enum): - UPLOADED_FILE = "uploaded_file" - CONNECTED_DICTIONARY = "connected_dictionary" - REFERENCE_DATASET = "reference_dataset" - NEIGHBOR_DATASET = "neighbor_dataset" - AI_GENERATED = "ai_generated" - - -# [/DEF:SemanticSourceType:Class] - - -# [DEF:TrustLevel:Class] -class TrustLevel(str, enum.Enum): - TRUSTED = "trusted" - RECOMMENDED = "recommended" - CANDIDATE = "candidate" - GENERATED = "generated" - - -# [/DEF:TrustLevel:Class] - - -# [DEF:SemanticSourceStatus:Class] -class SemanticSourceStatus(str, enum.Enum): - AVAILABLE = "available" - SELECTED = "selected" - APPLIED = "applied" - REJECTED = "rejected" - PARTIAL = "partial" - FAILED = "failed" - - -# [/DEF:SemanticSourceStatus:Class] - - -# [DEF:SemanticSource:Class] -class SemanticSource(Base): - __tablename__ = "semantic_sources" - - source_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - source_type = Column(SQLEnum(SemanticSourceType), nullable=False) - source_ref = Column(String, nullable=False) - source_version = Column(String, nullable=False) - display_name = Column(String, nullable=False) - trust_level = Column(SQLEnum(TrustLevel), nullable=False) - schema_overlap_score = Column(Float, nullable=True) - status = Column( - SQLEnum(SemanticSourceStatus), - nullable=False, - default=SemanticSourceStatus.AVAILABLE, - ) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - session = relationship("DatasetReviewSession", back_populates="semantic_sources") - - -# [/DEF:SemanticSource:Class] - - -# [DEF:FieldKind:Class] -class FieldKind(str, enum.Enum): - COLUMN = "column" - METRIC = "metric" - FILTER_DIMENSION = "filter_dimension" - PARAMETER = "parameter" - - -# [/DEF:FieldKind:Class] - - -# [DEF:FieldProvenance:Class] -class FieldProvenance(str, enum.Enum): - DICTIONARY_EXACT = "dictionary_exact" - REFERENCE_IMPORTED = "reference_imported" - FUZZY_INFERRED = "fuzzy_inferred" - AI_GENERATED = "ai_generated" - MANUAL_OVERRIDE = "manual_override" - UNRESOLVED = "unresolved" - - -# [/DEF:FieldProvenance:Class] - - -# [DEF:SemanticFieldEntry:Class] -class SemanticFieldEntry(Base): - __tablename__ = "semantic_field_entries" - - field_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - field_name = Column(String, nullable=False) - field_kind = Column(SQLEnum(FieldKind), nullable=False) - verbose_name = Column(String, nullable=True) - description = Column(Text, nullable=True) - display_format = Column(String, nullable=True) - provenance = Column( - SQLEnum(FieldProvenance), nullable=False, default=FieldProvenance.UNRESOLVED - ) - source_id = Column(String, nullable=True) - source_version = Column(String, nullable=True) - confidence_rank = Column(Integer, nullable=True) - is_locked = Column(Boolean, nullable=False, default=False) - has_conflict = Column(Boolean, nullable=False, default=False) - needs_review = Column(Boolean, nullable=False, default=True) - last_changed_by = Column(String, nullable=False) # system, user, agent - user_feedback = Column(String, nullable=True) # up, down, null - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - - session = relationship("DatasetReviewSession", back_populates="semantic_fields") - candidates = relationship( - "SemanticCandidate", back_populates="field", cascade="all, delete-orphan" - ) - - -# [/DEF:SemanticFieldEntry:Class] - - -# [DEF:CandidateMatchType:Class] -class CandidateMatchType(str, enum.Enum): - EXACT = "exact" - REFERENCE = "reference" - FUZZY = "fuzzy" - GENERATED = "generated" - - -# [/DEF:CandidateMatchType:Class] - - -# [DEF:CandidateStatus:Class] -class CandidateStatus(str, enum.Enum): - PROPOSED = "proposed" - ACCEPTED = "accepted" - REJECTED = "rejected" - SUPERSEDED = "superseded" - - -# [/DEF:CandidateStatus:Class] - - -# [DEF:SemanticCandidate:Class] -class SemanticCandidate(Base): - __tablename__ = "semantic_candidates" - - candidate_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - field_id = Column( - String, ForeignKey("semantic_field_entries.field_id"), nullable=False - ) - source_id = Column(String, nullable=True) - candidate_rank = Column(Integer, nullable=False) - match_type = Column(SQLEnum(CandidateMatchType), nullable=False) - confidence_score = Column(Float, nullable=False) - proposed_verbose_name = Column(String, nullable=True) - proposed_description = Column(Text, nullable=True) - proposed_display_format = Column(String, nullable=True) - status = Column( - SQLEnum(CandidateStatus), nullable=False, default=CandidateStatus.PROPOSED - ) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - field = relationship("SemanticFieldEntry", back_populates="candidates") - - -# [/DEF:SemanticCandidate:Class] - - -# [DEF:FilterSource:Class] -class FilterSource(str, enum.Enum): - SUPERSET_NATIVE = "superset_native" - SUPERSET_URL = "superset_url" - SUPERSET_PERMALINK = "superset_permalink" - SUPERSET_NATIVE_FILTERS_KEY = "superset_native_filters_key" - MANUAL = "manual" - INFERRED = "inferred" - - -# [/DEF:FilterSource:Class] - - -# [DEF:FilterConfidenceState:Class] -class FilterConfidenceState(str, enum.Enum): - CONFIRMED = "confirmed" - IMPORTED = "imported" - INFERRED = "inferred" - AI_DRAFT = "ai_draft" - UNRESOLVED = "unresolved" - - -# [/DEF:FilterConfidenceState:Class] - - -# [DEF:FilterRecoveryStatus:Class] -class FilterRecoveryStatus(str, enum.Enum): - RECOVERED = "recovered" - PARTIAL = "partial" - MISSING = "missing" - CONFLICTED = "conflicted" - - -# [/DEF:FilterRecoveryStatus:Class] - - -# [DEF:ImportedFilter:Class] -class ImportedFilter(Base): - __tablename__ = "imported_filters" - - filter_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - filter_name = Column(String, nullable=False) - display_name = Column(String, nullable=True) - raw_value = Column(JSON, nullable=False) - raw_value_masked = Column(Boolean, nullable=False, default=False) - normalized_value = Column(JSON, nullable=True) - source = Column(SQLEnum(FilterSource), nullable=False) - confidence_state = Column(SQLEnum(FilterConfidenceState), nullable=False) - requires_confirmation = Column(Boolean, nullable=False, default=False) - recovery_status = Column(SQLEnum(FilterRecoveryStatus), nullable=False) - notes = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - - session = relationship("DatasetReviewSession", back_populates="imported_filters") - - -# [/DEF:ImportedFilter:Class] - - -# [DEF:VariableKind:Class] -class VariableKind(str, enum.Enum): - NATIVE_FILTER = "native_filter" - PARAMETER = "parameter" - DERIVED = "derived" - UNKNOWN = "unknown" - - -# [/DEF:VariableKind:Class] - - -# [DEF:MappingStatus:Class] -class MappingStatus(str, enum.Enum): - UNMAPPED = "unmapped" - PROPOSED = "proposed" - APPROVED = "approved" - OVERRIDDEN = "overridden" - INVALID = "invalid" - - -# [/DEF:MappingStatus:Class] - - -# [DEF:TemplateVariable:Class] -class TemplateVariable(Base): - __tablename__ = "template_variables" - - variable_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - variable_name = Column(String, nullable=False) - expression_source = Column(Text, nullable=False) - variable_kind = Column(SQLEnum(VariableKind), nullable=False) - is_required = Column(Boolean, nullable=False, default=True) - default_value = Column(JSON, nullable=True) - mapping_status = Column( - SQLEnum(MappingStatus), nullable=False, default=MappingStatus.UNMAPPED - ) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - - session = relationship("DatasetReviewSession", back_populates="template_variables") - - -# [/DEF:TemplateVariable:Class] - - -# [DEF:MappingMethod:Class] -class MappingMethod(str, enum.Enum): - DIRECT_MATCH = "direct_match" - HEURISTIC_MATCH = "heuristic_match" - SEMANTIC_MATCH = "semantic_match" - MANUAL_OVERRIDE = "manual_override" - - -# [/DEF:MappingMethod:Class] - - -# [DEF:MappingWarningLevel:Class] -class MappingWarningLevel(str, enum.Enum): - LOW = "low" - MEDIUM = "medium" - HIGH = "high" - - -# [/DEF:MappingWarningLevel:Class] - - -# [DEF:ApprovalState:Class] -class ApprovalState(str, enum.Enum): - PENDING = "pending" - APPROVED = "approved" - REJECTED = "rejected" - NOT_REQUIRED = "not_required" - - -# [/DEF:ApprovalState:Class] - - -# [DEF:ExecutionMapping:Class] -class ExecutionMapping(Base): - __tablename__ = "execution_mappings" - - mapping_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - filter_id = Column(String, nullable=False) - variable_id = Column(String, nullable=False) - mapping_method = Column(SQLEnum(MappingMethod), nullable=False) - raw_input_value = Column(JSON, nullable=False) - effective_value = Column(JSON, nullable=True) - transformation_note = Column(Text, nullable=True) - warning_level = Column(SQLEnum(MappingWarningLevel), nullable=True) - requires_explicit_approval = Column(Boolean, nullable=False, default=False) - approval_state = Column( - SQLEnum(ApprovalState), nullable=False, default=ApprovalState.NOT_REQUIRED - ) - approved_by_user_id = Column(String, nullable=True) - approved_at = Column(DateTime, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - - session = relationship("DatasetReviewSession", back_populates="execution_mappings") - - -# [/DEF:ExecutionMapping:Class] - - -# [DEF:ClarificationStatus:Class] -class ClarificationStatus(str, enum.Enum): - PENDING = "pending" - ACTIVE = "active" - PAUSED = "paused" - COMPLETED = "completed" - CANCELLED = "cancelled" - - -# [/DEF:ClarificationStatus:Class] - - -# [DEF:ClarificationSession:Class] -class ClarificationSession(Base): - __tablename__ = "clarification_sessions" - - clarification_session_id = Column( - String, primary_key=True, default=lambda: str(uuid.uuid4()) - ) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - status = Column( - SQLEnum(ClarificationStatus), - nullable=False, - default=ClarificationStatus.PENDING, - ) - current_question_id = Column(String, nullable=True) - resolved_count = Column(Integer, nullable=False, default=0) - remaining_count = Column(Integer, nullable=False, default=0) - summary_delta = Column(Text, nullable=True) - started_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - completed_at = Column(DateTime, nullable=True) - - session = relationship( - "DatasetReviewSession", back_populates="clarification_sessions" - ) - questions = relationship( - "ClarificationQuestion", - back_populates="clarification_session", - cascade="all, delete-orphan", - ) - - -# [/DEF:ClarificationSession:Class] - - -# [DEF:QuestionState:Class] -class QuestionState(str, enum.Enum): - OPEN = "open" - ANSWERED = "answered" - SKIPPED = "skipped" - EXPERT_REVIEW = "expert_review" - SUPERSEDED = "superseded" - - -# [/DEF:QuestionState:Class] - - -# [DEF:ClarificationQuestion:Class] -class ClarificationQuestion(Base): - __tablename__ = "clarification_questions" - - question_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - clarification_session_id = Column( - String, - ForeignKey("clarification_sessions.clarification_session_id"), - nullable=False, - ) - topic_ref = Column(String, nullable=False) - question_text = Column(Text, nullable=False) - why_it_matters = Column(Text, nullable=False) - current_guess = Column(Text, nullable=True) - priority = Column(Integer, nullable=False, default=0) - state = Column(SQLEnum(QuestionState), nullable=False, default=QuestionState.OPEN) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - updated_at = Column( - DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False - ) - - clarification_session = relationship( - "ClarificationSession", back_populates="questions" - ) - options = relationship( - "ClarificationOption", back_populates="question", cascade="all, delete-orphan" - ) - answer = relationship( - "ClarificationAnswer", - back_populates="question", - uselist=False, - cascade="all, delete-orphan", - ) - - -# [/DEF:ClarificationQuestion:Class] - - -# [DEF:ClarificationOption:Class] -class ClarificationOption(Base): - __tablename__ = "clarification_options" - - option_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - question_id = Column( - String, ForeignKey("clarification_questions.question_id"), nullable=False - ) - label = Column(String, nullable=False) - value = Column(String, nullable=False) - is_recommended = Column(Boolean, nullable=False, default=False) - display_order = Column(Integer, nullable=False, default=0) - - question = relationship("ClarificationQuestion", back_populates="options") - - -# [/DEF:ClarificationOption:Class] - - -# [DEF:AnswerKind:Class] -class AnswerKind(str, enum.Enum): - SELECTED = "selected" - CUSTOM = "custom" - SKIPPED = "skipped" - EXPERT_REVIEW = "expert_review" - - -# [/DEF:AnswerKind:Class] - - -# [DEF:ClarificationAnswer:Class] -class ClarificationAnswer(Base): - __tablename__ = "clarification_answers" - - answer_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - question_id = Column( - String, - ForeignKey("clarification_questions.question_id"), - nullable=False, - unique=True, - ) - answer_kind = Column(SQLEnum(AnswerKind), nullable=False) - answer_value = Column(Text, nullable=True) - answered_by_user_id = Column(String, nullable=False) - impact_summary = Column(Text, nullable=True) - user_feedback = Column(String, nullable=True) # up, down, null - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - question = relationship("ClarificationQuestion", back_populates="answer") - - -# [/DEF:ClarificationAnswer:Class] - - -# [DEF:PreviewStatus:Class] -class PreviewStatus(str, enum.Enum): - PENDING = "pending" - READY = "ready" - FAILED = "failed" - STALE = "stale" - - -# [/DEF:PreviewStatus:Class] - - -# [DEF:CompiledPreview:Class] -class CompiledPreview(Base): - __tablename__ = "compiled_previews" - - preview_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - preview_status = Column( - SQLEnum(PreviewStatus), nullable=False, default=PreviewStatus.PENDING - ) - compiled_sql = Column(Text, nullable=True) - preview_fingerprint = Column(String, nullable=False) - compiled_by = Column(String, nullable=False, default="superset") - error_code = Column(String, nullable=True) - error_details = Column(Text, nullable=True) - compiled_at = Column(DateTime, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - session = relationship("DatasetReviewSession", back_populates="previews") - - -# [/DEF:CompiledPreview:Class] - - -# [DEF:LaunchStatus:Class] -class LaunchStatus(str, enum.Enum): - STARTED = "started" - SUCCESS = "success" - FAILED = "failed" - - -# [/DEF:LaunchStatus:Class] - - -# [DEF:DatasetRunContext:Class] -class DatasetRunContext(Base): - __tablename__ = "dataset_run_contexts" - - run_context_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - dataset_ref = Column(String, nullable=False) - environment_id = Column(String, nullable=False) - preview_id = Column(String, nullable=False) - sql_lab_session_ref = Column(String, nullable=False) - effective_filters = Column(JSON, nullable=False) - template_params = Column(JSON, nullable=False) - approved_mapping_ids = Column(JSON, nullable=False) - semantic_decision_refs = Column(JSON, nullable=False) - open_warning_refs = Column(JSON, nullable=False) - launch_status = Column(SQLEnum(LaunchStatus), nullable=False) - launch_error = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - session = relationship("DatasetReviewSession", back_populates="run_contexts") - - -# [/DEF:DatasetRunContext:Class] - - -# [DEF:SessionEvent:Class] -class SessionEvent(Base): - __tablename__ = "session_events" - - session_event_id = Column( - String, primary_key=True, default=lambda: str(uuid.uuid4()) - ) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - actor_user_id = Column(String, ForeignKey("users.id"), nullable=False) - event_type = Column(String, nullable=False) - event_summary = Column(Text, nullable=False) - current_phase = Column(String, nullable=True) - readiness_state = Column(String, nullable=True) - event_details = Column(JSON, nullable=False, default=dict) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - session = relationship("DatasetReviewSession", back_populates="events") - actor = relationship("User") - - -# [/DEF:SessionEvent:Class] - - -# [DEF:ArtifactType:Class] -class ArtifactType(str, enum.Enum): - DOCUMENTATION = "documentation" - VALIDATION_REPORT = "validation_report" - RUN_SUMMARY = "run_summary" - - -# [/DEF:ArtifactType:Class] - - -# [DEF:ArtifactFormat:Class] -class ArtifactFormat(str, enum.Enum): - JSON = "json" - MARKDOWN = "markdown" - CSV = "csv" - PDF = "pdf" - - -# [/DEF:ArtifactFormat:Class] - - -# [DEF:ExportArtifact:Class] -class ExportArtifact(Base): - __tablename__ = "export_artifacts" - - artifact_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) - session_id = Column( - String, ForeignKey("dataset_review_sessions.session_id"), nullable=False - ) - artifact_type = Column(SQLEnum(ArtifactType), nullable=False) - format = Column(SQLEnum(ArtifactFormat), nullable=False) - storage_ref = Column(String, nullable=False) - created_by_user_id = Column(String, nullable=False) - created_at = Column(DateTime, default=datetime.utcnow, nullable=False) - - session = relationship("DatasetReviewSession", back_populates="export_artifacts") - - -# [/DEF:ExportArtifact:Class] - # [/DEF:DatasetReviewModels:Module] diff --git a/backend/src/models/dataset_review_pkg/__init__.py b/backend/src/models/dataset_review_pkg/__init__.py new file mode 100644 index 00000000..a5824e7e --- /dev/null +++ b/backend/src/models/dataset_review_pkg/__init__.py @@ -0,0 +1,122 @@ +# [DEF:DatasetReviewModels:Module] +# @COMPLEXITY: 3 +# @SEMANTICS: dataset_review, session, profile, findings, semantics, clarification, execution, sqlalchemy +# @PURPOSE: Re-export all dataset review domain models from decomposed sub-modules for backward-compatible imports. +# @LAYER: Domain + +from src.models.dataset_review_pkg._enums import ( + SessionStatus, + SessionPhase, + ReadinessState, + RecommendedAction, + SessionCollaboratorRole, + BusinessSummarySource, + ConfidenceState, + FindingArea, + FindingSeverity, + ResolutionState, + SemanticSourceType, + TrustLevel, + SemanticSourceStatus, + FieldKind, + FieldProvenance, + CandidateMatchType, + CandidateStatus, + FilterSource, + FilterConfidenceState, + FilterRecoveryStatus, + VariableKind, + MappingStatus, + MappingMethod, + MappingWarningLevel, + ApprovalState, + ClarificationStatus, + QuestionState, + AnswerKind, + PreviewStatus, + LaunchStatus, + ArtifactType, + ArtifactFormat, +) +from src.models.dataset_review_pkg._session_models import ( + SessionCollaborator, + DatasetReviewSession, +) +from src.models.dataset_review_pkg._profile_models import DatasetProfile +from src.models.dataset_review_pkg._finding_models import ValidationFinding +from src.models.dataset_review_pkg._semantic_models import ( + SemanticSource, + SemanticFieldEntry, + SemanticCandidate, +) +from src.models.dataset_review_pkg._filter_models import ( + ImportedFilter, + TemplateVariable, +) +from src.models.dataset_review_pkg._mapping_models import ExecutionMapping +from src.models.dataset_review_pkg._clarification_models import ( + ClarificationSession, + ClarificationQuestion, + ClarificationOption, + ClarificationAnswer, +) +from src.models.dataset_review_pkg._execution_models import ( + CompiledPreview, + DatasetRunContext, + SessionEvent, + ExportArtifact, +) + +__all__ = [ + "SessionStatus", + "SessionPhase", + "ReadinessState", + "RecommendedAction", + "SessionCollaboratorRole", + "BusinessSummarySource", + "ConfidenceState", + "FindingArea", + "FindingSeverity", + "ResolutionState", + "SemanticSourceType", + "TrustLevel", + "SemanticSourceStatus", + "FieldKind", + "FieldProvenance", + "CandidateMatchType", + "CandidateStatus", + "FilterSource", + "FilterConfidenceState", + "FilterRecoveryStatus", + "VariableKind", + "MappingStatus", + "MappingMethod", + "MappingWarningLevel", + "ApprovalState", + "ClarificationStatus", + "QuestionState", + "AnswerKind", + "PreviewStatus", + "LaunchStatus", + "ArtifactType", + "ArtifactFormat", + "SessionCollaborator", + "DatasetReviewSession", + "DatasetProfile", + "ValidationFinding", + "SemanticSource", + "SemanticFieldEntry", + "SemanticCandidate", + "ImportedFilter", + "TemplateVariable", + "ExecutionMapping", + "ClarificationSession", + "ClarificationQuestion", + "ClarificationOption", + "ClarificationAnswer", + "CompiledPreview", + "DatasetRunContext", + "SessionEvent", + "ExportArtifact", +] +# [/DEF:DatasetReviewModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_clarification_models.py b/backend/src/models/dataset_review_pkg/_clarification_models.py new file mode 100644 index 00000000..76e3e41b --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_clarification_models.py @@ -0,0 +1,125 @@ +# [DEF:DatasetReviewClarificationModels:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Clarification session, question, option, and answer models for guided review flow. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] +# @INVARIANT: Only one active clarification question may exist at a time per session. + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Boolean, + Column, + String, + Integer, + Text, + DateTime, + ForeignKey, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + ClarificationStatus, + QuestionState, + AnswerKind, +) + + +# [DEF:ClarificationSession:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One clarification session aggregate owning questions and tracking resolution progress. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class ClarificationSession(Base): + __tablename__ = "clarification_sessions" + + clarification_session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False) + status = Column(SQLEnum(ClarificationStatus), nullable=False, default=ClarificationStatus.PENDING) + current_question_id = Column(String, nullable=True) + resolved_count = Column(Integer, nullable=False, default=0) + remaining_count = Column(Integer, nullable=False, default=0) + summary_delta = Column(Text, nullable=True) + started_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + completed_at = Column(DateTime, nullable=True) + + session = relationship("DatasetReviewSession", back_populates="clarification_sessions") + questions = relationship("ClarificationQuestion", back_populates="clarification_session", cascade="all, delete-orphan") + + +# [/DEF:ClarificationSession:Class] + + +# [DEF:ClarificationQuestion:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One clarification question with priority ordering, options, and state machine. +# @RELATION: DEPENDS_ON -> [ClarificationSession] +class ClarificationQuestion(Base): + __tablename__ = "clarification_questions" + + question_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + clarification_session_id = Column(String, ForeignKey("clarification_sessions.clarification_session_id"), nullable=False) + topic_ref = Column(String, nullable=False) + question_text = Column(Text, nullable=False) + why_it_matters = Column(Text, nullable=False) + current_guess = Column(Text, nullable=True) + priority = Column(Integer, nullable=False, default=0) + state = Column(SQLEnum(QuestionState), nullable=False, default=QuestionState.OPEN) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + clarification_session = relationship("ClarificationSession", back_populates="questions") + options = relationship("ClarificationOption", back_populates="question", cascade="all, delete-orphan") + answer = relationship("ClarificationAnswer", back_populates="question", uselist=False, cascade="all, delete-orphan") + + +# [/DEF:ClarificationQuestion:Class] + + +# [DEF:ClarificationOption:Class] +# @COMPLEXITY: 1 +# @PURPOSE: One selectable option for a clarification question with recommendation flag. +# @RELATION: DEPENDS_ON -> [ClarificationQuestion] +class ClarificationOption(Base): + __tablename__ = "clarification_options" + + option_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False) + label = Column(String, nullable=False) + value = Column(String, nullable=False) + is_recommended = Column(Boolean, nullable=False, default=False) + display_order = Column(Integer, nullable=False, default=0) + + question = relationship("ClarificationQuestion", back_populates="options") + + +# [/DEF:ClarificationOption:Class] + + +# [DEF:ClarificationAnswer:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One persisted clarification answer with impact summary and feedback tracking. +# @RELATION: DEPENDS_ON -> [ClarificationQuestion] +class ClarificationAnswer(Base): + __tablename__ = "clarification_answers" + + answer_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + question_id = Column(String, ForeignKey("clarification_questions.question_id"), nullable=False, unique=True) + answer_kind = Column(SQLEnum(AnswerKind), nullable=False) + answer_value = Column(Text, nullable=True) + answered_by_user_id = Column(String, nullable=False) + impact_summary = Column(Text, nullable=True) + user_feedback = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + question = relationship("ClarificationQuestion", back_populates="answer") + + +# [/DEF:ClarificationAnswer:Class] + + +# [/DEF:DatasetReviewClarificationModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_enums.py b/backend/src/models/dataset_review_pkg/_enums.py new file mode 100644 index 00000000..839dc8d4 --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_enums.py @@ -0,0 +1,463 @@ +# [DEF:DatasetReviewEnums:Module] +# @COMPLEXITY: 2 +# @PURPOSE: All enumeration types for the dataset review domain, grouped for stable cross-module reuse. +# @LAYER: Domain +# @INVARIANT: Enum values are string-based for JSON serialization compatibility. + +import enum + + +# [DEF:SessionStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Lifecycle status of a dataset review session. +class SessionStatus(str, enum.Enum): + ACTIVE = "active" + PAUSED = "paused" + COMPLETED = "completed" + ARCHIVED = "archived" + CANCELLED = "cancelled" + + +# [/DEF:SessionStatus:Class] + + +# [DEF:SessionPhase:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Ordered phase progression for dataset review orchestration. +class SessionPhase(str, enum.Enum): + INTAKE = "intake" + RECOVERY = "recovery" + REVIEW = "review" + SEMANTIC_REVIEW = "semantic_review" + CLARIFICATION = "clarification" + MAPPING_REVIEW = "mapping_review" + PREVIEW = "preview" + LAUNCH = "launch" + POST_RUN = "post_run" + + +# [/DEF:SessionPhase:Class] + + +# [DEF:ReadinessState:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Granular readiness indicator driving the recommended-action UX flow. +class ReadinessState(str, enum.Enum): + EMPTY = "empty" + IMPORTING = "importing" + REVIEW_READY = "review_ready" + SEMANTIC_SOURCE_REVIEW_NEEDED = "semantic_source_review_needed" + CLARIFICATION_NEEDED = "clarification_needed" + CLARIFICATION_ACTIVE = "clarification_active" + MAPPING_REVIEW_NEEDED = "mapping_review_needed" + COMPILED_PREVIEW_READY = "compiled_preview_ready" + PARTIALLY_READY = "partially_ready" + RUN_READY = "run_ready" + RUN_IN_PROGRESS = "run_in_progress" + COMPLETED = "completed" + RECOVERY_REQUIRED = "recovery_required" + + +# [/DEF:ReadinessState:Class] + + +# [DEF:RecommendedAction:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Next-action guidance derived from the current readiness state. +class RecommendedAction(str, enum.Enum): + IMPORT_FROM_SUPERSET = "import_from_superset" + REVIEW_DOCUMENTATION = "review_documentation" + APPLY_SEMANTIC_SOURCE = "apply_semantic_source" + START_CLARIFICATION = "start_clarification" + ANSWER_NEXT_QUESTION = "answer_next_question" + APPROVE_MAPPING = "approve_mapping" + GENERATE_SQL_PREVIEW = "generate_sql_preview" + COMPLETE_REQUIRED_VALUES = "complete_required_values" + LAUNCH_DATASET = "launch_dataset" + RESUME_SESSION = "resume_session" + EXPORT_OUTPUTS = "export_outputs" + + +# [/DEF:RecommendedAction:Class] + + +# [DEF:SessionCollaboratorRole:Class] +# @COMPLEXITY: 1 +# @PURPOSE: RBAC role for session collaborators. +class SessionCollaboratorRole(str, enum.Enum): + VIEWER = "viewer" + REVIEWER = "reviewer" + APPROVER = "approver" + + +# [/DEF:SessionCollaboratorRole:Class] + + +# [DEF:BusinessSummarySource:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Provenance of the dataset business summary text. +class BusinessSummarySource(str, enum.Enum): + CONFIRMED = "confirmed" + IMPORTED = "imported" + INFERRED = "inferred" + AI_DRAFT = "ai_draft" + MANUAL_OVERRIDE = "manual_override" + + +# [/DEF:BusinessSummarySource:Class] + + +# [DEF:ConfidenceState:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Confidence level for dataset profile completeness. +class ConfidenceState(str, enum.Enum): + CONFIRMED = "confirmed" + MOSTLY_CONFIRMED = "mostly_confirmed" + MIXED = "mixed" + LOW_CONFIDENCE = "low_confidence" + UNRESOLVED = "unresolved" + + +# [/DEF:ConfidenceState:Class] + + +# [DEF:FindingArea:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Domain area classification for validation findings. +class FindingArea(str, enum.Enum): + SOURCE_INTAKE = "source_intake" + DATASET_PROFILE = "dataset_profile" + SEMANTIC_ENRICHMENT = "semantic_enrichment" + CLARIFICATION = "clarification" + FILTER_RECOVERY = "filter_recovery" + TEMPLATE_MAPPING = "template_mapping" + COMPILED_PREVIEW = "compiled_preview" + LAUNCH = "launch" + AUDIT = "audit" + + +# [/DEF:FindingArea:Class] + + +# [DEF:FindingSeverity:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Severity classification for validation findings. +class FindingSeverity(str, enum.Enum): + BLOCKING = "blocking" + WARNING = "warning" + INFORMATIONAL = "informational" + + +# [/DEF:FindingSeverity:Class] + + +# [DEF:ResolutionState:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Resolution status for validation findings and clarification items. +class ResolutionState(str, enum.Enum): + OPEN = "open" + RESOLVED = "resolved" + APPROVED = "approved" + SKIPPED = "skipped" + DEFERRED = "deferred" + EXPERT_REVIEW = "expert_review" + + +# [/DEF:ResolutionState:Class] + + +# [DEF:SemanticSourceType:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Classification of semantic enrichment source origins. +class SemanticSourceType(str, enum.Enum): + UPLOADED_FILE = "uploaded_file" + CONNECTED_DICTIONARY = "connected_dictionary" + REFERENCE_DATASET = "reference_dataset" + NEIGHBOR_DATASET = "neighbor_dataset" + AI_GENERATED = "ai_generated" + + +# [/DEF:SemanticSourceType:Class] + + +# [DEF:TrustLevel:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Trust classification for semantic source reliability. +class TrustLevel(str, enum.Enum): + TRUSTED = "trusted" + RECOMMENDED = "recommended" + CANDIDATE = "candidate" + GENERATED = "generated" + + +# [/DEF:TrustLevel:Class] + + +# [DEF:SemanticSourceStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Lifecycle status for semantic source application. +class SemanticSourceStatus(str, enum.Enum): + AVAILABLE = "available" + SELECTED = "selected" + APPLIED = "applied" + REJECTED = "rejected" + PARTIAL = "partial" + FAILED = "failed" + + +# [/DEF:SemanticSourceStatus:Class] + + +# [DEF:FieldKind:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Kind classification for semantic field entries. +class FieldKind(str, enum.Enum): + COLUMN = "column" + METRIC = "metric" + FILTER_DIMENSION = "filter_dimension" + PARAMETER = "parameter" + + +# [/DEF:FieldKind:Class] + + +# [DEF:FieldProvenance:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Provenance tracking for semantic field value origin. +class FieldProvenance(str, enum.Enum): + DICTIONARY_EXACT = "dictionary_exact" + REFERENCE_IMPORTED = "reference_imported" + FUZZY_INFERRED = "fuzzy_inferred" + AI_GENERATED = "ai_generated" + MANUAL_OVERRIDE = "manual_override" + UNRESOLVED = "unresolved" + + +# [/DEF:FieldProvenance:Class] + + +# [DEF:CandidateMatchType:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Match type classification for semantic candidates. +class CandidateMatchType(str, enum.Enum): + EXACT = "exact" + REFERENCE = "reference" + FUZZY = "fuzzy" + GENERATED = "generated" + + +# [/DEF:CandidateMatchType:Class] + + +# [DEF:CandidateStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Lifecycle status for semantic candidate proposals. +class CandidateStatus(str, enum.Enum): + PROPOSED = "proposed" + ACCEPTED = "accepted" + REJECTED = "rejected" + SUPERSEDED = "superseded" + + +# [/DEF:CandidateStatus:Class] + + +# [DEF:FilterSource:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Origin classification for imported filters. +class FilterSource(str, enum.Enum): + SUPERSET_NATIVE = "superset_native" + SUPERSET_URL = "superset_url" + SUPERSET_PERMALINK = "superset_permalink" + SUPERSET_NATIVE_FILTERS_KEY = "superset_native_filters_key" + MANUAL = "manual" + INFERRED = "inferred" + + +# [/DEF:FilterSource:Class] + + +# [DEF:FilterConfidenceState:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Confidence classification for imported filter values. +class FilterConfidenceState(str, enum.Enum): + CONFIRMED = "confirmed" + IMPORTED = "imported" + INFERRED = "inferred" + AI_DRAFT = "ai_draft" + UNRESOLVED = "unresolved" + + +# [/DEF:FilterConfidenceState:Class] + + +# [DEF:FilterRecoveryStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Recovery quality status for imported filters. +class FilterRecoveryStatus(str, enum.Enum): + RECOVERED = "recovered" + PARTIAL = "partial" + MISSING = "missing" + CONFLICTED = "conflicted" + + +# [/DEF:FilterRecoveryStatus:Class] + + +# [DEF:VariableKind:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Kind classification for template variables. +class VariableKind(str, enum.Enum): + NATIVE_FILTER = "native_filter" + PARAMETER = "parameter" + DERIVED = "derived" + UNKNOWN = "unknown" + + +# [/DEF:VariableKind:Class] + + +# [DEF:MappingStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Lifecycle status for template variable mapping. +class MappingStatus(str, enum.Enum): + UNMAPPED = "unmapped" + PROPOSED = "proposed" + APPROVED = "approved" + OVERRIDDEN = "overridden" + INVALID = "invalid" + + +# [/DEF:MappingStatus:Class] + + +# [DEF:MappingMethod:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Method classification for execution mapping creation. +class MappingMethod(str, enum.Enum): + DIRECT_MATCH = "direct_match" + HEURISTIC_MATCH = "heuristic_match" + SEMANTIC_MATCH = "semantic_match" + MANUAL_OVERRIDE = "manual_override" + + +# [/DEF:MappingMethod:Class] + + +# [DEF:MappingWarningLevel:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Warning severity for execution mapping quality indicators. +class MappingWarningLevel(str, enum.Enum): + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + + +# [/DEF:MappingWarningLevel:Class] + + +# [DEF:ApprovalState:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Approval lifecycle for execution mapping gate checks. +class ApprovalState(str, enum.Enum): + PENDING = "pending" + APPROVED = "approved" + REJECTED = "rejected" + NOT_REQUIRED = "not_required" + + +# [/DEF:ApprovalState:Class] + + +# [DEF:ClarificationStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Lifecycle status for clarification sessions. +class ClarificationStatus(str, enum.Enum): + PENDING = "pending" + ACTIVE = "active" + PAUSED = "paused" + COMPLETED = "completed" + CANCELLED = "cancelled" + + +# [/DEF:ClarificationStatus:Class] + + +# [DEF:QuestionState:Class] +# @COMPLEXITY: 1 +# @PURPOSE: State machine for individual clarification questions. +class QuestionState(str, enum.Enum): + OPEN = "open" + ANSWERED = "answered" + SKIPPED = "skipped" + EXPERT_REVIEW = "expert_review" + SUPERSEDED = "superseded" + + +# [/DEF:QuestionState:Class] + + +# [DEF:AnswerKind:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Classification of clarification answer types. +class AnswerKind(str, enum.Enum): + SELECTED = "selected" + CUSTOM = "custom" + SKIPPED = "skipped" + EXPERT_REVIEW = "expert_review" + + +# [/DEF:AnswerKind:Class] + + +# [DEF:PreviewStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Lifecycle status for compiled SQL previews. +class PreviewStatus(str, enum.Enum): + PENDING = "pending" + READY = "ready" + FAILED = "failed" + STALE = "stale" + + +# [/DEF:PreviewStatus:Class] + + +# [DEF:LaunchStatus:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Outcome status for dataset launch handoff. +class LaunchStatus(str, enum.Enum): + STARTED = "started" + SUCCESS = "success" + FAILED = "failed" + + +# [/DEF:LaunchStatus:Class] + + +# [DEF:ArtifactType:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Type classification for export artifacts. +class ArtifactType(str, enum.Enum): + DOCUMENTATION = "documentation" + VALIDATION_REPORT = "validation_report" + RUN_SUMMARY = "run_summary" + + +# [/DEF:ArtifactType:Class] + + +# [DEF:ArtifactFormat:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Format classification for export artifact output. +class ArtifactFormat(str, enum.Enum): + JSON = "json" + MARKDOWN = "markdown" + CSV = "csv" + PDF = "pdf" + + +# [/DEF:ArtifactFormat:Class] + + +# [/DEF:DatasetReviewEnums:Module] diff --git a/backend/src/models/dataset_review_pkg/_execution_models.py b/backend/src/models/dataset_review_pkg/_execution_models.py new file mode 100644 index 00000000..4e2b6301 --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_execution_models.py @@ -0,0 +1,140 @@ +# [DEF:DatasetReviewExecutionModels:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Compiled preview, run context, session event, and export artifact models for execution and audit. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Text, + DateTime, + ForeignKey, + JSON, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + PreviewStatus, + LaunchStatus, + ArtifactType, + ArtifactFormat, +) + + +# [DEF:CompiledPreview:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One compiled SQL preview snapshot with fingerprint for staleness detection. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class CompiledPreview(Base): + __tablename__ = "compiled_previews" + + preview_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + preview_status = Column( + SQLEnum(PreviewStatus), nullable=False, default=PreviewStatus.PENDING + ) + compiled_sql = Column(Text, nullable=True) + preview_fingerprint = Column(String, nullable=False) + compiled_by = Column(String, nullable=False, default="superset") + error_code = Column(String, nullable=True) + error_details = Column(Text, nullable=True) + compiled_at = Column(DateTime, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="previews") + + +# [/DEF:CompiledPreview:Class] + + +# [DEF:DatasetRunContext:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Immutable launch audit snapshot capturing effective filters, template params, and approval state at launch time. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class DatasetRunContext(Base): + __tablename__ = "dataset_run_contexts" + + run_context_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + dataset_ref = Column(String, nullable=False) + environment_id = Column(String, nullable=False) + preview_id = Column(String, nullable=False) + sql_lab_session_ref = Column(String, nullable=False) + effective_filters = Column(JSON, nullable=False) + template_params = Column(JSON, nullable=False) + approved_mapping_ids = Column(JSON, nullable=False) + semantic_decision_refs = Column(JSON, nullable=False) + open_warning_refs = Column(JSON, nullable=False) + launch_status = Column(SQLEnum(LaunchStatus), nullable=False) + launch_error = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="run_contexts") + + +# [/DEF:DatasetRunContext:Class] + + +# [DEF:SessionEvent:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One persisted audit event for dataset review session mutations. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class SessionEvent(Base): + __tablename__ = "session_events" + + session_event_id = Column( + String, primary_key=True, default=lambda: str(uuid.uuid4()) + ) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + actor_user_id = Column(String, ForeignKey("users.id"), nullable=False) + event_type = Column(String, nullable=False) + event_summary = Column(Text, nullable=False) + current_phase = Column(String, nullable=True) + readiness_state = Column(String, nullable=True) + event_details = Column(JSON, nullable=False, default=dict) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="events") + actor = relationship("User") + + +# [/DEF:SessionEvent:Class] + + +# [DEF:ExportArtifact:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One persisted export artifact reference for documentation and validation outputs. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class ExportArtifact(Base): + __tablename__ = "export_artifacts" + + artifact_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + artifact_type = Column(SQLEnum(ArtifactType), nullable=False) + format = Column(SQLEnum(ArtifactFormat), nullable=False) + storage_ref = Column(String, nullable=False) + created_by_user_id = Column(String, nullable=False) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="export_artifacts") + + +# [/DEF:ExportArtifact:Class] + + +# [/DEF:DatasetReviewExecutionModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_filter_models.py b/backend/src/models/dataset_review_pkg/_filter_models.py new file mode 100644 index 00000000..bf255f8c --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_filter_models.py @@ -0,0 +1,95 @@ +# [DEF:DatasetReviewFilterModels:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Imported filter and template variable models for Superset context recovery and execution mapping. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Text, + Boolean, + DateTime, + ForeignKey, + JSON, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + FilterSource, + FilterConfidenceState, + FilterRecoveryStatus, + VariableKind, + MappingStatus, +) + + +# [DEF:ImportedFilter:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Recovered Superset filter with confidence and recovery status tracking. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class ImportedFilter(Base): + __tablename__ = "imported_filters" + + filter_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + filter_name = Column(String, nullable=False) + display_name = Column(String, nullable=True) + raw_value = Column(JSON, nullable=False) + raw_value_masked = Column(Boolean, nullable=False, default=False) + normalized_value = Column(JSON, nullable=True) + source = Column(SQLEnum(FilterSource), nullable=False) + confidence_state = Column(SQLEnum(FilterConfidenceState), nullable=False) + requires_confirmation = Column(Boolean, nullable=False, default=False) + recovery_status = Column(SQLEnum(FilterRecoveryStatus), nullable=False) + notes = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False + ) + + session = relationship("DatasetReviewSession", back_populates="imported_filters") + + +# [/DEF:ImportedFilter:Class] + + +# [DEF:TemplateVariable:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Discovered template variable from dataset SQL with mapping status tracking. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class TemplateVariable(Base): + __tablename__ = "template_variables" + + variable_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + variable_name = Column(String, nullable=False) + expression_source = Column(Text, nullable=False) + variable_kind = Column(SQLEnum(VariableKind), nullable=False) + is_required = Column(Boolean, nullable=False, default=True) + default_value = Column(JSON, nullable=True) + mapping_status = Column( + SQLEnum(MappingStatus), nullable=False, default=MappingStatus.UNMAPPED + ) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False + ) + + session = relationship("DatasetReviewSession", back_populates="template_variables") + + +# [/DEF:TemplateVariable:Class] + + +# [/DEF:DatasetReviewFilterModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_finding_models.py b/backend/src/models/dataset_review_pkg/_finding_models.py new file mode 100644 index 00000000..53d8f9bd --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_finding_models.py @@ -0,0 +1,59 @@ +# [DEF:DatasetReviewFindingModels:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Validation finding model for tracking blocking, warning, and informational issues during review. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Text, + DateTime, + ForeignKey, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + FindingArea, + FindingSeverity, + ResolutionState, +) + + +# [DEF:ValidationFinding:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Structured finding record for dataset review validation issues with resolution tracking. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class ValidationFinding(Base): + __tablename__ = "validation_findings" + + finding_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + area = Column(SQLEnum(FindingArea), nullable=False) + severity = Column(SQLEnum(FindingSeverity), nullable=False) + code = Column(String, nullable=False) + title = Column(String, nullable=False) + message = Column(Text, nullable=False) + resolution_state = Column( + SQLEnum(ResolutionState), nullable=False, default=ResolutionState.OPEN + ) + resolution_note = Column(Text, nullable=True) + caused_by_ref = Column(String, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + resolved_at = Column(DateTime, nullable=True) + + session = relationship("DatasetReviewSession", back_populates="findings") + + +# [/DEF:ValidationFinding:Class] + + +# [/DEF:DatasetReviewFindingModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_mapping_models.py b/backend/src/models/dataset_review_pkg/_mapping_models.py new file mode 100644 index 00000000..8a100e65 --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_mapping_models.py @@ -0,0 +1,61 @@ +# [DEF:DatasetReviewMappingModels:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Execution mapping model linking imported filters to template variables with approval gates. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Text, + Boolean, + DateTime, + ForeignKey, + JSON, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + MappingMethod, + MappingWarningLevel, + ApprovalState, +) + + +# [DEF:ExecutionMapping:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One filter-to-variable mapping with approval gate, effective value, and transformation metadata. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +# @INVARIANT: Explicit approval is required before launch when requires_explicit_approval is true. +class ExecutionMapping(Base): + __tablename__ = "execution_mappings" + + mapping_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column(String, ForeignKey("dataset_review_sessions.session_id"), nullable=False) + filter_id = Column(String, nullable=False) + variable_id = Column(String, nullable=False) + mapping_method = Column(SQLEnum(MappingMethod), nullable=False) + raw_input_value = Column(JSON, nullable=False) + effective_value = Column(JSON, nullable=True) + transformation_note = Column(Text, nullable=True) + warning_level = Column(SQLEnum(MappingWarningLevel), nullable=True) + requires_explicit_approval = Column(Boolean, nullable=False, default=False) + approval_state = Column(SQLEnum(ApprovalState), nullable=False, default=ApprovalState.NOT_REQUIRED) + approved_by_user_id = Column(String, nullable=True) + approved_at = Column(DateTime, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="execution_mappings") + + +# [/DEF:ExecutionMapping:Class] + + +# [/DEF:DatasetReviewMappingModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_profile_models.py b/backend/src/models/dataset_review_pkg/_profile_models.py new file mode 100644 index 00000000..f978837a --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_profile_models.py @@ -0,0 +1,68 @@ +# [DEF:DatasetReviewProfileModels:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Dataset profile model capturing business summary, confidence, and completeness metadata. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Text, + Float, + Boolean, + DateTime, + ForeignKey, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + BusinessSummarySource, + ConfidenceState, +) + + +# [DEF:DatasetProfile:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One-to-one profile snapshot for a dataset review session, tracking business summary provenance and completeness. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class DatasetProfile(Base): + __tablename__ = "dataset_profiles" + + profile_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, + ForeignKey("dataset_review_sessions.session_id"), + nullable=False, + unique=True, + ) + dataset_name = Column(String, nullable=False) + schema_name = Column(String, nullable=True) + database_name = Column(String, nullable=True) + business_summary = Column(Text, nullable=False) + business_summary_source = Column(SQLEnum(BusinessSummarySource), nullable=False) + description = Column(Text, nullable=True) + dataset_type = Column(String, nullable=True) + is_sqllab_view = Column(Boolean, nullable=False, default=False) + completeness_score = Column(Float, nullable=True) + confidence_state = Column(SQLEnum(ConfidenceState), nullable=False) + has_blocking_findings = Column(Boolean, nullable=False, default=False) + has_warning_findings = Column(Boolean, nullable=False, default=False) + manual_summary_locked = Column(Boolean, nullable=False, default=False) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False + ) + + session = relationship("DatasetReviewSession", back_populates="profile") + + +# [/DEF:DatasetProfile:Class] + + +# [/DEF:DatasetReviewProfileModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_semantic_models.py b/backend/src/models/dataset_review_pkg/_semantic_models.py new file mode 100644 index 00000000..e4d97f93 --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_semantic_models.py @@ -0,0 +1,139 @@ +# [DEF:DatasetReviewSemanticModels:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Semantic source, field entry, and candidate models for dictionary-driven semantic enrichment. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] +# @INVARIANT: Manual overrides are never silently replaced by imported, inferred, or AI-generated values. + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Integer, + Text, + Float, + Boolean, + DateTime, + ForeignKey, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + SemanticSourceType, + TrustLevel, + SemanticSourceStatus, + FieldKind, + FieldProvenance, + CandidateMatchType, + CandidateStatus, +) + + +# [DEF:SemanticSource:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Registered semantic enrichment source with trust level and application status. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class SemanticSource(Base): + __tablename__ = "semantic_sources" + + source_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + source_type = Column(SQLEnum(SemanticSourceType), nullable=False) + source_ref = Column(String, nullable=False) + source_version = Column(String, nullable=False) + display_name = Column(String, nullable=False) + trust_level = Column(SQLEnum(TrustLevel), nullable=False) + schema_overlap_score = Column(Float, nullable=True) + status = Column( + SQLEnum(SemanticSourceStatus), + nullable=False, + default=SemanticSourceStatus.AVAILABLE, + ) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="semantic_sources") + + +# [/DEF:SemanticSource:Class] + + +# [DEF:SemanticFieldEntry:Class] +# @COMPLEXITY: 3 +# @PURPOSE: Per-field semantic metadata entry with provenance tracking, lock state, and candidate set. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +# @RELATION: DEPENDS_ON -> [SemanticCandidate] +# @INVARIANT: Locked fields preserve their active value regardless of later candidate proposals. +class SemanticFieldEntry(Base): + __tablename__ = "semantic_field_entries" + + field_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + field_name = Column(String, nullable=False) + field_kind = Column(SQLEnum(FieldKind), nullable=False) + verbose_name = Column(String, nullable=True) + description = Column(Text, nullable=True) + display_format = Column(String, nullable=True) + provenance = Column( + SQLEnum(FieldProvenance), nullable=False, default=FieldProvenance.UNRESOLVED + ) + source_id = Column(String, nullable=True) + source_version = Column(String, nullable=True) + confidence_rank = Column(Integer, nullable=True) + is_locked = Column(Boolean, nullable=False, default=False) + has_conflict = Column(Boolean, nullable=False, default=False) + needs_review = Column(Boolean, nullable=False, default=True) + last_changed_by = Column(String, nullable=False) + user_feedback = Column(String, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False + ) + + session = relationship("DatasetReviewSession", back_populates="semantic_fields") + candidates = relationship( + "SemanticCandidate", back_populates="field", cascade="all, delete-orphan" + ) + + +# [/DEF:SemanticFieldEntry:Class] + + +# [DEF:SemanticCandidate:Class] +# @COMPLEXITY: 2 +# @PURPOSE: One proposed semantic value for a field entry, ranked by match type and confidence. +# @RELATION: DEPENDS_ON -> [SemanticFieldEntry] +class SemanticCandidate(Base): + __tablename__ = "semantic_candidates" + + candidate_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + field_id = Column( + String, ForeignKey("semantic_field_entries.field_id"), nullable=False + ) + source_id = Column(String, nullable=True) + candidate_rank = Column(Integer, nullable=False) + match_type = Column(SQLEnum(CandidateMatchType), nullable=False) + confidence_score = Column(Float, nullable=False) + proposed_verbose_name = Column(String, nullable=True) + proposed_description = Column(Text, nullable=True) + proposed_display_format = Column(String, nullable=True) + status = Column( + SQLEnum(CandidateStatus), nullable=False, default=CandidateStatus.PROPOSED + ) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + field = relationship("SemanticFieldEntry", back_populates="candidates") + + +# [/DEF:SemanticCandidate:Class] + + +# [/DEF:DatasetReviewSemanticModels:Module] diff --git a/backend/src/models/dataset_review_pkg/_session_models.py b/backend/src/models/dataset_review_pkg/_session_models.py new file mode 100644 index 00000000..2e421ca2 --- /dev/null +++ b/backend/src/models/dataset_review_pkg/_session_models.py @@ -0,0 +1,156 @@ +# [DEF:DatasetReviewSessionModels:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Session aggregate root and collaborator models for dataset review orchestration. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewEnums:Module] +# @RELATION: DEPENDS_ON -> [MappingModels] +# @INVARIANT: Session and profile entities are strictly scoped to an authenticated user. + +import uuid +from datetime import datetime + +from sqlalchemy import ( + Column, + String, + Integer, + DateTime, + ForeignKey, + Enum as SQLEnum, +) +from sqlalchemy.orm import relationship + +from src.models.mapping import Base +from src.models.dataset_review_pkg._enums import ( + SessionStatus, + SessionPhase, + ReadinessState, + RecommendedAction, + SessionCollaboratorRole, +) + + +# [DEF:SessionCollaborator:Class] +# @COMPLEXITY: 2 +# @PURPOSE: RBAC collaborator record linking a user to a dataset review session with a specific role. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +class SessionCollaborator(Base): + __tablename__ = "session_collaborators" + + id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + session_id = Column( + String, ForeignKey("dataset_review_sessions.session_id"), nullable=False + ) + user_id = Column(String, ForeignKey("users.id"), nullable=False) + role = Column(SQLEnum(SessionCollaboratorRole), nullable=False) + added_at = Column(DateTime, default=datetime.utcnow, nullable=False) + + session = relationship("DatasetReviewSession", back_populates="collaborators") + user = relationship("User") + + +# [/DEF:SessionCollaborator:Class] + + +# [DEF:DatasetReviewSession:Class] +# @COMPLEXITY: 3 +# @PURPOSE: Aggregate root for the dataset review lifecycle, owning all child entities and driving readiness transitions. +# @RELATION: DEPENDS_ON -> [SessionCollaborator] +# @RELATION: DEPENDS_ON -> [DatasetProfile] +# @RELATION: DEPENDS_ON -> [ValidationFinding] +# @RELATION: DEPENDS_ON -> [SemanticSource] +# @RELATION: DEPENDS_ON -> [SemanticFieldEntry] +# @RELATION: DEPENDS_ON -> [ImportedFilter] +# @RELATION: DEPENDS_ON -> [TemplateVariable] +# @RELATION: DEPENDS_ON -> [ExecutionMapping] +# @RELATION: DEPENDS_ON -> [ClarificationSession] +# @RELATION: DEPENDS_ON -> [CompiledPreview] +# @RELATION: DEPENDS_ON -> [DatasetRunContext] +# @RELATION: DEPENDS_ON -> [ExportArtifact] +# @RELATION: DEPENDS_ON -> [SessionEvent] +# @INVARIANT: Optimistic-lock version column prevents lost-update races on concurrent mutations. +class DatasetReviewSession(Base): + __tablename__ = "dataset_review_sessions" + + session_id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) + user_id = Column(String, ForeignKey("users.id"), nullable=False) + environment_id = Column(String, ForeignKey("environments.id"), nullable=False) + source_kind = Column(String, nullable=False) + source_input = Column(String, nullable=False) + dataset_ref = Column(String, nullable=False) + dataset_id = Column(Integer, nullable=True) + dashboard_id = Column(Integer, nullable=True) + readiness_state = Column( + SQLEnum(ReadinessState), nullable=False, default=ReadinessState.EMPTY + ) + recommended_action = Column( + SQLEnum(RecommendedAction), + nullable=False, + default=RecommendedAction.IMPORT_FROM_SUPERSET, + ) + version = Column(Integer, nullable=False, default=0) + __mapper_args__ = {"version_id_col": version, "version_id_generator": False} + status = Column( + SQLEnum(SessionStatus), nullable=False, default=SessionStatus.ACTIVE + ) + current_phase = Column( + SQLEnum(SessionPhase), nullable=False, default=SessionPhase.INTAKE + ) + active_task_id = Column(String, nullable=True) + last_preview_id = Column(String, nullable=True) + last_run_context_id = Column(String, nullable=True) + created_at = Column(DateTime, default=datetime.utcnow, nullable=False) + updated_at = Column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False + ) + last_activity_at = Column(DateTime, default=datetime.utcnow, nullable=False) + closed_at = Column(DateTime, nullable=True) + + owner = relationship("User") + collaborators = relationship( + "SessionCollaborator", back_populates="session", cascade="all, delete-orphan" + ) + profile = relationship( + "DatasetProfile", + back_populates="session", + uselist=False, + cascade="all, delete-orphan", + ) + findings = relationship( + "ValidationFinding", back_populates="session", cascade="all, delete-orphan" + ) + semantic_sources = relationship( + "SemanticSource", back_populates="session", cascade="all, delete-orphan" + ) + semantic_fields = relationship( + "SemanticFieldEntry", back_populates="session", cascade="all, delete-orphan" + ) + imported_filters = relationship( + "ImportedFilter", back_populates="session", cascade="all, delete-orphan" + ) + template_variables = relationship( + "TemplateVariable", back_populates="session", cascade="all, delete-orphan" + ) + execution_mappings = relationship( + "ExecutionMapping", back_populates="session", cascade="all, delete-orphan" + ) + clarification_sessions = relationship( + "ClarificationSession", back_populates="session", cascade="all, delete-orphan" + ) + previews = relationship( + "CompiledPreview", back_populates="session", cascade="all, delete-orphan" + ) + run_contexts = relationship( + "DatasetRunContext", back_populates="session", cascade="all, delete-orphan" + ) + export_artifacts = relationship( + "ExportArtifact", back_populates="session", cascade="all, delete-orphan" + ) + events = relationship( + "SessionEvent", back_populates="session", cascade="all, delete-orphan" + ) + + +# [/DEF:DatasetReviewSession:Class] + + +# [/DEF:DatasetReviewSessionModels:Module] diff --git a/backend/src/schemas/dataset_review.py b/backend/src/schemas/dataset_review.py index 9139859a..028242d8 100644 --- a/backend/src/schemas/dataset_review.py +++ b/backend/src/schemas/dataset_review.py @@ -1,419 +1,30 @@ # [DEF:DatasetReviewSchemas:Module] -# -# @COMPLEXITY: 3 +# @COMPLEXITY: 2 # @SEMANTICS: dataset_review, schemas, pydantic, session, profile, findings -# @PURPOSE: Defines API schemas for the dataset review orchestration flow. -# @LAYER: API -# @RELATION: DEPENDS_ON -> [DatasetReviewModels] +# @PURPOSE: Thin facade re-exporting all dataset review API schemas from decomposed sub-modules. +# @LAYER: API +# @RATIONALE: Original 419-line file exceeded INV_7 (400-line module limit). Decomposed into DTO and composite sub-modules. +# @REJECTED: Keeping all schemas in a single file because it exceeded the fractal limit. -# [SECTION: IMPORTS] -from datetime import datetime -from typing import List, Optional, Any -from pydantic import BaseModel, Field -from src.models.dataset_review import ( - SessionStatus, - SessionPhase, - ReadinessState, - RecommendedAction, - SessionCollaboratorRole, - BusinessSummarySource, - ConfidenceState, - FindingArea, - FindingSeverity, - ResolutionState, - SemanticSourceType, - TrustLevel, - SemanticSourceStatus, - FieldKind, - FieldProvenance, - CandidateMatchType, - CandidateStatus, - FilterSource, - FilterConfidenceState, - FilterRecoveryStatus, - VariableKind, - MappingStatus, - MappingMethod, - MappingWarningLevel, - ApprovalState, - ClarificationStatus, - QuestionState, - AnswerKind, - PreviewStatus, - LaunchStatus, - ArtifactType, - ArtifactFormat, +from src.schemas.dataset_review_pkg._dtos import ( # noqa: F401 + SessionCollaboratorDto, + DatasetProfileDto, + ValidationFindingDto, + SemanticSourceDto, + SemanticCandidateDto, + SemanticFieldEntryDto, + ImportedFilterDto, + TemplateVariableDto, + ExecutionMappingDto, +) +from src.schemas.dataset_review_pkg._composites import ( # noqa: F401 + ClarificationOptionDto, + ClarificationAnswerDto, + ClarificationQuestionDto, + ClarificationSessionDto, + CompiledPreviewDto, + DatasetRunContextDto, + SessionSummary, + SessionDetail, ) -# [/SECTION] - - -# [DEF:SessionCollaboratorDto:Class] -class SessionCollaboratorDto(BaseModel): - user_id: str - role: SessionCollaboratorRole - added_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:SessionCollaboratorDto:Class] - - -# [DEF:DatasetProfileDto:Class] -class DatasetProfileDto(BaseModel): - profile_id: str - session_id: str - dataset_name: str - schema_name: Optional[str] = None - database_name: Optional[str] = None - business_summary: str - business_summary_source: BusinessSummarySource - description: Optional[str] = None - dataset_type: Optional[str] = None - is_sqllab_view: bool - completeness_score: Optional[float] = None - confidence_state: ConfidenceState - has_blocking_findings: bool - has_warning_findings: bool - manual_summary_locked: bool - created_at: datetime - updated_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:DatasetProfileDto:Class] - - -# [DEF:ValidationFindingDto:Class] -class ValidationFindingDto(BaseModel): - finding_id: str - session_id: str - area: FindingArea - severity: FindingSeverity - code: str - title: str - message: str - resolution_state: ResolutionState - resolution_note: Optional[str] = None - caused_by_ref: Optional[str] = None - created_at: datetime - resolved_at: Optional[datetime] = None - - class Config: - from_attributes = True - - -# [/DEF:ValidationFindingDto:Class] - - -# [DEF:SemanticSourceDto:Class] -class SemanticSourceDto(BaseModel): - source_id: str - session_id: str - source_type: SemanticSourceType - source_ref: str - source_version: str - display_name: str - trust_level: TrustLevel - schema_overlap_score: Optional[float] = None - status: SemanticSourceStatus - created_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:SemanticSourceDto:Class] - - -# [DEF:SemanticCandidateDto:Class] -class SemanticCandidateDto(BaseModel): - candidate_id: str - field_id: str - source_id: Optional[str] = None - candidate_rank: int - match_type: CandidateMatchType - confidence_score: float - proposed_verbose_name: Optional[str] = None - proposed_description: Optional[str] = None - proposed_display_format: Optional[str] = None - status: CandidateStatus - created_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:SemanticCandidateDto:Class] - - -# [DEF:SemanticFieldEntryDto:Class] -class SemanticFieldEntryDto(BaseModel): - field_id: str - session_id: str - session_version: Optional[int] = None - field_name: str - field_kind: FieldKind - verbose_name: Optional[str] = None - description: Optional[str] = None - display_format: Optional[str] = None - provenance: FieldProvenance - source_id: Optional[str] = None - source_version: Optional[str] = None - confidence_rank: Optional[int] = None - is_locked: bool - has_conflict: bool - needs_review: bool - last_changed_by: str - user_feedback: Optional[str] = None - created_at: datetime - updated_at: datetime - candidates: List[SemanticCandidateDto] = [] - - class Config: - from_attributes = True - - -# [/DEF:SemanticFieldEntryDto:Class] - - -# [DEF:ImportedFilterDto:Class] -class ImportedFilterDto(BaseModel): - filter_id: str - session_id: str - filter_name: str - display_name: Optional[str] = None - raw_value: Any - raw_value_masked: bool = False - normalized_value: Optional[Any] = None - source: FilterSource - confidence_state: FilterConfidenceState - requires_confirmation: bool - recovery_status: FilterRecoveryStatus - notes: Optional[str] = None - created_at: datetime - updated_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:ImportedFilterDto:Class] - - -# [DEF:TemplateVariableDto:Class] -class TemplateVariableDto(BaseModel): - variable_id: str - session_id: str - variable_name: str - expression_source: str - variable_kind: VariableKind - is_required: bool - default_value: Optional[Any] = None - mapping_status: MappingStatus - created_at: datetime - updated_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:TemplateVariableDto:Class] - - -# [DEF:ExecutionMappingDto:Class] -class ExecutionMappingDto(BaseModel): - mapping_id: str - session_id: str - session_version: Optional[int] = None - filter_id: str - variable_id: str - mapping_method: MappingMethod - raw_input_value: Any - effective_value: Optional[Any] = None - transformation_note: Optional[str] = None - warning_level: Optional[MappingWarningLevel] = None - requires_explicit_approval: bool - approval_state: ApprovalState - approved_by_user_id: Optional[str] = None - approved_at: Optional[datetime] = None - created_at: datetime - updated_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:ExecutionMappingDto:Class] - - -# [DEF:ClarificationOptionDto:Class] -class ClarificationOptionDto(BaseModel): - option_id: str - question_id: str - label: str - value: str - is_recommended: bool - display_order: int - - class Config: - from_attributes = True - - -# [/DEF:ClarificationOptionDto:Class] - - -# [DEF:ClarificationAnswerDto:Class] -class ClarificationAnswerDto(BaseModel): - answer_id: str - question_id: str - answer_kind: AnswerKind - answer_value: Optional[str] = None - answered_by_user_id: str - impact_summary: Optional[str] = None - user_feedback: Optional[str] = None - created_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:ClarificationAnswerDto:Class] - - -# [DEF:ClarificationQuestionDto:Class] -class ClarificationQuestionDto(BaseModel): - question_id: str - clarification_session_id: str - topic_ref: str - question_text: str - why_it_matters: str - current_guess: Optional[str] = None - priority: int - state: QuestionState - created_at: datetime - updated_at: datetime - options: List[ClarificationOptionDto] = [] - answer: Optional[ClarificationAnswerDto] = None - - class Config: - from_attributes = True - - -# [/DEF:ClarificationQuestionDto:Class] - - -# [DEF:ClarificationSessionDto:Class] -class ClarificationSessionDto(BaseModel): - clarification_session_id: str - session_id: str - status: ClarificationStatus - current_question_id: Optional[str] = None - resolved_count: int - remaining_count: int - summary_delta: Optional[str] = None - started_at: datetime - updated_at: datetime - completed_at: Optional[datetime] = None - questions: List[ClarificationQuestionDto] = [] - - class Config: - from_attributes = True - - -# [/DEF:ClarificationSessionDto:Class] - - -# [DEF:CompiledPreviewDto:Class] -class CompiledPreviewDto(BaseModel): - preview_id: str - session_id: str - session_version: Optional[int] = None - preview_status: PreviewStatus - compiled_sql: Optional[str] = None - preview_fingerprint: str - compiled_by: str - error_code: Optional[str] = None - error_details: Optional[str] = None - compiled_at: Optional[datetime] = None - created_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:CompiledPreviewDto:Class] - - -# [DEF:DatasetRunContextDto:Class] -class DatasetRunContextDto(BaseModel): - run_context_id: str - session_id: str - session_version: Optional[int] = None - dataset_ref: str - environment_id: str - preview_id: str - sql_lab_session_ref: str - effective_filters: Any - template_params: Any - approved_mapping_ids: List[str] - semantic_decision_refs: List[str] - open_warning_refs: List[str] - launch_status: LaunchStatus - launch_error: Optional[str] = None - created_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:DatasetRunContextDto:Class] - - -# [DEF:SessionSummary:Class] -class SessionSummary(BaseModel): - session_id: str - user_id: str - environment_id: str - source_kind: str - source_input: str - dataset_ref: str - dataset_id: Optional[int] = None - version: int = 0 - session_version: int = 0 - readiness_state: ReadinessState - recommended_action: RecommendedAction - status: SessionStatus - current_phase: SessionPhase - created_at: datetime - updated_at: datetime - last_activity_at: datetime - - class Config: - from_attributes = True - - -# [/DEF:SessionSummary:Class] - - -# [DEF:SessionDetail:Class] -class SessionDetail(SessionSummary): - collaborators: List[SessionCollaboratorDto] = [] - profile: Optional[DatasetProfileDto] = None - findings: List[ValidationFindingDto] = [] - semantic_sources: List[SemanticSourceDto] = [] - semantic_fields: List[SemanticFieldEntryDto] = [] - imported_filters: List[ImportedFilterDto] = [] - template_variables: List[TemplateVariableDto] = [] - execution_mappings: List[ExecutionMappingDto] = [] - clarification_sessions: List[ClarificationSessionDto] = [] - previews: List[CompiledPreviewDto] = [] - run_contexts: List[DatasetRunContextDto] = [] - - -# [/DEF:SessionDetail:Class] - # [/DEF:DatasetReviewSchemas:Module] diff --git a/backend/src/schemas/dataset_review_pkg/_composites.py b/backend/src/schemas/dataset_review_pkg/_composites.py new file mode 100644 index 00000000..fd58ce41 --- /dev/null +++ b/backend/src/schemas/dataset_review_pkg/_composites.py @@ -0,0 +1,219 @@ +# [DEF:DatasetReviewSchemaComposites:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Composite Pydantic DTOs for clarification, preview, run context, and session summary/detail responses. +# @LAYER: API +# @RELATION: DEPENDS_ON -> [DatasetReviewSchemaDtos] + +from datetime import datetime +from typing import Any, List, Optional + +from pydantic import BaseModel + +from src.models.dataset_review import ( + ClarificationStatus, + QuestionState, + AnswerKind, + PreviewStatus, + LaunchStatus, + SessionStatus, + SessionPhase, + ReadinessState, + RecommendedAction, +) +from src.schemas.dataset_review_pkg._dtos import ( + SessionCollaboratorDto, + DatasetProfileDto, + ValidationFindingDto, + SemanticSourceDto, + SemanticFieldEntryDto, + ImportedFilterDto, + TemplateVariableDto, + ExecutionMappingDto, +) + + +# [DEF:ClarificationOptionDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Clarification option DTO. +class ClarificationOptionDto(BaseModel): + option_id: str + question_id: str + label: str + value: str + is_recommended: bool + display_order: int + + class Config: + from_attributes = True + + +# [/DEF:ClarificationOptionDto:Class] + + +# [DEF:ClarificationAnswerDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Clarification answer DTO with feedback. +class ClarificationAnswerDto(BaseModel): + answer_id: str + question_id: str + answer_kind: AnswerKind + answer_value: Optional[str] = None + answered_by_user_id: str + impact_summary: Optional[str] = None + user_feedback: Optional[str] = None + created_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:ClarificationAnswerDto:Class] + + +# [DEF:ClarificationQuestionDto:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Clarification question DTO with nested options and answer. +class ClarificationQuestionDto(BaseModel): + question_id: str + clarification_session_id: str + topic_ref: str + question_text: str + why_it_matters: str + current_guess: Optional[str] = None + priority: int + state: QuestionState + created_at: datetime + updated_at: datetime + options: List[ClarificationOptionDto] = [] + answer: Optional[ClarificationAnswerDto] = None + + class Config: + from_attributes = True + + +# [/DEF:ClarificationQuestionDto:Class] + + +# [DEF:ClarificationSessionDto:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Clarification session DTO with nested questions. +class ClarificationSessionDto(BaseModel): + clarification_session_id: str + session_id: str + status: ClarificationStatus + current_question_id: Optional[str] = None + resolved_count: int + remaining_count: int + summary_delta: Optional[str] = None + started_at: datetime + updated_at: datetime + completed_at: Optional[datetime] = None + questions: List[ClarificationQuestionDto] = [] + + class Config: + from_attributes = True + + +# [/DEF:ClarificationSessionDto:Class] + + +# [DEF:CompiledPreviewDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Compiled preview DTO with fingerprint and session version. +class CompiledPreviewDto(BaseModel): + preview_id: str + session_id: str + session_version: Optional[int] = None + preview_status: PreviewStatus + compiled_sql: Optional[str] = None + preview_fingerprint: str + compiled_by: str + error_code: Optional[str] = None + error_details: Optional[str] = None + compiled_at: Optional[datetime] = None + created_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:CompiledPreviewDto:Class] + + +# [DEF:DatasetRunContextDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Run context DTO with launch audit data and session version. +class DatasetRunContextDto(BaseModel): + run_context_id: str + session_id: str + session_version: Optional[int] = None + dataset_ref: str + environment_id: str + preview_id: str + sql_lab_session_ref: str + effective_filters: Any + template_params: Any + approved_mapping_ids: List[str] + semantic_decision_refs: List[str] + open_warning_refs: List[str] + launch_status: LaunchStatus + launch_error: Optional[str] = None + created_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:DatasetRunContextDto:Class] + + +# [DEF:SessionSummary:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Lightweight session summary DTO for list responses. +class SessionSummary(BaseModel): + session_id: str + user_id: str + environment_id: str + source_kind: str + source_input: str + dataset_ref: str + dataset_id: Optional[int] = None + version: int = 0 + session_version: int = 0 + readiness_state: ReadinessState + recommended_action: RecommendedAction + status: SessionStatus + current_phase: SessionPhase + created_at: datetime + updated_at: datetime + last_activity_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:SessionSummary:Class] + + +# [DEF:SessionDetail:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Full session detail DTO with all nested aggregates for detail views. +# @RELATION: INHERITS -> [SessionSummary] +class SessionDetail(SessionSummary): + collaborators: List[SessionCollaboratorDto] = [] + profile: Optional[DatasetProfileDto] = None + findings: List[ValidationFindingDto] = [] + semantic_sources: List[SemanticSourceDto] = [] + semantic_fields: List[SemanticFieldEntryDto] = [] + imported_filters: List[ImportedFilterDto] = [] + template_variables: List[TemplateVariableDto] = [] + execution_mappings: List[ExecutionMappingDto] = [] + clarification_sessions: List[ClarificationSessionDto] = [] + previews: List[CompiledPreviewDto] = [] + run_contexts: List[DatasetRunContextDto] = [] + + +# [/DEF:SessionDetail:Class] + + +# [/DEF:DatasetReviewSchemaComposites:Module] diff --git a/backend/src/schemas/dataset_review_pkg/_dtos.py b/backend/src/schemas/dataset_review_pkg/_dtos.py new file mode 100644 index 00000000..deefce98 --- /dev/null +++ b/backend/src/schemas/dataset_review_pkg/_dtos.py @@ -0,0 +1,262 @@ +# [DEF:DatasetReviewSchemaDtos:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Pydantic DTOs for session, profile, findings, collaborators, and semantic field API payloads. +# @LAYER: API +# @RELATION: DEPENDS_ON -> [DatasetReviewModels] + +from datetime import datetime +from typing import List, Optional, Any + +from pydantic import BaseModel + +from src.models.dataset_review import ( + SessionStatus, + SessionPhase, + ReadinessState, + RecommendedAction, + SessionCollaboratorRole, + BusinessSummarySource, + ConfidenceState, + FindingArea, + FindingSeverity, + ResolutionState, + SemanticSourceType, + TrustLevel, + SemanticSourceStatus, + FieldKind, + FieldProvenance, + CandidateMatchType, + CandidateStatus, + FilterSource, + FilterConfidenceState, + FilterRecoveryStatus, + VariableKind, + MappingStatus, + MappingMethod, + MappingWarningLevel, + ApprovalState, +) + + +# [DEF:SessionCollaboratorDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Collaborator DTO for session access control. +class SessionCollaboratorDto(BaseModel): + user_id: str + role: SessionCollaboratorRole + added_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:SessionCollaboratorDto:Class] + + +# [DEF:DatasetProfileDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Dataset profile DTO with business summary and confidence metadata. +class DatasetProfileDto(BaseModel): + profile_id: str + session_id: str + dataset_name: str + schema_name: Optional[str] = None + database_name: Optional[str] = None + business_summary: str + business_summary_source: BusinessSummarySource + description: Optional[str] = None + dataset_type: Optional[str] = None + is_sqllab_view: bool + completeness_score: Optional[float] = None + confidence_state: ConfidenceState + has_blocking_findings: bool + has_warning_findings: bool + manual_summary_locked: bool + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:DatasetProfileDto:Class] + + +# [DEF:ValidationFindingDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Validation finding DTO with resolution tracking. +class ValidationFindingDto(BaseModel): + finding_id: str + session_id: str + area: FindingArea + severity: FindingSeverity + code: str + title: str + message: str + resolution_state: ResolutionState + resolution_note: Optional[str] = None + caused_by_ref: Optional[str] = None + created_at: datetime + resolved_at: Optional[datetime] = None + + class Config: + from_attributes = True + + +# [/DEF:ValidationFindingDto:Class] + + +# [DEF:SemanticSourceDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Semantic source DTO with trust level and status. +class SemanticSourceDto(BaseModel): + source_id: str + session_id: str + source_type: SemanticSourceType + source_ref: str + source_version: str + display_name: str + trust_level: TrustLevel + schema_overlap_score: Optional[float] = None + status: SemanticSourceStatus + created_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:SemanticSourceDto:Class] + + +# [DEF:SemanticCandidateDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Semantic candidate DTO with match type and confidence score. +class SemanticCandidateDto(BaseModel): + candidate_id: str + field_id: str + source_id: Optional[str] = None + candidate_rank: int + match_type: CandidateMatchType + confidence_score: float + proposed_verbose_name: Optional[str] = None + proposed_description: Optional[str] = None + proposed_display_format: Optional[str] = None + status: CandidateStatus + created_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:SemanticCandidateDto:Class] + + +# [DEF:SemanticFieldEntryDto:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Semantic field entry DTO with nested candidates and session version. +class SemanticFieldEntryDto(BaseModel): + field_id: str + session_id: str + session_version: Optional[int] = None + field_name: str + field_kind: FieldKind + verbose_name: Optional[str] = None + description: Optional[str] = None + display_format: Optional[str] = None + provenance: FieldProvenance + source_id: Optional[str] = None + source_version: Optional[str] = None + confidence_rank: Optional[int] = None + is_locked: bool + has_conflict: bool + needs_review: bool + last_changed_by: str + user_feedback: Optional[str] = None + created_at: datetime + updated_at: datetime + candidates: List[SemanticCandidateDto] = [] + + class Config: + from_attributes = True + + +# [/DEF:SemanticFieldEntryDto:Class] + + +# [DEF:ImportedFilterDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Imported filter DTO with confidence and recovery status. +class ImportedFilterDto(BaseModel): + filter_id: str + session_id: str + filter_name: str + display_name: Optional[str] = None + raw_value: Any + raw_value_masked: bool = False + normalized_value: Optional[Any] = None + source: FilterSource + confidence_state: FilterConfidenceState + requires_confirmation: bool + recovery_status: FilterRecoveryStatus + notes: Optional[str] = None + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:ImportedFilterDto:Class] + + +# [DEF:TemplateVariableDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Template variable DTO with mapping status. +class TemplateVariableDto(BaseModel): + variable_id: str + session_id: str + variable_name: str + expression_source: str + variable_kind: VariableKind + is_required: bool + default_value: Optional[Any] = None + mapping_status: MappingStatus + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:TemplateVariableDto:Class] + + +# [DEF:ExecutionMappingDto:Class] +# @COMPLEXITY: 1 +# @PURPOSE: Execution mapping DTO with approval state and session version. +class ExecutionMappingDto(BaseModel): + mapping_id: str + session_id: str + session_version: Optional[int] = None + filter_id: str + variable_id: str + mapping_method: MappingMethod + raw_input_value: Any + effective_value: Optional[Any] = None + transformation_note: Optional[str] = None + warning_level: Optional[MappingWarningLevel] = None + requires_explicit_approval: bool + approval_state: ApprovalState + approved_by_user_id: Optional[str] = None + approved_at: Optional[datetime] = None + created_at: datetime + updated_at: datetime + + class Config: + from_attributes = True + + +# [/DEF:ExecutionMappingDto:Class] + + +# [/DEF:DatasetReviewSchemaDtos:Module] diff --git a/backend/src/services/dataset_review/clarification_engine.py b/backend/src/services/dataset_review/clarification_engine.py index d813d2c2..8f6167a8 100644 --- a/backend/src/services/dataset_review/clarification_engine.py +++ b/backend/src/services/dataset_review/clarification_engine.py @@ -3,20 +3,22 @@ # @SEMANTICS: dataset_review, clarification, question_payload, answer_persistence, readiness, findings # @PURPOSE: Manage one-question-at-a-time clarification state, deterministic answer persistence, and readiness/finding updates. # @LAYER: Domain -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @RELATION: [DEPENDS_ON] ->[ClarificationSession] -# @RELATION: [DEPENDS_ON] ->[ClarificationQuestion] -# @RELATION: [DEPENDS_ON] ->[ClarificationAnswer] -# @RELATION: [DEPENDS_ON] ->[ValidationFinding] +# @RELATION: DEPENDS_ON -> [DatasetReviewSessionRepository] +# @RELATION: DEPENDS_ON -> [ClarificationSession] +# @RELATION: DEPENDS_ON -> [ClarificationQuestion] +# @RELATION: DEPENDS_ON -> [ClarificationAnswer] +# @RELATION: DEPENDS_ON -> [ValidationFinding] +# @RELATION: DISPATCHES -> [ClarificationHelpers:Module] # @PRE: Target session contains a persisted clarification aggregate in the current ownership scope. # @POST: Active clarification payload exposes one highest-priority unresolved question, and each recorded answer is persisted before pointer/readiness mutation. # @SIDE_EFFECT: Persists clarification answers, question/session states, and related readiness/finding changes. # @DATA_CONTRACT: Input[DatasetReviewSession|ClarificationAnswerCommand] -> Output[ClarificationStateResult] # @INVARIANT: Only one active clarification question may exist at a time; skipped and expert-review items remain unresolved and visible. +# @RATIONALE: Original 635-line file exceeded INV_7 (400-line module limit). Extracted pure helpers into _helpers sub-module. +# @REJECTED: Keeping all clarification logic in one file because it exceeded the fractal limit. + from __future__ import annotations -# [DEF:imports:Block] -import uuid from dataclasses import dataclass, field from datetime import datetime from typing import List, Optional @@ -30,19 +32,25 @@ from src.models.dataset_review import ( ClarificationSession, ClarificationStatus, DatasetReviewSession, - FindingArea, - FindingSeverity, QuestionState, ReadinessState, RecommendedAction, - ResolutionState, SessionPhase, ValidationFinding, ) from src.services.dataset_review.repositories.session_repository import ( DatasetReviewSessionRepository, ) -# [/DEF:imports:Block] +from src.services.dataset_review.clarification_pkg._helpers import ( + select_next_open_question, + count_resolved_questions, + count_remaining_questions, + normalize_answer_value, + build_impact_summary, + upsert_clarification_finding, + derive_readiness_state, + derive_recommended_action, +) # [DEF:ClarificationQuestionPayload:Class] @@ -96,9 +104,8 @@ class ClarificationAnswerCommand: # [DEF:ClarificationEngine:Class] # @COMPLEXITY: 4 # @PURPOSE: Provide deterministic one-question-at-a-time clarification selection and answer persistence. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @RELATION: [DEPENDS_ON] ->[ClarificationSession] -# @RELATION: [DEPENDS_ON] ->[ValidationFinding] +# @RELATION: DEPENDS_ON -> [DatasetReviewSessionRepository] +# @RELATION: CALLS -> [ClarificationHelpers:Module] # @PRE: Repository is bound to the current request transaction scope. # @POST: Returned clarification state is persistence-backed and aligned with session readiness/recommended action. # @SIDE_EFFECT: Mutates clarification answers, session flags, and related clarification findings. @@ -113,51 +120,33 @@ class ClarificationEngine: # [DEF:build_question_payload:Function] # @COMPLEXITY: 4 - # @PURPOSE: Return the one active highest-priority clarification question payload with why-it-matters, current guess, and options. - # @RELATION: [DEPENDS_ON] ->[ClarificationQuestion] - # @RELATION: [DEPENDS_ON] ->[ClarificationOption] + # @PURPOSE: Return the one active highest-priority clarification question payload. # @PRE: Session contains unresolved clarification state or a resumable clarification session. # @POST: Returns exactly one active/open question payload or None when no unresolved question remains. # @SIDE_EFFECT: Normalizes the active-question pointer and clarification status in persistence. - # @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[ClarificationQuestionPayload|None] def build_question_payload( - self, - session: DatasetReviewSession, + self, session: DatasetReviewSession, ) -> Optional[ClarificationQuestionPayload]: with belief_scope("ClarificationEngine.build_question_payload"): clarification_session = self._get_latest_clarification_session(session) if clarification_session is None: - logger.reason( - "Clarification payload requested without clarification session", - extra={"session_id": session.session_id}, - ) + logger.reason("No clarification session found", extra={"session_id": session.session_id}) return None active_questions = [ - question - for question in clarification_session.questions - if question.state == QuestionState.OPEN + q for q in clarification_session.questions if q.state == QuestionState.OPEN ] - active_questions.sort( - key=lambda item: ( - -int(item.priority), - item.created_at, - item.question_id, - ) - ) + active_questions.sort(key=lambda item: (-int(item.priority), item.created_at, item.question_id)) if not active_questions: clarification_session.current_question_id = None clarification_session.status = ClarificationStatus.COMPLETED - session.readiness_state = self._derive_readiness_state(session) - session.recommended_action = self._derive_recommended_action(session) + session.readiness_state = derive_readiness_state(session, clarification_session) + session.recommended_action = derive_recommended_action(session, clarification_session) if session.current_phase == SessionPhase.CLARIFICATION: session.current_phase = SessionPhase.REVIEW self.repository.db.commit() - logger.reflect( - "No unresolved clarification question remains", - extra={"session_id": session.session_id}, - ) + logger.reflect("No unresolved clarification question remains", extra={"session_id": session.session_id}) return None selected_question = active_questions[0] @@ -167,15 +156,7 @@ class ClarificationEngine: session.recommended_action = RecommendedAction.ANSWER_NEXT_QUESTION session.current_phase = SessionPhase.CLARIFICATION - logger.reason( - "Selected active clarification question", - extra={ - "session_id": session.session_id, - "clarification_session_id": clarification_session.clarification_session_id, - "question_id": selected_question.question_id, - "priority": selected_question.priority, - }, - ) + logger.reason("Selected active clarification question", extra={"session_id": session.session_id, "question_id": selected_question.question_id, "priority": selected_question.priority}) self.repository.db.commit() payload = ClarificationQuestionPayload( @@ -188,124 +169,58 @@ class ClarificationEngine: priority=selected_question.priority, state=selected_question.state, options=[ - { - "option_id": option.option_id, - "question_id": option.question_id, - "label": option.label, - "value": option.value, - "is_recommended": option.is_recommended, - "display_order": option.display_order, - } - for option in sorted( - selected_question.options, - key=lambda item: ( - item.display_order, - item.label, - item.option_id, - ), - ) + {"option_id": o.option_id, "question_id": o.question_id, "label": o.label, "value": o.value, "is_recommended": o.is_recommended, "display_order": o.display_order} + for o in sorted(selected_question.options, key=lambda item: (item.display_order, item.label, item.option_id)) ], ) - logger.reflect( - "Clarification payload built", - extra={ - "session_id": session.session_id, - "question_id": payload.question_id, - "option_count": len(payload.options), - }, - ) + logger.reflect("Clarification payload built", extra={"session_id": session.session_id, "question_id": payload.question_id, "option_count": len(payload.options)}) return payload # [/DEF:build_question_payload:Function] # [DEF:record_answer:Function] # @COMPLEXITY: 4 - # @PURPOSE: Persist one clarification answer before any pointer/readiness mutation and compute deterministic state impact. - # @RELATION: [DEPENDS_ON] ->[ClarificationAnswer] - # @RELATION: [DEPENDS_ON] ->[ValidationFinding] + # @PURPOSE: Persist one clarification answer before any pointer/readiness mutation. # @PRE: Target question belongs to the session's active clarification session and is still open. - # @POST: Answer row is persisted before current-question pointer advances; skipped/expert-review items remain unresolved and visible. + # @POST: Answer row is persisted before current-question pointer advances. # @SIDE_EFFECT: Inserts answer row, mutates question/session states, updates clarification findings, and commits. - # @DATA_CONTRACT: Input[ClarificationAnswerCommand] -> Output[ClarificationStateResult] - def record_answer( - self, command: ClarificationAnswerCommand - ) -> ClarificationStateResult: + def record_answer(self, command: ClarificationAnswerCommand) -> ClarificationStateResult: with belief_scope("ClarificationEngine.record_answer"): session = command.session clarification_session = self._get_latest_clarification_session(session) if clarification_session is None: - logger.explore( - "Cannot record clarification answer because no clarification session exists", - extra={"session_id": session.session_id}, - ) + logger.explore("Cannot record clarification answer because no clarification session exists", extra={"session_id": session.session_id}) raise ValueError("Clarification session not found") question = self._find_question(clarification_session, command.question_id) if question is None: - logger.explore( - "Cannot record clarification answer for foreign or missing question", - extra={ - "session_id": session.session_id, - "question_id": command.question_id, - }, - ) + logger.explore("Cannot record clarification answer for foreign or missing question", extra={"session_id": session.session_id, "question_id": command.question_id}) raise ValueError("Clarification question not found") if question.answer is not None: - logger.explore( - "Rejected duplicate clarification answer submission", - extra={ - "session_id": session.session_id, - "question_id": command.question_id, - }, - ) + logger.explore("Rejected duplicate clarification answer submission", extra={"session_id": session.session_id, "question_id": command.question_id}) raise ValueError("Clarification question already answered") - if ( - clarification_session.current_question_id - and clarification_session.current_question_id != question.question_id - ): - logger.explore( - "Rejected answer for non-active clarification question", - extra={ - "session_id": session.session_id, - "question_id": question.question_id, - "current_question_id": clarification_session.current_question_id, - }, - ) - raise ValueError( - "Only the active clarification question can be answered" - ) + if clarification_session.current_question_id and clarification_session.current_question_id != question.question_id: + logger.explore("Rejected answer for non-active clarification question", extra={"session_id": session.session_id, "question_id": question.question_id, "current_question_id": clarification_session.current_question_id}) + raise ValueError("Only the active clarification question can be answered") - normalized_answer_value = self._normalize_answer_value( - command.answer_kind, command.answer_value, question - ) + normalized_answer_value = normalize_answer_value(command.answer_kind, command.answer_value, question) - logger.reason( - "Persisting clarification answer before state advancement", - extra={ - "session_id": session.session_id, - "question_id": question.question_id, - "answer_kind": command.answer_kind.value, - }, - ) + logger.reason("Persisting clarification answer before state advancement", extra={"session_id": session.session_id, "question_id": question.question_id, "answer_kind": command.answer_kind.value}) persisted_answer = ClarificationAnswer( question_id=question.question_id, answer_kind=command.answer_kind, answer_value=normalized_answer_value, answered_by_user_id=command.user.id, - impact_summary=self._build_impact_summary( - question, command.answer_kind, normalized_answer_value - ), + impact_summary=build_impact_summary(question, command.answer_kind, normalized_answer_value), ) self.repository.db.add(persisted_answer) self.repository.db.flush() - changed_finding = self._upsert_clarification_finding( - session=session, - question=question, - answer_kind=command.answer_kind, - answer_value=normalized_answer_value, + changed_finding = upsert_clarification_finding( + session=session, question=question, answer_kind=command.answer_kind, + answer_value=normalized_answer_value, db_session=self.repository.db, ) if command.answer_kind == AnswerKind.SELECTED: @@ -320,51 +235,26 @@ class ClarificationEngine: question.updated_at = datetime.utcnow() self.repository.db.flush() - clarification_session.resolved_count = self._count_resolved_questions( - clarification_session - ) - clarification_session.remaining_count = self._count_remaining_questions( - clarification_session - ) - clarification_session.summary_delta = self.summarize_progress( - clarification_session - ) + clarification_session.resolved_count = count_resolved_questions(clarification_session) + clarification_session.remaining_count = count_remaining_questions(clarification_session) + clarification_session.summary_delta = self.summarize_progress(clarification_session) clarification_session.updated_at = datetime.utcnow() - next_question = self._select_next_open_question(clarification_session) - clarification_session.current_question_id = ( - next_question.question_id if next_question else None - ) - clarification_session.status = ( - ClarificationStatus.ACTIVE - if next_question - else ClarificationStatus.COMPLETED - ) + next_question = select_next_open_question(clarification_session) + clarification_session.current_question_id = next_question.question_id if next_question else None + clarification_session.status = ClarificationStatus.ACTIVE if next_question else ClarificationStatus.COMPLETED if clarification_session.status == ClarificationStatus.COMPLETED: clarification_session.completed_at = datetime.utcnow() - session.readiness_state = self._derive_readiness_state(session) - session.recommended_action = self._derive_recommended_action(session) - session.current_phase = ( - SessionPhase.CLARIFICATION - if clarification_session.current_question_id - else SessionPhase.REVIEW - ) + session.readiness_state = derive_readiness_state(session, clarification_session) + session.recommended_action = derive_recommended_action(session, clarification_session) + session.current_phase = SessionPhase.CLARIFICATION if clarification_session.current_question_id else SessionPhase.REVIEW self.repository.bump_session_version(session) self.repository.db.commit() self.repository.db.refresh(session) - logger.reflect( - "Clarification answer recorded and session advanced", - extra={ - "session_id": session.session_id, - "question_id": question.question_id, - "next_question_id": clarification_session.current_question_id, - "readiness_state": session.readiness_state.value, - "remaining_count": clarification_session.remaining_count, - }, - ) + logger.reflect("Clarification answer recorded and session advanced", extra={"session_id": session.session_id, "question_id": question.question_id, "next_question_id": clarification_session.current_question_id, "readiness_state": session.readiness_state.value, "remaining_count": clarification_session.remaining_count}) return ClarificationStateResult( clarification_session=clarification_session, @@ -376,12 +266,11 @@ class ClarificationEngine: # [/DEF:record_answer:Function] # [DEF:summarize_progress:Function] - # @COMPLEXITY: 2 + # @COMPLEXITY: 1 # @PURPOSE: Produce a compact progress summary for pause/resume and completion UX. - # @RELATION: [DEPENDS_ON] ->[ClarificationSession] def summarize_progress(self, clarification_session: ClarificationSession) -> str: - resolved = self._count_resolved_questions(clarification_session) - remaining = self._count_remaining_questions(clarification_session) + resolved = count_resolved_questions(clarification_session) + remaining = count_remaining_questions(clarification_session) return f"{resolved} resolved, {remaining} unresolved" # [/DEF:summarize_progress:Function] @@ -389,246 +278,25 @@ class ClarificationEngine: # [DEF:_get_latest_clarification_session:Function] # @COMPLEXITY: 2 # @PURPOSE: Select the latest clarification session for the current dataset review aggregate. - def _get_latest_clarification_session( - self, - session: DatasetReviewSession, - ) -> Optional[ClarificationSession]: + def _get_latest_clarification_session(self, session: DatasetReviewSession) -> Optional[ClarificationSession]: if not session.clarification_sessions: return None - ordered_sessions = sorted( - session.clarification_sessions, - key=lambda item: (item.started_at, item.clarification_session_id), - reverse=True, - ) - return ordered_sessions[0] + ordered = sorted(session.clarification_sessions, key=lambda item: (item.started_at, item.clarification_session_id), reverse=True) + return ordered[0] # [/DEF:_get_latest_clarification_session:Function] # [DEF:_find_question:Function] - # @COMPLEXITY: 2 + # @COMPLEXITY: 1 # @PURPOSE: Resolve a clarification question from the active clarification aggregate. - def _find_question( - self, - clarification_session: ClarificationSession, - question_id: str, - ) -> Optional[ClarificationQuestion]: - for question in clarification_session.questions: - if question.question_id == question_id: - return question + def _find_question(self, clarification_session: ClarificationSession, question_id: str) -> Optional[ClarificationQuestion]: + for q in clarification_session.questions: + if q.question_id == question_id: + return q return None # [/DEF:_find_question:Function] - # [DEF:_select_next_open_question:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Select the next unresolved question in deterministic priority order. - def _select_next_open_question( - self, - clarification_session: ClarificationSession, - ) -> Optional[ClarificationQuestion]: - open_questions = [ - question - for question in clarification_session.questions - if question.state == QuestionState.OPEN - ] - if not open_questions: - return None - open_questions.sort( - key=lambda item: (-int(item.priority), item.created_at, item.question_id) - ) - return open_questions[0] - - # [/DEF:_select_next_open_question:Function] - - # [DEF:_count_resolved_questions:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Count questions whose answers fully resolved the ambiguity. - def _count_resolved_questions( - self, clarification_session: ClarificationSession - ) -> int: - return sum( - 1 - for question in clarification_session.questions - if question.state == QuestionState.ANSWERED - ) - - # [/DEF:_count_resolved_questions:Function] - - # [DEF:_count_remaining_questions:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Count questions still unresolved or deferred after clarification interaction. - def _count_remaining_questions( - self, clarification_session: ClarificationSession - ) -> int: - return sum( - 1 - for question in clarification_session.questions - if question.state - in {QuestionState.OPEN, QuestionState.SKIPPED, QuestionState.EXPERT_REVIEW} - ) - - # [/DEF:_count_remaining_questions:Function] - - # [DEF:_normalize_answer_value:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Validate and normalize answer payload based on answer kind and active question options. - def _normalize_answer_value( - self, - answer_kind: AnswerKind, - answer_value: Optional[str], - question: ClarificationQuestion, - ) -> Optional[str]: - normalized_answer_value = ( - str(answer_value).strip() if answer_value is not None else None - ) - if ( - answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM} - and not normalized_answer_value - ): - raise ValueError( - "answer_value is required for selected or custom clarification answers" - ) - if answer_kind == AnswerKind.SELECTED: - allowed_values = {option.value for option in question.options} - if normalized_answer_value not in allowed_values: - raise ValueError( - "answer_value must match one of the current clarification options" - ) - if answer_kind == AnswerKind.SKIPPED: - return normalized_answer_value or "skipped" - if answer_kind == AnswerKind.EXPERT_REVIEW: - return normalized_answer_value or "expert_review" - return normalized_answer_value - - # [/DEF:_normalize_answer_value:Function] - - # [DEF:_build_impact_summary:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Build a compact audit note describing how the clarification answer affects session state. - def _build_impact_summary( - self, - question: ClarificationQuestion, - answer_kind: AnswerKind, - answer_value: Optional[str], - ) -> str: - if answer_kind == AnswerKind.SKIPPED: - return f"Clarification for {question.topic_ref} was skipped and remains unresolved." - if answer_kind == AnswerKind.EXPERT_REVIEW: - return f"Clarification for {question.topic_ref} was deferred for expert review." - return f"Clarification for {question.topic_ref} recorded as '{answer_value}'." - - # [/DEF:_build_impact_summary:Function] - - # [DEF:_upsert_clarification_finding:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Keep one finding per clarification topic aligned with answer outcome and unresolved visibility rules. - # @RELATION: [DEPENDS_ON] ->[ValidationFinding] - def _upsert_clarification_finding( - self, - session: DatasetReviewSession, - question: ClarificationQuestion, - answer_kind: AnswerKind, - answer_value: Optional[str], - ) -> ValidationFinding: - caused_by_ref = f"clarification:{question.question_id}" - existing = next( - ( - finding - for finding in session.findings - if finding.area == FindingArea.CLARIFICATION - and finding.caused_by_ref == caused_by_ref - ), - None, - ) - - if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM}: - resolution_state = ResolutionState.RESOLVED - resolved_at = datetime.utcnow() - message = f"Clarified '{question.topic_ref}' with answer '{answer_value}'." - elif answer_kind == AnswerKind.SKIPPED: - resolution_state = ResolutionState.SKIPPED - resolved_at = None - message = f"Clarification for '{question.topic_ref}' was skipped and still needs review." - else: - resolution_state = ResolutionState.EXPERT_REVIEW - resolved_at = None - message = ( - f"Clarification for '{question.topic_ref}' requires expert review." - ) - - if existing is None: - existing = ValidationFinding( - finding_id=str(uuid.uuid4()), - session_id=session.session_id, - area=FindingArea.CLARIFICATION, - severity=FindingSeverity.WARNING, - code="CLARIFICATION_PENDING", - title="Clarification pending", - message=message, - resolution_state=resolution_state, - resolution_note=None, - caused_by_ref=caused_by_ref, - created_at=datetime.utcnow(), - resolved_at=resolved_at, - ) - self.repository.db.add(existing) - session.findings.append(existing) - else: - existing.message = message - existing.resolution_state = resolution_state - existing.resolved_at = resolved_at - - if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM}: - existing.code = "CLARIFICATION_RESOLVED" - existing.title = "Clarification resolved" - elif answer_kind == AnswerKind.SKIPPED: - existing.code = "CLARIFICATION_SKIPPED" - existing.title = "Clarification skipped" - else: - existing.code = "CLARIFICATION_EXPERT_REVIEW" - existing.title = "Clarification requires expert review" - - return existing - - # [/DEF:_upsert_clarification_finding:Function] - - # [DEF:_derive_readiness_state:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Recompute readiness after clarification mutation while preserving unresolved visibility semantics. - # @RELATION: [DEPENDS_ON] ->[ClarificationSession] - # @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] - def _derive_readiness_state(self, session: DatasetReviewSession) -> ReadinessState: - clarification_session = self._get_latest_clarification_session(session) - if clarification_session is None: - return session.readiness_state - - if clarification_session.current_question_id: - return ReadinessState.CLARIFICATION_ACTIVE - - if clarification_session.remaining_count > 0: - return ReadinessState.CLARIFICATION_NEEDED - - return ReadinessState.REVIEW_READY - - # [/DEF:_derive_readiness_state:Function] - - # [DEF:_derive_recommended_action:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Recompute next-action guidance after clarification mutations. - def _derive_recommended_action( - self, session: DatasetReviewSession - ) -> RecommendedAction: - clarification_session = self._get_latest_clarification_session(session) - if clarification_session is None: - return session.recommended_action - if clarification_session.current_question_id: - return RecommendedAction.ANSWER_NEXT_QUESTION - if clarification_session.remaining_count > 0: - return RecommendedAction.START_CLARIFICATION - return RecommendedAction.REVIEW_DOCUMENTATION - - # [/DEF:_derive_recommended_action:Function] - # [/DEF:ClarificationEngine:Class] diff --git a/backend/src/services/dataset_review/clarification_pkg/_helpers.py b/backend/src/services/dataset_review/clarification_pkg/_helpers.py new file mode 100644 index 00000000..829baee8 --- /dev/null +++ b/backend/src/services/dataset_review/clarification_pkg/_helpers.py @@ -0,0 +1,220 @@ +# [DEF:ClarificationHelpers:Module] +# @COMPLEXITY: 3 +# @PURPOSE: Pure helper functions for clarification engine — question selection, counting, normalization, finding upsert, and readiness derivation. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewModels] + +from __future__ import annotations + +import uuid +from datetime import datetime +from typing import List, Optional + +from src.models.dataset_review import ( + AnswerKind, + ClarificationAnswer, + ClarificationQuestion, + ClarificationSession, + DatasetReviewSession, + FindingArea, + FindingSeverity, + QuestionState, + ReadinessState, + RecommendedAction, + ResolutionState, + ValidationFinding, +) + + +# [DEF:select_next_open_question:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Select the next unresolved question in deterministic priority order. +def select_next_open_question( + clarification_session: ClarificationSession, +) -> Optional[ClarificationQuestion]: + open_questions = [ + q for q in clarification_session.questions if q.state == QuestionState.OPEN + ] + if not open_questions: + return None + open_questions.sort(key=lambda item: (-int(item.priority), item.created_at, item.question_id)) + return open_questions[0] + + +# [/DEF:select_next_open_question:Function] + + +# [DEF:count_resolved_questions:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Count questions whose answers fully resolved the ambiguity. +def count_resolved_questions(clarification_session: ClarificationSession) -> int: + return sum(1 for q in clarification_session.questions if q.state == QuestionState.ANSWERED) + + +# [/DEF:count_resolved_questions:Function] + + +# [DEF:count_remaining_questions:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Count questions still unresolved or deferred after clarification interaction. +def count_remaining_questions(clarification_session: ClarificationSession) -> int: + return sum( + 1 + for q in clarification_session.questions + if q.state in {QuestionState.OPEN, QuestionState.SKIPPED, QuestionState.EXPERT_REVIEW} + ) + + +# [/DEF:count_remaining_questions:Function] + + +# [DEF:normalize_answer_value:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Validate and normalize answer payload based on answer kind and active question options. +def normalize_answer_value( + answer_kind: AnswerKind, + answer_value: Optional[str], + question: ClarificationQuestion, +) -> Optional[str]: + normalized = str(answer_value).strip() if answer_value is not None else None + if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM} and not normalized: + raise ValueError("answer_value is required for selected or custom clarification answers") + if answer_kind == AnswerKind.SELECTED: + allowed_values = {option.value for option in question.options} + if normalized not in allowed_values: + raise ValueError("answer_value must match one of the current clarification options") + if answer_kind == AnswerKind.SKIPPED: + return normalized or "skipped" + if answer_kind == AnswerKind.EXPERT_REVIEW: + return normalized or "expert_review" + return normalized + + +# [/DEF:normalize_answer_value:Function] + + +# [DEF:build_impact_summary:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Build a compact audit note describing how the clarification answer affects session state. +def build_impact_summary( + question: ClarificationQuestion, + answer_kind: AnswerKind, + answer_value: Optional[str], +) -> str: + if answer_kind == AnswerKind.SKIPPED: + return f"Clarification for {question.topic_ref} was skipped and remains unresolved." + if answer_kind == AnswerKind.EXPERT_REVIEW: + return f"Clarification for {question.topic_ref} was deferred for expert review." + return f"Clarification for {question.topic_ref} recorded as '{answer_value}'." + + +# [/DEF:build_impact_summary:Function] + + +# [DEF:upsert_clarification_finding:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Keep one finding per clarification topic aligned with answer outcome and unresolved visibility rules. +# @RELATION: DEPENDS_ON -> [ValidationFinding] +def upsert_clarification_finding( + session: DatasetReviewSession, + question: ClarificationQuestion, + answer_kind: AnswerKind, + answer_value: Optional[str], + db_session, +) -> Optional[ValidationFinding]: + caused_by_ref = f"clarification:{question.question_id}" + existing = next( + (f for f in session.findings if f.area == FindingArea.CLARIFICATION and f.caused_by_ref == caused_by_ref), + None, + ) + + if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM}: + resolution_state = ResolutionState.RESOLVED + resolved_at = datetime.utcnow() + message = f"Clarified '{question.topic_ref}' with answer '{answer_value}'." + elif answer_kind == AnswerKind.SKIPPED: + resolution_state = ResolutionState.SKIPPED + resolved_at = None + message = f"Clarification for '{question.topic_ref}' was skipped and still needs review." + else: + resolution_state = ResolutionState.EXPERT_REVIEW + resolved_at = None + message = f"Clarification for '{question.topic_ref}' requires expert review." + + if existing is None: + existing = ValidationFinding( + finding_id=str(uuid.uuid4()), + session_id=session.session_id, + area=FindingArea.CLARIFICATION, + severity=FindingSeverity.WARNING, + code="CLARIFICATION_PENDING", + title="Clarification pending", + message=message, + resolution_state=resolution_state, + resolution_note=None, + caused_by_ref=caused_by_ref, + created_at=datetime.utcnow(), + resolved_at=resolved_at, + ) + db_session.add(existing) + session.findings.append(existing) + else: + existing.message = message + existing.resolution_state = resolution_state + existing.resolved_at = resolved_at + + if answer_kind in {AnswerKind.SELECTED, AnswerKind.CUSTOM}: + existing.code = "CLARIFICATION_RESOLVED" + existing.title = "Clarification resolved" + elif answer_kind == AnswerKind.SKIPPED: + existing.code = "CLARIFICATION_SKIPPED" + existing.title = "Clarification skipped" + else: + existing.code = "CLARIFICATION_EXPERT_REVIEW" + existing.title = "Clarification requires expert review" + + return existing + + +# [/DEF:upsert_clarification_finding:Function] + + +# [DEF:derive_readiness_state:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Recompute readiness after clarification mutation while preserving unresolved visibility semantics. +def derive_readiness_state( + session: DatasetReviewSession, + clarification_session: Optional[ClarificationSession], +) -> ReadinessState: + if clarification_session is None: + return session.readiness_state + if clarification_session.current_question_id: + return ReadinessState.CLARIFICATION_ACTIVE + if clarification_session.remaining_count > 0: + return ReadinessState.CLARIFICATION_NEEDED + return ReadinessState.REVIEW_READY + + +# [/DEF:derive_readiness_state:Function] + + +# [DEF:derive_recommended_action:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Recompute next-action guidance after clarification mutations. +def derive_recommended_action( + session: DatasetReviewSession, + clarification_session: Optional[ClarificationSession], +) -> RecommendedAction: + if clarification_session is None: + return session.recommended_action + if clarification_session.current_question_id: + return RecommendedAction.ANSWER_NEXT_QUESTION + if clarification_session.remaining_count > 0: + return RecommendedAction.START_CLARIFICATION + return RecommendedAction.REVIEW_DOCUMENTATION + + +# [/DEF:derive_recommended_action:Function] + + +# [/DEF:ClarificationHelpers:Module] diff --git a/backend/src/services/dataset_review/orchestrator.py b/backend/src/services/dataset_review/orchestrator.py index 9260b7a1..d63d7da6 100644 --- a/backend/src/services/dataset_review/orchestrator.py +++ b/backend/src/services/dataset_review/orchestrator.py @@ -3,25 +3,25 @@ # @SEMANTICS: dataset_review, orchestration, session_lifecycle, intake, recovery # @PURPOSE: Coordinate dataset review session startup and lifecycle-safe intake recovery for one authenticated user. # @LAYER: Domain -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @RELATION: [DEPENDS_ON] ->[SemanticSourceResolver] -# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor] -# @RELATION: [DEPENDS_ON] ->[SupersetCompilationAdapter] -# @RELATION: [DEPENDS_ON] ->[TaskManager] -# @RELATION: [CONTAINS] ->[DatasetReviewOrchestrator] +# @RELATION: DEPENDS_ON -> [DatasetReviewSessionRepository] +# @RELATION: DEPENDS_ON -> [SemanticSourceResolver] +# @RELATION: DEPENDS_ON -> [SupersetContextExtractor] +# @RELATION: DEPENDS_ON -> [SupersetCompilationAdapter] +# @RELATION: DEPENDS_ON -> [TaskManager] +# @RELATION: DISPATCHES -> [OrchestratorHelpers:Module] +# @RELATION: DISPATCHES -> [OrchestratorCommands:Module] # @PRE: session mutations must execute inside a persisted session boundary scoped to one authenticated user. # @POST: state transitions are persisted atomically and emit observable progress for long-running steps. # @SIDE_EFFECT: creates task records, updates session aggregates, triggers upstream Superset calls, persists audit artifacts. # @DATA_CONTRACT: Input[SessionCommand] -> Output[DatasetReviewSession | CompiledPreview | DatasetRunContext] # @INVARIANT: Launch is blocked unless a current session has no open blocking findings, all launch-sensitive mappings are approved, and a non-stale Superset-generated compiled preview matches the current input fingerprint. +# @RATIONALE: Original 1198-line monolith violated INV_7 (400-line module limit). Decomposed into commands and helpers sub-modules while preserving the orchestrator class as the single entry point. +# @REJECTED: Keeping all orchestration logic in one file because it exceeded the fractal limit by 3x. from __future__ import annotations -# [DEF:imports:Block] from dataclasses import dataclass, field from datetime import datetime -import hashlib -import json from typing import Any, Dict, List, Optional, cast from src.core.config_manager import ConfigManager @@ -70,98 +70,37 @@ from src.services.dataset_review.repositories.session_repository import ( ) from src.services.dataset_review.semantic_resolver import SemanticSourceResolver from src.services.dataset_review.event_logger import SessionEventPayload -# [/DEF:imports:Block] +from src.services.dataset_review.orchestrator_pkg._commands import ( + StartSessionCommand, + StartSessionResult, + PreparePreviewCommand, + PreparePreviewResult, + LaunchDatasetCommand, + LaunchDatasetResult, +) +from src.services.dataset_review.orchestrator_pkg._helpers import ( + parse_dataset_selection, + build_initial_profile, + build_partial_recovery_findings, + build_execution_snapshot, + build_launch_blockers, + get_latest_preview, + compute_preview_fingerprint, + extract_effective_filter_value, +) logger = cast(Any, logger) -# [DEF:StartSessionCommand:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Typed input contract for starting a dataset review session. -@dataclass -class StartSessionCommand: - user: User - environment_id: str - source_kind: str - source_input: str - - -# [/DEF:StartSessionCommand:Class] - - -# [DEF:StartSessionResult:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Session-start result carrying the persisted session and intake recovery metadata. -@dataclass -class StartSessionResult: - session: DatasetReviewSession - parsed_context: Optional[SupersetParsedContext] = None - findings: List[ValidationFinding] = field(default_factory=list) - - -# [/DEF:StartSessionResult:Class] - - -# [DEF:PreparePreviewCommand:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Typed input contract for compiling one Superset-backed session preview. -@dataclass -class PreparePreviewCommand: - user: User - session_id: str - expected_version: Optional[int] = None - - -# [/DEF:PreparePreviewCommand:Class] - - -# [DEF:PreparePreviewResult:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Result contract for one persisted compiled preview attempt. -@dataclass -class PreparePreviewResult: - session: DatasetReviewSession - preview: CompiledPreview - blocked_reasons: List[str] = field(default_factory=list) - - -# [/DEF:PreparePreviewResult:Class] - - -# [DEF:LaunchDatasetCommand:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Typed input contract for launching one dataset-review session into SQL Lab. -@dataclass -class LaunchDatasetCommand: - user: User - session_id: str - expected_version: Optional[int] = None - - -# [/DEF:LaunchDatasetCommand:Class] - - -# [DEF:LaunchDatasetResult:Class] -# @COMPLEXITY: 2 -# @PURPOSE: Launch result carrying immutable run context and any gate blockers surfaced before launch. -@dataclass -class LaunchDatasetResult: - session: DatasetReviewSession - run_context: DatasetRunContext - blocked_reasons: List[str] = field(default_factory=list) - - -# [/DEF:LaunchDatasetResult:Class] - - # [DEF:DatasetReviewOrchestrator:Class] # @COMPLEXITY: 5 # @PURPOSE: Coordinate safe session startup while preserving cross-user isolation and explicit partial recovery. -# @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] -# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor] -# @RELATION: [DEPENDS_ON] ->[TaskManager] -# @RELATION: [DEPENDS_ON] ->[ConfigManager] -# @RELATION: [DEPENDS_ON] ->[SemanticSourceResolver] +# @RELATION: DEPENDS_ON -> [DatasetReviewSessionRepository] +# @RELATION: DEPENDS_ON -> [SupersetContextExtractor] +# @RELATION: DEPENDS_ON -> [TaskManager] +# @RELATION: DEPENDS_ON -> [ConfigManager] +# @RELATION: DEPENDS_ON -> [SemanticSourceResolver] +# @RELATION: CALLS -> [OrchestratorHelpers:Module] # @PRE: constructor dependencies are valid and tied to the current request/task scope. # @POST: orchestrator instance can execute session-scoped mutations for one authenticated user. # @SIDE_EFFECT: downstream operations may persist session/profile/finding state and enqueue background tasks. @@ -171,13 +110,8 @@ class DatasetReviewOrchestrator: # [DEF:DatasetReviewOrchestrator_init:Function] # @COMPLEXITY: 3 # @PURPOSE: Bind repository, config, and task dependencies required by the orchestration boundary. - # @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] - # @RELATION: [DEPENDS_ON] ->[ConfigManager] - # @RELATION: [DEPENDS_ON] ->[TaskManager] - # @RELATION: [DEPENDS_ON] ->[SemanticSourceResolver] # @PRE: repository/config_manager are valid collaborators for the current request scope. # @POST: Instance holds collaborator references used by start/preview/launch orchestration methods. - # @SIDE_EFFECT: Stores dependency references for later session lifecycle operations. def __init__( self, repository: DatasetReviewSessionRepository, @@ -195,9 +129,8 @@ class DatasetReviewOrchestrator: # [DEF:start_session:Function] # @COMPLEXITY: 5 # @PURPOSE: Initialize a new session from a Superset link or dataset selection and trigger context recovery. - # @RELATION: [DEPENDS_ON] ->[DatasetReviewSessionRepository] - # @RELATION: [CALLS] ->[SupersetContextExtractor.parse_superset_link] - # @RELATION: [CALLS] ->[TaskManager.create_task] + # @RELATION: CALLS -> [SupersetContextExtractor.parse_superset_link] + # @RELATION: CALLS -> [TaskManager.create_task] # @PRE: source input is non-empty and environment is accessible. # @POST: session exists in persisted storage with intake/recovery state and task linkage when async work is required. # @SIDE_EFFECT: persists session and may enqueue recovery task. @@ -210,36 +143,19 @@ class DatasetReviewOrchestrator: normalized_environment_id = str(command.environment_id or "").strip() if not normalized_source_input: - logger.explore( - "Blocked dataset review session start due to empty source input" - ) + logger.explore("Blocked dataset review session start due to empty source input") raise ValueError("source_input must be non-empty") if normalized_source_kind not in {"superset_link", "dataset_selection"}: - logger.explore( - "Blocked dataset review session start due to unsupported source kind", - extra={"source_kind": normalized_source_kind}, - ) - raise ValueError( - "source_kind must be 'superset_link' or 'dataset_selection'" - ) + logger.explore("Blocked dataset review session start due to unsupported source kind", extra={"source_kind": normalized_source_kind}) + raise ValueError("source_kind must be 'superset_link' or 'dataset_selection'") environment = self.config_manager.get_environment(normalized_environment_id) if environment is None: - logger.explore( - "Blocked dataset review session start because environment was not found", - extra={"environment_id": normalized_environment_id}, - ) + logger.explore("Blocked dataset review session start because environment was not found", extra={"environment_id": normalized_environment_id}) raise ValueError("Environment not found") - logger.reason( - "Starting dataset review session", - extra={ - "user_id": command.user.id, - "environment_id": normalized_environment_id, - "source_kind": normalized_source_kind, - }, - ) + logger.reason("Starting dataset review session", extra={"user_id": command.user.id, "environment_id": normalized_environment_id, "source_kind": normalized_source_kind}) parsed_context: Optional[SupersetParsedContext] = None findings: List[ValidationFinding] = [] @@ -260,15 +176,11 @@ class DatasetReviewOrchestrator: if parsed_context.partial_recovery: readiness_state = ReadinessState.RECOVERY_REQUIRED recommended_action = RecommendedAction.REVIEW_DOCUMENTATION - findings.extend( - self._build_partial_recovery_findings(parsed_context) - ) + findings.extend(build_partial_recovery_findings(parsed_context)) else: readiness_state = ReadinessState.REVIEW_READY else: - dataset_ref, dataset_id = self._parse_dataset_selection( - normalized_source_input - ) + dataset_ref, dataset_id = parse_dataset_selection(normalized_source_input) readiness_state = ReadinessState.REVIEW_READY current_phase = SessionPhase.REVIEW @@ -300,7 +212,7 @@ class DatasetReviewOrchestrator: ) ) - profile = self._build_initial_profile( + profile = build_initial_profile( session_id=persisted_session.session_id, parsed_context=parsed_context, dataset_ref=dataset_ref, @@ -318,9 +230,7 @@ class DatasetReviewOrchestrator: "dataset_ref": persisted_session.dataset_ref, "dataset_id": persisted_session.dataset_id, "dashboard_id": persisted_session.dashboard_id, - "partial_recovery": bool( - parsed_context and parsed_context.partial_recovery - ), + "partial_recovery": bool(parsed_context and parsed_context.partial_recovery), }, ) ) @@ -360,26 +270,9 @@ class DatasetReviewOrchestrator: event_details={"task_id": active_task_id}, ) ) - logger.reason( - "Linked recovery task to started dataset review session", - extra={ - "session_id": persisted_session.session_id, - "task_id": active_task_id, - }, - ) + logger.reason("Linked recovery task to started dataset review session", extra={"session_id": persisted_session.session_id, "task_id": active_task_id}) - logger.reflect( - "Dataset review session start completed", - extra={ - "session_id": persisted_session.session_id, - "dataset_ref": persisted_session.dataset_ref, - "dataset_id": persisted_session.dataset_id, - "dashboard_id": persisted_session.dashboard_id, - "readiness_state": persisted_session.readiness_state.value, - "active_task_id": persisted_session.active_task_id, - "finding_count": len(findings), - }, - ) + logger.reflect("Dataset review session start completed", extra={"session_id": persisted_session.session_id, "dataset_ref": persisted_session.dataset_ref, "readiness_state": persisted_session.readiness_state.value, "active_task_id": persisted_session.active_task_id, "finding_count": len(findings)}) return StartSessionResult( session=persisted_session, parsed_context=parsed_context, @@ -391,32 +284,20 @@ class DatasetReviewOrchestrator: # [DEF:prepare_launch_preview:Function] # @COMPLEXITY: 4 # @PURPOSE: Assemble effective execution inputs and trigger Superset-side preview compilation. - # @RELATION: [CALLS] ->[SupersetCompilationAdapter.compile_preview] + # @RELATION: CALLS -> [SupersetCompilationAdapter.compile_preview] # @PRE: all required variables have candidate values or explicitly accepted defaults. # @POST: returns preview artifact in pending, ready, failed, or stale state. # @SIDE_EFFECT: persists preview attempt and upstream compilation diagnostics. # @DATA_CONTRACT: Input[PreparePreviewCommand] -> Output[PreparePreviewResult] - def prepare_launch_preview( - self, command: PreparePreviewCommand - ) -> PreparePreviewResult: + def prepare_launch_preview(self, command: PreparePreviewCommand) -> PreparePreviewResult: with belief_scope("DatasetReviewOrchestrator.prepare_launch_preview"): - session = self.repository.load_session_detail( - command.session_id, command.user.id - ) + session = self.repository.load_session_detail(command.session_id, command.user.id) if session is None or session.user_id != command.user.id: - logger.explore( - "Preview preparation rejected because owned session was not found", - extra={ - "session_id": command.session_id, - "user_id": command.user.id, - }, - ) + logger.explore("Preview preparation rejected because owned session was not found", extra={"session_id": command.session_id, "user_id": command.user.id}) raise ValueError("Session not found") if command.expected_version is not None: - self.repository.require_session_version( - session, command.expected_version - ) + self.repository.require_session_version(session, command.expected_version) if session.dataset_id is None: raise ValueError("Preview requires a resolved dataset_id") @@ -425,16 +306,10 @@ class DatasetReviewOrchestrator: if environment is None: raise ValueError("Environment not found") - execution_snapshot = self._build_execution_snapshot(session) + execution_snapshot = build_execution_snapshot(session) preview_blockers = execution_snapshot["preview_blockers"] if preview_blockers: - logger.explore( - "Preview preparation blocked by incomplete execution context", - extra={ - "session_id": session.session_id, - "blocked_reasons": preview_blockers, - }, - ) + logger.explore("Preview preparation blocked by incomplete execution context", extra={"session_id": session.session_id, "blocked_reasons": preview_blockers}) raise ValueError("Preview blocked: " + "; ".join(preview_blockers)) adapter = SupersetCompilationAdapter(environment) @@ -457,11 +332,7 @@ class DatasetReviewOrchestrator: session.current_phase = SessionPhase.PREVIEW session.last_activity_at = datetime.utcnow() if persisted_preview.preview_status == PreviewStatus.READY: - launch_blockers = self._build_launch_blockers( - session=session, - execution_snapshot=execution_snapshot, - preview=persisted_preview, - ) + launch_blockers = build_launch_blockers(session=session, execution_snapshot=execution_snapshot, preview=persisted_preview) if launch_blockers: session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY session.recommended_action = RecommendedAction.APPROVE_MAPPING @@ -481,59 +352,33 @@ class DatasetReviewOrchestrator: event_summary="Superset preview generation persisted", current_phase=session.current_phase.value, readiness_state=session.readiness_state.value, - event_details={ - "preview_id": persisted_preview.preview_id, - "preview_status": persisted_preview.preview_status.value, - "preview_fingerprint": persisted_preview.preview_fingerprint, - }, + event_details={"preview_id": persisted_preview.preview_id, "preview_status": persisted_preview.preview_status.value, "preview_fingerprint": persisted_preview.preview_fingerprint}, ) ) - logger.reflect( - "Superset preview preparation completed", - extra={ - "session_id": session.session_id, - "preview_id": persisted_preview.preview_id, - "preview_status": persisted_preview.preview_status.value, - "preview_fingerprint": persisted_preview.preview_fingerprint, - }, - ) - return PreparePreviewResult( - session=session, - preview=persisted_preview, - blocked_reasons=[], - ) + logger.reflect("Superset preview preparation completed", extra={"session_id": session.session_id, "preview_id": persisted_preview.preview_id, "preview_status": persisted_preview.preview_status.value}) + return PreparePreviewResult(session=session, preview=persisted_preview, blocked_reasons=[]) # [/DEF:prepare_launch_preview:Function] # [DEF:launch_dataset:Function] # @COMPLEXITY: 5 # @PURPOSE: Start the approved dataset execution through SQL Lab and persist run context for audit/replay. - # @RELATION: [CALLS] ->[SupersetCompilationAdapter.create_sql_lab_session] + # @RELATION: CALLS -> [SupersetCompilationAdapter.create_sql_lab_session] # @PRE: session is run-ready and compiled preview is current. # @POST: returns persisted run context with SQL Lab session reference and launch outcome. # @SIDE_EFFECT: creates SQL Lab execution session and audit snapshot. # @DATA_CONTRACT: Input[LaunchDatasetCommand] -> Output[LaunchDatasetResult] - # @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest Superset preview fingerprint matches current execution inputs. + # @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest preview fingerprint matches current execution inputs. def launch_dataset(self, command: LaunchDatasetCommand) -> LaunchDatasetResult: with belief_scope("DatasetReviewOrchestrator.launch_dataset"): - session = self.repository.load_session_detail( - command.session_id, command.user.id - ) + session = self.repository.load_session_detail(command.session_id, command.user.id) if session is None or session.user_id != command.user.id: - logger.explore( - "Launch rejected because owned session was not found", - extra={ - "session_id": command.session_id, - "user_id": command.user.id, - }, - ) + logger.explore("Launch rejected because owned session was not found", extra={"session_id": command.session_id, "user_id": command.user.id}) raise ValueError("Session not found") if command.expected_version is not None: - self.repository.require_session_version( - session, command.expected_version - ) + self.repository.require_session_version(session, command.expected_version) if session.dataset_id is None: raise ValueError("Launch requires a resolved dataset_id") @@ -542,22 +387,12 @@ class DatasetReviewOrchestrator: if environment is None: raise ValueError("Environment not found") - execution_snapshot = self._build_execution_snapshot(session) - current_preview = self._get_latest_preview(session) - launch_blockers = self._build_launch_blockers( - session=session, - execution_snapshot=execution_snapshot, - preview=current_preview, - ) - if launch_blockers: - logger.explore( - "Launch gate blocked dataset execution", - extra={ - "session_id": session.session_id, - "blocked_reasons": launch_blockers, - }, - ) - raise ValueError("Launch blocked: " + "; ".join(launch_blockers)) + execution_snapshot = build_execution_snapshot(session) + current_preview = get_latest_preview(session) + launch_blockers_list = build_launch_blockers(session=session, execution_snapshot=execution_snapshot, preview=current_preview) + if launch_blockers_list: + logger.explore("Launch gate blocked dataset execution", extra={"session_id": session.session_id, "blocked_reasons": launch_blockers_list}) + raise ValueError("Launch blocked: " + "; ".join(launch_blockers_list)) adapter = SupersetCompilationAdapter(environment) try: @@ -573,10 +408,7 @@ class DatasetReviewOrchestrator: launch_status = LaunchStatus.STARTED launch_error = None except Exception as exc: - logger.explore( - "SQL Lab launch failed after passing gates", - extra={"session_id": session.session_id, "error": str(exc)}, - ) + logger.explore("SQL Lab launch failed after passing gates", extra={"session_id": session.session_id, "error": str(exc)}) sql_lab_session_ref = "unavailable" launch_status = LaunchStatus.FAILED launch_error = str(exc) @@ -620,151 +452,28 @@ class DatasetReviewOrchestrator: event_summary="Dataset launch handoff persisted", current_phase=session.current_phase.value, readiness_state=session.readiness_state.value, - event_details={ - "run_context_id": persisted_run_context.run_context_id, - "launch_status": persisted_run_context.launch_status.value, - "preview_id": persisted_run_context.preview_id, - "sql_lab_session_ref": persisted_run_context.sql_lab_session_ref, - }, + event_details={"run_context_id": persisted_run_context.run_context_id, "launch_status": persisted_run_context.launch_status.value, "preview_id": persisted_run_context.preview_id, "sql_lab_session_ref": persisted_run_context.sql_lab_session_ref}, ) ) - logger.reflect( - "Dataset launch orchestration completed with audited run context", - extra={ - "session_id": session.session_id, - "run_context_id": persisted_run_context.run_context_id, - "launch_status": persisted_run_context.launch_status.value, - "preview_id": persisted_run_context.preview_id, - }, - ) - return LaunchDatasetResult( - session=session, - run_context=persisted_run_context, - blocked_reasons=[], - ) + logger.reflect("Dataset launch orchestration completed with audited run context", extra={"session_id": session.session_id, "run_context_id": persisted_run_context.run_context_id, "launch_status": persisted_run_context.launch_status.value}) + return LaunchDatasetResult(session=session, run_context=persisted_run_context, blocked_reasons=[]) # [/DEF:launch_dataset:Function] - # [DEF:_parse_dataset_selection:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Normalize dataset-selection payload into canonical session references. - # @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] - def _parse_dataset_selection(self, source_input: str) -> tuple[str, Optional[int]]: - normalized = str(source_input or "").strip() - if not normalized: - raise ValueError("dataset selection input must be non-empty") - - if normalized.isdigit(): - dataset_id = int(normalized) - return f"dataset:{dataset_id}", dataset_id - - if normalized.startswith("dataset:"): - suffix = normalized.split(":", 1)[1].strip() - if suffix.isdigit(): - return normalized, int(suffix) - return normalized, None - - return normalized, None - - # [/DEF:_parse_dataset_selection:Function] - - # [DEF:_build_initial_profile:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Create the first profile snapshot so exports and detail views remain usable immediately after intake. - # @RELATION: [DEPENDS_ON] ->[DatasetProfile] - def _build_initial_profile( - self, - session_id: str, - parsed_context: Optional[SupersetParsedContext], - dataset_ref: str, - ) -> DatasetProfile: - dataset_name = ( - dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset" - ) - business_summary = ( - f"Review session initialized for {dataset_ref}." - if dataset_ref - else "Review session initialized with unresolved dataset context." - ) - confidence_state = ( - ConfidenceState.MIXED - if parsed_context and parsed_context.partial_recovery - else ConfidenceState.MOSTLY_CONFIRMED - ) - return DatasetProfile( - session_id=session_id, - dataset_name=dataset_name or "Unresolved dataset", - schema_name=dataset_ref.split(".")[0] if "." in dataset_ref else None, - business_summary=business_summary, - business_summary_source=BusinessSummarySource.IMPORTED, - description="Initial review profile created from source intake.", - dataset_type="unknown", - is_sqllab_view=False, - completeness_score=0.25, - confidence_state=confidence_state, - has_blocking_findings=False, - has_warning_findings=bool( - parsed_context and parsed_context.partial_recovery - ), - manual_summary_locked=False, - ) - - # [/DEF:_build_initial_profile:Function] - - # [DEF:_build_partial_recovery_findings:Function] - # @COMPLEXITY: 4 - # @PURPOSE: Project partial Superset intake recovery into explicit findings without blocking session usability. - # @RELATION: [DEPENDS_ON] ->[ValidationFinding] - # @PRE: parsed_context.partial_recovery is true. - # @POST: returns warning-level findings that preserve usable but incomplete state. - # @SIDE_EFFECT: none beyond structured finding creation. - # @DATA_CONTRACT: Input[SupersetParsedContext] -> Output[List[ValidationFinding]] - def _build_partial_recovery_findings( - self, - parsed_context: SupersetParsedContext, - ) -> List[ValidationFinding]: - findings: List[ValidationFinding] = [] - for unresolved_ref in parsed_context.unresolved_references: - findings.append( - ValidationFinding( - area=FindingArea.SOURCE_INTAKE, - severity=FindingSeverity.WARNING, - code="PARTIAL_SUPERSET_RECOVERY", - title="Superset context recovered partially", - message=( - "Session remains usable, but some Superset context requires review: " - f"{unresolved_ref.replace('_', ' ')}." - ), - resolution_state=ResolutionState.OPEN, - caused_by_ref=unresolved_ref, - ) - ) - return findings - - # [/DEF:_build_partial_recovery_findings:Function] - # [DEF:_build_recovery_bootstrap:Function] # @COMPLEXITY: 4 # @PURPOSE: Recover and materialize initial imported filters, template variables, and draft execution mappings after session creation. - # @RELATION: [CALLS] ->[SupersetContextExtractor.recover_imported_filters] - # @RELATION: [CALLS] ->[SupersetContextExtractor.discover_template_variables] # @PRE: session belongs to the just-created review aggregate and parsed_context was produced for the same environment scope. # @POST: Returns bootstrap imported filters, template variables, execution mappings, and updated findings without persisting them directly. # @SIDE_EFFECT: Performs Superset reads through the extractor and may append warning findings for incomplete recovery. - # @DATA_CONTRACT: Input[Environment, DatasetReviewSession, SupersetParsedContext, List[ValidationFinding]] -> Output[Tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]] def _build_recovery_bootstrap( self, environment, session: DatasetReviewSession, parsed_context: SupersetParsedContext, findings: List[ValidationFinding], - ) -> tuple[ - List[ImportedFilter], - List[TemplateVariable], - List[ExecutionMapping], - List[ValidationFinding], - ]: + ) -> tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]: session_record = cast(Any, session) extractor = SupersetContextExtractor(environment) imported_filters_payload = extractor.recover_imported_filters(parsed_context) @@ -778,22 +487,10 @@ class DatasetReviewOrchestrator: raw_value=item.get("raw_value"), raw_value_masked=bool(item.get("raw_value_masked", False)), normalized_value=item.get("normalized_value"), - source=FilterSource( - str(item.get("source") or FilterSource.SUPERSET_URL.value) - ), - confidence_state=FilterConfidenceState( - str( - item.get("confidence_state") - or FilterConfidenceState.UNRESOLVED.value - ) - ), + source=FilterSource(str(item.get("source") or FilterSource.SUPERSET_URL.value)), + confidence_state=FilterConfidenceState(str(item.get("confidence_state") or FilterConfidenceState.UNRESOLVED.value)), requires_confirmation=bool(item.get("requires_confirmation", False)), - recovery_status=FilterRecoveryStatus( - str( - item.get("recovery_status") - or FilterRecoveryStatus.PARTIAL.value - ) - ), + recovery_status=FilterRecoveryStatus(str(item.get("recovery_status") or FilterRecoveryStatus.PARTIAL.value)), notes=item.get("notes"), ) for index, item in enumerate(imported_filters_payload) @@ -806,46 +503,24 @@ class DatasetReviewOrchestrator: try: dataset_payload = parsed_context.dataset_payload if not isinstance(dataset_payload, dict): - dataset_payload = extractor.client.get_dataset_detail( - session_record.dataset_id - ) - discovered_variables = extractor.discover_template_variables( - dataset_payload - ) + dataset_payload = extractor.client.get_dataset_detail(session_record.dataset_id) + discovered_variables = extractor.discover_template_variables(dataset_payload) template_variables = [ TemplateVariable( session_id=session_record.session_id, - variable_name=str( - item.get("variable_name") or f"variable_{index}" - ), + variable_name=str(item.get("variable_name") or f"variable_{index}"), expression_source=str(item.get("expression_source") or ""), - variable_kind=VariableKind( - str(item.get("variable_kind") or VariableKind.UNKNOWN.value) - ), + variable_kind=VariableKind(str(item.get("variable_kind") or VariableKind.UNKNOWN.value)), is_required=bool(item.get("is_required", True)), default_value=item.get("default_value"), - mapping_status=MappingStatus( - str( - item.get("mapping_status") - or MappingStatus.UNMAPPED.value - ) - ), + mapping_status=MappingStatus(str(item.get("mapping_status") or MappingStatus.UNMAPPED.value)), ) for index, item in enumerate(discovered_variables) ] except Exception as exc: - if ( - "dataset_template_variable_discovery_failed" - not in parsed_context.unresolved_references - ): - parsed_context.unresolved_references.append( - "dataset_template_variable_discovery_failed" - ) - if not any( - finding.caused_by_ref - == "dataset_template_variable_discovery_failed" - for finding in findings - ): + if "dataset_template_variable_discovery_failed" not in parsed_context.unresolved_references: + parsed_context.unresolved_references.append("dataset_template_variable_discovery_failed") + if not any(f.caused_by_ref == "dataset_template_variable_discovery_failed" for f in findings): findings.append( ValidationFinding( area=FindingArea.TEMPLATE_MAPPING, @@ -857,46 +532,26 @@ class DatasetReviewOrchestrator: caused_by_ref="dataset_template_variable_discovery_failed", ) ) - logger.explore( - "Template variable discovery failed during session bootstrap", - extra={ - "session_id": session_record.session_id, - "dataset_id": session_record.dataset_id, - "error": str(exc), - }, - ) + logger.explore("Template variable discovery failed during session bootstrap", extra={"session_id": session_record.session_id, "dataset_id": session_record.dataset_id, "error": str(exc)}) - filter_lookup = { - str(imported_filter.filter_name or "").strip().lower(): imported_filter - for imported_filter in imported_filters - if str(imported_filter.filter_name or "").strip() - } - for template_variable in template_variables: - matched_filter = filter_lookup.get( - str(template_variable.variable_name or "").strip().lower() - ) + filter_lookup = {str(f.filter_name or "").strip().lower(): f for f in imported_filters if str(f.filter_name or "").strip()} + for tv in template_variables: + matched_filter = filter_lookup.get(str(tv.variable_name or "").strip().lower()) if matched_filter is None: continue - requires_explicit_approval = bool( - matched_filter.requires_confirmation - or matched_filter.recovery_status != FilterRecoveryStatus.RECOVERED - ) + requires_explicit_approval = bool(matched_filter.requires_confirmation or matched_filter.recovery_status != FilterRecoveryStatus.RECOVERED) execution_mappings.append( ExecutionMapping( session_id=session_record.session_id, filter_id=matched_filter.filter_id, - variable_id=template_variable.variable_id, + variable_id=tv.variable_id, mapping_method=MappingMethod.DIRECT_MATCH, raw_input_value=matched_filter.raw_value, - effective_value=matched_filter.normalized_value - if matched_filter.normalized_value is not None - else matched_filter.raw_value, + effective_value=matched_filter.normalized_value if matched_filter.normalized_value is not None else matched_filter.raw_value, transformation_note="Bootstrapped from Superset recovery context", - warning_level=None if not requires_explicit_approval else None, + warning_level=None, requires_explicit_approval=requires_explicit_approval, - approval_state=ApprovalState.PENDING - if requires_explicit_approval - else ApprovalState.NOT_REQUIRED, + approval_state=ApprovalState.PENDING if requires_explicit_approval else ApprovalState.NOT_REQUIRED, approved_by_user_id=None, approved_at=None, ) @@ -906,240 +561,12 @@ class DatasetReviewOrchestrator: # [/DEF:_build_recovery_bootstrap:Function] - # [DEF:_extract_effective_filter_value:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Separate normalized filter payload metadata from the user-facing effective filter value. - def _extract_effective_filter_value( - self, normalized_value: Any, raw_value: Any - ) -> Any: - if isinstance(normalized_value, dict) and ( - "filter_clauses" in normalized_value - or "extra_form_data" in normalized_value - ): - return raw_value - return normalized_value if normalized_value is not None else raw_value - - # [/DEF:_extract_effective_filter_value:Function] - - # [DEF:_build_execution_snapshot:Function] - # @COMPLEXITY: 4 - # @PURPOSE: Build effective filters, template params, approvals, and fingerprint for preview and launch gating. - # @RELATION: [DEPENDS_ON] ->[DatasetReviewSession] - # @PRE: Session aggregate includes imported filters, template variables, and current execution mappings. - # @POST: returns deterministic execution snapshot for current session state without mutating persistence. - # @SIDE_EFFECT: none. - # @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[Dict[str,Any]] - def _build_execution_snapshot( - self, session: DatasetReviewSession - ) -> Dict[str, Any]: - session_record = cast(Any, session) - filter_lookup = { - item.filter_id: item for item in session_record.imported_filters - } - variable_lookup = { - item.variable_id: item for item in session_record.template_variables - } - - effective_filters: List[Dict[str, Any]] = [] - template_params: Dict[str, Any] = {} - approved_mapping_ids: List[str] = [] - open_warning_refs: List[str] = [] - preview_blockers: List[str] = [] - mapped_filter_ids: set[str] = set() - - for mapping in session_record.execution_mappings: - imported_filter = filter_lookup.get(mapping.filter_id) - template_variable = variable_lookup.get(mapping.variable_id) - if imported_filter is None: - preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter") - continue - if template_variable is None: - preview_blockers.append( - f"mapping:{mapping.mapping_id}:missing_variable" - ) - continue - - effective_value = mapping.effective_value - if effective_value is None: - effective_value = self._extract_effective_filter_value( - imported_filter.normalized_value, - imported_filter.raw_value, - ) - if effective_value is None: - effective_value = template_variable.default_value - - if effective_value is None and template_variable.is_required: - preview_blockers.append( - f"variable:{template_variable.variable_name}:missing_required_value" - ) - continue - - mapped_filter_ids.add(imported_filter.filter_id) - if effective_value is not None: - mapped_filter_payload = { - "mapping_id": mapping.mapping_id, - "filter_id": imported_filter.filter_id, - "filter_name": imported_filter.filter_name, - "variable_id": template_variable.variable_id, - "variable_name": template_variable.variable_name, - "effective_value": effective_value, - "raw_input_value": mapping.raw_input_value, - } - if isinstance(imported_filter.normalized_value, dict): - mapped_filter_payload["display_name"] = imported_filter.display_name - mapped_filter_payload["normalized_filter_payload"] = ( - imported_filter.normalized_value - ) - effective_filters.append(mapped_filter_payload) - template_params[template_variable.variable_name] = effective_value - if mapping.approval_state == ApprovalState.APPROVED: - approved_mapping_ids.append(mapping.mapping_id) - if ( - mapping.requires_explicit_approval - and mapping.approval_state != ApprovalState.APPROVED - ): - open_warning_refs.append(mapping.mapping_id) - - for imported_filter in session_record.imported_filters: - if imported_filter.filter_id in mapped_filter_ids: - continue - effective_value = imported_filter.normalized_value - effective_value = self._extract_effective_filter_value( - imported_filter.normalized_value, - imported_filter.raw_value, - ) - if effective_value is None: - continue - effective_filters.append( - { - "filter_id": imported_filter.filter_id, - "filter_name": imported_filter.filter_name, - "display_name": imported_filter.display_name, - "effective_value": effective_value, - "raw_input_value": imported_filter.raw_value, - "normalized_filter_payload": imported_filter.normalized_value, - } - ) - - mapped_variable_ids = { - mapping.variable_id for mapping in session_record.execution_mappings - } - for variable in session_record.template_variables: - if variable.variable_id in mapped_variable_ids: - continue - if variable.default_value is not None: - template_params[variable.variable_name] = variable.default_value - continue - if variable.is_required: - preview_blockers.append(f"variable:{variable.variable_name}:unmapped") - - semantic_decision_refs = [ - field.field_id - for field in session.semantic_fields - if field.is_locked - or not field.needs_review - or field.provenance.value != "unresolved" - ] - preview_fingerprint = self._compute_preview_fingerprint( - { - "dataset_id": session_record.dataset_id, - "template_params": template_params, - "effective_filters": effective_filters, - } - ) - return { - "effective_filters": effective_filters, - "template_params": template_params, - "approved_mapping_ids": sorted(approved_mapping_ids), - "semantic_decision_refs": sorted(semantic_decision_refs), - "open_warning_refs": sorted(open_warning_refs), - "preview_blockers": sorted(set(preview_blockers)), - "preview_fingerprint": preview_fingerprint, - } - - # [/DEF:_build_execution_snapshot:Function] - - # [DEF:_build_launch_blockers:Function] - # @COMPLEXITY: 4 - # @PURPOSE: Enforce launch gates from findings, approvals, and current preview truth. - # @RELATION: [DEPENDS_ON] ->[CompiledPreview] - # @PRE: execution_snapshot was computed from current session state and preview is the latest persisted preview or None. - # @POST: returns explicit blocker codes for every unmet launch invariant. - # @SIDE_EFFECT: none. - # @DATA_CONTRACT: Input[DatasetReviewSession,Dict[str,Any],CompiledPreview|None] -> Output[List[str]] - def _build_launch_blockers( - self, - session: DatasetReviewSession, - execution_snapshot: Dict[str, Any], - preview: Optional[CompiledPreview], - ) -> List[str]: - session_record = cast(Any, session) - blockers = list(execution_snapshot["preview_blockers"]) - - for finding in session_record.findings: - if ( - finding.severity == FindingSeverity.BLOCKING - and finding.resolution_state - not in {ResolutionState.RESOLVED, ResolutionState.APPROVED} - ): - blockers.append(f"finding:{finding.code}:blocking") - for mapping in session_record.execution_mappings: - if ( - mapping.requires_explicit_approval - and mapping.approval_state != ApprovalState.APPROVED - ): - blockers.append(f"mapping:{mapping.mapping_id}:approval_required") - - if preview is None: - blockers.append("preview:missing") - else: - if preview.preview_status != PreviewStatus.READY: - blockers.append(f"preview:{preview.preview_status.value}") - if preview.preview_fingerprint != execution_snapshot["preview_fingerprint"]: - blockers.append("preview:fingerprint_mismatch") - - return sorted(set(blockers)) - - # [/DEF:_build_launch_blockers:Function] - - # [DEF:_get_latest_preview:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Resolve the current latest preview snapshot for one session aggregate. - def _get_latest_preview( - self, session: DatasetReviewSession - ) -> Optional[CompiledPreview]: - session_record = cast(Any, session) - if not session_record.previews: - return None - if session_record.last_preview_id: - for preview in session_record.previews: - if preview.preview_id == session_record.last_preview_id: - return preview - return sorted( - session_record.previews, - key=lambda item: (item.created_at or datetime.min, item.preview_id), - reverse=True, - )[0] - - # [/DEF:_get_latest_preview:Function] - - # [DEF:_compute_preview_fingerprint:Function] - # @COMPLEXITY: 2 - # @PURPOSE: Produce deterministic execution fingerprint for preview truth and staleness checks. - def _compute_preview_fingerprint(self, payload: Dict[str, Any]) -> str: - serialized = json.dumps(payload, sort_keys=True, default=str) - return hashlib.sha256(serialized.encode("utf-8")).hexdigest() - - # [/DEF:_compute_preview_fingerprint:Function] - # [DEF:_enqueue_recovery_task:Function] - # @COMPLEXITY: 4 + # @COMPLEXITY: 3 # @PURPOSE: Link session start to observable async recovery when task infrastructure is available. - # @RELATION: [CALLS] ->[TaskManager.create_task] # @PRE: session is already persisted. # @POST: returns task identifier when a task could be enqueued, otherwise None. # @SIDE_EFFECT: may create one background task for progressive recovery. - # @DATA_CONTRACT: Input[StartSessionCommand,DatasetReviewSession,SupersetParsedContext|None] -> Output[task_id:str|None] def _enqueue_recovery_task( self, command: StartSessionCommand, @@ -1148,10 +575,7 @@ class DatasetReviewOrchestrator: ) -> Optional[str]: session_record = cast(Any, session) if self.task_manager is None: - logger.reason( - "Dataset review session started without task manager; continuing synchronously", - extra={"session_id": session_record.session_id}, - ) + logger.reason("Dataset review session started without task manager; continuing synchronously", extra={"session_id": session_record.session_id}) return None task_params: Dict[str, Any] = { @@ -1163,28 +587,18 @@ class DatasetReviewOrchestrator: "dataset_ref": session_record.dataset_ref, "dataset_id": session_record.dataset_id, "dashboard_id": session_record.dashboard_id, - "partial_recovery": bool( - parsed_context and parsed_context.partial_recovery - ), + "partial_recovery": bool(parsed_context and parsed_context.partial_recovery), } create_task = getattr(self.task_manager, "create_task", None) if create_task is None: - logger.explore( - "Task manager has no create_task method; skipping recovery enqueue" - ) + logger.explore("Task manager has no create_task method; skipping recovery enqueue") return None try: - task_object = create_task( - plugin_id="dataset-review-recovery", - params=task_params, - ) + task_object = create_task(plugin_id="dataset-review-recovery", params=task_params) except TypeError: - logger.explore( - "Recovery task enqueue skipped because task manager create_task contract is incompatible", - extra={"session_id": session_record.session_id}, - ) + logger.explore("Recovery task enqueue skipped because task manager create_task contract is incompatible", extra={"session_id": session_record.session_id}) return None task_id = getattr(task_object, "id", None) diff --git a/backend/src/services/dataset_review/orchestrator_pkg/_commands.py b/backend/src/services/dataset_review/orchestrator_pkg/_commands.py new file mode 100644 index 00000000..ecf1c085 --- /dev/null +++ b/backend/src/services/dataset_review/orchestrator_pkg/_commands.py @@ -0,0 +1,102 @@ +# [DEF:OrchestratorCommands:Module] +# @COMPLEXITY: 2 +# @PURPOSE: Typed command and result dataclasses for dataset review orchestration boundary. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewModels] +# @RELATION: DEPENDS_ON -> [SupersetContextExtractor] + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from src.models.auth import User +from src.models.dataset_review import ( + CompiledPreview, + DatasetReviewSession, + DatasetRunContext, + ValidationFinding, +) +from src.core.utils.superset_context_extractor import SupersetParsedContext + + +# [DEF:StartSessionCommand:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Typed input contract for starting a dataset review session. +@dataclass +class StartSessionCommand: + user: User + environment_id: str + source_kind: str + source_input: str + + +# [/DEF:StartSessionCommand:Class] + + +# [DEF:StartSessionResult:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Session-start result carrying the persisted session and intake recovery metadata. +@dataclass +class StartSessionResult: + session: DatasetReviewSession + parsed_context: Optional[SupersetParsedContext] = None + findings: List[ValidationFinding] = field(default_factory=list) + + +# [/DEF:StartSessionResult:Class] + + +# [DEF:PreparePreviewCommand:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Typed input contract for compiling one Superset-backed session preview. +@dataclass +class PreparePreviewCommand: + user: User + session_id: str + expected_version: Optional[int] = None + + +# [/DEF:PreparePreviewCommand:Class] + + +# [DEF:PreparePreviewResult:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Result contract for one persisted compiled preview attempt. +@dataclass +class PreparePreviewResult: + session: DatasetReviewSession + preview: CompiledPreview + blocked_reasons: List[str] = field(default_factory=list) + + +# [/DEF:PreparePreviewResult:Class] + + +# [DEF:LaunchDatasetCommand:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Typed input contract for launching one dataset-review session into SQL Lab. +@dataclass +class LaunchDatasetCommand: + user: User + session_id: str + expected_version: Optional[int] = None + + +# [/DEF:LaunchDatasetCommand:Class] + + +# [DEF:LaunchDatasetResult:Class] +# @COMPLEXITY: 2 +# @PURPOSE: Launch result carrying immutable run context and any gate blockers. +@dataclass +class LaunchDatasetResult: + session: DatasetReviewSession + run_context: DatasetRunContext + blocked_reasons: List[str] = field(default_factory=list) + + +# [/DEF:LaunchDatasetResult:Class] + + +# [/DEF:OrchestratorCommands:Module] diff --git a/backend/src/services/dataset_review/orchestrator_pkg/_helpers.py b/backend/src/services/dataset_review/orchestrator_pkg/_helpers.py new file mode 100644 index 00000000..e01c7462 --- /dev/null +++ b/backend/src/services/dataset_review/orchestrator_pkg/_helpers.py @@ -0,0 +1,356 @@ +# [DEF:OrchestratorHelpers:Module] +# @COMPLEXITY: 4 +# @PURPOSE: Pure helper methods extracted from DatasetReviewOrchestrator for INV_7 compliance — snapshot, blockers, fingerprint, recovery bootstrap. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewModels] +# @RELATION: DEPENDS_ON -> [SupersetContextExtractor] +# @PRE: Caller provides a loaded session aggregate with hydrated child collections. +# @POST: Helper results are deterministic and do not mutate persistence directly. + +from __future__ import annotations + +import hashlib +import json +from datetime import datetime +from typing import Any, Dict, List, Optional, cast + +from src.core.logger import belief_scope, logger +from src.models.dataset_review import ( + ApprovalState, + CompiledPreview, + ConfidenceState, + DatasetProfile, + DatasetReviewSession, + ExecutionMapping, + FilterConfidenceState, + FilterRecoveryStatus, + FilterSource, + FindingArea, + FindingSeverity, + ImportedFilter, + MappingMethod, + MappingStatus, + PreviewStatus, + ResolutionState, + TemplateVariable, + ValidationFinding, + VariableKind, + BusinessSummarySource, +) + +logger = cast(Any, logger) + + +# [DEF:parse_dataset_selection:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Normalize dataset-selection payload into canonical session references. +def parse_dataset_selection(source_input: str) -> tuple[str, Optional[int]]: + normalized = str(source_input or "").strip() + if not normalized: + raise ValueError("dataset selection input must be non-empty") + if normalized.isdigit(): + dataset_id = int(normalized) + return f"dataset:{dataset_id}", dataset_id + if normalized.startswith("dataset:"): + suffix = normalized.split(":", 1)[1].strip() + if suffix.isdigit(): + return normalized, int(suffix) + return normalized, None + return normalized, None + + +# [/DEF:parse_dataset_selection:Function] + + +# [DEF:build_initial_profile:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Create the first profile snapshot so exports and detail views remain usable immediately after intake. +def build_initial_profile( + session_id: str, + parsed_context: Optional[Any], + dataset_ref: str, +) -> DatasetProfile: + dataset_name = ( + dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset" + ) + business_summary = ( + f"Review session initialized for {dataset_ref}." + if dataset_ref + else "Review session initialized with unresolved dataset context." + ) + confidence_state = ( + ConfidenceState.MIXED + if parsed_context and getattr(parsed_context, "partial_recovery", False) + else ConfidenceState.MOSTLY_CONFIRMED + ) + return DatasetProfile( + session_id=session_id, + dataset_name=dataset_name or "Unresolved dataset", + schema_name=dataset_ref.split(".")[0] if "." in dataset_ref else None, + business_summary=business_summary, + business_summary_source=BusinessSummarySource.IMPORTED, + description="Initial review profile created from source intake.", + dataset_type="unknown", + is_sqllab_view=False, + completeness_score=0.25, + confidence_state=confidence_state, + has_blocking_findings=False, + has_warning_findings=bool( + parsed_context and getattr(parsed_context, "partial_recovery", False) + ), + manual_summary_locked=False, + ) + + +# [/DEF:build_initial_profile:Function] + + +# [DEF:build_partial_recovery_findings:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Project partial Superset intake recovery into explicit findings without blocking session usability. +# @PRE: parsed_context.partial_recovery is true. +# @POST: Returns warning-level findings that preserve usable but incomplete state. +def build_partial_recovery_findings(parsed_context: Any) -> List[ValidationFinding]: + findings: List[ValidationFinding] = [] + for unresolved_ref in getattr(parsed_context, "unresolved_references", []): + findings.append( + ValidationFinding( + area=FindingArea.SOURCE_INTAKE, + severity=FindingSeverity.WARNING, + code="PARTIAL_SUPERSET_RECOVERY", + title="Superset context recovered partially", + message=( + "Session remains usable, but some Superset context requires review: " + f"{unresolved_ref.replace('_', ' ')}." + ), + resolution_state=ResolutionState.OPEN, + caused_by_ref=unresolved_ref, + ) + ) + return findings + + +# [/DEF:build_partial_recovery_findings:Function] + + +# [DEF:extract_effective_filter_value:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Separate normalized filter payload metadata from the user-facing effective filter value. +def extract_effective_filter_value( + normalized_value: Any, raw_value: Any +) -> Any: + if isinstance(normalized_value, dict) and ( + "filter_clauses" in normalized_value + or "extra_form_data" in normalized_value + ): + return raw_value + return normalized_value if normalized_value is not None else raw_value + + +# [/DEF:extract_effective_filter_value:Function] + + +# [DEF:build_execution_snapshot:Function] +# @COMPLEXITY: 4 +# @PURPOSE: Build effective filters, template params, approvals, and fingerprint for preview and launch gating. +# @PRE: Session aggregate includes imported filters, template variables, and current execution mappings. +# @POST: Returns deterministic execution snapshot for current session state without mutating persistence. +def build_execution_snapshot(session: DatasetReviewSession) -> Dict[str, Any]: + session_record = cast(Any, session) + filter_lookup = { + item.filter_id: item for item in session_record.imported_filters + } + variable_lookup = { + item.variable_id: item for item in session_record.template_variables + } + + effective_filters: List[Dict[str, Any]] = [] + template_params: Dict[str, Any] = {} + approved_mapping_ids: List[str] = [] + open_warning_refs: List[str] = [] + preview_blockers: List[str] = [] + mapped_filter_ids: set[str] = set() + + for mapping in session_record.execution_mappings: + imported_filter = filter_lookup.get(mapping.filter_id) + template_variable = variable_lookup.get(mapping.variable_id) + if imported_filter is None: + preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter") + continue + if template_variable is None: + preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_variable") + continue + + effective_value = mapping.effective_value + if effective_value is None: + effective_value = extract_effective_filter_value( + imported_filter.normalized_value, imported_filter.raw_value, + ) + if effective_value is None: + effective_value = template_variable.default_value + + if effective_value is None and template_variable.is_required: + preview_blockers.append( + f"variable:{template_variable.variable_name}:missing_required_value" + ) + continue + + mapped_filter_ids.add(imported_filter.filter_id) + if effective_value is not None: + mapped_filter_payload = { + "mapping_id": mapping.mapping_id, + "filter_id": imported_filter.filter_id, + "filter_name": imported_filter.filter_name, + "variable_id": template_variable.variable_id, + "variable_name": template_variable.variable_name, + "effective_value": effective_value, + "raw_input_value": mapping.raw_input_value, + } + if isinstance(imported_filter.normalized_value, dict): + mapped_filter_payload["display_name"] = imported_filter.display_name + mapped_filter_payload["normalized_filter_payload"] = ( + imported_filter.normalized_value + ) + effective_filters.append(mapped_filter_payload) + template_params[template_variable.variable_name] = effective_value + if mapping.approval_state == ApprovalState.APPROVED: + approved_mapping_ids.append(mapping.mapping_id) + if ( + mapping.requires_explicit_approval + and mapping.approval_state != ApprovalState.APPROVED + ): + open_warning_refs.append(mapping.mapping_id) + + for imported_filter in session_record.imported_filters: + if imported_filter.filter_id in mapped_filter_ids: + continue + effective_value = extract_effective_filter_value( + imported_filter.normalized_value, imported_filter.raw_value, + ) + if effective_value is None: + continue + effective_filters.append( + { + "filter_id": imported_filter.filter_id, + "filter_name": imported_filter.filter_name, + "display_name": imported_filter.display_name, + "effective_value": effective_value, + "raw_input_value": imported_filter.raw_value, + "normalized_filter_payload": imported_filter.normalized_value, + } + ) + + mapped_variable_ids = { + mapping.variable_id for mapping in session_record.execution_mappings + } + for variable in session_record.template_variables: + if variable.variable_id in mapped_variable_ids: + continue + if variable.default_value is not None: + template_params[variable.variable_name] = variable.default_value + continue + if variable.is_required: + preview_blockers.append(f"variable:{variable.variable_name}:unmapped") + + semantic_decision_refs = [ + field.field_id + for field in session.semantic_fields + if field.is_locked + or not field.needs_review + or field.provenance.value != "unresolved" + ] + preview_fingerprint = compute_preview_fingerprint( + { + "dataset_id": session_record.dataset_id, + "template_params": template_params, + "effective_filters": effective_filters, + } + ) + return { + "effective_filters": effective_filters, + "template_params": template_params, + "approved_mapping_ids": sorted(approved_mapping_ids), + "semantic_decision_refs": sorted(semantic_decision_refs), + "open_warning_refs": sorted(open_warning_refs), + "preview_blockers": sorted(set(preview_blockers)), + "preview_fingerprint": preview_fingerprint, + } + + +# [/DEF:build_execution_snapshot:Function] + + +# [DEF:build_launch_blockers:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Enforce launch gates from findings, approvals, and current preview truth. +# @PRE: execution_snapshot was computed from current session state. +# @POST: Returns explicit blocker codes for every unmet launch invariant. +def build_launch_blockers( + session: DatasetReviewSession, + execution_snapshot: Dict[str, Any], + preview: Optional[CompiledPreview], +) -> List[str]: + session_record = cast(Any, session) + blockers = list(execution_snapshot["preview_blockers"]) + + for finding in session_record.findings: + if ( + finding.severity == FindingSeverity.BLOCKING + and finding.resolution_state + not in {ResolutionState.RESOLVED, ResolutionState.APPROVED} + ): + blockers.append(f"finding:{finding.code}:blocking") + for mapping in session_record.execution_mappings: + if ( + mapping.requires_explicit_approval + and mapping.approval_state != ApprovalState.APPROVED + ): + blockers.append(f"mapping:{mapping.mapping_id}:approval_required") + + if preview is None: + blockers.append("preview:missing") + else: + if preview.preview_status != PreviewStatus.READY: + blockers.append(f"preview:{preview.preview_status.value}") + if preview.preview_fingerprint != execution_snapshot["preview_fingerprint"]: + blockers.append("preview:fingerprint_mismatch") + + return sorted(set(blockers)) + + +# [/DEF:build_launch_blockers:Function] + + +# [DEF:get_latest_preview:Function] +# @COMPLEXITY: 2 +# @PURPOSE: Resolve the current latest preview snapshot for one session aggregate. +def get_latest_preview(session: DatasetReviewSession) -> Optional[CompiledPreview]: + session_record = cast(Any, session) + if not session_record.previews: + return None + if session_record.last_preview_id: + for preview in session_record.previews: + if preview.preview_id == session_record.last_preview_id: + return preview + return sorted( + session_record.previews, + key=lambda item: (item.created_at or datetime.min, item.preview_id), + reverse=True, + )[0] + + +# [/DEF:get_latest_preview:Function] + + +# [DEF:compute_preview_fingerprint:Function] +# @COMPLEXITY: 1 +# @PURPOSE: Produce deterministic execution fingerprint for preview truth and staleness checks. +def compute_preview_fingerprint(payload: Dict[str, Any]) -> str: + serialized = json.dumps(payload, sort_keys=True, default=str) + return hashlib.sha256(serialized.encode("utf-8")).hexdigest() + + +# [/DEF:compute_preview_fingerprint:Function] + + +# [/DEF:OrchestratorHelpers:Module] diff --git a/backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py b/backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py new file mode 100644 index 00000000..06cce284 --- /dev/null +++ b/backend/src/services/dataset_review/repositories/repository_pkg/_mutations.py @@ -0,0 +1,202 @@ +# [DEF:SessionRepositoryMutations:Module] +# @COMPLEXITY: 4 +# @PURPOSE: Persistence mutation operations for dataset review session aggregates — profile/findings, recovery state, preview, run context. +# @LAYER: Domain +# @RELATION: DEPENDS_ON -> [DatasetReviewModels] +# @RELATION: DEPENDS_ON -> [SessionEventLogger] +# @PRE: All mutations execute within authenticated request or task scope. +# @POST: Session aggregate writes preserve ownership and version semantics. + +from __future__ import annotations + +from datetime import datetime +from typing import Any, List, Optional, cast + +from sqlalchemy.orm import Session + +from src.core.logger import belief_scope, logger +from src.models.dataset_review import ( + ClarificationQuestion, + ClarificationSession, + CompiledPreview, + DatasetProfile, + DatasetReviewSession, + DatasetRunContext, + ExecutionMapping, + ImportedFilter, + SemanticFieldEntry, + SessionCollaborator, + SessionEvent, + TemplateVariable, + ValidationFinding, +) +from src.services.dataset_review.event_logger import SessionEventLogger + +logger = cast(Any, logger) + + +# [DEF:save_profile_and_findings:Function] +# @COMPLEXITY: 4 +# @PURPOSE: Persist profile state and replace validation findings for an owned session in one transaction. +# @PRE: session_id belongs to user_id and the supplied profile/findings belong to the same aggregate scope. +# @POST: stored profile matches the current session and findings are replaced by the supplied collection. +# @SIDE_EFFECT: updates profile rows, deletes stale findings, inserts current findings, and commits the transaction. +def save_profile_and_findings( + db: Session, + event_logger: SessionEventLogger, + get_owned_session, + require_session_version, + commit_session_mutation, + session_id: str, + user_id: str, + profile: DatasetProfile, + findings: List[ValidationFinding], + expected_version: Optional[int] = None, +) -> DatasetReviewSession: + with belief_scope("save_profile_and_findings"): + session = get_owned_session(session_id, user_id) + if expected_version is not None: + require_session_version(session, expected_version) + logger.reason("Persisting dataset profile and replacing validation findings", extra={"session_id": session_id, "user_id": user_id, "has_profile": bool(profile), "findings_count": len(findings)}) + + if profile: + existing_profile = db.query(DatasetProfile).filter_by(session_id=session_id).first() + if existing_profile: + profile.profile_id = existing_profile.profile_id + db.merge(profile) + + db.query(ValidationFinding).filter(ValidationFinding.session_id == session_id).delete() + for finding in findings: + cast(Any, finding).session_id = session_id + db.add(finding) + + commit_session_mutation(session, expected_version=expected_version) + logger.reflect("Dataset profile and validation findings committed", extra={"session_id": session.session_id, "user_id": user_id, "findings_count": len(findings)}) + + from src.services.dataset_review.repositories.session_repository import DatasetReviewSessionRepository + return session + + +# [/DEF:save_profile_and_findings:Function] + + +# [DEF:save_recovery_state:Function] +# @COMPLEXITY: 4 +# @PURPOSE: Persist imported filters, template variables, and initial execution mappings for one owned session. +# @PRE: session_id belongs to user_id. +# @POST: Recovery state persisted to database. +# @SIDE_EFFECT: Writes to database. +def save_recovery_state( + db: Session, + get_owned_session, + require_session_version, + commit_session_mutation, + load_session_detail_fn, + session_id: str, + user_id: str, + imported_filters: List[ImportedFilter], + template_variables: List[TemplateVariable], + execution_mappings: List[ExecutionMapping], + expected_version: Optional[int] = None, +) -> DatasetReviewSession: + with belief_scope("save_recovery_state"): + session = get_owned_session(session_id, user_id) + if expected_version is not None: + require_session_version(session, expected_version) + logger.reason("Persisting dataset review recovery bootstrap state", extra={"session_id": session_id, "user_id": user_id, "imported_filters_count": len(imported_filters), "template_variables_count": len(template_variables), "execution_mappings_count": len(execution_mappings)}) + + db.query(ExecutionMapping).filter(ExecutionMapping.session_id == session_id).delete() + db.query(TemplateVariable).filter(TemplateVariable.session_id == session_id).delete() + db.query(ImportedFilter).filter(ImportedFilter.session_id == session_id).delete() + + for f in imported_filters: + cast(Any, f).session_id = session_id + db.add(f) + for tv in template_variables: + cast(Any, tv).session_id = session_id + db.add(tv) + db.flush() + for em in execution_mappings: + cast(Any, em).session_id = session_id + db.add(em) + + commit_session_mutation(session, expected_version=expected_version) + logger.reflect("Dataset review recovery bootstrap state committed", extra={"session_id": session.session_id, "user_id": user_id}) + return load_session_detail_fn(session_id, user_id) + + +# [/DEF:save_recovery_state:Function] + + +# [DEF:save_preview:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Persist a preview snapshot and mark prior session previews stale. +# @PRE: session_id belongs to user_id and preview is prepared for the same session aggregate. +# @POST: preview is persisted and the session points to the latest preview identifier. +# @SIDE_EFFECT: updates prior preview statuses, inserts a preview row, mutates the parent session, and commits. +def save_preview( + db: Session, + get_owned_session, + require_session_version, + commit_session_mutation, + session_id: str, + user_id: str, + preview: CompiledPreview, + expected_version: Optional[int] = None, +) -> CompiledPreview: + with belief_scope("save_preview"): + session = get_owned_session(session_id, user_id) + session_record = cast(Any, session) + if expected_version is not None: + require_session_version(session, expected_version) + logger.reason("Persisting compiled preview and staling previous preview snapshots", extra={"session_id": session_id, "user_id": user_id}) + + db.query(CompiledPreview).filter(CompiledPreview.session_id == session_id).update({"preview_status": "stale"}) + db.add(preview) + db.flush() + session_record.last_preview_id = preview.preview_id + + commit_session_mutation(session, refresh_targets=[preview], expected_version=expected_version) + logger.reflect("Compiled preview committed as latest session preview", extra={"session_id": session.session_id, "preview_id": preview.preview_id}) + return preview + + +# [/DEF:save_preview:Function] + + +# [DEF:save_run_context:Function] +# @COMPLEXITY: 3 +# @PURPOSE: Persist an immutable launch audit snapshot for an owned session. +# @PRE: session_id belongs to user_id and run_context targets the same aggregate. +# @POST: run context is persisted and linked as the latest launch snapshot for the session. +# @SIDE_EFFECT: inserts a run-context row, mutates the parent session pointer, and commits. +def save_run_context( + db: Session, + get_owned_session, + require_session_version, + commit_session_mutation, + session_id: str, + user_id: str, + run_context: DatasetRunContext, + expected_version: Optional[int] = None, +) -> DatasetRunContext: + with belief_scope("save_run_context"): + session = get_owned_session(session_id, user_id) + session_record = cast(Any, session) + if expected_version is not None: + require_session_version(session, expected_version) + logger.reason("Persisting dataset run context audit snapshot", extra={"session_id": session_id, "user_id": user_id}) + + db.add(run_context) + db.flush() + session_record.last_run_context_id = run_context.run_context_id + + commit_session_mutation(session, refresh_targets=[run_context], expected_version=expected_version) + logger.reflect("Dataset run context committed as latest launch snapshot", extra={"session_id": session.session_id, "run_context_id": run_context.run_context_id}) + return run_context + + +# [/DEF:save_run_context:Function] + + +# [/DEF:SessionRepositoryMutations:Module] diff --git a/backend/src/services/dataset_review/repositories/session_repository.py b/backend/src/services/dataset_review/repositories/session_repository.py index ce4c79e5..6e0ea5f7 100644 --- a/backend/src/services/dataset_review/repositories/session_repository.py +++ b/backend/src/services/dataset_review/repositories/session_repository.py @@ -2,15 +2,18 @@ # @COMPLEXITY: 5 # @PURPOSE: Persist and retrieve dataset review session aggregates, including readiness, findings, semantic decisions, clarification state, previews, and run contexts. # @LAYER: Domain -# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] -# @RELATION: [DEPENDS_ON] -> [DatasetProfile] -# @RELATION: [DEPENDS_ON] -> [ValidationFinding] -# @RELATION: [DEPENDS_ON] -> [CompiledPreview] +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +# @RELATION: DEPENDS_ON -> [DatasetProfile] +# @RELATION: DEPENDS_ON -> [ValidationFinding] +# @RELATION: DEPENDS_ON -> [CompiledPreview] +# @RELATION: DISPATCHES -> [SessionRepositoryMutations:Module] # @PRE: repository operations execute within authenticated request or task scope. # @POST: session aggregate reads are structurally consistent and writes preserve ownership and version semantics. # @SIDE_EFFECT: reads and writes SQLAlchemy-backed session aggregates. # @DATA_CONTRACT: Input[SessionMutation] -> Output[PersistedSessionAggregate] # @INVARIANT: answers, mapping approvals, preview artifacts, and launch snapshots are never attributed to the wrong user or session. +# @RATIONALE: Original 627-line file exceeded INV_7 (400-line module limit). Extracted mutation operations into _mutations sub-module. +# @REJECTED: Keeping all repository operations in one file because it exceeded the fractal limit. from datetime import datetime from typing import Any, Optional, List, cast @@ -57,23 +60,17 @@ class DatasetReviewSessionVersionConflictError(ValueError): # [DEF:DatasetReviewSessionRepository:Class] # @COMPLEXITY: 4 # @PURPOSE: Enforce ownership-scoped persistence and retrieval for dataset review session aggregates. -# @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] -# @RELATION: [DEPENDS_ON] -> [DatasetProfile] -# @RELATION: [DEPENDS_ON] -> [ValidationFinding] -# @RELATION: [DEPENDS_ON] -> [CompiledPreview] -# @RELATION: [DEPENDS_ON] -> [SessionEventLogger] -# @PRE: constructor receives a live SQLAlchemy session and callers provide authenticated user scope for guarded reads and writes. +# @RELATION: DEPENDS_ON -> [DatasetReviewSession] +# @RELATION: DEPENDS_ON -> [SessionEventLogger] +# @PRE: constructor receives a live SQLAlchemy session and callers provide authenticated user scope. # @POST: repository methods return ownership-scoped aggregates or persisted child records without changing domain meaning. # @SIDE_EFFECT: mutates and queries the persistence layer through the injected database session. -# @DATA_CONTRACT: Input[OwnedSessionQuery|SessionMutation] -> Output[PersistedSessionAggregate|PersistedChildRecord] class DatasetReviewSessionRepository: # [DEF:init_repo:Function] - # @COMPLEXITY: 4 + # @COMPLEXITY: 2 # @PURPOSE: Bind one live SQLAlchemy session to the repository instance. - # @RELATION: DEPENDS_ON -> DatasetReviewSessionRepository; CALLS -> sqlalchemy # @PRE: db_session is not None # @POST: Repository instance initialized with valid session - # @SIDE_EFFECT: None - pure initialization def __init__(self, db: Session): self.db = db self.event_logger = SessionEventLogger(db) @@ -81,542 +78,205 @@ class DatasetReviewSessionRepository: # [/DEF:init_repo:Function] # [DEF:get_owned_session:Function] - # @COMPLEXITY: 4 - # @PURPOSE: Resolve one owner-scoped dataset review session for mutation paths without leaking foreign-session state. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] + # @COMPLEXITY: 3 + # @PURPOSE: Resolve one owner-scoped dataset review session for mutation paths. # @PRE: session_id and user_id are non-empty identifiers from the authenticated ownership scope. # @POST: returns the owned session or raises a deterministic access error. - # @SIDE_EFFECT: reads one session row from the current database transaction. - # @DATA_CONTRACT: Input[OwnedSessionQuery] -> Output[DatasetReviewSession|ValueError] def _get_owned_session(self, session_id: str, user_id: str) -> DatasetReviewSession: with belief_scope("DatasetReviewSessionRepository.get_owned_session"): - logger.reason( - "Resolving owner-scoped dataset review session for mutation path", - extra={"session_id": session_id, "user_id": user_id}, - ) + logger.reason("Resolving owner-scoped dataset review session", extra={"session_id": session_id, "user_id": user_id}) session = ( self.db.query(DatasetReviewSession) - .filter( - DatasetReviewSession.session_id == session_id, - DatasetReviewSession.user_id == user_id, - ) + .filter(DatasetReviewSession.session_id == session_id, DatasetReviewSession.user_id == user_id) .first() ) if not session: - logger.explore( - "Owner-scoped dataset review session lookup failed", - extra={"session_id": session_id, "user_id": user_id}, - ) + logger.explore("Owner-scoped dataset review session lookup failed", extra={"session_id": session_id, "user_id": user_id}) raise ValueError("Session not found or access denied") - logger.reflect( - "Owner-scoped dataset review session resolved", - extra={"session_id": session.session_id, "user_id": session.user_id}, - ) + logger.reflect("Owner-scoped dataset review session resolved", extra={"session_id": session.session_id}) return session # [/DEF:get_owned_session:Function] # [DEF:create_sess:Function] - # @COMPLEXITY: 4 + # @COMPLEXITY: 3 # @PURPOSE: Persist an initial dataset review session shell. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @PRE: session is a new aggregate root bound to the current ownership scope. # @POST: session is committed, refreshed, and returned with persisted identifiers. - # @SIDE_EFFECT: inserts a session row and commits the active transaction. - # @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[DatasetReviewSession] def create_session(self, session: DatasetReviewSession) -> DatasetReviewSession: with belief_scope("DatasetReviewSessionRepository.create_session"): - logger.reason( - "Persisting dataset review session shell", - extra={ - "user_id": session.user_id, - "environment_id": session.environment_id, - }, - ) + logger.reason("Persisting dataset review session shell", extra={"user_id": session.user_id, "environment_id": session.environment_id}) self.db.add(session) self.db.commit() self.db.refresh(session) - logger.reflect( - "Dataset review session shell persisted with stable identifier", - extra={"session_id": session.session_id, "user_id": session.user_id}, - ) + logger.reflect("Dataset review session shell persisted", extra={"session_id": session.session_id}) return session # [/DEF:create_sess:Function] # [DEF:require_session_version:Function] - # @COMPLEXITY: 4 + # @COMPLEXITY: 3 # @PURPOSE: Enforce optimistic-lock version matching before a session mutation is persisted. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @PRE: session belongs to the current owner mutation scope and expected_version is the caller's last observed version. # @POST: returns the same session when versions match; otherwise raises deterministic conflict error. - # @SIDE_EFFECT: none. - # @DATA_CONTRACT: Input[DatasetReviewSession,int] -> Output[DatasetReviewSession|DatasetReviewSessionVersionConflictError] - def require_session_version( - self, session: DatasetReviewSession, expected_version: int - ) -> DatasetReviewSession: + def require_session_version(self, session: DatasetReviewSession, expected_version: int) -> DatasetReviewSession: with belief_scope("DatasetReviewSessionRepository.require_session_version"): - session_record = cast(Any, session) - actual_version = int(getattr(session_record, "version", 0) or 0) - logger.reason( - "Checking optimistic-lock version for dataset review mutation", - extra={ - "session_id": session.session_id, - "expected_version": expected_version, - "actual_version": actual_version, - }, - ) + actual_version = int(getattr(session, "version", 0) or 0) + logger.reason("Checking optimistic-lock version", extra={"session_id": session.session_id, "expected_version": expected_version, "actual_version": actual_version}) if actual_version != expected_version: - logger.explore( - "Rejected dataset review mutation due to stale session version", - extra={ - "session_id": session.session_id, - "expected_version": expected_version, - "actual_version": actual_version, - }, - ) - raise DatasetReviewSessionVersionConflictError( - str(session_record.session_id), expected_version, actual_version - ) - logger.reflect( - "Optimistic-lock version accepted for dataset review mutation", - extra={"session_id": session.session_id, "version": actual_version}, - ) + logger.explore("Rejected mutation due to stale session version", extra={"session_id": session.session_id, "expected_version": expected_version, "actual_version": actual_version}) + raise DatasetReviewSessionVersionConflictError(str(session.session_id), expected_version, actual_version) + logger.reflect("Optimistic-lock version accepted", extra={"session_id": session.session_id, "version": actual_version}) return session # [/DEF:require_session_version:Function] # [DEF:bump_session_version:Function] - # @COMPLEXITY: 4 + # @COMPLEXITY: 2 # @PURPOSE: Increment optimistic-lock version after a successful session mutation is assembled. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @PRE: session mutation has passed guards and will be committed in the current transaction. - # @POST: session version increments monotonically and last_activity_at reflects the mutation time. - # @SIDE_EFFECT: mutates the in-memory session aggregate before commit. - # @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[int] + # @POST: session version increments monotonically. def bump_session_version(self, session: DatasetReviewSession) -> int: with belief_scope("DatasetReviewSessionRepository.bump_session_version"): - session_record = cast(Any, session) - next_version = int(getattr(session_record, "version", 0) or 0) + 1 - session_record.version = next_version - session_record.last_activity_at = datetime.utcnow() - logger.reflect( - "Prepared incremented dataset review session version", - extra={"session_id": session.session_id, "version": next_version}, - ) + next_version = int(getattr(session, "version", 0) or 0) + 1 + setattr(session, "version", next_version) + session.last_activity_at = datetime.utcnow() + logger.reflect("Prepared incremented session version", extra={"session_id": session.session_id, "version": next_version}) return next_version # [/DEF:bump_session_version:Function] # [DEF:commit_session_mutation:Function] # @COMPLEXITY: 4 - # @PURPOSE: Commit one prepared dataset review session mutation and translate stale writes into deterministic optimistic-lock conflicts. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @PRE: session mutation has already been assembled in the current SQLAlchemy transaction. + # @PURPOSE: Commit one prepared session mutation and translate stale writes into deterministic conflicts. # @POST: session mutation is committed with one version increment or a deterministic conflict error is raised. - # @SIDE_EFFECT: increments session version, commits the transaction, refreshes ORM rows, or rolls back failed stale writes. - # @DATA_CONTRACT: Input[DatasetReviewSession,List[Any]|None,int|None] -> Output[DatasetReviewSession|DatasetReviewSessionVersionConflictError] def commit_session_mutation( - self, - session: DatasetReviewSession, - *, - refresh_targets: Optional[List[Any]] = None, - expected_version: Optional[int] = None, + self, session: DatasetReviewSession, *, refresh_targets: Optional[List[Any]] = None, expected_version: Optional[int] = None, ) -> DatasetReviewSession: with belief_scope("DatasetReviewSessionRepository.commit_session_mutation"): - session_record = cast(Any, session) - observed_version = int( - expected_version - if expected_version is not None - else getattr(session_record, "version", 0) or 0 - ) - logger.reason( - "Committing dataset review session mutation with optimistic lock", - extra={ - "session_id": session.session_id, - "observed_version": observed_version, - "refresh_count": len(refresh_targets or []), - }, - ) + observed_version = int(expected_version if expected_version is not None else getattr(session, "version", 0) or 0) + logger.reason("Committing session mutation with optimistic lock", extra={"session_id": session.session_id, "observed_version": observed_version}) self.bump_session_version(session) try: self.db.commit() except StaleDataError as exc: self.db.rollback() - actual_version_row = ( - self.db.query(DatasetReviewSession.version) - .filter(DatasetReviewSession.session_id == session.session_id) - .first() - ) - actual_version = ( - int(actual_version_row[0] or 0) if actual_version_row else 0 - ) - logger.explore( - "Dataset review session commit rejected by optimistic lock", - extra={ - "session_id": session.session_id, - "expected_version": observed_version, - "actual_version": actual_version, - }, - ) - raise DatasetReviewSessionVersionConflictError( - session.session_id, - observed_version, - actual_version, - ) from exc - + actual_version_row = self.db.query(DatasetReviewSession.version).filter(DatasetReviewSession.session_id == session.session_id).first() + actual_version = int(actual_version_row[0] or 0) if actual_version_row else 0 + logger.explore("Session commit rejected by optimistic lock", extra={"session_id": session.session_id, "expected_version": observed_version, "actual_version": actual_version}) + raise DatasetReviewSessionVersionConflictError(session.session_id, observed_version, actual_version) from exc self.db.refresh(session) for target in refresh_targets or []: self.db.refresh(target) - logger.reflect( - "Dataset review session mutation committed", - extra={ - "session_id": session.session_id, - "version": getattr(session, "version", None), - "refresh_count": len(refresh_targets or []), - }, - ) + logger.reflect("Session mutation committed", extra={"session_id": session.session_id, "version": getattr(session, "version", None)}) return session # [/DEF:commit_session_mutation:Function] # [DEF:load_detail:Function] - # @COMPLEXITY: 4 + # @COMPLEXITY: 3 # @PURPOSE: Return the full session aggregate for API and frontend resume flows. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @RELATION: [DEPENDS_ON] -> [SessionCollaborator] - # @PRE: session_id is a valid UUID; db_session is active - # @POST: Returns SessionDetail with all fields populated - # @SIDE_EFFECT: Read-only database operation - def load_session_detail( - self, session_id: str, user_id: str - ) -> Optional[DatasetReviewSession]: + # @POST: Returns SessionDetail with all fields populated or None. + def load_session_detail(self, session_id: str, user_id: str) -> Optional[DatasetReviewSession]: with belief_scope("DatasetReviewSessionRepository.load_session_detail"): - logger.reason( - "Loading dataset review session detail for owner-or-collaborator scope", - extra={"session_id": session_id, "user_id": user_id}, - ) + logger.reason("Loading dataset review session detail", extra={"session_id": session_id, "user_id": user_id}) session = ( self.db.query(DatasetReviewSession) - .outerjoin( - SessionCollaborator, - DatasetReviewSession.session_id == SessionCollaborator.session_id, - ) + .outerjoin(SessionCollaborator, DatasetReviewSession.session_id == SessionCollaborator.session_id) .options( joinedload(DatasetReviewSession.profile), joinedload(DatasetReviewSession.findings), joinedload(DatasetReviewSession.collaborators), joinedload(DatasetReviewSession.semantic_sources), - joinedload(DatasetReviewSession.semantic_fields).joinedload( - SemanticFieldEntry.candidates - ), + joinedload(DatasetReviewSession.semantic_fields).joinedload(SemanticFieldEntry.candidates), joinedload(DatasetReviewSession.imported_filters), joinedload(DatasetReviewSession.template_variables), joinedload(DatasetReviewSession.execution_mappings), - joinedload(DatasetReviewSession.clarification_sessions) - .joinedload(ClarificationSession.questions) - .joinedload(ClarificationQuestion.options), - joinedload(DatasetReviewSession.clarification_sessions) - .joinedload(ClarificationSession.questions) - .joinedload(ClarificationQuestion.answer), + joinedload(DatasetReviewSession.clarification_sessions).joinedload(ClarificationSession.questions).joinedload(ClarificationQuestion.options), + joinedload(DatasetReviewSession.clarification_sessions).joinedload(ClarificationSession.questions).joinedload(ClarificationQuestion.answer), joinedload(DatasetReviewSession.previews), joinedload(DatasetReviewSession.run_contexts), joinedload(DatasetReviewSession.events), ) .filter(DatasetReviewSession.session_id == session_id) - .filter( - or_( - DatasetReviewSession.user_id == user_id, - SessionCollaborator.user_id == user_id, - ) - ) + .filter(or_(DatasetReviewSession.user_id == user_id, SessionCollaborator.user_id == user_id)) .first() ) - logger.reflect( - "Dataset review session detail lookup completed", - extra={ - "session_id": session_id, - "user_id": user_id, - "found": bool(session), - }, - ) + logger.reflect("Session detail lookup completed", extra={"session_id": session_id, "found": bool(session)}) return session # [/DEF:load_detail:Function] - # [DEF:save_prof_find:Function] + # [DEF:save_profile_and_findings:Function] # @COMPLEXITY: 4 - # @PURPOSE: Persist profile state and replace validation findings for an owned session in one transaction. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @RELATION: [DEPENDS_ON] -> [DatasetProfile] - # @RELATION: [DEPENDS_ON] -> [ValidationFinding] - # @PRE: session_id belongs to user_id and the supplied profile/findings belong to the same aggregate scope. - # @POST: stored profile matches the current session and findings are replaced by the supplied collection. - # @SIDE_EFFECT: updates profile rows, deletes stale findings, inserts current findings, and commits the transaction. - # @DATA_CONTRACT: Input[ProfileAndFindingsMutation] -> Output[DatasetReviewSession] + # @PURPOSE: Persist profile state and replace validation findings for an owned session. + # @POST: stored profile matches the current session and findings are replaced. def save_profile_and_findings( - self, - session_id: str, - user_id: str, - profile: DatasetProfile, - findings: List[ValidationFinding], - expected_version: Optional[int] = None, + self, session_id: str, user_id: str, profile: DatasetProfile, findings: List[ValidationFinding], expected_version: Optional[int] = None, ) -> DatasetReviewSession: - with belief_scope("DatasetReviewSessionRepository.save_profile_and_findings"): - session = self._get_owned_session(session_id, user_id) - session_record = cast(Any, session) - if expected_version is not None: - self.require_session_version(session, expected_version) - logger.reason( - "Persisting dataset profile and replacing validation findings", - extra={ - "session_id": session_id, - "user_id": user_id, - "has_profile": bool(profile), - "findings_count": len(findings), - "expected_version": expected_version, - }, - ) + from src.services.dataset_review.repositories.repository_pkg._mutations import save_profile_and_findings as _save + return _save( + self.db, self.event_logger, self._get_owned_session, self.require_session_version, + self.commit_session_mutation, session_id, user_id, profile, findings, expected_version, + ) - if profile: - existing_profile = ( - self.db.query(DatasetProfile) - .filter_by(session_id=session_id) - .first() - ) - if existing_profile: - profile.profile_id = existing_profile.profile_id - self.db.merge(profile) - - self.db.query(ValidationFinding).filter( - ValidationFinding.session_id == session_id - ).delete() - - for finding in findings: - finding_record = cast(Any, finding) - finding_record.session_id = session_id - self.db.add(finding) - - self.commit_session_mutation(session, expected_version=expected_version) - logger.reflect( - "Dataset profile and validation findings committed", - extra={ - "session_id": session.session_id, - "version": session_record.version, - "user_id": user_id, - "findings_count": len(findings), - }, - ) - return self.load_session_detail(session_id, user_id) - - # [/DEF:save_prof_find:Function] + # [/DEF:save_profile_and_findings:Function] # [DEF:save_recovery_state:Function] - # @COMPLEXITY: 4 - # @PURPOSE: Persist imported filters, template variables, and initial execution mappings for one owned session. - # @RELATION: [DEPENDS_ON] -> [ImportedFilter] - # @RELATION: [DEPENDS_ON] -> [TemplateVariable] - # @RELATION: [DEPENDS_ON] -> [ExecutionMapping] - # @PRE: session_id is a valid UUID; recovery_state is a valid dict - # @POST: Recovery state persisted to database - # @SIDE_EFFECT: Writes to database + # @COMPLEXITY: 3 + # @PURPOSE: Persist imported filters, template variables, and initial execution mappings. def save_recovery_state( - self, - session_id: str, - user_id: str, - imported_filters: List[ImportedFilter], - template_variables: List[TemplateVariable], - execution_mappings: List[ExecutionMapping], + self, session_id: str, user_id: str, imported_filters: List[ImportedFilter], + template_variables: List[TemplateVariable], execution_mappings: List[ExecutionMapping], expected_version: Optional[int] = None, ) -> DatasetReviewSession: - with belief_scope("DatasetReviewSessionRepository.save_recovery_state"): - session = self._get_owned_session(session_id, user_id) - session_record = cast(Any, session) - if expected_version is not None: - self.require_session_version(session, expected_version) - logger.reason( - "Persisting dataset review recovery bootstrap state", - extra={ - "session_id": session_id, - "user_id": user_id, - "imported_filters_count": len(imported_filters), - "template_variables_count": len(template_variables), - "execution_mappings_count": len(execution_mappings), - "expected_version": expected_version, - }, - ) - - self.db.query(ExecutionMapping).filter( - ExecutionMapping.session_id == session_id - ).delete() - self.db.query(TemplateVariable).filter( - TemplateVariable.session_id == session_id - ).delete() - self.db.query(ImportedFilter).filter( - ImportedFilter.session_id == session_id - ).delete() - - for imported_filter in imported_filters: - imported_filter_record = cast(Any, imported_filter) - imported_filter_record.session_id = session_id - self.db.add(imported_filter) - - for template_variable in template_variables: - template_variable_record = cast(Any, template_variable) - template_variable_record.session_id = session_id - self.db.add(template_variable) - - self.db.flush() - - for execution_mapping in execution_mappings: - execution_mapping_record = cast(Any, execution_mapping) - execution_mapping_record.session_id = session_id - self.db.add(execution_mapping) - - self.commit_session_mutation(session, expected_version=expected_version) - logger.reflect( - "Dataset review recovery bootstrap state committed", - extra={ - "session_id": session.session_id, - "version": session_record.version, - "user_id": user_id, - "imported_filters_count": len(imported_filters), - "template_variables_count": len(template_variables), - "execution_mappings_count": len(execution_mappings), - }, - ) - return self.load_session_detail(session_id, user_id) + from src.services.dataset_review.repositories.repository_pkg._mutations import save_recovery_state as _save + return _save( + self.db, self._get_owned_session, self.require_session_version, + self.commit_session_mutation, self.load_session_detail, + session_id, user_id, imported_filters, template_variables, execution_mappings, expected_version, + ) # [/DEF:save_recovery_state:Function] - # [DEF:save_prev:Function] - # @COMPLEXITY: 4 + # [DEF:save_preview:Function] + # @COMPLEXITY: 3 # @PURPOSE: Persist a preview snapshot and mark prior session previews stale. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @RELATION: [DEPENDS_ON] -> [CompiledPreview] - # @PRE: session_id belongs to user_id and preview is prepared for the same session aggregate. - # @POST: preview is persisted and the session points to the latest preview identifier. - # @SIDE_EFFECT: updates prior preview statuses, inserts a preview row, mutates the parent session, and commits. - # @DATA_CONTRACT: Input[PreviewMutation] -> Output[CompiledPreview] def save_preview( - self, - session_id: str, - user_id: str, - preview: CompiledPreview, - expected_version: Optional[int] = None, + self, session_id: str, user_id: str, preview: CompiledPreview, expected_version: Optional[int] = None, ) -> CompiledPreview: - with belief_scope("DatasetReviewSessionRepository.save_preview"): - session = self._get_owned_session(session_id, user_id) - session_record = cast(Any, session) - if expected_version is not None: - self.require_session_version(session, expected_version) - logger.reason( - "Persisting compiled preview and staling previous preview snapshots", - extra={ - "session_id": session_id, - "user_id": user_id, - "expected_version": expected_version, - }, - ) + from src.services.dataset_review.repositories.repository_pkg._mutations import save_preview as _save + return _save( + self.db, self._get_owned_session, self.require_session_version, + self.commit_session_mutation, session_id, user_id, preview, expected_version, + ) - self.db.query(CompiledPreview).filter( - CompiledPreview.session_id == session_id - ).update({"preview_status": "stale"}) + # [/DEF:save_preview:Function] - self.db.add(preview) - self.db.flush() - session_record.last_preview_id = preview.preview_id - - self.commit_session_mutation( - session, - refresh_targets=[preview], - expected_version=expected_version, - ) - logger.reflect( - "Compiled preview committed as latest session preview", - extra={ - "session_id": session.session_id, - "version": session_record.version, - "preview_id": preview.preview_id, - "user_id": user_id, - }, - ) - return preview - - # [/DEF:save_prev:Function] - - # [DEF:save_run_ctx:Function] - # @COMPLEXITY: 4 + # [DEF:save_run_context:Function] + # @COMPLEXITY: 3 # @PURPOSE: Persist an immutable launch audit snapshot for an owned session. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] - # @RELATION: [DEPENDS_ON] -> [DatasetRunContext] - # @PRE: session_id belongs to user_id and run_context targets the same aggregate. - # @POST: run context is persisted and linked as the latest launch snapshot for the session. - # @SIDE_EFFECT: inserts a run-context row, mutates the parent session pointer, and commits. - # @DATA_CONTRACT: Input[RunContextMutation] -> Output[DatasetRunContext] def save_run_context( - self, - session_id: str, - user_id: str, - run_context: DatasetRunContext, - expected_version: Optional[int] = None, + self, session_id: str, user_id: str, run_context: DatasetRunContext, expected_version: Optional[int] = None, ) -> DatasetRunContext: - with belief_scope("DatasetReviewSessionRepository.save_run_context"): - session = self._get_owned_session(session_id, user_id) - session_record = cast(Any, session) - if expected_version is not None: - self.require_session_version(session, expected_version) - logger.reason( - "Persisting dataset run context audit snapshot", - extra={ - "session_id": session_id, - "user_id": user_id, - "expected_version": expected_version, - }, - ) + from src.services.dataset_review.repositories.repository_pkg._mutations import save_run_context as _save + return _save( + self.db, self._get_owned_session, self.require_session_version, + self.commit_session_mutation, session_id, user_id, run_context, expected_version, + ) - self.db.add(run_context) - self.db.flush() - session_record.last_run_context_id = run_context.run_context_id - - self.commit_session_mutation( - session, - refresh_targets=[run_context], - expected_version=expected_version, - ) - logger.reflect( - "Dataset run context committed as latest launch snapshot", - extra={ - "session_id": session.session_id, - "version": session_record.version, - "run_context_id": run_context.run_context_id, - "user_id": user_id, - }, - ) - return run_context - - # [/DEF:save_run_ctx:Function] + # [/DEF:save_run_context:Function] # [DEF:list_user_sess:Function] # @COMPLEXITY: 2 # @PURPOSE: List review sessions owned by a specific user ordered by most recent update. - # @RELATION: [DEPENDS_ON] -> [DatasetReviewSession] def list_sessions_for_user(self, user_id: str) -> List[DatasetReviewSession]: with belief_scope("DatasetReviewSessionRepository.list_sessions_for_user"): - logger.reason( - "Listing dataset review sessions for owner scope", - extra={"user_id": user_id}, - ) + logger.reason("Listing dataset review sessions for owner scope", extra={"user_id": user_id}) sessions = ( self.db.query(DatasetReviewSession) .filter(DatasetReviewSession.user_id == user_id) .order_by(DatasetReviewSession.updated_at.desc()) .all() ) - logger.reflect( - "Dataset review session list assembled", - extra={"user_id": user_id, "session_count": len(sessions)}, - ) + logger.reflect("Session list assembled", extra={"user_id": user_id, "session_count": len(sessions)}) return sessions # [/DEF:list_user_sess:Function]