openapi: 3.0.3 info: title: LLM Dataset Orchestration API description: API for managing dataset review sessions, semantic enrichment, clarification, preview, exports, and audited SQL Lab launch orchestration. version: 1.0.0 tags: - name: Dataset Orchestration - name: Session Lifecycle - name: Semantic Review - name: Clarification - name: Mapping Review - name: Preview and Launch - name: Exports paths: security: - bearerAuth: [] paths: /api/dataset-orchestration/sessions: get: tags: [Session Lifecycle] summary: List resumable dataset review sessions for the current user x-required-permissions: [dataset:session:read] parameters: - $ref: '#/components/parameters/StatusFilter' - $ref: '#/components/parameters/ReadinessFilter' - $ref: '#/components/parameters/PageParam' - $ref: '#/components/parameters/PageSizeParam' responses: '200': description: Session list content: application/json: schema: $ref: '#/components/schemas/SessionCollection' '401': $ref: '#/components/responses/Unauthorized' post: tags: [Session Lifecycle] summary: Start a new dataset review session x-required-permissions: [dataset:session:manage] requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/StartSessionRequest' responses: '201': description: Session started content: application/json: schema: $ref: '#/components/schemas/SessionSummary' '400': $ref: '#/components/responses/BadRequest' '404': $ref: '#/components/responses/NotFound' '422': $ref: '#/components/responses/ValidationError' /api/dataset-orchestration/sessions/{session_id}: get: tags: [Session Lifecycle] summary: Get full dataset review session state parameters: - $ref: '#/components/parameters/SessionId' responses: '200': description: Full session state content: application/json: schema: $ref: '#/components/schemas/SessionDetail' '401': $ref: '#/components/responses/Unauthorized' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' patch: tags: [Session Lifecycle] summary: Update resumable session lifecycle state parameters: - $ref: '#/components/parameters/SessionId' requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/UpdateSessionRequest' responses: '200': description: Session updated content: application/json: schema: $ref: '#/components/schemas/SessionSummary' '400': $ref: '#/components/responses/BadRequest' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' delete: tags: [Session Lifecycle] summary: Archive or delete a session owned by the current user parameters: - $ref: '#/components/parameters/SessionId' - name: hard_delete in: query required: false schema: type: boolean default: false responses: '204': description: Session removed or archived '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/semantic-source: post: tags: [Semantic Review] summary: Apply a semantic source to the current session parameters: - $ref: '#/components/parameters/SessionId' requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ApplySemanticSourceRequest' responses: '200': description: Semantic source applied and session updated content: application/json: schema: $ref: '#/components/schemas/SessionDetail' '400': $ref: '#/components/responses/BadRequest' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/fields/{field_id}/semantic: patch: tags: [Semantic Review] summary: Apply a field-level semantic decision or manual override parameters: - $ref: '#/components/parameters/SessionId' - $ref: '#/components/parameters/FieldId' requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/FieldSemanticUpdateRequest' responses: '200': description: Field semantic state updated content: application/json: schema: $ref: '#/components/schemas/SemanticFieldEntry' '400': $ref: '#/components/responses/BadRequest' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/fields/{field_id}/lock: post: tags: [Semantic Review] summary: Lock a field against automatic overwrite parameters: - $ref: '#/components/parameters/SessionId' - $ref: '#/components/parameters/FieldId' responses: '200': description: Field locked content: application/json: schema: $ref: '#/components/schemas/SemanticFieldEntry' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/fields/{field_id}/unlock: post: tags: [Semantic Review] summary: Unlock a manually protected field parameters: - $ref: '#/components/parameters/SessionId' - $ref: '#/components/parameters/FieldId' responses: '200': description: Field unlocked content: application/json: schema: $ref: '#/components/schemas/SemanticFieldEntry' '403': $ref: '#/components/responses/Forbidden' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/clarification: get: tags: [Clarification] summary: Get current clarification session state and active question parameters: - $ref: '#/components/parameters/SessionId' responses: '200': description: Clarification session state content: application/json: schema: $ref: '#/components/schemas/ClarificationState' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/clarification/answers: post: tags: [Clarification] summary: Submit an answer to the current clarification question parameters: - $ref: '#/components/parameters/SessionId' requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/ClarificationAnswerRequest' responses: '200': description: Answer recorded and session updated content: application/json: schema: $ref: '#/components/schemas/ClarificationAnswerResult' '400': $ref: '#/components/responses/BadRequest' '404': $ref: '#/components/responses/NotFound' '422': $ref: '#/components/responses/ValidationError' /api/dataset-orchestration/sessions/{session_id}/clarification/resume: post: tags: [Clarification] summary: Resume or start clarification mode for the next unresolved question parameters: - $ref: '#/components/parameters/SessionId' responses: '200': description: Clarification resumed content: application/json: schema: $ref: '#/components/schemas/ClarificationState' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/mappings: get: tags: [Mapping Review] summary: List current imported-filter to template-variable mappings parameters: - $ref: '#/components/parameters/SessionId' responses: '200': description: Mapping list content: application/json: schema: $ref: '#/components/schemas/MappingCollection' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/mappings/{mapping_id}: patch: tags: [Mapping Review] summary: Update one mapping value or override method parameters: - $ref: '#/components/parameters/SessionId' - $ref: '#/components/parameters/MappingId' requestBody: required: true content: application/json: schema: $ref: '#/components/schemas/UpdateMappingRequest' responses: '200': description: Mapping updated content: application/json: schema: $ref: '#/components/schemas/ExecutionMapping' '400': $ref: '#/components/responses/BadRequest' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/mappings/{mapping_id}/approve: post: tags: [Mapping Review] summary: Explicitly approve a warning-level mapping transformation parameters: - $ref: '#/components/parameters/SessionId' - $ref: '#/components/parameters/MappingId' requestBody: required: false content: application/json: schema: $ref: '#/components/schemas/ApproveMappingRequest' responses: '200': description: Mapping approved content: application/json: schema: $ref: '#/components/schemas/ExecutionMapping' '400': $ref: '#/components/responses/BadRequest' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/preview: post: tags: [Preview and Launch] summary: Trigger Superset-side SQL compilation preview parameters: - $ref: '#/components/parameters/SessionId' responses: '202': description: Preview generation started content: application/json: schema: $ref: '#/components/schemas/PreviewEnqueueResult' '200': description: Preview generated synchronously content: application/json: schema: $ref: '#/components/schemas/CompiledPreview' '400': $ref: '#/components/responses/BadRequest' '409': description: Preview blocked by incomplete execution context content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' /api/dataset-orchestration/sessions/{session_id}/launch: post: tags: [Preview and Launch] summary: Launch the approved dataset run through SQL Lab x-required-permissions: [dataset:execution:launch] parameters: - $ref: '#/components/parameters/SessionId' responses: '201': description: Dataset launched content: application/json: schema: $ref: '#/components/schemas/LaunchResult' '400': $ref: '#/components/responses/BadRequest' '409': description: Launch blocked by unresolved gates content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' /api/dataset-orchestration/sessions/{session_id}/exports/documentation: get: tags: [Exports] summary: Export generated documentation output for the session parameters: - $ref: '#/components/parameters/SessionId' - name: format in: query required: false schema: $ref: '#/components/schemas/ArtifactFormat' responses: '200': description: Documentation export metadata or artifact response content: application/json: schema: $ref: '#/components/schemas/ExportArtifact' '404': $ref: '#/components/responses/NotFound' /api/dataset-orchestration/sessions/{session_id}/exports/validation: get: tags: [Exports] summary: Export validation findings output for the session parameters: - $ref: '#/components/parameters/SessionId' - name: format in: query required: false schema: $ref: '#/components/schemas/ArtifactFormat' responses: '200': description: Validation export metadata or artifact response content: application/json: schema: $ref: '#/components/schemas/ExportArtifact' '404': $ref: '#/components/responses/NotFound' components: parameters: SessionId: name: session_id in: path required: true schema: type: string FieldId: name: field_id in: path required: true schema: type: string MappingId: name: mapping_id in: path required: true schema: type: string StatusFilter: name: status in: query required: false schema: $ref: '#/components/schemas/SessionStatus' ReadinessFilter: name: readiness_state in: query required: false schema: $ref: '#/components/schemas/ReadinessState' PageParam: name: page in: query required: false schema: type: integer minimum: 1 default: 1 PageSizeParam: name: page_size in: query required: false schema: type: integer minimum: 1 maximum: 100 default: 20 securitySchemes: bearerAuth: type: http scheme: bearer bearerFormat: JWT responses: BadRequest: description: Invalid request content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' Unauthorized: description: Authentication required content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' Forbidden: description: Access denied content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' NotFound: description: Resource not found content: application/json: schema: $ref: '#/components/schemas/ErrorResponse' ValidationError: description: Schema validation error content: application/json: schema: $ref: '#/components/schemas/ValidationErrorResponse' schemas: SessionStatus: type: string enum: [active, paused, completed, archived, cancelled] SessionPhase: type: string enum: [intake, recovery, review, semantic_review, clarification, mapping_review, preview, launch, post_run] ReadinessState: type: string enum: - empty - importing - review_ready - semantic_source_review_needed - clarification_needed - clarification_active - mapping_review_needed - compiled_preview_ready - partially_ready - run_ready - run_in_progress - completed - recovery_required RecommendedAction: type: string enum: - import_from_superset - review_documentation - apply_semantic_source - start_clarification - answer_next_question - approve_mapping - generate_sql_preview - complete_required_values - launch_dataset - resume_session - export_outputs StartSessionRequest: type: object required: [source_kind, source_input, environment_id] properties: source_kind: type: string enum: [superset_link, dataset_selection] source_input: type: string environment_id: type: string UpdateSessionRequest: type: object required: [status] properties: status: $ref: '#/components/schemas/SessionStatus' note: type: string nullable: true SessionSummary: type: object required: - session_id - dataset_ref - environment_id - readiness_state - recommended_action - status - current_phase - created_at - updated_at properties: session_id: type: string dataset_ref: type: string dataset_id: type: integer nullable: true dashboard_id: type: integer nullable: true environment_id: type: string readiness_state: $ref: '#/components/schemas/ReadinessState' recommended_action: $ref: '#/components/schemas/RecommendedAction' status: $ref: '#/components/schemas/SessionStatus' current_phase: $ref: '#/components/schemas/SessionPhase' active_task_id: type: string nullable: true created_at: type: string format: date-time updated_at: type: string format: date-time last_activity_at: type: string format: date-time nullable: true SessionCollection: type: object required: [items, total, page, page_size, has_next] properties: items: type: array items: $ref: '#/components/schemas/SessionSummary' total: type: integer page: type: integer page_size: type: integer has_next: type: boolean BusinessSummarySource: type: string enum: [confirmed, imported, inferred, ai_draft, manual_override] ConfidenceState: type: string enum: [confirmed, mostly_confirmed, mixed, low_confidence, unresolved] DatasetProfile: type: object required: - profile_id - session_id - dataset_name - business_summary - business_summary_source - is_sqllab_view - confidence_state - has_blocking_findings - has_warning_findings - manual_summary_locked properties: profile_id: type: string session_id: type: string dataset_name: type: string schema_name: type: string nullable: true database_name: type: string nullable: true business_summary: type: string business_summary_source: $ref: '#/components/schemas/BusinessSummarySource' description: type: string nullable: true dataset_type: type: string enum: [table, virtual, sqllab_view, unknown] nullable: true is_sqllab_view: type: boolean completeness_score: type: number nullable: true confidence_state: $ref: '#/components/schemas/ConfidenceState' has_blocking_findings: type: boolean has_warning_findings: type: boolean manual_summary_locked: type: boolean created_at: type: string format: date-time nullable: true updated_at: type: string format: date-time nullable: true FindingArea: type: string enum: - source_intake - dataset_profile - semantic_enrichment - clarification - filter_recovery - template_mapping - compiled_preview - launch - audit FindingSeverity: type: string enum: [blocking, warning, informational] ResolutionState: type: string enum: [open, resolved, approved, skipped, deferred, expert_review] ValidationFinding: type: object required: - finding_id - session_id - area - severity - code - title - message - resolution_state properties: finding_id: type: string session_id: type: string area: $ref: '#/components/schemas/FindingArea' severity: $ref: '#/components/schemas/FindingSeverity' code: type: string title: type: string message: type: string resolution_state: $ref: '#/components/schemas/ResolutionState' resolution_note: type: string nullable: true caused_by_ref: type: string nullable: true created_at: type: string format: date-time nullable: true resolved_at: type: string format: date-time nullable: true SemanticSourceType: type: string enum: [uploaded_file, connected_dictionary, reference_dataset, neighbor_dataset, ai_generated] TrustLevel: type: string enum: [trusted, recommended, candidate, generated] SemanticSourceStatus: type: string enum: [available, selected, applied, rejected, partial, failed] SemanticSource: type: object required: - source_id - session_id - source_type - source_ref - display_name - trust_level - status properties: source_id: type: string session_id: type: string source_type: $ref: '#/components/schemas/SemanticSourceType' source_ref: type: string display_name: type: string trust_level: $ref: '#/components/schemas/TrustLevel' schema_overlap_score: type: number nullable: true status: $ref: '#/components/schemas/SemanticSourceStatus' created_at: type: string format: date-time nullable: true ApplySemanticSourceRequest: type: object required: [source_type, source_ref] properties: source_type: $ref: '#/components/schemas/SemanticSourceType' source_ref: type: string FieldKind: type: string enum: [column, metric, filter_dimension, parameter] FieldProvenance: type: string enum: - dictionary_exact - reference_imported - fuzzy_inferred - ai_generated - manual_override - unresolved ChangedByKind: type: string enum: [system, user, agent] SemanticCandidate: type: object required: - candidate_id - candidate_rank - match_type - confidence_score - status properties: candidate_id: type: string source_id: type: string nullable: true candidate_rank: type: integer match_type: type: string enum: [exact, reference, fuzzy, generated] confidence_score: type: number proposed_verbose_name: type: string nullable: true proposed_description: type: string nullable: true proposed_display_format: type: string nullable: true status: type: string enum: [proposed, accepted, rejected, superseded] SemanticFieldEntry: type: object required: - field_id - session_id - field_name - field_kind - provenance - is_locked - has_conflict - needs_review - last_changed_by properties: field_id: type: string session_id: type: string field_name: type: string field_kind: $ref: '#/components/schemas/FieldKind' verbose_name: type: string nullable: true description: type: string nullable: true display_format: type: string nullable: true provenance: $ref: '#/components/schemas/FieldProvenance' source_id: type: string nullable: true confidence_rank: type: integer nullable: true is_locked: type: boolean has_conflict: type: boolean needs_review: type: boolean last_changed_by: $ref: '#/components/schemas/ChangedByKind' candidates: type: array items: $ref: '#/components/schemas/SemanticCandidate' FieldSemanticUpdateRequest: type: object properties: candidate_id: type: string nullable: true verbose_name: type: string nullable: true description: type: string nullable: true display_format: type: string nullable: true lock_field: type: boolean default: false resolution_note: type: string nullable: true additionalProperties: false FilterSource: type: string enum: [superset_native, superset_url, manual, inferred] FilterConfidenceState: type: string enum: [confirmed, imported, inferred, ai_draft, unresolved] FilterRecoveryStatus: type: string enum: [recovered, partial, missing, conflicted] ImportedFilter: type: object required: - filter_id - session_id - filter_name - raw_value - source - confidence_state - requires_confirmation - recovery_status properties: filter_id: type: string session_id: type: string filter_name: type: string display_name: type: string nullable: true raw_value: {} normalized_value: nullable: true source: $ref: '#/components/schemas/FilterSource' confidence_state: $ref: '#/components/schemas/FilterConfidenceState' requires_confirmation: type: boolean recovery_status: $ref: '#/components/schemas/FilterRecoveryStatus' notes: type: string nullable: true VariableKind: type: string enum: [native_filter, parameter, derived, unknown] MappingStatus: type: string enum: [unmapped, proposed, approved, overridden, invalid] TemplateVariable: type: object required: - variable_id - session_id - variable_name - expression_source - variable_kind - is_required - mapping_status properties: variable_id: type: string session_id: type: string variable_name: type: string expression_source: type: string variable_kind: $ref: '#/components/schemas/VariableKind' is_required: type: boolean default_value: nullable: true mapping_status: $ref: '#/components/schemas/MappingStatus' MappingMethod: type: string enum: [direct_match, heuristic_match, semantic_match, manual_override] MappingWarningLevel: type: string enum: [low, medium, high] ApprovalState: type: string enum: [pending, approved, rejected, not_required] ExecutionMapping: type: object required: - mapping_id - session_id - filter_id - variable_id - mapping_method - raw_input_value - requires_explicit_approval - approval_state properties: mapping_id: type: string session_id: type: string filter_id: type: string variable_id: type: string mapping_method: $ref: '#/components/schemas/MappingMethod' raw_input_value: {} effective_value: nullable: true transformation_note: type: string nullable: true warning_level: $ref: '#/components/schemas/MappingWarningLevel' nullable: true requires_explicit_approval: type: boolean approval_state: $ref: '#/components/schemas/ApprovalState' approved_by_user_id: type: string nullable: true approved_at: type: string format: date-time nullable: true MappingCollection: type: object required: [items] properties: items: type: array items: $ref: '#/components/schemas/ExecutionMapping' UpdateMappingRequest: type: object properties: effective_value: nullable: true mapping_method: $ref: '#/components/schemas/MappingMethod' transformation_note: type: string nullable: true additionalProperties: false ApproveMappingRequest: type: object properties: approval_note: type: string nullable: true ClarificationStatus: type: string enum: [pending, active, paused, completed, cancelled] QuestionState: type: string enum: [open, answered, skipped, expert_review, superseded] AnswerKind: type: string enum: [selected, custom, skipped, expert_review] ClarificationOption: type: object required: [option_id, label, value, is_recommended, display_order] properties: option_id: type: string label: type: string value: type: string is_recommended: type: boolean display_order: type: integer ClarificationQuestion: type: object required: - question_id - topic_ref - question_text - why_it_matters - priority - state properties: question_id: type: string topic_ref: type: string question_text: type: string why_it_matters: type: string current_guess: type: string nullable: true priority: type: integer state: $ref: '#/components/schemas/QuestionState' options: type: array items: $ref: '#/components/schemas/ClarificationOption' ClarificationSessionSummary: type: object required: - clarification_session_id - session_id - status - resolved_count - remaining_count properties: clarification_session_id: type: string session_id: type: string status: $ref: '#/components/schemas/ClarificationStatus' current_question_id: type: string nullable: true resolved_count: type: integer remaining_count: type: integer summary_delta: type: string nullable: true ClarificationState: type: object required: [clarification_session] properties: clarification_session: $ref: '#/components/schemas/ClarificationSessionSummary' current_question: $ref: '#/components/schemas/ClarificationQuestion' nullable: true ClarificationAnswerRequest: type: object required: [question_id, answer_kind] properties: question_id: type: string answer_kind: $ref: '#/components/schemas/AnswerKind' answer_value: type: string nullable: true ClarificationAnswerResult: type: object required: [clarification_state, session] properties: clarification_state: $ref: '#/components/schemas/ClarificationState' session: $ref: '#/components/schemas/SessionSummary' changed_findings: type: array items: $ref: '#/components/schemas/ValidationFinding' PreviewStatus: type: string enum: [pending, ready, failed, stale] CompiledPreview: type: object required: - preview_id - session_id - preview_status - preview_fingerprint - compiled_by properties: preview_id: type: string session_id: type: string preview_status: $ref: '#/components/schemas/PreviewStatus' compiled_sql: type: string nullable: true preview_fingerprint: type: string compiled_by: type: string enum: [superset] error_code: type: string nullable: true error_details: type: string nullable: true compiled_at: type: string format: date-time nullable: true created_at: type: string format: date-time nullable: true PreviewEnqueueResult: type: object required: [session_id, preview_status] properties: session_id: type: string preview_status: $ref: '#/components/schemas/PreviewStatus' task_id: type: string nullable: true LaunchStatus: type: string enum: [started, success, failed] DatasetRunContextSummary: type: object required: - run_context_id - session_id - dataset_ref - environment_id - preview_id - sql_lab_session_ref - launch_status properties: run_context_id: type: string session_id: type: string dataset_ref: type: string environment_id: type: string preview_id: type: string sql_lab_session_ref: type: string effective_filters: type: array items: $ref: '#/components/schemas/ImportedFilter' template_params: {} approved_mapping_ids: type: array items: type: string semantic_decision_refs: type: array items: type: string open_warning_refs: type: array items: type: string launch_status: $ref: '#/components/schemas/LaunchStatus' launch_error: type: string nullable: true created_at: type: string format: date-time nullable: true LaunchResult: type: object required: [run_context, redirect_url] properties: run_context: $ref: '#/components/schemas/DatasetRunContextSummary' redirect_url: type: string ArtifactType: type: string enum: [documentation, validation_report, run_summary] ArtifactFormat: type: string enum: [json, markdown, csv, pdf] ExportArtifact: type: object required: - artifact_id - session_id - artifact_type - format - storage_ref properties: artifact_id: type: string session_id: type: string artifact_type: $ref: '#/components/schemas/ArtifactType' format: $ref: '#/components/schemas/ArtifactFormat' storage_ref: type: string created_by_user_id: type: string nullable: true created_at: type: string format: date-time nullable: true SessionDetail: type: object required: - session - profile - findings - semantic_sources - semantic_fields - filters - template_variables - mappings properties: session: $ref: '#/components/schemas/SessionSummary' profile: $ref: '#/components/schemas/DatasetProfile' findings: type: array items: $ref: '#/components/schemas/ValidationFinding' semantic_sources: type: array items: $ref: '#/components/schemas/SemanticSource' semantic_fields: type: array items: $ref: '#/components/schemas/SemanticFieldEntry' filters: type: array items: $ref: '#/components/schemas/ImportedFilter' template_variables: type: array items: $ref: '#/components/schemas/TemplateVariable' mappings: type: array items: $ref: '#/components/schemas/ExecutionMapping' clarification: $ref: '#/components/schemas/ClarificationState' nullable: true preview: $ref: '#/components/schemas/CompiledPreview' nullable: true latest_run_context: $ref: '#/components/schemas/DatasetRunContextSummary' nullable: true ErrorResponse: type: object required: [error_code, message] properties: error_code: type: string message: type: string details: nullable: true ValidationErrorItem: type: object required: [field, message] properties: field: type: string message: type: string ValidationErrorResponse: type: object required: [error_code, message, errors] properties: error_code: type: string enum: [validation_error] message: type: string errors: type: array items: $ref: '#/components/schemas/ValidationErrorItem'