semantic cleanup

This commit is contained in:
2026-05-08 10:07:05 +03:00
parent 505864438e
commit d8df1fff59
90 changed files with 148541 additions and 2251 deletions

View File

@@ -0,0 +1,115 @@
# [DEF:AssistantApi:Module]
# @COMPLEXITY: 5
# @SEMANTICS: api, assistant, chat, command, confirmation
# @PURPOSE: API routes for LLM assistant command parsing and safe execution orchestration.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [TaskManager]
# @RELATION: DEPENDS_ON -> [AssistantMessageRecord]
# @RELATION: DEPENDS_ON -> [AssistantConfirmationRecord]
# @RELATION: DEPENDS_ON -> [AssistantAuditRecord]
# @INVARIANT: Risky operations are never executed without valid confirmation token.
# Re-export public API for backward compatibility.
from ._routes import router, send_message, confirm_operation, cancel_operation
from ._routes import list_conversations, delete_conversation, get_history, get_assistant_audit
from ._schemas import (
AssistantMessageRequest,
AssistantMessageResponse,
AssistantAction,
ConfirmationRecord,
CONVERSATIONS,
USER_ACTIVE_CONVERSATION,
CONFIRMATIONS,
ASSISTANT_AUDIT,
INTENT_PERMISSION_CHECKS,
_SAFE_OPS,
_DATASET_REVIEW_OPS,
)
from ._command_parser import _parse_command
from ._llm_planner import _plan_intent_with_llm, _build_tool_catalog, _authorize_intent
from ._dispatch import _dispatch_intent, _async_confirmation_summary, _clarification_text_for_intent
from ._dataset_review import (
_load_dataset_review_context,
_plan_dataset_review_intent,
_dispatch_dataset_review_intent,
)
from ._history import (
_append_history,
_persist_message,
_audit,
_persist_audit,
_persist_confirmation,
_update_confirmation_state,
_load_confirmation_from_db,
_resolve_or_create_conversation,
_cleanup_history_ttl,
_is_conversation_archived,
_coerce_query_bool,
)
from ._resolvers import (
_extract_id,
_resolve_env_id,
_is_production_env,
_resolve_provider_id,
_get_default_environment_id,
_resolve_dashboard_id_by_ref,
_resolve_dashboard_id_entity,
_get_environment_name_by_id,
_extract_result_deep_links,
_build_task_observability_summary,
)
__all__ = [
"router",
"send_message",
"confirm_operation",
"cancel_operation",
"list_conversations",
"delete_conversation",
"get_history",
"get_assistant_audit",
"AssistantMessageRequest",
"AssistantMessageResponse",
"AssistantAction",
"ConfirmationRecord",
"CONVERSATIONS",
"USER_ACTIVE_CONVERSATION",
"CONFIRMATIONS",
"ASSISTANT_AUDIT",
"INTENT_PERMISSION_CHECKS",
"_SAFE_OPS",
"_DATASET_REVIEW_OPS",
"_parse_command",
"_plan_intent_with_llm",
"_build_tool_catalog",
"_authorize_intent",
"_dispatch_intent",
"_async_confirmation_summary",
"_clarification_text_for_intent",
"_load_dataset_review_context",
"_plan_dataset_review_intent",
"_dispatch_dataset_review_intent",
"_append_history",
"_persist_message",
"_audit",
"_persist_audit",
"_persist_confirmation",
"_update_confirmation_state",
"_load_confirmation_from_db",
"_resolve_or_create_conversation",
"_cleanup_history_ttl",
"_is_conversation_archived",
"_coerce_query_bool",
"_extract_id",
"_resolve_env_id",
"_is_production_env",
"_resolve_provider_id",
"_get_default_environment_id",
"_resolve_dashboard_id_by_ref",
"_resolve_dashboard_id_entity",
"_get_environment_name_by_id",
"_extract_result_deep_links",
"_build_task_observability_summary",
]
# [/DEF:AssistantApi:Module]

View File

@@ -0,0 +1,91 @@
# [DEF:AssistantCommandParser:Module]
# @COMPLEXITY: 4
# @PURPOSE: Deterministic RU/EN command text parser that converts user messages into intent payloads.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [AssistantResolvers]
# @INVARIANT: Every return path includes domain, operation, entities, confidence, risk_level, requires_confirmation.
from __future__ import annotations
import re
from typing import Any, Dict, List, Optional
from src.core.logger import belief_scope, logger
from src.core.config_manager import ConfigManager
from ._resolvers import _extract_id, _is_production_env
# [DEF:_parse_command:Function]
# @COMPLEXITY: 4
# @PURPOSE: Deterministically parse RU/EN command text into intent payload.
# @DATA_CONTRACT: Input[message:str, config_manager:ConfigManager] -> Output[Dict[str,Any]{domain,operation,entities,confidence,risk_level,requires_confirmation}]
# @RELATION: DEPENDS_ON -> [_extract_id]
# @RELATION: DEPENDS_ON -> [_is_production_env]
# @SIDE_EFFECT: None (pure parsing logic).
# @PRE: message contains raw user text and config manager resolves environments.
# @POST: Returns intent dict with domain/operation/entities/confidence/risk fields.
# @INVARIANT: every return path includes domain, operation, entities, confidence, risk_level, requires_confirmation.
def _parse_command(message: str, config_manager: ConfigManager) -> Dict[str, Any]:
with belief_scope('_parse_command'):
logger.reason('Belief protocol reasoning checkpoint for _parse_command')
text = message.strip()
lower = text.lower()
if any((phrase in lower for phrase in ['что ты умеешь', 'что умеешь', 'что ты можешь', 'help', 'помощь', 'доступные команды', 'какие команды'])):
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'assistant', 'operation': 'show_capabilities', 'entities': {}, 'confidence': 0.98, 'risk_level': 'safe', 'requires_confirmation': False}
dashboard_id = _extract_id(lower, ['(?:дашборд\\w*|dashboard)\\s*(?:id\\s*)?(\\d+)'])
dashboard_ref = _extract_id(lower, ['(?:дашборд\\w*|dashboard)\\s*(?:id\\s*)?([a-zа-я0-9._-]+)'])
dataset_id = _extract_id(lower, ['(?:датасет\\w*|dataset)\\s*(?:id\\s*)?(\\d+)'])
task_id = _extract_id(lower, ['(task[-_a-z0-9]{1,}|[0-9a-f]{8}-[0-9a-f-]{27,})'])
if any((k in lower for k in ['статус', 'status', 'state', 'проверь задачу'])):
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'status', 'operation': 'get_task_status', 'entities': {'task_id': task_id}, 'confidence': 0.92 if task_id else 0.66, 'risk_level': 'safe', 'requires_confirmation': False}
if any((k in lower for k in ['ветк', 'branch'])) and any((k in lower for k in ['созд', 'сделай', 'create'])):
branch = _extract_id(lower, ['(?:ветк\\w*|branch)\\s+([a-z0-9._/-]+)'])
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'git', 'operation': 'create_branch', 'entities': {'dashboard_id': int(dashboard_id) if dashboard_id else None, 'branch_name': branch}, 'confidence': 0.95 if branch and dashboard_id else 0.7, 'risk_level': 'guarded', 'requires_confirmation': False}
if any((k in lower for k in ['коммит', 'commit'])):
quoted = re.search('"([^"]{3,120})"', text)
message_text = quoted.group(1) if quoted else 'assistant: update dashboard changes'
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'git', 'operation': 'commit_changes', 'entities': {'dashboard_id': int(dashboard_id) if dashboard_id else None, 'message': message_text}, 'confidence': 0.9 if dashboard_id else 0.7, 'risk_level': 'guarded', 'requires_confirmation': False}
if any((k in lower for k in ['деплой', 'deploy', 'разверн'])):
env_match = _extract_id(lower, ['(?:в|to)\\s+([a-z0-9_-]+)'])
is_dangerous = _is_production_env(env_match, config_manager)
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'git', 'operation': 'deploy_dashboard', 'entities': {'dashboard_id': int(dashboard_id) if dashboard_id else None, 'environment': env_match}, 'confidence': 0.92 if dashboard_id and env_match else 0.7, 'risk_level': 'dangerous' if is_dangerous else 'guarded', 'requires_confirmation': is_dangerous}
if any((k in lower for k in ['миграц', 'migration', 'migrate'])):
src = _extract_id(lower, ['(?:с|from)\\s+([a-z0-9_-]+)'])
tgt = _extract_id(lower, ['(?:на|to)\\s+([a-z0-9_-]+)'])
dry_run = '--dry-run' in lower or 'dry run' in lower
replace_db_config = '--replace-db-config' in lower
fix_cross_filters = '--no-fix-cross-filters' not in lower
is_dangerous = _is_production_env(tgt, config_manager)
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'migration', 'operation': 'execute_migration', 'entities': {'dashboard_id': int(dashboard_id) if dashboard_id else None, 'source_env': src, 'target_env': tgt, 'dry_run': dry_run, 'replace_db_config': replace_db_config, 'fix_cross_filters': fix_cross_filters}, 'confidence': 0.95 if dashboard_id and src and tgt else 0.72, 'risk_level': 'dangerous' if is_dangerous else 'guarded', 'requires_confirmation': is_dangerous or dry_run}
if any((k in lower for k in ['бэкап', 'backup', 'резерв'])):
env_match = _extract_id(lower, ['(?:в|for|из|from)\\s+([a-z0-9_-]+)'])
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'backup', 'operation': 'run_backup', 'entities': {'dashboard_id': int(dashboard_id) if dashboard_id else None, 'environment': env_match}, 'confidence': 0.9 if env_match else 0.7, 'risk_level': 'guarded', 'requires_confirmation': False}
if any((k in lower for k in ['здоровье', 'health', 'ошибки', 'failing', 'проблемы'])):
env_match = _extract_id(lower, ['(?:в|for|env|окружени[ея])\\s+([a-z0-9_-]+)'])
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'health', 'operation': 'get_health_summary', 'entities': {'environment': env_match}, 'confidence': 0.9, 'risk_level': 'safe', 'requires_confirmation': False}
if any((k in lower for k in ['валидац', 'validate', 'провер'])):
env_match = _extract_id(lower, ['(?:в|for|env|окружени[ея])\\s+([a-z0-9_-]+)'])
provider_match = _extract_id(lower, ['(?:provider|провайдер)\\s+([a-z0-9_-]+)'])
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'llm', 'operation': 'run_llm_validation', 'entities': {'dashboard_id': int(dashboard_id) if dashboard_id else None, 'dashboard_ref': dashboard_ref if dashboard_ref and (not dashboard_ref.isdigit()) else None, 'environment': env_match, 'provider': provider_match}, 'confidence': 0.88 if dashboard_id else 0.64, 'risk_level': 'guarded', 'requires_confirmation': False}
if any((k in lower for k in ['документац', 'documentation', 'generate docs', 'сгенерируй док'])):
env_match = _extract_id(lower, ['(?:в|for|env|окружени[ея])\\s+([a-z0-9_-]+)'])
provider_match = _extract_id(lower, ['(?:provider|провайдер)\\s+([a-z0-9_-]+)'])
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'llm', 'operation': 'run_llm_documentation', 'entities': {'dataset_id': int(dataset_id) if dataset_id else None, 'environment': env_match, 'provider': provider_match}, 'confidence': 0.88 if dataset_id else 0.64, 'risk_level': 'guarded', 'requires_confirmation': False}
logger.reflect('Belief protocol postcondition checkpoint for _parse_command')
return {'domain': 'unknown', 'operation': 'clarify', 'entities': {}, 'confidence': 0.3, 'risk_level': 'safe', 'requires_confirmation': False}
# [/DEF:_parse_command:Function]
# [/DEF:AssistantCommandParser:Module]

View File

@@ -0,0 +1,552 @@
# [DEF:AssistantDatasetReview:Module]
# @COMPLEXITY: 4
# @PURPOSE: Dataset review context loading, intent planning, and dispatch for the assistant API.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [DatasetReviewOrchestrator]
# @RELATION: DEPENDS_ON -> [AssistantSchemas]
# @INVARIANT: Dataset review operations are always scoped to the owner's session.
from __future__ import annotations
import re
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from fastapi import HTTPException, status
from sqlalchemy.orm import Session
from src.core.logger import belief_scope, logger
from src.core.config_manager import ConfigManager
from src.core.utils.superset_context_extractor import (
sanitize_imported_filter_for_assistant,
)
from src.models.dataset_review import (
ApprovalState,
DatasetReviewSession,
ReadinessState,
RecommendedAction,
)
from src.services.dataset_review.orchestrator import (
DatasetReviewOrchestrator,
PreparePreviewCommand,
)
from src.services.dataset_review.repositories.session_repository import (
DatasetReviewSessionRepository,
DatasetReviewSessionVersionConflictError,
)
from src.schemas.auth import User
from src.api.routes.dataset_review import FieldSemanticUpdateRequest, _update_semantic_field_state
from ._schemas import (
AssistantAction,
)
# [DEF:_serialize_dataset_review_context:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build assistant-safe dataset-review context snapshot with masked imported-filter payloads for session-scoped assistant routing.
# @RELATION: DEPENDS_ON -> [DatasetReviewSession]
# @PRE: session_id is a valid active review session identifier.
# @POST: Returns a serializable dictionary containing the complete review context.
# @SIDE_EFFECT: Reads session data from the database.
def _serialize_dataset_review_context(session: DatasetReviewSession) -> Dict[str, Any]:
with belief_scope('_serialize_dataset_review_context'):
logger.reason('Belief protocol reasoning checkpoint for _serialize_dataset_review_context')
latest_preview = None
previews = getattr(session, 'previews', []) or []
if previews:
latest_preview = previews[-1]
logger.reflect('Belief protocol postcondition checkpoint for _serialize_dataset_review_context')
return {'session_id': session.session_id, 'version': int(getattr(session, 'version', 0) or 0), 'dataset_ref': session.dataset_ref, 'dataset_id': session.dataset_id, 'environment_id': session.environment_id, 'readiness_state': session.readiness_state.value, 'recommended_action': session.recommended_action.value, 'status': session.status.value, 'current_phase': session.current_phase.value, 'findings': [{'finding_id': item.finding_id, 'code': item.code, 'severity': item.severity.value, 'message': item.message, 'resolution_state': item.resolution_state.value} for item in getattr(session, 'findings', [])], 'imported_filters': [sanitize_imported_filter_for_assistant({'filter_id': item.filter_id, 'filter_name': item.filter_name, 'display_name': item.display_name, 'raw_value': item.raw_value, 'raw_value_masked': bool(getattr(item, 'raw_value_masked', False)), 'normalized_value': item.normalized_value, 'source': getattr(item.source, 'value', item.source), 'confidence_state': getattr(item.confidence_state, 'value', item.confidence_state), 'requires_confirmation': bool(item.requires_confirmation), 'recovery_status': getattr(item.recovery_status, 'value', item.recovery_status), 'notes': item.notes}) for item in getattr(session, 'imported_filters', [])], 'mappings': [{'mapping_id': item.mapping_id, 'filter_id': item.filter_id, 'variable_id': item.variable_id, 'mapping_method': getattr(item.mapping_method, 'value', item.mapping_method), 'effective_value': item.effective_value, 'approval_state': getattr(item.approval_state, 'value', item.approval_state), 'requires_explicit_approval': bool(item.requires_explicit_approval)} for item in getattr(session, 'execution_mappings', [])], 'semantic_fields': [{'field_id': item.field_id, 'field_name': item.field_name, 'verbose_name': item.verbose_name, 'description': item.description, 'display_format': item.display_format, 'provenance': getattr(item.provenance, 'value', item.provenance), 'is_locked': bool(item.is_locked), 'needs_review': bool(item.needs_review), 'candidates': [{'candidate_id': c.candidate_id, 'semantic_type': getattr(c.semantic_type, 'value', c.semantic_type), 'confidence': float(c.confidence or 0), 'reasoning': c.reasoning or ''} for c in (getattr(item, 'candidates', None) or [])]} for item in getattr(session, 'semantic_fields', [])], 'preview': {'preview_status': getattr(latest_preview, 'preview_status', None), 'compiled_sql': getattr(latest_preview, 'compiled_sql', None)} if latest_preview else None}
# [/DEF:_serialize_dataset_review_context:Function]
# [DEF:_load_dataset_review_context:Function]
# @COMPLEXITY: 4
# @PURPOSE: Load owner-scoped dataset-review context for assistant planning and grounded response generation.
# @RELATION: DEPENDS_ON -> [DatasetReviewSessionRepository]
# @PRE: session_id is a valid active review session identifier.
# @POST: Returns a loaded context object with session data and findings.
# @SIDE_EFFECT: Reads session data from the database.
def _load_dataset_review_context(dataset_review_session_id: Optional[str], current_user: User, db: Session) -> Optional[Dict[str, Any]]:
with belief_scope('_load_dataset_review_context'):
if not dataset_review_session_id:
return None
logger.reason('Belief protocol reasoning checkpoint for _load_dataset_review_context')
repository = DatasetReviewSessionRepository(db)
session = repository.load_session_detail(dataset_review_session_id, current_user.id)
if session is None or session.user_id != current_user.id:
raise HTTPException(status_code=404, detail='Dataset review session not found')
logger.reflect('Belief protocol postcondition checkpoint for _load_dataset_review_context')
return _serialize_dataset_review_context(session)
# [/DEF:_load_dataset_review_context:Function]
# [DEF:_extract_dataset_review_target:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract structured dataset-review focus target hints embedded in assistant prompts.
def _extract_dataset_review_target(message: str) -> Tuple[Optional[str], Optional[str]]:
match = re.search(
r"(?:target|focus)\s*[:=]\s*(field|mapping|finding|filter)[:=]([A-Za-z0-9._-]+)",
str(message or ""),
re.IGNORECASE,
)
if not match:
return None, None
return match.group(1).lower(), match.group(2)
# [/DEF:_extract_dataset_review_target:Function]
# [DEF:_match_dataset_review_field:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve one semantic field from assistant-visible context by id or user-visible label.
def _match_dataset_review_field(
dataset_context: Dict[str, Any],
message: str,
) -> Optional[Dict[str, Any]]:
target_kind, target_id = _extract_dataset_review_target(message)
fields = dataset_context.get("semantic_fields", []) or []
if target_kind == "field" and target_id:
return next(
(item for item in fields if str(item.get("field_id")) == str(target_id)),
None,
)
normalized_message = str(message or "").lower()
for field in fields:
if str(field.get("field_id", "")).lower() in normalized_message:
return field
field_name = str(field.get("field_name", "")).lower()
if field_name and field_name in normalized_message:
return field
verbose_name = str(field.get("verbose_name", "")).lower()
if verbose_name and verbose_name in normalized_message:
return field
return None
# [/DEF:_match_dataset_review_field:Function]
# [DEF:_extract_quoted_segment:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract one quoted assistant command segment after a label token.
def _extract_quoted_segment(message: str, label: str) -> Optional[str]:
pattern = rf"{label}\s*[=:]?\s*[\"']([^\"']+)[\"']"
match = re.search(pattern, str(message or ""), re.IGNORECASE)
return match.group(1).strip() if match else None
# [/DEF:_extract_quoted_segment:Function]
# [DEF:_dataset_review_conflict_http_exception:Function]
# @COMPLEXITY: 2
# @PURPOSE: Convert dataset-review optimistic-lock conflicts into shared 409 assistant semantics.
def _dataset_review_conflict_http_exception(
exc: DatasetReviewSessionVersionConflictError,
) -> HTTPException:
return HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail={
"error_code": "session_version_conflict",
"message": str(exc),
"session_id": exc.session_id,
"expected_version": exc.expected_version,
"actual_version": exc.actual_version,
},
)
# [/DEF:_dataset_review_conflict_http_exception:Function]
# [DEF:_plan_dataset_review_intent:Function]
# @COMPLEXITY: 3
# @PURPOSE: Parse session-scoped dataset-review assistant commands before falling back to generic assistant tool routing.
# @RELATION: CALLS -> DatasetReviewOrchestrator
def _plan_dataset_review_intent(
message: str,
dataset_context: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
lower = message.strip().lower()
session_id = dataset_context["session_id"]
session_version = int(dataset_context.get("version", 0) or 0)
target_kind, target_id = _extract_dataset_review_target(message)
if any(
token in lower
for token in [
"approve mappings",
"approve mapping",
"подтверди мапп",
"одобри мапп",
]
):
pending_mapping_ids = [
item["mapping_id"]
for item in dataset_context.get("mappings", [])
if item.get("requires_explicit_approval")
and item.get("approval_state") != ApprovalState.APPROVED.value
]
return {
"domain": "dataset_review",
"operation": "dataset_review_approve_mappings",
"entities": {
"dataset_review_session_id": session_id,
"session_version": session_version,
"mapping_ids": pending_mapping_ids,
},
"confidence": 0.95,
"risk_level": "guarded",
"requires_confirmation": True,
}
if any(
token in lower
for token in [
"generate sql preview",
"generate preview",
"сгенерируй превью",
"собери превью",
]
):
return {
"domain": "dataset_review",
"operation": "dataset_review_generate_sql_preview",
"entities": {
"dataset_review_session_id": session_id,
"session_version": session_version,
},
"confidence": 0.94,
"risk_level": "guarded",
"requires_confirmation": True,
}
if any(
token in lower
for token in [
"set field semantics",
"apply field semantics",
"semantic override",
"update semantic field",
"установи семантик",
"обнови семантик",
]
):
field = _match_dataset_review_field(dataset_context, message)
if field is None:
return None
candidate_id = None
if any(
token in lower for token in ["accept candidate", "apply candidate", "прими кандидат"]
):
candidates = field.get("candidates") or []
if candidates:
candidate_id = candidates[0].get("candidate_id")
verbose_name = _extract_quoted_segment(
message, "verbose_name|verbose name|label"
)
description = _extract_quoted_segment(message, "description|desc")
display_format = _extract_quoted_segment(
message, "display_format|display format|format"
)
lock_field = any(
token in lower for token in [" lock", "lock it", "зафикс", "закреп"]
)
if not any([candidate_id, verbose_name, description, display_format]):
return None
return {
"domain": "dataset_review",
"operation": "dataset_review_set_field_semantics",
"entities": {
"dataset_review_session_id": session_id,
"session_version": session_version,
"field_id": field.get("field_id") or target_id,
"candidate_id": candidate_id,
"verbose_name": verbose_name,
"description": description,
"display_format": display_format,
"lock_field": lock_field,
},
"confidence": 0.9,
"risk_level": "guarded",
"requires_confirmation": True,
}
if any(
token in lower
for token in [
"filters",
"фильтр",
"mapping",
"маппинг",
"preview",
"превью",
"finding",
"ошиб",
]
):
findings_count = len(dataset_context.get("findings", []))
mappings_count = len(dataset_context.get("mappings", []))
filters_count = len(dataset_context.get("imported_filters", []))
preview = dataset_context.get("preview") or {}
preview_status = preview.get("preview_status") or "missing"
masked_filters = dataset_context.get("imported_filters", [])
return {
"domain": "dataset_review",
"operation": "dataset_review_answer_context",
"entities": {
"dataset_review_session_id": session_id,
"summary": (
f"Session {session_id}: readiness={dataset_context['readiness_state']}, "
f"recommended_action={dataset_context['recommended_action']}, "
f"filters={filters_count}, mappings={mappings_count}, findings={findings_count}, "
f"preview_status={preview_status}, imported_filters={masked_filters}"
),
},
"confidence": 0.8,
"risk_level": "safe",
"requires_confirmation": False,
}
return None
# [/DEF:_plan_dataset_review_intent:Function]
# [DEF:_dispatch_dataset_review_intent:Function]
# @COMPLEXITY: 4
# @PURPOSE: Route confirmed dataset-review assistant intents through existing backend dataset-review APIs and orchestration boundaries.
# @RELATION: CALLS -> DatasetReviewOrchestrator
# @PRE: context contains valid session data and user intent.
# @POST: Returns a structured response with planned actions and confirmations.
# @SIDE_EFFECT: May update session state and enqueue tasks.
async def _dispatch_dataset_review_intent(
intent: Dict[str, Any],
current_user: User,
config_manager: ConfigManager,
db: Session,
) -> Tuple[str, Optional[str], List[AssistantAction]]:
with belief_scope("_dispatch_dataset_review_intent"):
logger.reason(
"Dispatching assistant dataset-review intent",
extra={"operation": intent.get("operation")},
)
entities = intent.get("entities", {})
session_id = entities.get("dataset_review_session_id")
session_version = entities.get("session_version")
if not session_id or session_version is None:
raise HTTPException(
status_code=422,
detail="Missing dataset_review_session_id/session_version",
)
operation = str(intent.get("operation") or "")
repository = DatasetReviewSessionRepository(db)
if operation == "dataset_review_answer_context":
summary = str(entities.get("summary") or "")
logger.reflect(
"Returned assistant-safe dataset review context summary",
extra={"session_id": session_id, "operation": operation},
)
return summary, None, []
session = repository.load_session_detail(session_id, current_user.id)
if session is None or session.user_id != current_user.id:
logger.explore(
"Assistant dataset-review intent rejected because session was not found",
extra={"session_id": session_id, "user_id": current_user.id},
)
raise HTTPException(
status_code=404, detail="Dataset review session not found"
)
try:
repository.require_session_version(session, int(session_version))
except DatasetReviewSessionVersionConflictError as exc:
logger.explore(
"Assistant dataset-review intent rejected due to stale session version",
extra={
"session_id": exc.session_id,
"expected_version": exc.expected_version,
"actual_version": exc.actual_version,
"operation": operation,
},
)
raise _dataset_review_conflict_http_exception(exc) from exc
logger.reason(
"Dispatching confirmed assistant dataset-review intent",
extra={
"session_id": session_id,
"session_version": session_version,
"operation": operation,
},
)
if operation == "dataset_review_approve_mappings":
mapping_ids = list(dict.fromkeys(entities.get("mapping_ids") or []))
if not mapping_ids:
raise HTTPException(
status_code=409, detail="No pending mappings to approve"
)
updated_count = 0
for mapping in session.execution_mappings:
if mapping.mapping_id not in mapping_ids:
continue
mapping.approval_state = ApprovalState.APPROVED
mapping.approved_by_user_id = current_user.id
mapping.approved_at = datetime.utcnow()
updated_count += 1
if updated_count == 0:
raise HTTPException(
status_code=409, detail="No matching mappings available to approve"
)
session.last_activity_at = datetime.utcnow()
if session.readiness_state == ReadinessState.MAPPING_REVIEW_NEEDED:
session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW
repository.bump_session_version(session)
repository.db.commit()
repository.db.refresh(session)
repository.event_logger.log_for_session(
session,
actor_user_id=current_user.id,
event_type="assistant_mapping_approval",
event_summary="Assistant-approved warning-sensitive mappings persisted",
event_details={
"mapping_ids": mapping_ids,
"count": updated_count,
"version": int(getattr(session, "version", 0) or 0),
},
)
logger.reflect(
"Assistant mapping approval persisted within optimistic-lock boundary",
extra={
"session_id": session_id,
"updated_count": updated_count,
"version": int(getattr(session, "version", 0) or 0),
},
)
return (
f"Approved {updated_count} mapping(s) for dataset review session {session_id}.",
None,
[
AssistantAction(
type="focus_target",
label="Open mapping review",
target="mapping",
)
],
)
if operation == "dataset_review_set_field_semantics":
field_id = str(entities.get("field_id") or "").strip()
if not field_id:
raise HTTPException(status_code=422, detail="Missing field_id")
field = next(
(item for item in session.semantic_fields if item.field_id == field_id),
None,
)
if field is None:
raise HTTPException(status_code=404, detail="Semantic field not found")
update_request = FieldSemanticUpdateRequest(
candidate_id=entities.get("candidate_id"),
verbose_name=entities.get("verbose_name"),
description=entities.get("description"),
display_format=entities.get("display_format"),
lock_field=bool(entities.get("lock_field", False)),
)
try:
_update_semantic_field_state(
field, update_request, changed_by="assistant"
)
except HTTPException:
raise
except ValueError as exc:
raise HTTPException(status_code=400, detail=str(exc)) from exc
session.last_activity_at = datetime.utcnow()
repository.bump_session_version(session)
repository.db.commit()
repository.db.refresh(session)
repository.db.refresh(field)
repository.event_logger.log_for_session(
session,
actor_user_id=current_user.id,
event_type="assistant_field_semantics_updated",
event_summary="Assistant semantic field update persisted",
event_details={
"field_id": field.field_id,
"candidate_id": entities.get("candidate_id"),
"lock_field": bool(entities.get("lock_field", False)),
"version": int(getattr(session, "version", 0) or 0),
},
)
logger.reflect(
"Assistant semantic field update committed safely",
extra={
"session_id": session_id,
"field_id": field_id,
"version": int(getattr(session, "version", 0) or 0),
},
)
return (
f"Updated semantic field {field.field_name} for dataset review session {session_id}.",
None,
[
AssistantAction(
type="focus_target",
label="Open semantic review",
target=f"field:{field.field_id}",
)
],
)
if operation == "dataset_review_generate_sql_preview":
orchestrator = DatasetReviewOrchestrator(
repository=repository,
config_manager=config_manager,
)
result = orchestrator.prepare_launch_preview(
PreparePreviewCommand(
user=current_user,
session_id=session_id,
expected_version=int(session_version),
)
)
preview_status = getattr(
result.preview.preview_status, "value", result.preview.preview_status
)
logger.reflect(
"Assistant-triggered Superset preview generation completed",
extra={
"session_id": session_id,
"preview_status": preview_status,
},
)
return (
f"SQL preview {preview_status} for dataset review session {session_id}.",
None,
[
AssistantAction(
type="focus_target",
label="Open SQL preview",
target="sql-preview",
)
],
)
raise HTTPException(
status_code=400, detail="Unsupported dataset review operation"
)
# [/DEF:_dispatch_dataset_review_intent:Function]
# [/DEF:AssistantDatasetReview:Module]

View File

@@ -0,0 +1,309 @@
# [DEF:AssistantDispatch:Module]
# @COMPLEXITY: 5
# @PURPOSE: Intent dispatch engine, confirmation summary, and clarification text for the assistant API.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [AssistantSchemas]
# @RELATION: DEPENDS_ON -> [AssistantResolvers]
# @RELATION: DEPENDS_ON -> [AssistantLlmPlanner]
# @RELATION: DEPENDS_ON -> [AssistantDatasetReview]
# @INVARIANT: Unsupported operations are rejected via HTTPException(400).
from __future__ import annotations
from typing import Any, Dict, List, Optional, Tuple
from fastapi import HTTPException
from sqlalchemy.orm import Session
from src.core.logger import belief_scope, logger
from src.core.config_manager import ConfigManager
from src.core.task_manager import TaskManager
from src.schemas.auth import User
from src.services.git_service import GitService
from src.services.llm_provider import LLMProviderService
from src.services.llm_prompt_templates import is_multimodal_model
from ._schemas import (
_DATASET_REVIEW_OPS,
AssistantAction,
)
from ._resolvers import (
_build_task_observability_summary,
_extract_result_deep_links,
_get_environment_name_by_id,
_resolve_dashboard_id_entity,
_resolve_env_id,
_resolve_provider_id,
)
from ._history import _coerce_query_bool
from ._llm_planner import _check_any_permission
from ._dataset_review import _dispatch_dataset_review_intent
git_service = GitService()
# [DEF:_clarification_text_for_intent:Function]
# @COMPLEXITY: 2
# @PURPOSE: Convert technical missing-parameter errors into user-facing clarification prompts.
# @PRE: state was classified as needs_clarification for current intent/error combination.
# @POST: Returned text is human-readable and actionable for target operation.
def _clarification_text_for_intent(
intent: Optional[Dict[str, Any]], detail_text: str
) -> str:
operation = (intent or {}).get("operation")
guidance_by_operation: Dict[str, str] = {
"run_llm_validation": (
"Нужно уточнение для запуска LLM-валидации: Укажите дашборд (id или slug), окружение и провайдер LLM."
),
"run_llm_documentation": (
"Нужно уточнение для генерации документации: Укажите dataset_id, окружение и провайдер LLM."
),
"create_branch": "Нужно уточнение: укажите дашборд (id/slug/title) и имя ветки.",
"commit_changes": "Нужно уточнение: укажите дашборд (id/slug/title) для коммита.",
"deploy_dashboard": "Нужно уточнение: укажите дашборд (id/slug/title) и целевое окружение.",
"execute_migration": "Нужно уточнение: укажите дашборд (id/slug/title), source_env и target_env.",
"run_backup": "Нужно уточнение: укажите окружение и при необходимости дашборд (id/slug/title).",
}
return guidance_by_operation.get(operation, detail_text)
# [/DEF:_clarification_text_for_intent:Function]
# [DEF:_async_confirmation_summary:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build human-readable confirmation prompt for an intent before execution.
# @PRE: actions is a non-empty list of planned review actions.
# @POST: Returns a formatted summary string suitable for display to the user.
# @SIDE_EFFECT: None - pure formatting function.
async def _async_confirmation_summary(intent: Dict[str, Any], config_manager: ConfigManager, db: Session) -> str:
with belief_scope('_confirmation_summary'):
logger.reason('Belief protocol reasoning checkpoint for _confirmation_summary')
operation = intent.get('operation', '')
entities = intent.get('entities', {})
descriptions: Dict[str, str] = {'create_branch': 'создание ветки{branch} для дашборда{dashboard}', 'commit_changes': 'коммит изменений для дашборда{dashboard}', 'deploy_dashboard': 'деплой дашборда{dashboard} в окружение{env}', 'execute_migration': 'миграция дашборда{dashboard} с{src} на{tgt}', 'run_backup': 'бэкап окружения{env}{dashboard}', 'run_llm_validation': 'LLM-валидация дашборда{dashboard}{env}', 'run_llm_documentation': 'генерация документации для датасета{dataset}{env}'}
template = descriptions.get(operation)
if not template:
logger.reflect('Belief protocol postcondition checkpoint for _confirmation_summary')
return 'Подтвердите выполнение операции или отмените.'
def _label(value: Any, prefix: str=' ') -> str:
logger.reflect('Belief protocol postcondition checkpoint for _confirmation_summary')
return f'{prefix}{value}' if value else ''
dashboard = entities.get('dashboard_id') or entities.get('dashboard_ref')
text = template.format(branch=_label(entities.get('branch_name')), dashboard=_label(dashboard), env=_label(entities.get('environment') or entities.get('target_env')), src=_label(entities.get('source_env')), tgt=_label(entities.get('target_env')), dataset=_label(entities.get('dataset_id')))
if operation == 'execute_migration':
flags = []
flags.append('маппинг БД: ' + ('ВКЛ' if _coerce_query_bool(entities.get('replace_db_config', False)) else 'ВЫКЛ'))
flags.append('исправление кроссфильтров: ' + ('ВКЛ' if _coerce_query_bool(entities.get('fix_cross_filters', True)) else 'ВЫКЛ'))
dry_run_enabled = _coerce_query_bool(entities.get('dry_run', False))
flags.append('отчет dry-run: ' + ('ВКЛ' if dry_run_enabled else 'ВЫКЛ'))
text += f" ({', '.join(flags)})"
if dry_run_enabled:
try:
from src.core.migration.dry_run_orchestrator import MigrationDryRunService
from src.models.dashboard import DashboardSelection
from src.core.superset_client import SupersetClient
src_token = entities.get('source_env')
tgt_token = entities.get('target_env')
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=src_token)
if dashboard_id and src_token and tgt_token:
src_env_id = _resolve_env_id(src_token, config_manager)
tgt_env_id = _resolve_env_id(tgt_token, config_manager)
if src_env_id and tgt_env_id:
env_map = {env.id: env for env in config_manager.get_environments()}
source_env = env_map.get(src_env_id)
target_env = env_map.get(tgt_env_id)
if source_env and target_env and (source_env.id != target_env.id):
selection = DashboardSelection(source_env_id=source_env.id, target_env_id=target_env.id, selected_ids=[dashboard_id], replace_db_config=_coerce_query_bool(entities.get('replace_db_config', False)), fix_cross_filters=_coerce_query_bool(entities.get('fix_cross_filters', True)))
service = MigrationDryRunService()
source_client = SupersetClient(source_env)
target_client = SupersetClient(target_env)
report = service.run(selection, source_client, target_client, db)
s = report.get('summary', {})
dash_s = s.get('dashboards', {})
charts_s = s.get('charts', {})
ds_s = s.get('datasets', {})
creates = dash_s.get('create', 0) + charts_s.get('create', 0) + ds_s.get('create', 0)
updates = dash_s.get('update', 0) + charts_s.get('update', 0) + ds_s.get('update', 0)
deletes = dash_s.get('delete', 0) + charts_s.get('delete', 0) + ds_s.get('delete', 0)
text += f'\n\nОтчет dry-run:\n- Будет создано новых объектов: {creates}\n- Будет обновлено: {updates}\n- Будет удалено: {deletes}'
else:
text += '\n\n(Не удалось загрузить отчет dry-run: неверные окружения).'
except Exception as e:
import traceback
logger.warning('[assistant.dry_run_summary][failed] Exception: %s\n%s', e, traceback.format_exc())
text += f'\n\n(Не удалось загрузить отчет dry-run: {e}).'
logger.reflect('Belief protocol postcondition checkpoint for _confirmation_summary')
return f'Выполнить: {text}. Подтвердите или отмените.'
# [/DEF:_async_confirmation_summary:Function]
# [DEF:_dispatch_intent:Function]
# @COMPLEXITY: 5
# @PURPOSE: Execute parsed assistant intent via existing task/plugin/git services.
# @DATA_CONTRACT: Input[intent,current_user,task_manager,config_manager,db] -> Output[Tuple[text:str,task_id:Optional[str],actions:List[AssistantAction]]]
# @RELATION: DEPENDS_ON -> [_check_any_permission]
# @RELATION: DEPENDS_ON -> [_resolve_dashboard_id_entity]
# @RELATION: DEPENDS_ON -> [TaskManager]
# @RELATION: DEPENDS_ON -> [GitService]
# @SIDE_EFFECT: May enqueue tasks, invoke git operations, and query/update external service state.
# @PRE: intent operation is known and actor permissions are validated per operation.
# @POST: Returns response text, optional task id, and UI actions for follow-up.
# @INVARIANT: unsupported operations are rejected via HTTPException(400).
async def _dispatch_intent(intent: Dict[str, Any], current_user: User, task_manager: TaskManager, config_manager: ConfigManager, db: Session) -> Tuple[str, Optional[str], List[AssistantAction]]:
with belief_scope('_dispatch_intent'):
logger.reason('Belief protocol reasoning checkpoint for _dispatch_intent')
operation = intent.get('operation')
entities = intent.get('entities', {})
if operation in _DATASET_REVIEW_OPS or operation == 'dataset_review_answer_context':
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return await _dispatch_dataset_review_intent(intent, current_user, config_manager, db)
if operation == 'show_capabilities':
from ._llm_planner import _build_tool_catalog
tools_catalog = _build_tool_catalog(current_user, config_manager, db)
labels = {'create_branch': 'Git: создание ветки', 'commit_changes': 'Git: коммит', 'deploy_dashboard': 'Git: деплой дашборда', 'execute_migration': 'Миграции: запуск переноса', 'run_backup': 'Бэкапы: запуск резервного копирования', 'run_llm_validation': 'LLM: валидация дашборда', 'run_llm_documentation': 'LLM: генерация документации', 'get_task_status': 'Статус: проверка задачи', 'get_health_summary': 'Здоровье: сводка по дашбордам'}
available = [labels[t['operation']] for t in tools_catalog if t['operation'] in labels]
if not available:
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return ('Сейчас нет доступных для вас операций ассистента.', None, [])
commands = '\n'.join((f'- {item}' for item in available))
text = f'Вот что я могу сделать для вас:\n{commands}\n\nПример: `запусти миграцию с dev на prod для дашборда 42`.'
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (text, None, [])
if operation == 'get_health_summary':
from src.services.health_service import HealthService
env_token = entities.get('environment')
env_id = _resolve_env_id(env_token, config_manager)
service = HealthService(db)
summary = await service.get_health_summary(environment_id=env_id)
env_name = _get_environment_name_by_id(env_id, config_manager) if env_id else 'всех окружений'
text = f'Сводка здоровья дашбордов для {env_name}:\n- ✅ Прошли проверку: {summary.pass_count}\n- ⚠️ С предупреждениями: {summary.warn_count}\n- ❌ Ошибки валидации: {summary.fail_count}\n- ❓ Неизвестно: {summary.unknown_count}'
actions = [AssistantAction(type='open_route', label='Открыть Health Center', target='/dashboards/health')]
if summary.fail_count > 0:
text += '\n\nОбнаружены ошибки в следующих дашбордах:'
for item in summary.items:
if item.status == 'FAIL':
text += f"\n- {item.dashboard_id} ({item.environment_id}): {item.summary or 'Нет деталей'}"
actions.append(AssistantAction(type='open_route', label=f'Отчет {item.dashboard_id}', target=f'/reports/llm/{item.task_id}'))
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (text, None, actions[:5])
if operation == 'get_task_status':
_check_any_permission(current_user, [('tasks', 'READ')])
task_id = entities.get('task_id')
if not task_id:
recent = [t for t in task_manager.get_tasks(limit=20, offset=0) if t.user_id == current_user.id]
if not recent:
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return ('У вас пока нет задач в истории.', None, [])
task = recent[0]
actions = [AssistantAction(type='open_task', label='Open Task', target=task.id)]
if str(task.status).upper() in {'SUCCESS', 'FAILED'}:
actions.extend(_extract_result_deep_links(task, config_manager))
summary_line = _build_task_observability_summary(task, config_manager)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Последняя задача: {task.id}, статус: {task.status}.' + (f'\n{summary_line}' if summary_line else ''), task.id, actions)
task = task_manager.get_task(task_id)
if not task:
raise HTTPException(status_code=404, detail=f'Task {task_id} not found')
actions = [AssistantAction(type='open_task', label='Open Task', target=task.id)]
if str(task.status).upper() in {'SUCCESS', 'FAILED'}:
actions.extend(_extract_result_deep_links(task, config_manager))
summary_line = _build_task_observability_summary(task, config_manager)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Статус задачи {task.id}: {task.status}.' + (f'\n{summary_line}' if summary_line else ''), task.id, actions)
if operation == 'create_branch':
_check_any_permission(current_user, [('plugin:git', 'EXECUTE')])
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager)
branch_name = entities.get('branch_name')
if not dashboard_id or not branch_name:
raise HTTPException(status_code=422, detail='Missing dashboard_id/dashboard_ref or branch_name')
git_service.create_branch(dashboard_id, branch_name, 'main')
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Ветка `{branch_name}` создана для дашборда {dashboard_id}.', None, [])
if operation == 'commit_changes':
_check_any_permission(current_user, [('plugin:git', 'EXECUTE')])
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager)
commit_message = entities.get('message')
if not dashboard_id:
raise HTTPException(status_code=422, detail='Missing dashboard_id/dashboard_ref')
git_service.commit_changes(dashboard_id, commit_message, None)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return ('Коммит выполнен успешно.', None, [])
if operation == 'deploy_dashboard':
_check_any_permission(current_user, [('plugin:git', 'EXECUTE')])
env_token = entities.get('environment')
env_id = _resolve_env_id(env_token, config_manager)
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=env_token)
if not dashboard_id or not env_id:
raise HTTPException(status_code=422, detail='Missing dashboard_id/dashboard_ref or environment')
task = await task_manager.create_task(plugin_id='git-integration', params={'operation': 'deploy', 'dashboard_id': dashboard_id, 'environment_id': env_id}, user_id=current_user.id)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Деплой запущен. task_id={task.id}', task.id, [AssistantAction(type='open_task', label='Open Task', target=task.id), AssistantAction(type='open_reports', label='Open Reports', target='/reports')])
if operation == 'execute_migration':
_check_any_permission(current_user, [('plugin:migration', 'EXECUTE'), ('plugin:superset-migration', 'EXECUTE')])
src_token = entities.get('source_env')
dashboard_ref = entities.get('dashboard_ref')
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=src_token)
src = _resolve_env_id(src_token, config_manager)
tgt = _resolve_env_id(entities.get('target_env'), config_manager)
if not src or not tgt:
raise HTTPException(status_code=422, detail='Missing source_env/target_env')
if not dashboard_id and (not dashboard_ref):
raise HTTPException(status_code=422, detail='Missing dashboard_id/dashboard_ref')
migration_params: Dict[str, Any] = {'source_env_id': src, 'target_env_id': tgt, 'replace_db_config': _coerce_query_bool(entities.get('replace_db_config', False)), 'fix_cross_filters': _coerce_query_bool(entities.get('fix_cross_filters', True))}
if dashboard_id:
migration_params['selected_ids'] = [dashboard_id]
else:
migration_params['dashboard_regex'] = str(dashboard_ref)
task = await task_manager.create_task(plugin_id='superset-migration', params=migration_params, user_id=current_user.id)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Миграция запущена. task_id={task.id}', task.id, [AssistantAction(type='open_task', label='Open Task', target=task.id), AssistantAction(type='open_reports', label='Open Reports', target='/reports'), *([AssistantAction(type='open_route', label=f'Открыть дашборд в {_get_environment_name_by_id(tgt, config_manager)}', target=f'/dashboards/{dashboard_id}?env_id={tgt}'), AssistantAction(type='open_diff', label='Показать Diff', target=str(dashboard_id))] if dashboard_id else [])])
if operation == 'run_backup':
_check_any_permission(current_user, [('plugin:superset-backup', 'EXECUTE'), ('plugin:backup', 'EXECUTE')])
env_token = entities.get('environment')
env_id = _resolve_env_id(env_token, config_manager)
if not env_id:
raise HTTPException(status_code=400, detail='Missing or unknown environment')
params: Dict[str, Any] = {'environment_id': env_id}
if entities.get('dashboard_id') or entities.get('dashboard_ref'):
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=env_token)
if not dashboard_id:
raise HTTPException(status_code=422, detail='Missing dashboard_id/dashboard_ref')
params['dashboard_ids'] = [dashboard_id]
task = await task_manager.create_task(plugin_id='superset-backup', params=params, user_id=current_user.id)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Бэкап запущен. task_id={task.id}', task.id, [AssistantAction(type='open_task', label='Open Task', target=task.id), AssistantAction(type='open_reports', label='Open Reports', target='/reports'), *([AssistantAction(type='open_route', label=f'Открыть дашборд в {_get_environment_name_by_id(env_id, config_manager)}', target=f'/dashboards/{dashboard_id}?env_id={env_id}'), AssistantAction(type='open_diff', label='Показать Diff', target=str(dashboard_id))] if entities.get('dashboard_id') or entities.get('dashboard_ref') else [])])
if operation == 'run_llm_validation':
_check_any_permission(current_user, [('plugin:llm_dashboard_validation', 'EXECUTE')])
env_token = entities.get('environment')
env_id = _resolve_env_id(env_token, config_manager) or _resolve_env_id(None, config_manager)
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=env_token)
provider_id = _resolve_provider_id(entities.get('provider'), db, config_manager=config_manager, task_key='dashboard_validation')
if not dashboard_id or not env_id or (not provider_id):
raise HTTPException(status_code=422, detail='Missing dashboard_id/environment/provider. Укажите ID/slug дашборда или окружение.')
provider = LLMProviderService(db).get_provider(provider_id)
provider_model = provider.default_model if provider else ''
if not is_multimodal_model(provider_model, provider.provider_type if provider else None):
raise HTTPException(status_code=422, detail='Selected provider model is not multimodal for dashboard validation. Выберите мультимодальную модель (например, gpt-4o).')
task = await task_manager.create_task(plugin_id='llm_dashboard_validation', params={'dashboard_id': str(dashboard_id), 'environment_id': env_id, 'provider_id': provider_id}, user_id=current_user.id)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'LLM-валидация запущена. task_id={task.id}', task.id, [AssistantAction(type='open_task', label='Open Task', target=task.id), AssistantAction(type='open_reports', label='Open Reports', target='/reports')])
if operation == 'run_llm_documentation':
_check_any_permission(current_user, [('plugin:llm_documentation', 'EXECUTE')])
dataset_id = entities.get('dataset_id')
env_id = _resolve_env_id(entities.get('environment'), config_manager)
provider_id = _resolve_provider_id(entities.get('provider'), db, config_manager=config_manager, task_key='documentation')
if not dataset_id or not env_id or (not provider_id):
raise HTTPException(status_code=400, detail='Missing dataset_id/environment/provider')
task = await task_manager.create_task(plugin_id='llm_documentation', params={'dataset_id': str(dataset_id), 'environment_id': env_id, 'provider_id': provider_id}, user_id=current_user.id)
logger.reflect('Belief protocol postcondition checkpoint for _dispatch_intent')
return (f'Генерация документации запущена. task_id={task.id}', task.id, [AssistantAction(type='open_task', label='Open Task', target=task.id), AssistantAction(type='open_reports', label='Open Reports', target='/reports')])
raise HTTPException(status_code=400, detail='Unsupported operation')
# [/DEF:_dispatch_intent:Function]
# [/DEF:AssistantDispatch:Module]

View File

@@ -0,0 +1,382 @@
# [DEF:AssistantHistory:Module]
# @COMPLEXITY: 2
# @PURPOSE: Conversation history, audit trail, and confirmation persistence helpers for the assistant API.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [AssistantSchemas]
# @INVARIANT: Failed persistence attempts always rollback before returning.
from __future__ import annotations
import uuid
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple
from sqlalchemy.orm import Session
from src.core.logger import belief_scope, logger
from src.models.assistant import (
AssistantAuditRecord,
AssistantConfirmationRecord,
AssistantMessageRecord,
)
from ._schemas import (
ASSISTANT_ARCHIVE_AFTER_DAYS,
ASSISTANT_MESSAGE_TTL_DAYS,
ASSISTANT_AUDIT,
CONFIRMATIONS,
CONVERSATIONS,
ConfirmationRecord,
)
logger = logger
# [DEF:_append_history:Function]
# @COMPLEXITY: 2
# @PURPOSE: Append conversation message to in-memory history buffer.
# @DATA_CONTRACT: Input[user_id,conversation_id,role,text,state?,task_id?,confirmation_id?] -> Output[None]
# @RELATION: UPDATES -> [CONVERSATIONS]
# @SIDE_EFFECT: Mutates in-memory CONVERSATIONS store for user conversation history.
# @PRE: user_id and conversation_id identify target conversation bucket.
# @POST: Message entry is appended to CONVERSATIONS key list.
# @INVARIANT: every appended entry includes generated message_id and created_at timestamp.
def _append_history(
user_id: str,
conversation_id: str,
role: str,
text: str,
state: Optional[str] = None,
task_id: Optional[str] = None,
confirmation_id: Optional[str] = None,
):
key = (user_id, conversation_id)
if key not in CONVERSATIONS:
CONVERSATIONS[key] = []
CONVERSATIONS[key].append(
{
"message_id": str(uuid.uuid4()),
"conversation_id": conversation_id,
"role": role,
"text": text,
"state": state,
"task_id": task_id,
"confirmation_id": confirmation_id,
"created_at": datetime.utcnow(),
}
)
# [/DEF:_append_history:Function]
# [DEF:_persist_message:Function]
# @COMPLEXITY: 2
# @PURPOSE: Persist assistant/user message record to database.
# @DATA_CONTRACT: Input[Session,user_id,conversation_id,role,text,state?,task_id?,confirmation_id?,metadata?] -> Output[None]
# @RELATION: DEPENDS_ON -> [AssistantMessageRecord]
# @SIDE_EFFECT: Writes AssistantMessageRecord rows and commits or rollbacks the DB session.
# @PRE: db session is writable and message payload is serializable.
# @POST: Message row is committed or persistence failure is logged.
# @INVARIANT: failed persistence attempts always rollback before returning.
def _persist_message(
db: Session,
user_id: str,
conversation_id: str,
role: str,
text: str,
state: Optional[str] = None,
task_id: Optional[str] = None,
confirmation_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
):
try:
row = AssistantMessageRecord(
id=str(uuid.uuid4()),
user_id=user_id,
conversation_id=conversation_id,
role=role,
text=text,
state=state,
task_id=task_id,
confirmation_id=confirmation_id,
payload=metadata,
)
db.add(row)
db.commit()
except Exception as exc:
db.rollback()
logger.warning(f"[assistant.message][persist_failed] {exc}")
# [/DEF:_persist_message:Function]
# [DEF:_audit:Function]
# @COMPLEXITY: 2
# @PURPOSE: Append in-memory audit record for assistant decision trace.
# @DATA_CONTRACT: Input[user_id,payload:Dict[str,Any]] -> Output[None]
# @RELATION: UPDATES -> [ASSISTANT_AUDIT]
# @SIDE_EFFECT: Mutates in-memory ASSISTANT_AUDIT store and emits structured log event.
# @PRE: payload describes decision/outcome fields.
# @POST: ASSISTANT_AUDIT list for user contains new timestamped entry.
# @INVARIANT: persisted in-memory audit entry always contains created_at in ISO format.
def _audit(user_id: str, payload: Dict[str, Any]):
if user_id not in ASSISTANT_AUDIT:
ASSISTANT_AUDIT[user_id] = []
ASSISTANT_AUDIT[user_id].append(
{**payload, "created_at": datetime.utcnow().isoformat()}
)
logger.info(f"[assistant.audit] {payload}")
# [/DEF:_audit:Function]
# [DEF:_persist_audit:Function]
# @COMPLEXITY: 2
# @PURPOSE: Persist structured assistant audit payload in database.
# @PRE: db session is writable and payload is JSON-serializable.
# @POST: Audit row is committed or failure is logged with rollback.
def _persist_audit(
db: Session, user_id: str, payload: Dict[str, Any], conversation_id: Optional[str]
):
try:
row = AssistantAuditRecord(
id=str(uuid.uuid4()),
user_id=user_id,
conversation_id=conversation_id,
decision=payload.get("decision"),
task_id=payload.get("task_id"),
message=payload.get("message"),
payload=payload,
)
db.add(row)
db.commit()
except Exception as exc:
db.rollback()
logger.warning(f"[assistant.audit][persist_failed] {exc}")
# [/DEF:_persist_audit:Function]
# [DEF:_persist_confirmation:Function]
# @COMPLEXITY: 2
# @PURPOSE: Persist confirmation token record to database.
# @PRE: record contains id/user/intent/dispatch/expiry fields.
# @POST: Confirmation row exists in persistent storage.
def _persist_confirmation(db: Session, record: ConfirmationRecord):
try:
row = AssistantConfirmationRecord(
id=record.id,
user_id=record.user_id,
conversation_id=record.conversation_id,
state=record.state,
intent=record.intent,
dispatch=record.dispatch,
expires_at=record.expires_at,
created_at=record.created_at,
consumed_at=None,
)
db.merge(row)
db.commit()
except Exception as exc:
db.rollback()
logger.warning(f"[assistant.confirmation][persist_failed] {exc}")
# [/DEF:_persist_confirmation:Function]
# [DEF:_update_confirmation_state:Function]
# @COMPLEXITY: 2
# @PURPOSE: Update persistent confirmation token lifecycle state.
# @PRE: confirmation_id references existing row.
# @POST: State and consumed_at fields are updated when applicable.
def _update_confirmation_state(db: Session, confirmation_id: str, state: str):
try:
row = (
db.query(AssistantConfirmationRecord)
.filter(AssistantConfirmationRecord.id == confirmation_id)
.first()
)
if not row:
return
row.state = state
if state in {"consumed", "expired", "cancelled"}:
row.consumed_at = datetime.utcnow()
db.commit()
except Exception as exc:
db.rollback()
logger.warning(f"[assistant.confirmation][update_failed] {exc}")
# [/DEF:_update_confirmation_state:Function]
# [DEF:_load_confirmation_from_db:Function]
# @COMPLEXITY: 2
# @PURPOSE: Load confirmation token from database into in-memory model.
# @PRE: confirmation_id may or may not exist in storage.
# @POST: Returns ConfirmationRecord when found, otherwise None.
def _load_confirmation_from_db(
db: Session, confirmation_id: str
) -> Optional[ConfirmationRecord]:
row = (
db.query(AssistantConfirmationRecord)
.filter(AssistantConfirmationRecord.id == confirmation_id)
.first()
)
if not row:
return None
return ConfirmationRecord(
id=row.id,
user_id=row.user_id,
conversation_id=row.conversation_id,
intent=row.intent or {},
dispatch=row.dispatch or {},
expires_at=row.expires_at,
state=row.state,
created_at=row.created_at,
)
# [/DEF:_load_confirmation_from_db:Function]
# [DEF:_ensure_conversation:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve active conversation id in memory or create a new one.
# @PRE: user_id identifies current actor.
# @POST: Returns stable conversation id and updates USER_ACTIVE_CONVERSATION.
def _ensure_conversation(user_id: str, conversation_id: Optional[str]) -> str:
if conversation_id:
from ._schemas import USER_ACTIVE_CONVERSATION
USER_ACTIVE_CONVERSATION[user_id] = conversation_id
return conversation_id
from ._schemas import USER_ACTIVE_CONVERSATION
active = USER_ACTIVE_CONVERSATION.get(user_id)
if active:
return active
new_id = str(uuid.uuid4())
USER_ACTIVE_CONVERSATION[user_id] = new_id
return new_id
# [/DEF:_ensure_conversation:Function]
# [DEF:_resolve_or_create_conversation:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve active conversation using explicit id, memory cache, or persisted history.
# @PRE: user_id and db session are available.
# @POST: Returns conversation id and updates USER_ACTIVE_CONVERSATION cache.
def _resolve_or_create_conversation(
user_id: str, conversation_id: Optional[str], db: Session
) -> str:
from ._schemas import USER_ACTIVE_CONVERSATION
if conversation_id:
USER_ACTIVE_CONVERSATION[user_id] = conversation_id
return conversation_id
active = USER_ACTIVE_CONVERSATION.get(user_id)
if active:
return active
from sqlalchemy import desc
last_message = (
db.query(AssistantMessageRecord)
.filter(AssistantMessageRecord.user_id == user_id)
.order_by(desc(AssistantMessageRecord.created_at))
.first()
)
if last_message:
USER_ACTIVE_CONVERSATION[user_id] = last_message.conversation_id
return last_message.conversation_id
new_id = str(uuid.uuid4())
USER_ACTIVE_CONVERSATION[user_id] = new_id
return new_id
# [/DEF:_resolve_or_create_conversation:Function]
# [DEF:_cleanup_history_ttl:Function]
# @COMPLEXITY: 2
# @PURPOSE: Enforce assistant message retention window by deleting expired rows and in-memory records.
# @PRE: db session is available and user_id references current actor scope.
# @POST: Messages older than ASSISTANT_MESSAGE_TTL_DAYS are removed from persistence and memory mirrors.
def _cleanup_history_ttl(db: Session, user_id: str):
cutoff = datetime.utcnow() - timedelta(days=ASSISTANT_MESSAGE_TTL_DAYS)
try:
query = db.query(AssistantMessageRecord).filter(
AssistantMessageRecord.user_id == user_id,
AssistantMessageRecord.created_at < cutoff,
)
if hasattr(query, "delete"):
query.delete(synchronize_session=False)
db.commit()
except Exception as exc:
db.rollback()
logger.warning(
f"[assistant.history][ttl_cleanup_failed] user={user_id} error={exc}"
)
stale_keys: List[Tuple[str, str]] = []
for key, items in CONVERSATIONS.items():
if key[0] != user_id:
continue
kept = []
for item in items:
created_at = item.get("created_at")
if isinstance(created_at, datetime) and created_at < cutoff:
continue
kept.append(item)
if kept:
CONVERSATIONS[key] = kept
else:
stale_keys.append(key)
for key in stale_keys:
CONVERSATIONS.pop(key, None)
# [/DEF:_cleanup_history_ttl:Function]
# [DEF:_is_conversation_archived:Function]
# @COMPLEXITY: 2
# @PURPOSE: Determine archived state for a conversation based on last update timestamp.
# @PRE: updated_at can be null for empty conversations.
# @POST: Returns True when conversation inactivity exceeds archive threshold.
def _is_conversation_archived(updated_at: Optional[datetime]) -> bool:
if not updated_at:
return False
cutoff = datetime.utcnow() - timedelta(days=ASSISTANT_ARCHIVE_AFTER_DAYS)
return updated_at < cutoff
# [/DEF:_is_conversation_archived:Function]
# [DEF:_coerce_query_bool:Function]
# @COMPLEXITY: 2
# @PURPOSE: Normalize bool-like query values for compatibility in direct handler invocations/tests.
# @PRE: value may be bool, string, or FastAPI Query metadata object.
# @POST: Returns deterministic boolean flag.
def _coerce_query_bool(value: Any) -> bool:
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in {"1", "true", "yes", "on"}
return False
# [/DEF:_coerce_query_bool:Function]
# [/DEF:AssistantHistory:Module]

View File

@@ -0,0 +1,441 @@
# [DEF:AssistantLlmPlanner:Module]
# @COMPLEXITY: 3
# @PURPOSE: LLM-based intent planning, tool catalog construction, and authorization for the assistant API.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [AssistantSchemas]
# @RELATION: DEPENDS_ON -> [AssistantResolvers]
# @INVARIANT: Tool catalog is filtered by user permissions before being sent to LLM.
from __future__ import annotations
import json
from typing import Any, Dict, List, Optional, Tuple
from fastapi import HTTPException
from sqlalchemy.orm import Session
from src.core.logger import belief_scope, logger
from src.core.config_manager import ConfigManager
from src.dependencies import has_permission
from src.schemas.auth import User
from src.services.llm_provider import LLMProviderService
from src.services.llm_prompt_templates import (
normalize_llm_settings,
resolve_bound_provider_id,
)
from src.plugins.llm_analysis.service import LLMClient
from src.plugins.llm_analysis.models import LLMProviderType
from ._schemas import (
INTENT_PERMISSION_CHECKS,
AssistantAction,
)
from ._resolvers import (
_get_default_environment_id,
_is_production_env,
_resolve_provider_id,
)
# [DEF:_check_any_permission:Function]
# @COMPLEXITY: 2
# @PURPOSE: Validate user against alternative permission checks (logical OR).
# @PRE: checks list contains resource-action tuples.
# @POST: Returns on first successful permission; raises 403-like HTTPException otherwise.
def _check_any_permission(current_user: User, checks: List[Tuple[str, str]]):
errors: List[HTTPException] = []
for resource, action in checks:
try:
has_permission(resource, action)(current_user)
return
except HTTPException as exc:
errors.append(exc)
raise (
errors[-1]
if errors
else HTTPException(status_code=403, detail="Permission denied")
)
# [/DEF:_check_any_permission:Function]
# [DEF:_has_any_permission:Function]
# @COMPLEXITY: 2
# @PURPOSE: Check whether user has at least one permission tuple from the provided list.
# @PRE: current_user and checks list are valid.
# @POST: Returns True when at least one permission check passes.
def _has_any_permission(current_user: User, checks: List[Tuple[str, str]]) -> bool:
try:
_check_any_permission(current_user, checks)
return True
except HTTPException:
return False
# [/DEF:_has_any_permission:Function]
# [DEF:_build_tool_catalog:Function]
# @COMPLEXITY: 3
# @PURPOSE: Build current-user tool catalog for LLM planner with operation contracts and defaults.
# @PRE: current_user is authenticated; config/db are available.
# @POST: Returns list of executable tools filtered by permission and runtime availability.
# @RELATION: CALLS -> LLMProviderService
def _build_tool_catalog(
current_user: User,
config_manager: ConfigManager,
db: Session,
dataset_review_context: Optional[Dict[str, Any]] = None,
) -> List[Dict[str, Any]]:
envs = config_manager.get_environments()
default_env_id = _get_default_environment_id(config_manager)
providers = LLMProviderService(db).get_all_providers()
llm_settings = {}
try:
llm_settings = config_manager.get_config().settings.llm
except Exception:
llm_settings = {}
active_provider = next((p.id for p in providers if p.is_active), None)
fallback_provider = active_provider or (providers[0].id if providers else None)
validation_provider = (
resolve_bound_provider_id(llm_settings, "dashboard_validation")
or fallback_provider
)
documentation_provider = (
resolve_bound_provider_id(llm_settings, "documentation") or fallback_provider
)
candidates: List[Dict[str, Any]] = [
{
"operation": "show_capabilities",
"domain": "assistant",
"description": "Show available assistant commands and examples",
"required_entities": [],
"optional_entities": [],
"risk_level": "safe",
"requires_confirmation": False,
},
{
"operation": "get_task_status",
"domain": "status",
"description": "Get task status by task_id or latest user task",
"required_entities": [],
"optional_entities": ["task_id"],
"risk_level": "safe",
"requires_confirmation": False,
},
{
"operation": "create_branch",
"domain": "git",
"description": "Create git branch for dashboard by id/slug/title",
"required_entities": ["branch_name"],
"optional_entities": ["dashboard_id", "dashboard_ref"],
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "commit_changes",
"domain": "git",
"description": "Commit dashboard repository changes by dashboard id/slug/title",
"required_entities": [],
"optional_entities": ["dashboard_id", "dashboard_ref", "message"],
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "deploy_dashboard",
"domain": "git",
"description": "Deploy dashboard (id/slug/title) to target environment",
"required_entities": ["environment"],
"optional_entities": ["dashboard_id", "dashboard_ref"],
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "execute_migration",
"domain": "migration",
"description": "Run dashboard migration (id/slug/title) between environments. Optional boolean flags: replace_db_config, fix_cross_filters",
"required_entities": ["source_env", "target_env"],
"optional_entities": [
"dashboard_id",
"dashboard_ref",
"replace_db_config",
"fix_cross_filters",
],
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "run_backup",
"domain": "backup",
"description": "Run backup for environment or specific dashboard by id/slug/title",
"required_entities": ["environment"],
"optional_entities": ["dashboard_id", "dashboard_ref"],
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "run_llm_validation",
"domain": "llm",
"description": "Run LLM dashboard validation by dashboard id/slug/title",
"required_entities": [],
"optional_entities": ["dashboard_ref", "environment", "provider"],
"defaults": {
"environment": default_env_id,
"provider": validation_provider,
},
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "run_llm_documentation",
"domain": "llm",
"description": "Generate dataset documentation via LLM",
"required_entities": ["dataset_id"],
"optional_entities": ["environment", "provider"],
"defaults": {
"environment": default_env_id,
"provider": documentation_provider,
},
"risk_level": "guarded",
"requires_confirmation": False,
},
{
"operation": "get_health_summary",
"domain": "health",
"description": "Get summary of dashboard health and failing validations",
"required_entities": [],
"optional_entities": ["environment"],
"risk_level": "safe",
"requires_confirmation": False,
},
]
available: List[Dict[str, Any]] = []
for tool in candidates:
checks = INTENT_PERMISSION_CHECKS.get(tool["operation"], [])
if checks and not _has_any_permission(current_user, checks):
continue
available.append(tool)
if dataset_review_context is not None:
dataset_tools: List[Dict[str, Any]] = [
{
"operation": "dataset_review_answer_context",
"domain": "dataset_review",
"description": "Answer questions using the currently bound dataset review session context",
"required_entities": ["dataset_review_session_id"],
"optional_entities": [],
"risk_level": "safe",
"requires_confirmation": False,
},
{
"operation": "dataset_review_approve_mappings",
"domain": "dataset_review",
"description": "Approve warning-sensitive execution mappings in the current dataset review session",
"required_entities": ["dataset_review_session_id", "session_version"],
"optional_entities": ["mapping_ids"],
"risk_level": "guarded",
"requires_confirmation": True,
},
{
"operation": "dataset_review_set_field_semantics",
"domain": "dataset_review",
"description": "Apply explicit semantic field override or candidate selection in the current dataset review session",
"required_entities": [
"dataset_review_session_id",
"session_version",
"field_id",
],
"optional_entities": [
"candidate_id",
"verbose_name",
"description",
"display_format",
"lock_field",
],
"risk_level": "guarded",
"requires_confirmation": True,
},
{
"operation": "dataset_review_generate_sql_preview",
"domain": "dataset_review",
"description": "Generate a Superset-compiled SQL preview for the current dataset review session",
"required_entities": ["dataset_review_session_id", "session_version"],
"optional_entities": [],
"risk_level": "guarded",
"requires_confirmation": True,
},
]
for tool in dataset_tools:
checks = INTENT_PERMISSION_CHECKS.get(tool["operation"], [])
if checks and not _has_any_permission(current_user, checks):
continue
available.append(tool)
return available
# [/DEF:_build_tool_catalog:Function]
# [DEF:_coerce_intent_entities:Function]
# @COMPLEXITY: 2
# @PURPOSE: Normalize intent entity value types from LLM output to route-compatible values.
# @PRE: intent contains entities dict or missing entities.
# @POST: Returned intent has numeric ids coerced where possible and string values stripped.
def _coerce_intent_entities(intent: Dict[str, Any]) -> Dict[str, Any]:
entities = intent.get("entities")
if not isinstance(entities, dict):
intent["entities"] = {}
entities = intent["entities"]
for key in ("dashboard_id", "dataset_id"):
value = entities.get(key)
if isinstance(value, str) and value.strip().isdigit():
entities[key] = int(value.strip())
for key, value in list(entities.items()):
if isinstance(value, str):
entities[key] = value.strip()
return intent
# [/DEF:_coerce_intent_entities:Function]
# [DEF:_plan_intent_with_llm:Function]
# @COMPLEXITY: 2
# @PURPOSE: Use active LLM provider to select best tool/operation from dynamic catalog.
# @PRE: tools list contains allowed operations for current user.
# @POST: Returns normalized intent dict when planning succeeds; otherwise None.
async def _plan_intent_with_llm(
message: str,
tools: List[Dict[str, Any]],
db: Session,
config_manager: ConfigManager,
) -> Optional[Dict[str, Any]]:
if not tools:
return None
llm_settings = normalize_llm_settings(config_manager.get_config().settings.llm)
planner_provider_token = llm_settings.get("assistant_planner_provider")
planner_model_override = llm_settings.get("assistant_planner_model")
llm_service = LLMProviderService(db)
providers = llm_service.get_all_providers()
provider_id = _resolve_provider_id(planner_provider_token, db)
provider = next((p for p in providers if p.id == provider_id), None)
if not provider:
return None
api_key = llm_service.get_decrypted_api_key(provider.id)
if not api_key:
return None
planner = LLMClient(
provider_type=LLMProviderType(provider.provider_type),
api_key=api_key,
base_url=provider.base_url,
default_model=planner_model_override or provider.default_model,
)
system_instruction = (
"You are a deterministic intent planner for backend tools.\n"
"Choose exactly one operation from available_tools or return clarify.\n"
"Output strict JSON object:\n"
"{"
'"domain": string, '
'"operation": string, '
'"entities": object, '
'"confidence": number, '
'"risk_level": "safe"|"guarded"|"dangerous", '
'"requires_confirmation": boolean'
"}\n"
"Rules:\n"
"- Use only operation names from available_tools.\n"
'- If input is ambiguous, operation must be "clarify" with low confidence.\n'
"- If dashboard is provided as name/slug (e.g., COVID), put it into entities.dashboard_ref.\n"
"- Keep entities minimal and factual.\n"
)
payload = {
"available_tools": tools,
"user_message": message,
"known_environments": [
{"id": e.id, "name": e.name} for e in config_manager.get_environments()
],
}
try:
response = await planner.get_json_completion(
[
{"role": "system", "content": system_instruction},
{"role": "user", "content": json.dumps(payload, ensure_ascii=False)},
]
)
except Exception as exc:
import traceback
logger.warning(
f"[assistant.planner][fallback] LLM planner unavailable: {exc}\n{traceback.format_exc()}"
)
return None
if not isinstance(response, dict):
return None
operation = response.get("operation")
valid_ops = {tool["operation"] for tool in tools}
if operation == "clarify":
return {
"domain": "unknown",
"operation": "clarify",
"entities": {},
"confidence": float(response.get("confidence", 0.3)),
"risk_level": "safe",
"requires_confirmation": False,
}
if operation not in valid_ops:
return None
by_operation = {tool["operation"]: tool for tool in tools}
selected = by_operation[operation]
intent = {
"domain": response.get("domain") or selected["domain"],
"operation": operation,
"entities": response.get("entities", {}),
"confidence": float(response.get("confidence", 0.75)),
"risk_level": response.get("risk_level") or selected["risk_level"],
"requires_confirmation": bool(
response.get("requires_confirmation", selected["requires_confirmation"])
),
}
intent = _coerce_intent_entities(intent)
defaults = selected.get("defaults") or {}
for key, value in defaults.items():
if value and not intent["entities"].get(key):
intent["entities"][key] = value
if operation in {"deploy_dashboard", "execute_migration"}:
env_token = intent["entities"].get("environment") or intent["entities"].get(
"target_env"
)
if _is_production_env(env_token, config_manager):
intent["risk_level"] = "dangerous"
intent["requires_confirmation"] = True
return intent
# [/DEF:_plan_intent_with_llm:Function]
# [DEF:_authorize_intent:Function]
# @COMPLEXITY: 2
# @PURPOSE: Validate user permissions for parsed intent before confirmation/dispatch.
# @PRE: intent.operation is present for known assistant command domains.
# @POST: Returns if authorized; raises HTTPException(403) when denied.
def _authorize_intent(intent: Dict[str, Any], current_user: User):
operation = intent.get("operation")
if operation in INTENT_PERMISSION_CHECKS:
_check_any_permission(current_user, INTENT_PERMISSION_CHECKS[operation])
# [/DEF:_authorize_intent:Function]
# [/DEF:AssistantLlmPlanner:Module]

View File

@@ -0,0 +1,407 @@
# [DEF:AssistantResolvers:Module]
# @COMPLEXITY: 2
# @PURPOSE: Environment, dashboard, provider, and task resolution utilities for the assistant API.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [ConfigManager]
# @RELATION: DEPENDS_ON -> [SupersetClient]
# @INVARIANT: Resolution functions never raise; they return None on failure.
from __future__ import annotations
import re
from typing import Any, Dict, List, Optional, Tuple, cast
from sqlalchemy.orm import Session
from src.core.logger import belief_scope, logger
from src.core.config_manager import ConfigManager
from src.core.superset_client import SupersetClient
from src.services.llm_provider import LLMProviderService
from src.services.llm_prompt_templates import resolve_bound_provider_id
from src.schemas.auth import User
logger = cast(Any, logger)
# [DEF:_extract_id:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract first regex match group from text by ordered pattern list.
# @PRE: patterns contain at least one capture group.
# @POST: Returns first matched token or None.
def _extract_id(text: str, patterns: List[str]) -> Optional[str]:
for p in patterns:
m = re.search(p, text, flags=re.IGNORECASE)
if m:
return m.group(1)
return None
# [/DEF:_extract_id:Function]
# [DEF:_resolve_env_id:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve environment identifier/name token to canonical environment id.
# @PRE: config_manager provides environment list.
# @POST: Returns matched environment id or None.
def _resolve_env_id(
token: Optional[str], config_manager: ConfigManager
) -> Optional[str]:
if not token:
return None
normalized = token.strip().lower()
envs = config_manager.get_environments()
for env in envs:
if env.id.lower() == normalized or env.name.lower() == normalized:
return env.id
return None
# [/DEF:_resolve_env_id:Function]
# [DEF:_is_production_env:Function]
# @COMPLEXITY: 2
# @PURPOSE: Determine whether environment token resolves to production-like target.
# @PRE: config_manager provides environments or token text is provided.
# @POST: Returns True for production/prod synonyms, else False.
def _is_production_env(token: Optional[str], config_manager: ConfigManager) -> bool:
env_id = _resolve_env_id(token, config_manager)
if not env_id:
return (token or "").strip().lower() in {"prod", "production", "прод"}
env = next((e for e in config_manager.get_environments() if e.id == env_id), None)
if not env:
return False
target = f"{env.id} {env.name}".lower()
return "prod" in target or "production" in target or "прод" in target
# [/DEF:_is_production_env:Function]
# [DEF:_resolve_provider_id:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve provider token to provider id with active/default fallback.
# @PRE: db session can load provider list through LLMProviderService.
# @POST: Returns provider id or None when no providers configured.
def _resolve_provider_id(
provider_token: Optional[str],
db: Session,
config_manager: Optional[ConfigManager] = None,
task_key: Optional[str] = None,
) -> Optional[str]:
service = LLMProviderService(db)
providers = service.get_all_providers()
if not providers:
return None
if provider_token:
needle = provider_token.strip().lower()
for p in providers:
if p.id.lower() == needle or p.name.lower() == needle:
return p.id
if config_manager and task_key:
try:
llm_settings = config_manager.get_config().settings.llm
bound_provider_id = resolve_bound_provider_id(llm_settings, task_key)
if bound_provider_id and any(p.id == bound_provider_id for p in providers):
return bound_provider_id
except Exception:
pass
active = next((p for p in providers if p.is_active), None)
return active.id if active else providers[0].id
# [/DEF:_resolve_provider_id:Function]
# [DEF:_get_default_environment_id:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve default environment id from settings or first configured environment.
# @PRE: config_manager returns environments list.
# @POST: Returns default environment id or None when environment list is empty.
def _get_default_environment_id(config_manager: ConfigManager) -> Optional[str]:
configured = config_manager.get_environments()
if not configured:
return None
preferred = None
if hasattr(config_manager, "get_config"):
try:
preferred = config_manager.get_config().settings.default_environment_id
except Exception:
preferred = None
if preferred and any(env.id == preferred for env in configured):
return preferred
explicit_default = next(
(env.id for env in configured if getattr(env, "is_default", False)), None
)
return explicit_default or configured[0].id
# [/DEF:_get_default_environment_id:Function]
# [DEF:_resolve_dashboard_id_by_ref:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve dashboard id by title or slug reference in selected environment.
# @PRE: dashboard_ref is a non-empty string-like token.
# @POST: Returns dashboard id when uniquely matched, otherwise None.
def _resolve_dashboard_id_by_ref(
dashboard_ref: Optional[str],
env_id: Optional[str],
config_manager: ConfigManager,
) -> Optional[int]:
if not dashboard_ref or not env_id:
return None
env = next(
(item for item in config_manager.get_environments() if item.id == env_id), None
)
if not env:
return None
needle = dashboard_ref.strip().lower()
try:
client = SupersetClient(env)
_, dashboards = client.get_dashboards(query={"page_size": 200})
except Exception as exc:
logger.warning(
f"[assistant.dashboard_resolve][failed] ref={dashboard_ref} env={env_id} error={exc}"
)
return None
exact = next(
(
d
for d in dashboards
if str(d.get("slug", "")).lower() == needle
or str(d.get("dashboard_title", "")).lower() == needle
or str(d.get("title", "")).lower() == needle
),
None,
)
if exact:
return int(exact.get("id"))
partial = [
d
for d in dashboards
if needle in str(d.get("dashboard_title", d.get("title", ""))).lower()
]
if len(partial) == 1 and partial[0].get("id") is not None:
return int(partial[0]["id"])
return None
# [/DEF:_resolve_dashboard_id_by_ref:Function]
# [DEF:_resolve_dashboard_id_entity:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve dashboard id from intent entities using numeric id or dashboard_ref fallback.
# @PRE: entities may contain dashboard_id as int/str and optional dashboard_ref.
# @POST: Returns resolved dashboard id or None when ambiguous/unresolvable.
def _resolve_dashboard_id_entity(
entities: Dict[str, Any],
config_manager: ConfigManager,
env_hint: Optional[str] = None,
) -> Optional[int]:
raw_dashboard_id = entities.get("dashboard_id")
dashboard_ref = entities.get("dashboard_ref")
if isinstance(raw_dashboard_id, int):
return raw_dashboard_id
if isinstance(raw_dashboard_id, str):
token = raw_dashboard_id.strip()
if token.isdigit():
return int(token)
if token and not dashboard_ref:
dashboard_ref = token
if not dashboard_ref:
return None
env_token = (
env_hint
or entities.get("environment")
or entities.get("source_env")
or entities.get("target_env")
)
env_id = (
_resolve_env_id(env_token, config_manager)
if env_token
else _get_default_environment_id(config_manager)
)
return _resolve_dashboard_id_by_ref(str(dashboard_ref), env_id, config_manager)
# [/DEF:_resolve_dashboard_id_entity:Function]
# [DEF:_get_environment_name_by_id:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve human-readable environment name by id.
# @PRE: environment id may be None.
# @POST: Returns matching environment name or fallback id.
def _get_environment_name_by_id(
env_id: Optional[str], config_manager: ConfigManager
) -> str:
if not env_id:
return "unknown"
env = next(
(item for item in config_manager.get_environments() if item.id == env_id), None
)
return env.name if env else env_id
# [/DEF:_get_environment_name_by_id:Function]
# [DEF:_extract_result_deep_links:Function]
# @COMPLEXITY: 2
# @PURPOSE: Build deep-link actions to verify task result from assistant chat.
# @PRE: task object is available.
# @POST: Returns zero or more assistant actions for dashboard open/diff.
def _extract_result_deep_links(
task: Any, config_manager: ConfigManager
) -> List:
from ._schemas import AssistantAction
plugin_id = getattr(task, "plugin_id", None)
params = getattr(task, "params", {}) or {}
result = getattr(task, "result", {}) or {}
actions: list = []
dashboard_id: Optional[int] = None
env_id: Optional[str] = None
if plugin_id == "superset-migration":
migrated = (
result.get("migrated_dashboards") if isinstance(result, dict) else None
)
if isinstance(migrated, list) and migrated:
first = migrated[0]
if isinstance(first, dict) and first.get("id") is not None:
dashboard_id = int(first.get("id"))
if (
dashboard_id is None
and isinstance(params.get("selected_ids"), list)
and params["selected_ids"]
):
dashboard_id = int(params["selected_ids"][0])
env_id = params.get("target_env_id")
elif plugin_id == "superset-backup":
dashboards = result.get("dashboards") if isinstance(result, dict) else None
if isinstance(dashboards, list) and dashboards:
first = dashboards[0]
if isinstance(first, dict) and first.get("id") is not None:
dashboard_id = int(first.get("id"))
if (
dashboard_id is None
and isinstance(params.get("dashboard_ids"), list)
and params["dashboard_ids"]
):
dashboard_id = int(params["dashboard_ids"][0])
env_id = params.get("environment_id") or _resolve_env_id(
result.get("environment"), config_manager
)
elif plugin_id == "llm_dashboard_validation":
if params.get("dashboard_id") is not None:
dashboard_id = int(params["dashboard_id"])
env_id = params.get("environment_id")
if dashboard_id is not None and env_id:
env_name = _get_environment_name_by_id(env_id, config_manager)
actions.append(
AssistantAction(
type="open_route",
label=f"Открыть дашборд в {env_name}",
target=f"/dashboards/{dashboard_id}?env_id={env_id}",
)
)
if dashboard_id is not None:
actions.append(
AssistantAction(
type="open_diff",
label="Показать Diff",
target=str(dashboard_id),
)
)
return actions
# [/DEF:_extract_result_deep_links:Function]
# [DEF:_build_task_observability_summary:Function]
# @COMPLEXITY: 2
# @PURPOSE: Build compact textual summary for completed tasks to reduce "black box" effect.
# @PRE: task may contain plugin-specific result payload.
# @POST: Returns non-empty summary line for known task types or empty string fallback.
def _build_task_observability_summary(task: Any, config_manager: ConfigManager) -> str:
plugin_id = getattr(task, "plugin_id", None)
status = str(getattr(task, "status", "")).upper()
params = getattr(task, "params", {}) or {}
result = getattr(task, "result", {}) or {}
if plugin_id == "superset-migration" and isinstance(result, dict):
migrated = len(result.get("migrated_dashboards") or [])
failed_rows = result.get("failed_dashboards") or []
failed = len(failed_rows)
selected = result.get("selected_dashboards", migrated + failed)
mappings = result.get("mapping_count", 0)
target_env_id = params.get("target_env_id")
target_env_name = _get_environment_name_by_id(target_env_id, config_manager)
warning = ""
if failed_rows:
first = failed_rows[0]
warning = (
f" Внимание: {first.get('title') or first.get('id')}: "
f"{first.get('error') or 'ошибка'}."
)
return (
f"Сводка миграции: выбрано {selected}, перенесено {migrated}, "
f"с ошибками {failed}, маппингов {mappings}, целевая среда {target_env_name}."
f"{warning}"
)
if plugin_id == "superset-backup" and isinstance(result, dict):
total = int(result.get("total_dashboards", 0) or 0)
ok = int(result.get("backed_up_dashboards", 0) or 0)
failed = int(result.get("failed_dashboards", 0) or 0)
env_id = params.get("environment_id") or _resolve_env_id(
result.get("environment"), config_manager
)
env_name = _get_environment_name_by_id(env_id, config_manager)
failures = result.get("failures") or []
warning = ""
if failures:
first = failures[0]
warning = (
f" Внимание: {first.get('title') or first.get('id')}: "
f"{first.get('error') or 'ошибка'}."
)
return (
f"Сводка бэкапа: среда {env_name}, всего {total}, успешно {ok}, "
f"с ошибками {failed}. {status}.{warning}"
)
if plugin_id == "llm_dashboard_validation" and isinstance(result, dict):
report_status = result.get("status") or status
report_summary = result.get("summary") or "Итог недоступен."
issues = result.get("issues") or []
return f"Сводка валидации: статус {report_status}, проблем {len(issues)}. {report_summary}"
# Fallback for unknown task payloads.
if status in {"SUCCESS", "FAILED"}:
return f"Задача завершена со статусом {status}."
return ""
# [/DEF:_build_task_observability_summary:Function]
# [/DEF:AssistantResolvers:Module]

View File

@@ -0,0 +1,601 @@
# [DEF:AssistantRoutes:Module]
# @COMPLEXITY: 5
# @PURPOSE: FastAPI route handlers for the assistant API — message sending, confirmation, conversation management.
# @LAYER: API
# @RELATION: DEPENDS_ON -> [AssistantSchemas]
# @RELATION: DEPENDS_ON -> [AssistantHistory]
# @RELATION: DEPENDS_ON -> [AssistantCommandParser]
# @RELATION: DEPENDS_ON -> [AssistantLlmPlanner]
# @RELATION: DEPENDS_ON -> [AssistantDatasetReview]
# @RELATION: DEPENDS_ON -> [AssistantDispatch]
# @INVARIANT: Risky operations are never executed without valid confirmation token.
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any, Dict, Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status
from sqlalchemy.orm import Session
from sqlalchemy import desc
from src.core.logger import belief_scope, logger
from src.core.task_manager import TaskManager
from src.dependencies import (
get_current_user,
get_task_manager,
get_config_manager,
has_permission,
)
from src.core.config_manager import ConfigManager
from src.core.database import get_db
from src.models.assistant import (
AssistantAuditRecord,
AssistantMessageRecord,
)
from src.schemas.auth import User
from ._schemas import (
_SAFE_OPS,
ASSISTANT_AUDIT,
CONFIRMATIONS,
CONVERSATIONS,
USER_ACTIVE_CONVERSATION,
AssistantAction,
AssistantMessageRequest,
AssistantMessageResponse,
ConfirmationRecord,
)
from ._history import (
_append_history,
_audit,
_cleanup_history_ttl,
_coerce_query_bool,
_is_conversation_archived,
_load_confirmation_from_db,
_persist_audit,
_persist_confirmation,
_persist_message,
_resolve_or_create_conversation,
_update_confirmation_state,
)
from ._command_parser import _parse_command
from ._llm_planner import (
_authorize_intent,
_build_tool_catalog,
_plan_intent_with_llm,
)
from ._dataset_review import (
_load_dataset_review_context,
_plan_dataset_review_intent,
)
from ._dispatch import (
_async_confirmation_summary,
_clarification_text_for_intent,
_dispatch_intent,
)
router = APIRouter(tags=["Assistant"])
@router.post("/messages", response_model=AssistantMessageResponse)
# [DEF:send_message:Function]
# @COMPLEXITY: 5
# @PURPOSE: Parse assistant command, enforce safety gates, and dispatch executable intent.
# @DATA_CONTRACT: Input[AssistantMessageRequest,User,TaskManager,ConfigManager,Session] -> Output[AssistantMessageResponse]
# @RELATION: DEPENDS_ON -> [_plan_intent_with_llm]
# @RELATION: DEPENDS_ON -> [_parse_command]
# @RELATION: DEPENDS_ON -> [_dispatch_intent]
# @RELATION: DEPENDS_ON -> [_append_history]
# @RELATION: DEPENDS_ON -> [_persist_message]
# @RELATION: DEPENDS_ON -> [_audit]
# @SIDE_EFFECT: Persists chat/audit state, mutates in-memory conversation and confirmation stores, and may create confirmation records.
# @PRE: Authenticated user is available and message text is non-empty.
# @POST: Response state is one of clarification/confirmation/started/success/denied/failed.
# @RETURN: AssistantMessageResponse with operation feedback and optional actions.
# @INVARIANT: non-safe operations are gated with confirmation before execution from this endpoint.
async def send_message(request: AssistantMessageRequest, current_user: User=Depends(get_current_user), task_manager: TaskManager=Depends(get_task_manager), config_manager: ConfigManager=Depends(get_config_manager), db: Session=Depends(get_db)):
with belief_scope('send_message'):
logger.reason('Belief protocol reasoning checkpoint for send_message')
user_id = current_user.id
dataset_review_context = _load_dataset_review_context(request.dataset_review_session_id, current_user, db)
conversation_id = _resolve_or_create_conversation(user_id, request.conversation_id, db)
_append_history(user_id, conversation_id, 'user', request.message)
_persist_message(db, user_id, conversation_id, 'user', request.message)
tools_catalog = _build_tool_catalog(current_user, config_manager, db)
intent = None
try:
intent = await _plan_intent_with_llm(request.message, tools_catalog, db, config_manager)
except Exception as exc:
logger.warning(f'[assistant.planner][fallback] Planner error: {exc}')
if not intent:
intent = _parse_command(request.message, config_manager)
if dataset_review_context:
dataset_review_intent = _plan_dataset_review_intent(request.message, dataset_review_context)
if dataset_review_intent is not None:
intent = dataset_review_intent
confidence = float(intent.get('confidence', 0.0))
if intent.get('domain') == 'unknown' or confidence < 0.6:
text = 'Команда неоднозначна. Уточните действие: git / migration / backup / llm / status.'
_append_history(user_id, conversation_id, 'assistant', text, state='needs_clarification')
_persist_message(db, user_id, conversation_id, 'assistant', text, state='needs_clarification', metadata={'intent': intent})
audit_payload = {'decision': 'needs_clarification', 'message': request.message, 'intent': intent, 'dataset_review_session_id': request.dataset_review_session_id}
_audit(user_id, audit_payload)
_persist_audit(db, user_id, audit_payload, conversation_id)
logger.reflect('Belief protocol postcondition checkpoint for send_message')
return AssistantMessageResponse(conversation_id=conversation_id, response_id=str(uuid.uuid4()), state='needs_clarification', text=text, intent=intent, actions=[AssistantAction(type='rephrase', label='Rephrase command')], created_at=datetime.utcnow())
try:
_authorize_intent(intent, current_user)
operation = intent.get('operation')
if operation not in _SAFE_OPS:
confirmation_id = str(uuid.uuid4())
confirm = ConfirmationRecord(id=confirmation_id, user_id=user_id, conversation_id=conversation_id, intent=intent, dispatch={'intent': intent}, expires_at=datetime.utcnow() + __import__('datetime').timedelta(minutes=5), created_at=datetime.utcnow())
CONFIRMATIONS[confirmation_id] = confirm
_persist_confirmation(db, confirm)
text = await _async_confirmation_summary(intent, config_manager, db)
_append_history(user_id, conversation_id, 'assistant', text, state='needs_confirmation', confirmation_id=confirmation_id)
_persist_message(db, user_id, conversation_id, 'assistant', text, state='needs_confirmation', confirmation_id=confirmation_id, metadata={'intent': intent, 'dataset_review_context': dataset_review_context, 'actions': [{'type': 'confirm', 'label': '✅ Подтвердить', 'target': confirmation_id}, {'type': 'cancel', 'label': '❌ Отменить', 'target': confirmation_id}]})
audit_payload = {'decision': 'needs_confirmation', 'message': request.message, 'intent': intent, 'confirmation_id': confirmation_id, 'dataset_review_session_id': request.dataset_review_session_id}
_audit(user_id, audit_payload)
_persist_audit(db, user_id, audit_payload, conversation_id)
logger.reflect('Belief protocol postcondition checkpoint for send_message')
return AssistantMessageResponse(conversation_id=conversation_id, response_id=str(uuid.uuid4()), state='needs_confirmation', text=text, intent=intent, confirmation_id=confirmation_id, actions=[AssistantAction(type='confirm', label='✅ Подтвердить', target=confirmation_id), AssistantAction(type='cancel', label='❌ Отменить', target=confirmation_id)], created_at=datetime.utcnow())
text, task_id, actions = await _dispatch_intent(intent, current_user, task_manager, config_manager, db)
state = 'started' if task_id else 'success'
_append_history(user_id, conversation_id, 'assistant', text, state=state, task_id=task_id)
_persist_message(db, user_id, conversation_id, 'assistant', text, state=state, task_id=task_id, metadata={'intent': intent, 'dataset_review_context': dataset_review_context, 'actions': [a.model_dump() for a in actions]})
audit_payload = {'decision': 'executed', 'message': request.message, 'intent': intent, 'task_id': task_id, 'dataset_review_session_id': request.dataset_review_session_id}
_audit(user_id, audit_payload)
_persist_audit(db, user_id, audit_payload, conversation_id)
logger.reflect('Belief protocol postcondition checkpoint for send_message')
return AssistantMessageResponse(conversation_id=conversation_id, response_id=str(uuid.uuid4()), state=state, text=text, intent=intent, task_id=task_id, actions=actions, created_at=datetime.utcnow())
except HTTPException as exc:
detail_text = str(exc.detail)
is_clarification_error = exc.status_code in (400, 422) and (detail_text.lower().startswith('missing') or 'укажите' in detail_text.lower() or 'выберите' in detail_text.lower())
if exc.status_code == status.HTTP_403_FORBIDDEN:
state = 'denied'
elif is_clarification_error:
state = 'needs_clarification'
else:
state = 'failed'
text = _clarification_text_for_intent(intent, detail_text) if state == 'needs_clarification' else detail_text
_append_history(user_id, conversation_id, 'assistant', text, state=state)
_persist_message(db, user_id, conversation_id, 'assistant', text, state=state, metadata={'intent': intent})
audit_payload = {'decision': state, 'message': request.message, 'intent': intent, 'error': text, 'dataset_review_session_id': request.dataset_review_session_id}
_audit(user_id, audit_payload)
_persist_audit(db, user_id, audit_payload, conversation_id)
logger.reflect('Belief protocol postcondition checkpoint for send_message')
return AssistantMessageResponse(conversation_id=conversation_id, response_id=str(uuid.uuid4()), state=state, text=text, intent=intent, actions=[AssistantAction(type='rephrase', label='Rephrase command')] if state == 'needs_clarification' else [], created_at=datetime.utcnow())
# [/DEF:send_message:Function]
@router.post(
"/confirmations/{confirmation_id}/confirm", response_model=AssistantMessageResponse
)
# [DEF:confirm_operation:Function]
# @COMPLEXITY: 2
# @PURPOSE: Execute previously requested risky operation after explicit user confirmation.
# @PRE: confirmation_id exists, belongs to current user, is pending, and not expired.
# @POST: Confirmation state becomes consumed and operation result is persisted in history.
# @RETURN: AssistantMessageResponse with task details when async execution starts.
async def confirm_operation(
confirmation_id: str,
current_user: User = Depends(get_current_user),
task_manager: TaskManager = Depends(get_task_manager),
config_manager: ConfigManager = Depends(get_config_manager),
db: Session = Depends(get_db),
):
with belief_scope("assistant.confirm"):
record = CONFIRMATIONS.get(confirmation_id)
if not record:
record = _load_confirmation_from_db(db, confirmation_id)
if record:
CONFIRMATIONS[confirmation_id] = record
else:
raise HTTPException(status_code=404, detail="Confirmation not found")
if record.user_id != current_user.id:
raise HTTPException(
status_code=403, detail="Confirmation does not belong to current user"
)
if record.state != "pending":
raise HTTPException(
status_code=400, detail=f"Confirmation already {record.state}"
)
if datetime.utcnow() > record.expires_at:
record.state = "expired"
_update_confirmation_state(db, confirmation_id, "expired")
raise HTTPException(status_code=400, detail="Confirmation expired")
intent = record.intent
text, task_id, actions = await _dispatch_intent(
intent, current_user, task_manager, config_manager, db
)
record.state = "consumed"
_update_confirmation_state(db, confirmation_id, "consumed")
_append_history(
current_user.id,
record.conversation_id,
"assistant",
text,
state="started" if task_id else "success",
task_id=task_id,
)
_persist_message(
db,
current_user.id,
record.conversation_id,
"assistant",
text,
state="started" if task_id else "success",
task_id=task_id,
metadata={"intent": intent, "confirmation_id": confirmation_id},
)
audit_payload = {
"decision": "confirmed_execute",
"confirmation_id": confirmation_id,
"task_id": task_id,
"intent": intent,
}
_audit(current_user.id, audit_payload)
_persist_audit(db, current_user.id, audit_payload, record.conversation_id)
return AssistantMessageResponse(
conversation_id=record.conversation_id,
response_id=str(uuid.uuid4()),
state="started" if task_id else "success",
text=text,
intent=intent,
task_id=task_id,
actions=actions,
created_at=datetime.utcnow(),
)
# [/DEF:confirm_operation:Function]
@router.post(
"/confirmations/{confirmation_id}/cancel", response_model=AssistantMessageResponse
)
# [DEF:cancel_operation:Function]
# @COMPLEXITY: 2
# @PURPOSE: Cancel pending risky operation and mark confirmation token as cancelled.
# @PRE: confirmation_id exists, belongs to current user, and is still pending.
# @POST: Confirmation becomes cancelled and cannot be executed anymore.
# @RETURN: AssistantMessageResponse confirming cancellation.
async def cancel_operation(
confirmation_id: str,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
with belief_scope("assistant.cancel"):
record = CONFIRMATIONS.get(confirmation_id)
if not record:
record = _load_confirmation_from_db(db, confirmation_id)
if record:
CONFIRMATIONS[confirmation_id] = record
else:
raise HTTPException(status_code=404, detail="Confirmation not found")
if record.user_id != current_user.id:
raise HTTPException(
status_code=403, detail="Confirmation does not belong to current user"
)
if record.state != "pending":
raise HTTPException(
status_code=400, detail=f"Confirmation already {record.state}"
)
record.state = "cancelled"
_update_confirmation_state(db, confirmation_id, "cancelled")
text = "Операция отменена. Выполнение не запускалось."
_append_history(
current_user.id,
record.conversation_id,
"assistant",
text,
state="success",
confirmation_id=confirmation_id,
)
_persist_message(
db,
current_user.id,
record.conversation_id,
"assistant",
text,
state="success",
confirmation_id=confirmation_id,
metadata={"intent": record.intent},
)
audit_payload = {
"decision": "cancelled",
"confirmation_id": confirmation_id,
"intent": record.intent,
}
_audit(current_user.id, audit_payload)
_persist_audit(db, current_user.id, audit_payload, record.conversation_id)
return AssistantMessageResponse(
conversation_id=record.conversation_id,
response_id=str(uuid.uuid4()),
state="success",
text=text,
intent=record.intent,
confirmation_id=confirmation_id,
actions=[],
created_at=datetime.utcnow(),
)
# [/DEF:cancel_operation:Function]
# [DEF:list_conversations:Function]
# @COMPLEXITY: 2
# @PURPOSE: Return paginated conversation list for current user with archived flag and last message preview.
# @PRE: Authenticated user context and valid pagination params.
# @POST: Conversations are grouped by conversation_id sorted by latest activity descending.
# @RETURN: Dict with items, paging metadata, and archive segmentation counts.
@router.get("/conversations")
async def list_conversations(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
include_archived: bool = Query(False),
archived_only: bool = Query(False),
search: Optional[str] = Query(None),
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
with belief_scope("assistant.conversations"):
user_id = current_user.id
include_archived = _coerce_query_bool(include_archived)
archived_only = _coerce_query_bool(archived_only)
_cleanup_history_ttl(db, user_id)
rows = (
db.query(AssistantMessageRecord)
.filter(AssistantMessageRecord.user_id == user_id)
.order_by(desc(AssistantMessageRecord.created_at))
.all()
)
summary: Dict[str, Dict[str, Any]] = {}
for row in rows:
conv_id = row.conversation_id
if not conv_id:
continue
created_at = row.created_at or datetime.utcnow()
if conv_id not in summary:
summary[conv_id] = {
"conversation_id": conv_id,
"title": "",
"updated_at": created_at,
"last_message": row.text,
"last_role": row.role,
"last_state": row.state,
"last_task_id": row.task_id,
"message_count": 0,
}
item = summary[conv_id]
item["message_count"] += 1
if row.role == "user" and row.text and not item["title"]:
item["title"] = row.text.strip()[:80]
items = []
search_term = search.lower().strip() if search else ""
archived_total = sum(
1
for c in summary.values()
if _is_conversation_archived(c.get("updated_at"))
)
active_total = len(summary) - archived_total
for conv in summary.values():
conv["archived"] = _is_conversation_archived(conv.get("updated_at"))
if not conv.get("title"):
conv["title"] = f"Conversation {conv['conversation_id'][:8]}"
if search_term:
haystack = (
f"{conv.get('title', '')} {conv.get('last_message', '')}".lower()
)
if search_term not in haystack:
continue
if archived_only and not conv["archived"]:
continue
if not archived_only and not include_archived and conv["archived"]:
continue
updated = conv.get("updated_at")
conv["updated_at"] = (
updated.isoformat() if isinstance(updated, datetime) else None
)
items.append(conv)
items.sort(key=lambda x: x.get("updated_at") or "", reverse=True)
total = len(items)
start = (page - 1) * page_size
page_items = items[start : start + page_size]
return {
"items": page_items,
"total": total,
"page": page,
"page_size": page_size,
"has_next": start + page_size < total,
"active_total": active_total,
"archived_total": archived_total,
}
# [/DEF:list_conversations:Function]
# [DEF:delete_conversation:Function]
# @COMPLEXITY: 2
# @PURPOSE: Soft-delete or hard-delete a conversation and clear its in-memory trace.
# @PRE: conversation_id belongs to current_user.
# @POST: Conversation records are removed from DB and CONVERSATIONS cache.
@router.delete("/conversations/{conversation_id}")
async def delete_conversation(
conversation_id: str,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
with belief_scope("assistant.conversations.delete"):
user_id = current_user.id
# 1. Remove from in-memory cache
key = (user_id, conversation_id)
if key in CONVERSATIONS:
del CONVERSATIONS[key]
# 2. Delete from database
deleted_count = (
db.query(AssistantMessageRecord)
.filter(
AssistantMessageRecord.user_id == user_id,
AssistantMessageRecord.conversation_id == conversation_id,
)
.delete()
)
db.commit()
if deleted_count == 0:
raise HTTPException(
status_code=404, detail="Conversation not found or already deleted"
)
return {
"status": "success",
"deleted": deleted_count,
"conversation_id": conversation_id,
}
# [/DEF:delete_conversation:Function]
@router.get("/history")
# [DEF:get_history:Function]
# @PURPOSE: Retrieve paginated assistant conversation history for current user.
# @PRE: Authenticated user is available and page params are valid.
# @POST: Returns persistent messages and mirrored in-memory snapshot for diagnostics.
# @RETURN: Dict with items, paging metadata, and resolved conversation_id.
async def get_history(
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
conversation_id: Optional[str] = Query(None),
from_latest: bool = Query(False),
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
with belief_scope("assistant.history"):
user_id = current_user.id
_cleanup_history_ttl(db, user_id)
conv_id = _resolve_or_create_conversation(user_id, conversation_id, db)
base_query = db.query(AssistantMessageRecord).filter(
AssistantMessageRecord.user_id == user_id,
AssistantMessageRecord.conversation_id == conv_id,
)
total = base_query.count()
start = (page - 1) * page_size
if from_latest:
rows = (
base_query.order_by(desc(AssistantMessageRecord.created_at))
.offset(start)
.limit(page_size)
.all()
)
rows = list(reversed(rows))
else:
rows = (
base_query.order_by(AssistantMessageRecord.created_at.asc())
.offset(start)
.limit(page_size)
.all()
)
persistent_items = [
{
"message_id": row.id,
"conversation_id": row.conversation_id,
"role": row.role,
"text": row.text,
"state": row.state,
"task_id": row.task_id,
"confirmation_id": row.confirmation_id,
"created_at": row.created_at.isoformat() if row.created_at else None,
"metadata": row.payload,
}
for row in rows
]
memory_items = CONVERSATIONS.get((user_id, conv_id), [])
return {
"items": persistent_items,
"memory_items": memory_items,
"total": total,
"page": page,
"page_size": page_size,
"has_next": start + page_size < total,
"from_latest": from_latest,
"conversation_id": conv_id,
}
# [/DEF:get_history:Function]
@router.get("/audit")
# [DEF:get_assistant_audit:Function]
# @PURPOSE: Return assistant audit decisions for current user from persistent and in-memory stores.
# @PRE: User has tasks:READ permission.
# @POST: Audit payload is returned in reverse chronological order from DB.
# @RETURN: Dict with persistent and memory audit slices.
async def get_assistant_audit(
limit: int = Query(50, ge=1, le=500),
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
_=Depends(has_permission("tasks", "READ")),
):
with belief_scope("assistant.audit"):
memory_rows = ASSISTANT_AUDIT.get(current_user.id, [])
db_rows = (
db.query(AssistantAuditRecord)
.filter(AssistantAuditRecord.user_id == current_user.id)
.order_by(AssistantAuditRecord.created_at.desc())
.limit(limit)
.all()
)
persistent = [
{
"id": row.id,
"user_id": row.user_id,
"conversation_id": row.conversation_id,
"decision": row.decision,
"task_id": row.task_id,
"message": row.message,
"payload": row.payload,
"created_at": row.created_at.isoformat() if row.created_at else None,
}
for row in db_rows
]
return {
"items": persistent,
"memory_items": memory_rows[-limit:],
"total": len(persistent),
"memory_total": len(memory_rows),
}
# [/DEF:get_assistant_audit:Function]
# [/DEF:AssistantRoutes:Module]

View File

@@ -0,0 +1,149 @@
# [DEF:AssistantSchemas:Module]
# @COMPLEXITY: 2
# @PURPOSE: Pydantic models, in-memory stores, and permission mappings for the assistant API.
# @LAYER: API
# @RELATION: USED_BY -> [AssistantRoutes]
# @RELATION: USED_BY -> [AssistantHistory]
# @INVARIANT: In-memory stores are module-level singletons shared across the assistant package.
from __future__ import annotations
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple
from pydantic import BaseModel, Field
from src.schemas.auth import User
# [DEF:AssistantMessageRequest:Class]
# @COMPLEXITY: 1
# @PURPOSE: Input payload for assistant message endpoint.
# @DATA_CONTRACT: Input[conversation_id?:str, message:str(1..4000)] -> Output[AssistantMessageRequest]
# @RELATION: USED_BY -> [send_message]
# @SIDE_EFFECT: None (schema declaration only).
# @PRE: message length is within accepted bounds.
# @POST: Request object provides message text and optional conversation binding.
# @INVARIANT: message is always non-empty and no longer than 4000 characters.
class AssistantMessageRequest(BaseModel):
conversation_id: Optional[str] = None
message: str = Field(..., min_length=1, max_length=4000)
dataset_review_session_id: Optional[str] = None
# [/DEF:AssistantMessageRequest:Class]
# [DEF:AssistantAction:Class]
# @COMPLEXITY: 1
# @PURPOSE: UI action descriptor returned with assistant responses.
# @DATA_CONTRACT: Input[type:str, label:str, target?:str] -> Output[AssistantAction]
# @RELATION: USED_BY -> [AssistantMessageResponse]
# @SIDE_EFFECT: None (schema declaration only).
# @PRE: type and label are provided by orchestration logic.
# @POST: Action can be rendered as button on frontend.
# @INVARIANT: type and label are required for every UI action.
class AssistantAction(BaseModel):
type: str
label: str
target: Optional[str] = None
# [/DEF:AssistantAction:Class]
# [DEF:AssistantMessageResponse:Class]
# @COMPLEXITY: 1
# @PURPOSE: Output payload contract for assistant interaction endpoints.
# @DATA_CONTRACT: Input[conversation_id,response_id,state,text,intent?,confirmation_id?,task_id?,actions[],created_at] -> Output[AssistantMessageResponse]
# @RELATION: RETURNED_BY -> [send_message]
# @RELATION: RETURNED_BY -> [confirm_operation]
# @RELATION: RETURNED_BY -> [cancel_operation]
# @SIDE_EFFECT: None (schema declaration only).
# @PRE: Response includes deterministic state and text.
# @POST: Payload may include task_id/confirmation_id/actions for UI follow-up.
# @INVARIANT: created_at and state are always present in endpoint responses.
class AssistantMessageResponse(BaseModel):
conversation_id: str
response_id: str
state: str
text: str
intent: Optional[Dict[str, Any]] = None
confirmation_id: Optional[str] = None
task_id: Optional[str] = None
actions: List[AssistantAction] = Field(default_factory=list)
created_at: datetime
# [/DEF:AssistantMessageResponse:Class]
# [DEF:ConfirmationRecord:Class]
# @COMPLEXITY: 1
# @PURPOSE: In-memory confirmation token model for risky operation dispatch.
# @DATA_CONTRACT: Input[id,user_id,conversation_id,intent,dispatch,expires_at,state?,created_at] -> Output[ConfirmationRecord]
# @RELATION: USED_BY -> [send_message]
# @RELATION: USED_BY -> [confirm_operation]
# @RELATION: USED_BY -> [cancel_operation]
# @SIDE_EFFECT: None (schema declaration only).
# @PRE: intent/dispatch/user_id are populated at confirmation request time.
# @POST: Record tracks lifecycle state and expiry timestamp.
# @INVARIANT: state defaults to "pending" and expires_at bounds confirmation validity.
class ConfirmationRecord(BaseModel):
id: str
user_id: str
conversation_id: str
intent: Dict[str, Any]
dispatch: Dict[str, Any]
expires_at: datetime
state: str = "pending"
created_at: datetime
# [/DEF:ConfirmationRecord:Class]
# --- In-memory stores ---
CONVERSATIONS: Dict[Tuple[str, str], List[Dict[str, Any]]] = {}
USER_ACTIVE_CONVERSATION: Dict[str, str] = {}
CONFIRMATIONS: Dict[str, ConfirmationRecord] = {}
ASSISTANT_AUDIT: Dict[str, List[Dict[str, Any]]] = {}
ASSISTANT_ARCHIVE_AFTER_DAYS = 14
ASSISTANT_MESSAGE_TTL_DAYS = 90
# Operations that are read-only and do not require confirmation.
_SAFE_OPS = {
"show_capabilities",
"get_task_status",
"get_health_summary",
"dataset_review_answer_context",
}
_DATASET_REVIEW_OPS = {
"dataset_review_approve_mappings",
"dataset_review_set_field_semantics",
"dataset_review_generate_sql_preview",
}
INTENT_PERMISSION_CHECKS: Dict[str, List[Tuple[str, str]]] = {
"get_task_status": [("tasks", "READ")],
"create_branch": [("plugin:git", "EXECUTE")],
"commit_changes": [("plugin:git", "EXECUTE")],
"deploy_dashboard": [("plugin:git", "EXECUTE")],
"execute_migration": [
("plugin:migration", "EXECUTE"),
("plugin:superset-migration", "EXECUTE"),
],
"run_backup": [("plugin:superset-backup", "EXECUTE"), ("plugin:backup", "EXECUTE")],
"run_llm_validation": [("plugin:llm_dashboard_validation", "EXECUTE")],
"run_llm_documentation": [("plugin:llm_documentation", "EXECUTE")],
"get_health_summary": [("plugin:migration", "READ")],
"dataset_review_answer_context": [("dataset:session", "READ")],
"dataset_review_approve_mappings": [("dataset:session", "MANAGE")],
"dataset_review_set_field_semantics": [("dataset:session", "MANAGE")],
"dataset_review_generate_sql_preview": [("dataset:session", "MANAGE")],
}
# [/DEF:AssistantSchemas:Module]

View File

@@ -32,6 +32,7 @@ from src.api.routes.dataset_review_pkg._dependencies import ( # noqa: F401
_get_repository,
_get_orchestrator,
_get_clarification_engine,
_update_semantic_field_state,
)
from src.api.routes.dataset_review_pkg._routes import router # noqa: F401
# [/DEF:DatasetReviewApi:Module]

View File

@@ -1,110 +1,7 @@
# [DEF:DatasetReviewDependencies:Module]
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @PURPOSE: Dependency injection, serialization helpers, and feature-flag guards for dataset review endpoints.
# @LAYER: API
# @RATIONALE: Extracted from 2484-line monolith to satisfy INV_7 (400-line module limit).
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
# @COMPLEXITY: 2
# @PURPOSE: Produce validation-focused export content.
from __future__ import annotations
@@ -410,7 +307,10 @@ class LaunchDatasetResponse(BaseModel):
# @PURPOSE: Guard US1 dataset review endpoints behind the configured feature flag.
def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> bool:
with belief_scope("dataset_review.require_auto_review_flag"):
if not config_manager.get_config().settings.ff_dataset_auto_review:
settings = config_manager.get_config().settings
if not settings.features.dataset_review:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset review feature is disabled")
if not settings.ff_dataset_auto_review:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset auto review feature is disabled")
return True
@@ -423,7 +323,10 @@ def _require_auto_review_flag(config_manager=Depends(get_config_manager)) -> boo
# @PURPOSE: Guard clarification-specific US2 endpoints behind the configured feature flag.
def _require_clarification_flag(config_manager=Depends(get_config_manager)) -> bool:
with belief_scope("dataset_review.require_clarification_flag"):
if not config_manager.get_config().settings.ff_dataset_clarification:
settings = config_manager.get_config().settings
if not settings.features.dataset_review:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset review feature is disabled")
if not settings.ff_dataset_clarification:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset clarification feature is disabled")
return True
@@ -436,7 +339,10 @@ def _require_clarification_flag(config_manager=Depends(get_config_manager)) -> b
# @PURPOSE: Guard US3 execution endpoints behind the configured feature flag.
def _require_execution_flag(config_manager=Depends(get_config_manager)) -> bool:
with belief_scope("dataset_review.require_execution_flag"):
if not config_manager.get_config().settings.ff_dataset_execution:
settings = config_manager.get_config().settings
if not settings.features.dataset_review:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset review feature is disabled")
if not settings.ff_dataset_execution:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Dataset execution feature is disabled")
return True

View File

@@ -1,52 +1,7 @@
# [DEF:DatasetReviewRoutes:Module]
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @LAYER: API
# @RATIONALE: Extracted from 2484-line monolith to satisfy INV_7 (400-line module limit).
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
# @COMPLEXITY: 3
# @PURPOSE: Persist thumbs up/down feedback for clarification question/answer content.
from __future__ import annotations
@@ -70,7 +25,6 @@ from src.models.dataset_review import (
)
from src.schemas.dataset_review import (
ClarificationAnswerDto,
ClarificationStateResponse as _ClarificationStateResponseSchema,
CompiledPreviewDto,
ExecutionMappingDto,
SemanticFieldEntryDto,
@@ -166,11 +120,18 @@ async def list_sessions(
items = [_serialize_session_summary(s) for s in sessions[start:end]]
logger.reflect(
"Session page assembled",
extra={"user_id": current_user.id, "returned": len(items), "total": len(sessions)},
extra={
"user_id": current_user.id,
"returned": len(items),
"total": len(sessions),
},
)
return SessionCollectionResponse(
items=items, total=len(sessions), page=page,
page_size=page_size, has_next=end < len(sessions),
items=items,
total=len(sessions),
page=page,
page_size=page_size,
has_next=end < len(sessions),
)
@@ -197,21 +158,35 @@ async def start_session(
with belief_scope("start_session"):
logger.reason(
"Starting dataset review session",
extra={"user_id": current_user.id, "environment_id": request.environment_id},
extra={
"user_id": current_user.id,
"environment_id": request.environment_id,
},
)
try:
result = orchestrator.start_session(
StartSessionCommand(
user=current_user, environment_id=request.environment_id,
source_kind=request.source_kind, source_input=request.source_input,
user=current_user,
environment_id=request.environment_id,
source_kind=request.source_kind,
source_input=request.source_input,
)
)
except ValueError as exc:
logger.explore("Session start rejected", extra={"user_id": current_user.id, "error": str(exc)})
logger.explore(
"Session start rejected",
extra={"user_id": current_user.id, "error": str(exc)},
)
detail = str(exc)
sc = status.HTTP_404_NOT_FOUND if detail == "Environment not found" else status.HTTP_400_BAD_REQUEST
sc = (
status.HTTP_404_NOT_FOUND
if detail == "Environment not found"
else status.HTTP_400_BAD_REQUEST
)
raise HTTPException(status_code=sc, detail=detail) from exc
logger.reflect("Session started", extra={"session_id": result.session.session_id})
logger.reflect(
"Session started", extra={"session_id": result.session.session_id}
)
return _serialize_session_summary(result.session)
@@ -261,19 +236,30 @@ async def update_session(
current_user: User = Depends(get_current_user),
):
with belief_scope("update_session"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
session_record = cast(Any, session)
session_record.status = request.status
if request.status == SessionStatus.PAUSED:
session_record.recommended_action = RecommendedAction.RESUME_SESSION
elif request.status in {SessionStatus.ARCHIVED, SessionStatus.CANCELLED, SessionStatus.COMPLETED}:
elif request.status in {
SessionStatus.ARCHIVED,
SessionStatus.CANCELLED,
SessionStatus.COMPLETED,
}:
session_record.active_task_id = None
_commit_owned_session_mutation(repository, session)
_record_session_event(
repository, session, current_user,
repository,
session,
current_user,
event_type="session_status_updated",
event_summary="Dataset review session lifecycle updated",
event_details={"status": session_record.status.value, "version": session_record.version},
event_details={
"status": session_record.status.value,
"version": session_record.version,
},
)
return _serialize_session_summary(session)
@@ -300,9 +286,18 @@ async def delete_session(
current_user: User = Depends(get_current_user),
):
with belief_scope("delete_session"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
if hard_delete:
_record_session_event(repository, session, current_user, event_type="session_deleted", event_summary="Session hard-deleted", event_details={"hard_delete": True})
_record_session_event(
repository,
session,
current_user,
event_type="session_deleted",
event_summary="Session hard-deleted",
event_details={"hard_delete": True},
)
repository.db.delete(session)
repository.db.commit()
return Response(status_code=status.HTTP_204_NO_CONTENT)
@@ -310,7 +305,14 @@ async def delete_session(
session_record.status = SessionStatus.ARCHIVED
session_record.active_task_id = None
_commit_owned_session_mutation(repository, session)
_record_session_event(repository, session, current_user, event_type="session_archived", event_summary="Session archived", event_details={"hard_delete": False, "version": session_record.version})
_record_session_event(
repository,
session,
current_user,
event_type="session_archived",
event_summary="Session archived",
event_details={"hard_delete": False, "version": session_record.version},
)
return Response(status_code=status.HTTP_204_NO_CONTENT)
@@ -336,14 +338,20 @@ async def export_documentation(
):
with belief_scope("export_documentation"):
if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Only json and markdown exports are supported",
)
session = _get_owned_session_or_404(repository, session_id, current_user)
payload = _build_documentation_export(session, format)
return ExportArtifactResponse(
artifact_id=f"documentation-{session.session_id}-{format.value}",
session_id=session.session_id, artifact_type="documentation",
format=format.value, storage_ref=payload["storage_ref"],
created_by_user_id=current_user.id, content=payload["content"],
session_id=session.session_id,
artifact_type="documentation",
format=format.value,
storage_ref=payload["storage_ref"],
created_by_user_id=current_user.id,
content=payload["content"],
)
@@ -369,14 +377,20 @@ async def export_validation(
):
with belief_scope("export_validation"):
if format not in {ArtifactFormat.JSON, ArtifactFormat.MARKDOWN}:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only json and markdown exports are supported")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Only json and markdown exports are supported",
)
session = _get_owned_session_or_404(repository, session_id, current_user)
payload = _build_validation_export(session, format)
return ExportArtifactResponse(
artifact_id=f"validation-{session.session_id}-{format.value}",
session_id=session.session_id, artifact_type="validation_report",
format=format.value, storage_ref=payload["storage_ref"],
created_by_user_id=current_user.id, content=payload["content"],
session_id=session.session_id,
artifact_type="validation_report",
format=format.value,
storage_ref=payload["storage_ref"],
created_by_user_id=current_user.id,
content=payload["content"],
)
@@ -408,7 +422,12 @@ async def get_clarification_state(
cs = _get_latest_clarification_session_or_404(session)
question = clarification_engine.build_question_payload(session)
return _serialize_clarification_state(
ClarificationStateResult(clarification_session=cs, current_question=question, session=session, changed_findings=[])
ClarificationStateResult(
clarification_session=cs,
current_question=question,
session=session,
changed_findings=[],
)
)
@@ -435,11 +454,18 @@ async def resume_clarification(
current_user: User = Depends(get_current_user),
):
with belief_scope("resume_clarification"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
cs = _get_latest_clarification_session_or_404(session)
question = clarification_engine.build_question_payload(session)
return _serialize_clarification_state(
ClarificationStateResult(clarification_session=cs, current_question=question, session=session, changed_findings=[])
ClarificationStateResult(
clarification_session=cs,
current_question=question,
session=session,
changed_findings=[],
)
)
@@ -467,21 +493,30 @@ async def record_clarification_answer(
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.record_clarification_answer"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
try:
result = clarification_engine.record_answer(
ClarificationAnswerCommand(
session=session, question_id=request.question_id,
answer_kind=request.answer_kind, answer_value=request.answer_value,
session=session,
question_id=request.question_id,
answer_kind=request.answer_kind,
answer_value=request.answer_value,
user=current_user,
)
)
except ValueError as exc:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)
) from exc
return ClarificationAnswerResultResponse(
clarification_state=_serialize_clarification_state(result),
session=_serialize_session_summary(result.session),
changed_findings=[ValidationFindingDto.model_validate(f, from_attributes=True) for f in result.changed_findings],
changed_findings=[
ValidationFindingDto.model_validate(f, from_attributes=True)
for f in result.changed_findings
],
)
@@ -500,18 +535,29 @@ async def record_clarification_answer(
],
)
async def update_field_semantic(
session_id: str, field_id: str, request: FieldSemanticUpdateRequest,
session_id: str,
field_id: str,
request: FieldSemanticUpdateRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.update_field_semantic"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
field = _get_owned_field_or_404(session, field_id)
_update_semantic_field_state(field, request, changed_by="user")
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=[field])
_record_session_event(repository, session, current_user, event_type="semantic_field_updated", event_summary="Semantic field decision persisted", event_details={"field_id": field.field_id, "version": sr.version})
_record_session_event(
repository,
session,
current_user,
event_type="semantic_field_updated",
event_summary="Semantic field decision persisted",
event_details={"field_id": field.field_id, "version": sr.version},
)
return _serialize_semantic_field(field)
@@ -530,19 +576,29 @@ async def update_field_semantic(
],
)
async def lock_field_semantic(
session_id: str, field_id: str,
session_id: str,
field_id: str,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.lock_field_semantic"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
field = _get_owned_field_or_404(session, field_id)
field.is_locked = True
field.last_changed_by = "user"
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=[field])
_record_session_event(repository, session, current_user, event_type="semantic_field_locked", event_summary="Semantic field lock persisted", event_details={"field_id": field.field_id, "version": sr.version})
_record_session_event(
repository,
session,
current_user,
event_type="semantic_field_locked",
event_summary="Semantic field lock persisted",
event_details={"field_id": field.field_id, "version": sr.version},
)
return _serialize_semantic_field(field)
@@ -561,13 +617,16 @@ async def lock_field_semantic(
],
)
async def unlock_field_semantic(
session_id: str, field_id: str,
session_id: str,
field_id: str,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.unlock_field_semantic"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
field = _get_owned_field_or_404(session, field_id)
field.is_locked = False
field.last_changed_by = "user"
@@ -576,7 +635,14 @@ async def unlock_field_semantic(
field.needs_review = True
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=[field])
_record_session_event(repository, session, current_user, event_type="semantic_field_unlocked", event_summary="Semantic field unlock persisted", event_details={"field_id": field.field_id, "version": sr.version})
_record_session_event(
repository,
session,
current_user,
event_type="semantic_field_unlocked",
event_summary="Semantic field unlock persisted",
event_details={"field_id": field.field_id, "version": sr.version},
)
return _serialize_semantic_field(field)
@@ -595,21 +661,39 @@ async def unlock_field_semantic(
],
)
async def approve_batch_semantic_fields(
session_id: str, request: BatchApproveSemanticRequest,
session_id: str,
request: BatchApproveSemanticRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.approve_batch_semantic_fields"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
updated = []
for item in request.items:
field = _get_owned_field_or_404(session, item.field_id)
_update_semantic_field_state(field, FieldSemanticUpdateRequest(candidate_id=item.candidate_id, lock_field=item.lock_field), changed_by="user")
_update_semantic_field_state(
field,
FieldSemanticUpdateRequest(
candidate_id=item.candidate_id, lock_field=item.lock_field
),
changed_by="user",
)
updated.append(field)
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=list(updated))
_record_session_event(repository, session, current_user, event_type="semantic_fields_batch_approved", event_summary="Batch semantic approval persisted", event_details={"count": len(updated), "version": sr.version})
_commit_owned_session_mutation(
repository, session, refresh_targets=list(updated)
)
_record_session_event(
repository,
session,
current_user,
event_type="semantic_fields_batch_approved",
event_summary="Batch semantic approval persisted",
event_details={"count": len(updated), "version": sr.version},
)
return [_serialize_semantic_field(f) for f in updated]
@@ -635,7 +719,9 @@ async def list_execution_mappings(
):
with belief_scope("dataset_review.list_execution_mappings"):
session = _get_owned_session_or_404(repository, session_id, current_user)
return MappingCollectionResponse(items=[_serialize_execution_mapping(m) for m in session.execution_mappings])
return MappingCollectionResponse(
items=[_serialize_execution_mapping(m) for m in session.execution_mappings]
)
# [/DEF:list_execution_mappings:Function]
@@ -654,32 +740,53 @@ async def list_execution_mappings(
],
)
async def update_execution_mapping(
session_id: str, mapping_id: str, request: UpdateExecutionMappingRequest,
session_id: str,
mapping_id: str,
request: UpdateExecutionMappingRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.update_execution_mapping"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
mapping = _get_owned_mapping_or_404(session, mapping_id)
if request.effective_value is None:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="effective_value is required")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="effective_value is required",
)
mapping.effective_value = request.effective_value
mapping.mapping_method = MappingMethod(request.mapping_method or MappingMethod.MANUAL_OVERRIDE.value)
mapping.mapping_method = MappingMethod(
request.mapping_method or MappingMethod.MANUAL_OVERRIDE.value
)
mapping.transformation_note = request.transformation_note
mapping.approval_state = ApprovalState.APPROVED
mapping.approved_by_user_id = current_user.id
mapping.approved_at = datetime.utcnow()
session.last_activity_at = datetime.utcnow()
session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW
if session.readiness_state in {ReadinessState.MAPPING_REVIEW_NEEDED, ReadinessState.COMPILED_PREVIEW_READY, ReadinessState.RUN_READY, ReadinessState.RUN_IN_PROGRESS}:
if session.readiness_state in {
ReadinessState.MAPPING_REVIEW_NEEDED,
ReadinessState.COMPILED_PREVIEW_READY,
ReadinessState.RUN_READY,
ReadinessState.RUN_IN_PROGRESS,
}:
session.readiness_state = ReadinessState.COMPILED_PREVIEW_READY
for preview in session.previews:
if preview.preview_status == PreviewStatus.READY:
preview.preview_status = PreviewStatus.STALE
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=[mapping])
_record_session_event(repository, session, current_user, event_type="execution_mapping_updated", event_summary="Mapping override persisted", event_details={"mapping_id": mapping.mapping_id, "version": sr.version})
_record_session_event(
repository,
session,
current_user,
event_type="execution_mapping_updated",
event_summary="Mapping override persisted",
event_details={"mapping_id": mapping.mapping_id, "version": sr.version},
)
return _serialize_execution_mapping(mapping)
@@ -699,13 +806,17 @@ async def update_execution_mapping(
],
)
async def approve_execution_mapping(
session_id: str, mapping_id: str, request: ApproveMappingRequest,
session_id: str,
mapping_id: str,
request: ApproveMappingRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.approve_execution_mapping"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
mapping = _get_owned_mapping_or_404(session, mapping_id)
mapping.approval_state = ApprovalState.APPROVED
mapping.approved_by_user_id = current_user.id
@@ -717,7 +828,14 @@ async def approve_execution_mapping(
session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=[mapping])
_record_session_event(repository, session, current_user, event_type="execution_mapping_approved", event_summary="Mapping approval persisted", event_details={"mapping_id": mapping.mapping_id, "version": sr.version})
_record_session_event(
repository,
session,
current_user,
event_type="execution_mapping_approved",
event_summary="Mapping approval persisted",
event_details={"mapping_id": mapping.mapping_id, "version": sr.version},
)
return _serialize_execution_mapping(mapping)
@@ -737,13 +855,16 @@ async def approve_execution_mapping(
],
)
async def approve_batch_execution_mappings(
session_id: str, request: BatchApproveMappingRequest,
session_id: str,
request: BatchApproveMappingRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.approve_batch_execution_mappings"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
updated = []
for mid in list(dict.fromkeys(request.mapping_ids)):
mapping = _get_owned_mapping_or_404(session, mid)
@@ -757,8 +878,17 @@ async def approve_batch_execution_mappings(
if session.readiness_state == ReadinessState.MAPPING_REVIEW_NEEDED:
session.recommended_action = RecommendedAction.GENERATE_SQL_PREVIEW
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session, refresh_targets=list(updated))
_record_session_event(repository, session, current_user, event_type="execution_mappings_batch_approved", event_summary="Batch mapping approval persisted", event_details={"count": len(updated), "version": sr.version})
_commit_owned_session_mutation(
repository, session, refresh_targets=list(updated)
)
_record_session_event(
repository,
session,
current_user,
event_type="execution_mappings_batch_approved",
event_summary="Batch mapping approval persisted",
event_details={"count": len(updated), "version": sr.version},
)
return [_serialize_execution_mapping(m) for m in updated]
@@ -778,33 +908,50 @@ async def approve_batch_execution_mappings(
],
)
async def trigger_preview_generation(
session_id: str, response: Response,
session_id: str,
response: Response,
orchestrator=Depends(_get_orchestrator),
repository=Depends(_get_repository),
session_version: int = Depends(_require_session_version_header),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.trigger_preview_generation"):
_prepare_owned_session_mutation(repository, session_id, current_user, session_version)
_prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
try:
result = orchestrator.prepare_launch_preview(
PreparePreviewCommand(user=current_user, session_id=session_id, expected_version=session_version)
PreparePreviewCommand(
user=current_user,
session_id=session_id,
expected_version=session_version,
)
)
except DatasetReviewSessionVersionConflictError as exc:
raise _build_session_version_conflict_http_exception(exc) from exc
except ValueError as exc:
detail = str(exc)
sc = status.HTTP_404_NOT_FOUND if detail in {"Session not found", "Environment not found"} else status.HTTP_409_CONFLICT if detail.startswith("Preview blocked:") else status.HTTP_400_BAD_REQUEST
sc = (
status.HTTP_404_NOT_FOUND
if detail in {"Session not found", "Environment not found"}
else status.HTTP_409_CONFLICT
if detail.startswith("Preview blocked:")
else status.HTTP_400_BAD_REQUEST
)
raise HTTPException(status_code=sc, detail=detail) from exc
if result.preview.preview_status == PreviewStatus.PENDING:
response.status_code = status.HTTP_202_ACCEPTED
return PreviewEnqueueResultResponse(
session_id=result.session.session_id,
session_version=int(getattr(result.session, "version", 0) or 0),
preview_status=result.preview.preview_status.value, task_id=None,
preview_status=result.preview.preview_status.value,
task_id=None,
)
response.status_code = status.HTTP_200_OK
return _serialize_preview(result.preview, session_version_fallback=int(getattr(result.session, "version", 0) or 0))
return _serialize_preview(
result.preview,
session_version_fallback=int(getattr(result.session, "version", 0) or 0),
)
# [/DEF:trigger_preview_generation:Function]
@@ -832,22 +979,37 @@ async def launch_dataset(
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.launch_dataset"):
_prepare_owned_session_mutation(repository, session_id, current_user, session_version)
_prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
try:
result = orchestrator.launch_dataset(
LaunchDatasetCommand(user=current_user, session_id=session_id, expected_version=session_version)
LaunchDatasetCommand(
user=current_user,
session_id=session_id,
expected_version=session_version,
)
)
except DatasetReviewSessionVersionConflictError as exc:
raise _build_session_version_conflict_http_exception(exc) from exc
except ValueError as exc:
detail = str(exc)
sc = status.HTTP_404_NOT_FOUND if detail in {"Session not found", "Environment not found"} else status.HTTP_409_CONFLICT if detail.startswith("Launch blocked:") else status.HTTP_400_BAD_REQUEST
sc = (
status.HTTP_404_NOT_FOUND
if detail in {"Session not found", "Environment not found"}
else status.HTTP_409_CONFLICT
if detail.startswith("Launch blocked:")
else status.HTTP_400_BAD_REQUEST
)
raise HTTPException(status_code=sc, detail=detail) from exc
environment = config_manager.get_environment(result.session.environment_id)
env_url = getattr(environment, "url", "") if environment is not None else ""
return LaunchDatasetResponse(
run_context=_serialize_run_context(result.run_context),
redirect_url=_build_sql_lab_redirect_url(environment_url=env_url, sql_lab_session_ref=result.run_context.sql_lab_session_ref),
redirect_url=_build_sql_lab_redirect_url(
environment_url=env_url,
sql_lab_session_ref=result.run_context.sql_lab_session_ref,
),
)
@@ -866,18 +1028,33 @@ async def launch_dataset(
],
)
async def record_field_feedback(
session_id: str, field_id: str, request: FeedbackRequest,
session_id: str,
field_id: str,
request: FeedbackRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.record_field_feedback"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
field = _get_owned_field_or_404(session, field_id)
field.user_feedback = request.feedback
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session)
_record_session_event(repository, session, current_user, event_type="semantic_field_feedback_recorded", event_summary="Feedback persisted", event_details={"field_id": field.field_id, "feedback": request.feedback, "version": sr.version})
_record_session_event(
repository,
session,
current_user,
event_type="semantic_field_feedback_recorded",
event_summary="Feedback persisted",
event_details={
"field_id": field.field_id,
"feedback": request.feedback,
"version": sr.version,
},
)
return FeedbackResponse(target_id=field.field_id, feedback=request.feedback)
@@ -897,24 +1074,47 @@ async def record_field_feedback(
],
)
async def record_clarification_feedback(
session_id: str, question_id: str, request: FeedbackRequest,
session_id: str,
question_id: str,
request: FeedbackRequest,
session_version: int = Depends(_require_session_version_header),
repository=Depends(_get_repository),
current_user: User = Depends(get_current_user),
):
with belief_scope("dataset_review.record_clarification_feedback"):
session = _prepare_owned_session_mutation(repository, session_id, current_user, session_version)
session = _prepare_owned_session_mutation(
repository, session_id, current_user, session_version
)
cs = _get_latest_clarification_session_or_404(session)
question = next((q for q in cs.questions if q.question_id == question_id), None)
if question is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Clarification question not found")
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Clarification question not found",
)
if question.answer is None:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Clarification answer not found")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Clarification answer not found",
)
question.answer.user_feedback = request.feedback
sr = cast(Any, session)
_commit_owned_session_mutation(repository, session)
_record_session_event(repository, session, current_user, event_type="clarification_feedback_recorded", event_summary="Feedback persisted", event_details={"question_id": question.question_id, "feedback": request.feedback, "version": sr.version})
return FeedbackResponse(target_id=question.question_id, feedback=request.feedback)
_record_session_event(
repository,
session,
current_user,
event_type="clarification_feedback_recorded",
event_summary="Feedback persisted",
event_details={
"question_id": question.question_id,
"feedback": request.feedback,
"version": sr.version,
},
)
return FeedbackResponse(
target_id=question.question_id, feedback=request.feedback
)
# [/DEF:record_clarification_feedback:Function]

View File

@@ -31,6 +31,8 @@ async def get_health_summary(
@PURPOSE: Get aggregated health status for all dashboards.
@POST: Returns HealthSummaryResponse
"""
if not config_manager.get_config().settings.features.health_monitor:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Health monitor feature is disabled")
service = HealthService(db, config_manager=config_manager)
return await service.get_health_summary(environment_id=environment_id)
# [/DEF:get_health_summary:Function]
@@ -53,6 +55,8 @@ async def delete_health_report(
@PURPOSE: Delete a persisted dashboard validation report from health summary.
@POST: Validation record is removed; linked task/logs are deleted when present.
"""
if not config_manager.get_config().settings.features.health_monitor:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Health monitor feature is disabled")
service = HealthService(db, config_manager=config_manager)
if not service.delete_validation_report(record_id, task_manager=task_manager):
raise HTTPException(status_code=404, detail="Health report not found")

View File

@@ -111,6 +111,22 @@ async def get_settings(
# [/DEF:get_settings:Function]
# [DEF:get_features:Function]
# @COMPLEXITY: 1
# @PURPOSE: Public endpoint returning feature flags for frontend sidebar filtering.
# @RATIONALE: Unauthenticated because sidebar filtering must work for all users, not just admins.
# @PRE: Config manager is available.
# @POST: Returns dict with dataset_review and health_monitor booleans.
@router.get("/features")
async def get_features(
config_manager: ConfigManager = Depends(get_config_manager),
):
return config_manager.get_config().settings.features.model_dump()
# [/DEF:get_features:Function]
# [DEF:update_global_settings:Function]
# @COMPLEXITY: 2
# @PURPOSE: Updates global application settings.
@@ -403,6 +419,7 @@ class ConsolidatedSettingsResponse(BaseModel):
logging: dict
storage: dict
notifications: dict = {}
features: dict = {}
# [/DEF:ConsolidatedSettingsResponse:Class]
@@ -472,6 +489,7 @@ async def get_consolidated_settings(
logging=config.settings.logging.dict(),
storage=config.settings.storage.dict(),
notifications=notifications_payload,
features=config.settings.features.model_dump(),
)
logger.reflect(
"Consolidated settings payload assembled",
@@ -525,6 +543,12 @@ async def update_consolidated_settings(
raise HTTPException(status_code=400, detail=message)
current_settings.storage = new_storage
# Update Features if provided
if "features" in settings_patch:
from ...core.config_models import FeaturesConfig
current_settings.features = FeaturesConfig(**settings_patch["features"])
if "notifications" in settings_patch:
payload = config_manager.get_payload()
payload["notifications"] = settings_patch["notifications"]

View File

@@ -3,30 +3,7 @@
# @SEMANTICS: superset, async, client, httpx, dashboards, datasets
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @LAYER: Core
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# [SECTION: IMPORTS]
import asyncio

View File

@@ -69,12 +69,41 @@ class ConfigManager:
# [/DEF:__init__:Function]
# [DEF:_apply_features_from_env:Function]
# @PURPOSE: Read FEATURES__* env vars and apply them to a GlobalSettings features config.
# @SIDE_EFFECT: Reads os.environ; mutates settings.features in-place.
# @RATIONALE: Env vars seed the initial defaults. After first bootstrap, DB is source of truth.
@staticmethod
def _apply_features_from_env(settings: GlobalSettings) -> None:
with belief_scope("ConfigManager._apply_features_from_env"):
dataset_review_env = os.getenv("FEATURES__DATASET_REVIEW")
if dataset_review_env is not None:
parsed = dataset_review_env.strip().lower() in ("true", "1", "yes")
settings.features.dataset_review = parsed
logger.reason(
"Applied FEATURES__DATASET_REVIEW from env",
extra={"value": parsed, "raw": dataset_review_env},
)
health_monitor_env = os.getenv("FEATURES__HEALTH_MONITOR")
if health_monitor_env is not None:
parsed = health_monitor_env.strip().lower() in ("true", "1", "yes")
settings.features.health_monitor = parsed
logger.reason(
"Applied FEATURES__HEALTH_MONITOR from env",
extra={"value": parsed, "raw": health_monitor_env},
)
# [/DEF:_apply_features_from_env:Function]
# [DEF:_default_config:Function]
# @PURPOSE: Build default application configuration fallback.
def _default_config(self) -> AppConfig:
with belief_scope("ConfigManager._default_config"):
logger.reason("Building default AppConfig fallback")
return AppConfig(environments=[], settings=GlobalSettings())
config = AppConfig(environments=[], settings=GlobalSettings())
self._apply_features_from_env(config.settings)
return config
# [/DEF:_default_config:Function]
@@ -200,6 +229,7 @@ class ConfigManager:
"settings": self.raw_payload.get("settings", {}),
}
)
self._apply_features_from_env(config.settings)
logger.reason(
"Legacy payload validated; persisting migrated configuration to database",
extra={

View File

@@ -71,6 +71,19 @@ class CleanReleaseConfig(BaseModel):
# [/DEF:CleanReleaseConfig:DataClass]
# [DEF:FeaturesConfig:DataClass]
# @COMPLEXITY: 1
# @PURPOSE: Top-level feature flags that toggle entire project features on/off.
# @RATIONALE: Features are read from environment variables on bootstrap and persisted in DB.
# DB is source of truth after initial bootstrap; env vars only seed defaults.
class FeaturesConfig(BaseModel):
dataset_review: bool = True
health_monitor: bool = True
# [/DEF:FeaturesConfig:DataClass]
# [DEF:GlobalSettings:DataClass]
# @PURPOSE: Represents global application settings.
class GlobalSettings(BaseModel):
@@ -78,6 +91,7 @@ class GlobalSettings(BaseModel):
clean_release: CleanReleaseConfig = Field(default_factory=CleanReleaseConfig)
default_environment_id: Optional[str] = None
logging: LoggingConfig = Field(default_factory=LoggingConfig)
features: FeaturesConfig = Field(default_factory=FeaturesConfig)
connections: List[dict] = []
llm: dict = Field(
default_factory=lambda: {

View File

@@ -49,7 +49,7 @@ class SupersetClient:
with belief_scope("SupersetClientInit"):
app_logger.reason(
"Initializing Superset client for environment",
extra={"environment": getattr(env, "id", None), "name": env.name},
extra={"environment": getattr(env, "id", None), "env_name": env.name},
)
self.env = env
# Construct auth payload expected by Superset API

View File

@@ -0,0 +1,65 @@
# [DEF:SupersetClientModule:Module]
#
# @COMPLEXITY: 3
# @PURPOSE: Предоставляет высокоуровневый клиент для взаимодействия с Superset REST API, инкапсулируя логику запросов, обработку ошибок и пагинацию.
# @RELATION: DEPENDS_ON -> [ConfigModels]
# @RELATION: DEPENDS_ON -> [APIClient]
# @RELATION: DEPENDS_ON -> [SupersetAPIError]
# @RELATION: DEPENDS_ON -> [get_filename_from_headers]
#
# @PUBLIC_API: SupersetClient
#
# @RATIONALE: Decomposed from monolithic superset_client.py (2145 lines) into
# domain-scoped mixins to satisfy INV_7 (module < 400 lines). The composed class
# preserves the original public API surface — all consumers continue to import
# `from src.core.superset_client import SupersetClient` without changes.
# @REJECTED: Keeping a single 2145-line file — violates fractal limit INV_7.
from ._base import SupersetClientBase
from ._user_projection import SupersetUserProjectionMixin
from ._dashboards_list import SupersetDashboardsListMixin
from ._dashboards_filters import SupersetDashboardsFiltersMixin
from ._dashboards_crud import SupersetDashboardsCrudMixin
from ._charts import SupersetChartsMixin
from ._datasets import SupersetDatasetsMixin
from ._datasets_preview import SupersetDatasetsPreviewMixin
from ._databases import SupersetDatabasesMixin
# [DEF:SupersetClient:Class]
# @COMPLEXITY: 3
# @PURPOSE: Класс-обёртка над Superset REST API, предоставляющий методы для работы с дашбордами и датасетами.
# @RELATION: DEPENDS_ON -> [ConfigModels]
# @RELATION: DEPENDS_ON -> [APIClient]
# @RELATION: DEPENDS_ON -> [SupersetAPIError]
# @RELATION: INHERITS -> [SupersetClientBase]
# @RELATION: INHERITS -> [SupersetUserProjectionMixin]
# @RELATION: INHERITS -> [SupersetDashboardsListMixin]
# @RELATION: INHERITS -> [SupersetDashboardsFiltersMixin]
# @RELATION: INHERITS -> [SupersetDashboardsCrudMixin]
# @RELATION: INHERITS -> [SupersetChartsMixin]
# @RELATION: INHERITS -> [SupersetDatasetsMixin]
# @RELATION: INHERITS -> [SupersetDatasetsPreviewMixin]
# @RELATION: INHERITS -> [SupersetDatabasesMixin]
class SupersetClient(
SupersetDatabasesMixin,
SupersetDatasetsPreviewMixin,
SupersetDatasetsMixin,
SupersetChartsMixin,
SupersetDashboardsCrudMixin,
SupersetDashboardsFiltersMixin,
SupersetDashboardsListMixin,
SupersetUserProjectionMixin,
SupersetClientBase,
):
"""Composed Superset REST API client.
MRO order ensures domain mixins resolve before base class.
All consumers continue to use: from src.core.superset_client import SupersetClient
"""
pass
# [/DEF:SupersetClient:Class]
# [/DEF:SupersetClientModule:Module]

View File

@@ -0,0 +1,313 @@
# [DEF:SupersetClientBase:Module]
# @COMPLEXITY: 3
# @PURPOSE: Base class for SupersetClient providing initialization, authentication, pagination, and import/export helpers.
# @RELATION: DEPENDS_ON -> [ConfigModels]
# @RELATION: DEPENDS_ON -> [APIClient]
# @RELATION: DEPENDS_ON -> [SupersetAPIError]
# @RELATION: DEPENDS_ON -> [get_filename_from_headers]
import json
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
from ..logger import logger as app_logger, belief_scope
from ..utils.network import APIClient, SupersetAPIError
from ..utils.fileio import get_filename_from_headers
from ..config_models import Environment
app_logger = cast(Any, app_logger)
# [DEF:SupersetClientBase:Class]
# @COMPLEXITY: 3
# @PURPOSE: Base class providing Superset client initialization, auth, pagination, and import/export plumbing.
# @RELATION: DEPENDS_ON -> [ConfigModels]
# @RELATION: DEPENDS_ON -> [APIClient]
# @RELATION: DEPENDS_ON -> [SupersetAPIError]
class SupersetClientBase:
# [DEF:SupersetClientInit:Function]
# @COMPLEXITY: 3
# @PURPOSE: Инициализирует клиент, проверяет конфигурацию и создает сетевой клиент.
# @RELATION: DEPENDS_ON -> [Environment]
# @RELATION: DEPENDS_ON -> [APIClient]
def __init__(self, env: Environment):
with belief_scope("SupersetClientInit"):
app_logger.reason(
"Initializing Superset client for environment",
extra={"environment": getattr(env, "id", None), "env_name": env.name},
)
self.env = env
# Construct auth payload expected by Superset API
auth_payload = {
"username": env.username,
"password": env.password,
"provider": "db",
"refresh": "true",
}
self.network = APIClient(
config={"base_url": env.url, "auth": auth_payload},
verify_ssl=env.verify_ssl,
timeout=env.timeout,
)
self.delete_before_reimport: bool = False
app_logger.reflect(
"Superset client initialized",
extra={"environment": getattr(self.env, "id", None)},
)
# [/DEF:SupersetClientInit:Function]
# [DEF:SupersetClientAuthenticate:Function]
# @COMPLEXITY: 3
# @PURPOSE: Authenticates the client using the configured credentials.
# @RELATION: CALLS -> [APIClient]
def authenticate(self) -> Dict[str, str]:
with belief_scope("SupersetClientAuthenticate"):
app_logger.reason(
"Authenticating Superset client",
extra={"environment": getattr(self.env, "id", None)},
)
tokens = self.network.authenticate()
app_logger.reflect(
"Superset client authentication completed",
extra={
"environment": getattr(self.env, "id", None),
"token_keys": sorted(tokens.keys()),
},
)
return tokens
# [/DEF:SupersetClientAuthenticate:Function]
@property
# [DEF:SupersetClientHeaders:Function]
# @COMPLEXITY: 1
# @PURPOSE: Возвращает базовые HTTP-заголовки, используемые сетевым клиентом.
def headers(self) -> dict:
with belief_scope("headers"):
return self.network.headers
# [/DEF:SupersetClientHeaders:Function]
# --- Pagination helpers ---
# [DEF:SupersetClientValidateQueryParams:Function]
# @COMPLEXITY: 1
# @PURPOSE: Ensures query parameters have default page and page_size.
def _validate_query_params(self, query: Optional[Dict]) -> Dict:
with belief_scope("_validate_query_params"):
# Superset list endpoints commonly cap page_size at 100.
# Using 100 avoids partial fetches when larger values are silently truncated.
base_query = {"page": 0, "page_size": 100}
return {**base_query, **(query or {})}
# [/DEF:SupersetClientValidateQueryParams:Function]
# [DEF:SupersetClientFetchTotalObjectCount:Function]
# @COMPLEXITY: 1
# @PURPOSE: Fetches the total number of items for a given endpoint.
# @RELATION: CALLS -> [APIClient]
def _fetch_total_object_count(self, endpoint: str) -> int:
with belief_scope("_fetch_total_object_count"):
return self.network.fetch_paginated_count(
endpoint=endpoint,
query_params={"page": 0, "page_size": 1},
count_field="count",
)
# [/DEF:SupersetClientFetchTotalObjectCount:Function]
# [DEF:SupersetClientFetchAllPages:Function]
# @COMPLEXITY: 1
# @PURPOSE: Iterates through all pages to collect all data items.
# @RELATION: CALLS -> [APIClient]
def _fetch_all_pages(self, endpoint: str, pagination_options: Dict) -> List[Dict]:
with belief_scope("_fetch_all_pages"):
return self.network.fetch_paginated_data(
endpoint=endpoint, pagination_options=pagination_options
)
# [/DEF:SupersetClientFetchAllPages:Function]
# --- Import/Export helpers ---
# [DEF:SupersetClientDoImport:Function]
# @COMPLEXITY: 1
# @PURPOSE: Performs the actual multipart upload for import.
# @RELATION: CALLS -> [APIClient]
def _do_import(self, file_name: Union[str, Path]) -> Dict:
with belief_scope("_do_import"):
app_logger.debug(f"[_do_import][State] Uploading file: {file_name}")
file_path = Path(file_name)
if not file_path.exists():
app_logger.error(
f"[_do_import][Failure] File does not exist: {file_name}"
)
raise FileNotFoundError(f"File does not exist: {file_name}")
return self.network.upload_file(
endpoint="/dashboard/import/",
file_info={
"file_obj": file_path,
"file_name": file_path.name,
"form_field": "formData",
},
extra_data={"overwrite": "true"},
timeout=self.env.timeout * 2,
)
# [/DEF:SupersetClientDoImport:Function]
# [DEF:SupersetClientValidateExportResponse:Function]
# @COMPLEXITY: 1
# @PURPOSE: Validates that the export response is a non-empty ZIP archive.
def _validate_export_response(self, response: Response, dashboard_id: int) -> None:
with belief_scope("_validate_export_response"):
content_type = response.headers.get("Content-Type", "")
if "application/zip" not in content_type:
raise SupersetAPIError(
f"Получен не ZIP-архив (Content-Type: {content_type})"
)
if not response.content:
raise SupersetAPIError("Получены пустые данные при экспорте")
# [/DEF:SupersetClientValidateExportResponse:Function]
# [DEF:SupersetClientResolveExportFilename:Function]
# @COMPLEXITY: 1
# @PURPOSE: Determines the filename for an exported dashboard.
def _resolve_export_filename(self, response: Response, dashboard_id: int) -> str:
with belief_scope("_resolve_export_filename"):
filename = get_filename_from_headers(dict(response.headers))
if not filename:
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
filename = f"dashboard_export_{dashboard_id}_{timestamp}.zip"
app_logger.warning(
"[_resolve_export_filename][Warning] Generated filename: %s",
filename,
)
return filename
# [/DEF:SupersetClientResolveExportFilename:Function]
# [DEF:SupersetClientValidateImportFile:Function]
# @COMPLEXITY: 1
# @PURPOSE: Validates that the file to be imported is a valid ZIP with metadata.yaml.
def _validate_import_file(self, zip_path: Union[str, Path]) -> None:
with belief_scope("_validate_import_file"):
path = Path(zip_path)
if not path.exists():
raise FileNotFoundError(f"Файл {zip_path} не существует")
if not zipfile.is_zipfile(path):
raise SupersetAPIError(f"Файл {zip_path} не является ZIP-архивом")
with zipfile.ZipFile(path, "r") as zf:
if not any(n.endswith("metadata.yaml") for n in zf.namelist()):
raise SupersetAPIError(
f"Архив {zip_path} не содержит 'metadata.yaml'"
)
# [/DEF:SupersetClientValidateImportFile:Function]
# [DEF:SupersetClientResolveTargetIdForDelete:Function]
# @COMPLEXITY: 1
# @PURPOSE: Resolves a dashboard ID from either an ID or a slug.
# @RELATION: CALLS -> [SupersetClientGetDashboards]
def _resolve_target_id_for_delete(
self, dash_id: Optional[int], dash_slug: Optional[str]
) -> Optional[int]:
with belief_scope("_resolve_target_id_for_delete"):
if dash_id is not None:
return dash_id
if dash_slug is not None:
app_logger.debug(
"[_resolve_target_id_for_delete][State] Resolving ID by slug '%s'.",
dash_slug,
)
try:
_, candidates = self.get_dashboards(
query={
"filters": [{"col": "slug", "op": "eq", "value": dash_slug}]
}
)
if candidates:
target_id = candidates[0]["id"]
app_logger.debug(
"[_resolve_target_id_for_delete][Success] Resolved slug to ID %s.",
target_id,
)
return target_id
except Exception as e:
app_logger.warning(
"[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s",
dash_slug,
e,
)
return None
# [/DEF:SupersetClientResolveTargetIdForDelete:Function]
# [DEF:SupersetClientGetAllResources:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches all resources of a given type with id, uuid, and name columns.
# @RELATION: CALLS -> [SupersetClientFetchAllPages]
def get_all_resources(
self, resource_type: str, since_dttm: Optional["datetime"] = None
) -> List[Dict]:
with belief_scope(
"SupersetClient.get_all_resources",
f"type={resource_type}, since={since_dttm}",
):
column_map = {
"chart": {
"endpoint": "/chart/",
"columns": ["id", "uuid", "slice_name"],
},
"dataset": {
"endpoint": "/dataset/",
"columns": ["id", "uuid", "table_name"],
},
"dashboard": {
"endpoint": "/dashboard/",
"columns": ["id", "uuid", "slug", "dashboard_title"],
},
}
config = column_map.get(resource_type)
if not config:
app_logger.warning(
"[get_all_resources][Warning] Unknown resource type: %s",
resource_type,
)
return []
query = {"columns": config["columns"]}
if since_dttm:
import math
# Use int milliseconds to be safe
timestamp_ms = math.floor(since_dttm.timestamp() * 1000)
query["filters"] = [
{"col": "changed_on_dttm", "opr": "gt", "value": timestamp_ms}
]
validated = self._validate_query_params(query)
data = self._fetch_all_pages(
endpoint=config["endpoint"],
pagination_options={"base_query": validated, "results_field": "result"},
)
app_logger.info(
"[get_all_resources][Exit] Fetched %d %s resources.",
len(data),
resource_type,
)
return data
# [/DEF:SupersetClientGetAllResources:Function]
# [/DEF:SupersetClientBase:Class]
# [/DEF:SupersetClientBase:Module]

View File

@@ -0,0 +1,88 @@
# [DEF:SupersetChartsMixin:Module]
# @COMPLEXITY: 3
# @PURPOSE: Chart domain mixin for SupersetClient — list, get, extract IDs from layout.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
import json
import re
from typing import Any, Dict, List, Optional, Tuple, cast
from ..logger import logger as app_logger, belief_scope
app_logger = cast(Any, app_logger)
# [DEF:SupersetChartsMixin:Class]
# @COMPLEXITY: 3
# @PURPOSE: Mixin providing all chart-related Superset API operations.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
class SupersetChartsMixin:
# [DEF:SupersetClientGetChart:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches a single chart by ID.
# @RELATION: CALLS -> [APIClient]
def get_chart(self, chart_id: int) -> Dict:
with belief_scope("SupersetClient.get_chart", f"id={chart_id}"):
response = self.network.request(method="GET", endpoint=f"/chart/{chart_id}")
return cast(Dict, response)
# [/DEF:SupersetClientGetChart:Function]
# [DEF:SupersetClientGetCharts:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches all charts with pagination support.
# @RELATION: CALLS -> [SupersetClientFetchAllPages]
def get_charts(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_charts"):
validated_query = self._validate_query_params(query or {})
if "columns" not in validated_query:
validated_query["columns"] = ["id", "uuid", "slice_name", "viz_type"]
paginated_data = self._fetch_all_pages(
endpoint="/chart/",
pagination_options={
"base_query": validated_query,
"results_field": "result",
},
)
return len(paginated_data), paginated_data
# [/DEF:SupersetClientGetCharts:Function]
# [DEF:SupersetClientExtractChartIdsFromLayout:Function]
# @COMPLEXITY: 1
# @PURPOSE: Traverses dashboard layout metadata and extracts chart IDs from common keys.
def _extract_chart_ids_from_layout(
self, payload: Any
) -> set:
with belief_scope("_extract_chart_ids_from_layout"):
found = set()
def walk(node):
if isinstance(node, dict):
for key, value in node.items():
if key in ("chartId", "chart_id", "slice_id", "sliceId"):
try:
found.add(int(value))
except (TypeError, ValueError):
pass
if key == "id" and isinstance(value, str):
match = re.match(r"^CHART-(\d+)$", value)
if match:
try:
found.add(int(match.group(1)))
except ValueError:
pass
walk(value)
elif isinstance(node, list):
for item in node:
walk(item)
walk(payload)
return found
# [/DEF:SupersetClientExtractChartIdsFromLayout:Function]
# [/DEF:SupersetChartsMixin:Class]
# [/DEF:SupersetChartsMixin:Module]

View File

@@ -0,0 +1,2 @@
# This file has been decomposed into _dashboards_list.py, _dashboards_filters.py, _dashboards_crud.py
# See __init__.py for the composed SupersetClient class.

View File

@@ -0,0 +1,374 @@
# [DEF:SupersetDashboardsCrudMixin:Module]
# @COMPLEXITY: 3
# @PURPOSE: Dashboard CRUD mixin for SupersetClient — detail, export, import, delete.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
# @RELATION: DEPENDS_ON -> [SupersetDashboardsFiltersMixin]
# @RELATION: DEPENDS_ON -> [SupersetChartsMixin]
import json
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
from ..logger import logger as app_logger, belief_scope
app_logger = cast(Any, app_logger)
# [DEF:SupersetDashboardsCrudMixin:Class]
# @COMPLEXITY: 3
# @PURPOSE: Mixin providing dashboard detail resolution, export, import, and delete operations.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
# @RELATION: DEPENDS_ON -> [SupersetDashboardsFiltersMixin]
# @RELATION: DEPENDS_ON -> [SupersetChartsMixin]
class SupersetDashboardsCrudMixin:
# [DEF:SupersetClientGetDashboardDetail:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches detailed dashboard information including related charts and datasets.
# @RELATION: CALLS -> [SupersetClientGetDashboard]
# @RELATION: CALLS -> [SupersetClientGetChart]
def get_dashboard_detail(self, dashboard_ref: Union[int, str]) -> Dict:
with belief_scope(
"SupersetClient.get_dashboard_detail", f"ref={dashboard_ref}"
):
dashboard_response = self.get_dashboard(dashboard_ref)
dashboard_data = dashboard_response.get("result", dashboard_response)
charts: List[Dict] = []
datasets: List[Dict] = []
# [DEF:extract_dataset_id_from_form_data:Function]
def extract_dataset_id_from_form_data(
form_data: Optional[Dict],
) -> Optional[int]:
if not isinstance(form_data, dict):
return None
datasource = form_data.get("datasource")
if isinstance(datasource, str):
matched = re.match(r"^(\d+)__", datasource)
if matched:
try:
return int(matched.group(1))
except ValueError:
return None
if isinstance(datasource, dict):
ds_id = datasource.get("id")
try:
return int(ds_id) if ds_id is not None else None
except (TypeError, ValueError):
return None
ds_id = form_data.get("datasource_id")
try:
return int(ds_id) if ds_id is not None else None
except (TypeError, ValueError):
return None
# [/DEF:extract_dataset_id_from_form_data:Function]
try:
charts_response = self.network.request(
method="GET", endpoint=f"/dashboard/{dashboard_ref}/charts"
)
charts_payload = (
charts_response.get("result", [])
if isinstance(charts_response, dict)
else []
)
for chart_obj in charts_payload:
if not isinstance(chart_obj, dict):
continue
chart_id = chart_obj.get("id")
if chart_id is None:
continue
form_data = chart_obj.get("form_data")
if isinstance(form_data, str):
try:
form_data = json.loads(form_data)
except Exception:
form_data = {}
dataset_id = extract_dataset_id_from_form_data(
form_data
) or chart_obj.get("datasource_id")
charts.append({
"id": int(chart_id),
"title": chart_obj.get("slice_name")
or chart_obj.get("name") or f"Chart {chart_id}",
"viz_type": (
form_data.get("viz_type")
if isinstance(form_data, dict) else None
),
"dataset_id": int(dataset_id) if dataset_id is not None else None,
"last_modified": chart_obj.get("changed_on"),
"overview": chart_obj.get("description")
or (form_data.get("viz_type") if isinstance(form_data, dict) else None)
or "Chart",
})
except Exception as e:
app_logger.warning(
"[get_dashboard_detail][Warning] Failed to fetch dashboard charts: %s", e,
)
try:
datasets_response = self.network.request(
method="GET", endpoint=f"/dashboard/{dashboard_ref}/datasets"
)
datasets_payload = (
datasets_response.get("result", [])
if isinstance(datasets_response, dict)
else []
)
for dataset_obj in datasets_payload:
if not isinstance(dataset_obj, dict):
continue
dataset_id = dataset_obj.get("id")
if dataset_id is None:
continue
db_payload = dataset_obj.get("database")
db_name = (
db_payload.get("database_name")
if isinstance(db_payload, dict) else None
)
table_name = (
dataset_obj.get("table_name")
or dataset_obj.get("datasource_name")
or dataset_obj.get("name") or f"Dataset {dataset_id}"
)
schema = dataset_obj.get("schema")
fq_name = f"{schema}.{table_name}" if schema else table_name
datasets.append({
"id": int(dataset_id),
"table_name": table_name,
"schema": schema,
"database": db_name or dataset_obj.get("database_name") or "Unknown",
"last_modified": dataset_obj.get("changed_on"),
"overview": fq_name,
})
except Exception as e:
app_logger.warning(
"[get_dashboard_detail][Warning] Failed to fetch dashboard datasets: %s", e,
)
# Fallback: derive chart IDs from layout metadata
if not charts:
raw_position_json = dashboard_data.get("position_json")
chart_ids_from_position = set()
if isinstance(raw_position_json, str) and raw_position_json:
try:
parsed_position = json.loads(raw_position_json)
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(parsed_position)
)
except Exception:
pass
elif isinstance(raw_position_json, dict):
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(raw_position_json)
)
raw_json_metadata = dashboard_data.get("json_metadata")
if isinstance(raw_json_metadata, str) and raw_json_metadata:
try:
parsed_metadata = json.loads(raw_json_metadata)
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(parsed_metadata)
)
except Exception:
pass
elif isinstance(raw_json_metadata, dict):
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(raw_json_metadata)
)
app_logger.info(
"[get_dashboard_detail][State] Extracted %s fallback chart IDs from layout (dashboard_id=%s)",
len(chart_ids_from_position), dashboard_ref,
)
for chart_id in sorted(chart_ids_from_position):
try:
chart_response = self.get_chart(int(chart_id))
chart_data = chart_response.get("result", chart_response)
charts.append({
"id": int(chart_id),
"title": chart_data.get("slice_name")
or chart_data.get("name") or f"Chart {chart_id}",
"viz_type": chart_data.get("viz_type"),
"dataset_id": chart_data.get("datasource_id"),
"last_modified": chart_data.get("changed_on"),
"overview": chart_data.get("description")
or chart_data.get("viz_type") or "Chart",
})
except Exception as e:
app_logger.warning(
"[get_dashboard_detail][Warning] Failed to resolve fallback chart %s: %s",
chart_id, e,
)
# Backfill datasets from chart datasource IDs.
dataset_ids_from_charts = {
c.get("dataset_id") for c in charts if c.get("dataset_id") is not None
}
known_dataset_ids = {
d.get("id") for d in datasets if d.get("id") is not None
}
missing_dataset_ids: List[int] = []
for raw_dataset_id in dataset_ids_from_charts:
if raw_dataset_id is None or raw_dataset_id in known_dataset_ids:
continue
try:
missing_dataset_ids.append(int(raw_dataset_id))
except (TypeError, ValueError):
continue
for dataset_id in missing_dataset_ids:
try:
dataset_response = self.get_dataset(int(dataset_id))
dataset_data = dataset_response.get("result", dataset_response)
db_payload = dataset_data.get("database")
db_name = (
db_payload.get("database_name")
if isinstance(db_payload, dict) else None
)
table_name = (
dataset_data.get("table_name") or f"Dataset {dataset_id}"
)
schema = dataset_data.get("schema")
fq_name = f"{schema}.{table_name}" if schema else table_name
datasets.append({
"id": int(dataset_id),
"table_name": table_name,
"schema": schema,
"database": db_name or "Unknown",
"last_modified": dataset_data.get("changed_on_utc")
or dataset_data.get("changed_on"),
"overview": fq_name,
})
except Exception as e:
app_logger.warning(
"[get_dashboard_detail][Warning] Failed to resolve dataset %s: %s",
dataset_id, e,
)
unique_charts = {chart["id"]: chart for chart in charts}
unique_datasets = {dataset["id"]: dataset for dataset in datasets}
resolved_dashboard_id = dashboard_data.get("id", dashboard_ref)
return {
"id": resolved_dashboard_id,
"title": dashboard_data.get("dashboard_title")
or dashboard_data.get("title") or f"Dashboard {resolved_dashboard_id}",
"slug": dashboard_data.get("slug"),
"url": dashboard_data.get("url"),
"description": dashboard_data.get("description") or "",
"last_modified": dashboard_data.get("changed_on_utc")
or dashboard_data.get("changed_on"),
"published": dashboard_data.get("published"),
"charts": list(unique_charts.values()),
"datasets": list(unique_datasets.values()),
"chart_count": len(unique_charts),
"dataset_count": len(unique_datasets),
}
# [/DEF:SupersetClientGetDashboardDetail:Function]
# [DEF:SupersetClientExportDashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Экспортирует дашборд в виде ZIP-архива.
# @SIDE_EFFECT: Performs network I/O to download archive.
# @RELATION: CALLS -> [APIClient]
def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
with belief_scope("export_dashboard"):
app_logger.info(
"[export_dashboard][Enter] Exporting dashboard %s.", dashboard_id
)
response = self.network.request(
method="GET",
endpoint="/dashboard/export/",
params={"q": json.dumps([dashboard_id])},
stream=True,
raw_response=True,
)
response = cast(Response, response)
self._validate_export_response(response, dashboard_id)
filename = self._resolve_export_filename(response, dashboard_id)
app_logger.info(
"[export_dashboard][Exit] Exported dashboard %s to %s.",
dashboard_id, filename,
)
return response.content, filename
# [/DEF:SupersetClientExportDashboard:Function]
# [DEF:SupersetClientImportDashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Импортирует дашборд из ZIP-файла.
# @SIDE_EFFECT: Performs network I/O to upload archive.
# @RELATION: CALLS -> [SupersetClientDoImport]
# @RELATION: CALLS -> [APIClient]
def import_dashboard(
self,
file_name: Union[str, Path],
dash_id: Optional[int] = None,
dash_slug: Optional[str] = None,
) -> Dict:
with belief_scope("import_dashboard"):
if file_name is None:
raise ValueError("file_name cannot be None")
file_path = str(file_name)
self._validate_import_file(file_path)
try:
return self._do_import(file_path)
except Exception as exc:
app_logger.error(
"[import_dashboard][Failure] First import attempt failed: %s",
exc, exc_info=True,
)
if not self.delete_before_reimport:
raise
target_id = self._resolve_target_id_for_delete(dash_id, dash_slug)
if target_id is None:
app_logger.error(
"[import_dashboard][Failure] No ID available for delete-retry."
)
raise
self.delete_dashboard(target_id)
app_logger.info(
"[import_dashboard][State] Deleted dashboard ID %s, retrying import.",
target_id,
)
return self._do_import(file_path)
# [/DEF:SupersetClientImportDashboard:Function]
# [DEF:SupersetClientDeleteDashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Удаляет дашборд по его ID или slug.
# @SIDE_EFFECT: Deletes resource from upstream Superset environment.
# @RELATION: CALLS -> [APIClient]
def delete_dashboard(self, dashboard_id: Union[int, str]) -> None:
with belief_scope("delete_dashboard"):
app_logger.info(
"[delete_dashboard][Enter] Deleting dashboard %s.", dashboard_id
)
response = self.network.request(
method="DELETE", endpoint=f"/dashboard/{dashboard_id}"
)
response = cast(Dict, response)
if response.get("result", True) is not False:
app_logger.info(
"[delete_dashboard][Success] Dashboard %s deleted.", dashboard_id
)
else:
app_logger.warning(
"[delete_dashboard][Warning] Unexpected response while deleting %s: %s",
dashboard_id, response,
)
# [/DEF:SupersetClientDeleteDashboard:Function]
# [/DEF:SupersetDashboardsCrudMixin:Class]
# [/DEF:SupersetDashboardsCrudMixin:Module]

View File

@@ -0,0 +1,266 @@
# [DEF:SupersetDashboardsFiltersMixin:Module]
# @COMPLEXITY: 3
# @PURPOSE: Dashboard native filter extraction mixin for SupersetClient.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
import json
from typing import Any, Dict, Optional, Union, cast
from ..logger import logger as app_logger, belief_scope
app_logger = cast(Any, app_logger)
# [DEF:SupersetDashboardsFiltersMixin:Class]
# @COMPLEXITY: 3
# @PURPOSE: Mixin providing dashboard native filter extraction from permalink and URL state.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
class SupersetDashboardsFiltersMixin:
# [DEF:SupersetClientGetDashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches a single dashboard by ID or slug.
# @RELATION: CALLS -> [APIClient]
def get_dashboard(self, dashboard_ref: Union[int, str]) -> Dict:
with belief_scope("SupersetClient.get_dashboard", f"ref={dashboard_ref}"):
response = self.network.request(
method="GET", endpoint=f"/dashboard/{dashboard_ref}"
)
return cast(Dict, response)
# [/DEF:SupersetClientGetDashboard:Function]
# [DEF:SupersetClientGetDashboardPermalinkState:Function]
# @COMPLEXITY: 2
# @PURPOSE: Fetches stored dashboard permalink state by permalink key.
# @RELATION: CALLS -> [APIClient]
def get_dashboard_permalink_state(self, permalink_key: str) -> Dict:
with belief_scope(
"SupersetClient.get_dashboard_permalink_state", f"key={permalink_key}"
):
response = self.network.request(
method="GET", endpoint=f"/dashboard/permalink/{permalink_key}"
)
return cast(Dict, response)
# [/DEF:SupersetClientGetDashboardPermalinkState:Function]
# [DEF:SupersetClientGetNativeFilterState:Function]
# @COMPLEXITY: 2
# @PURPOSE: Fetches stored native filter state by filter state key.
# @RELATION: CALLS -> [APIClient]
def get_native_filter_state(
self, dashboard_id: Union[int, str], filter_state_key: str
) -> Dict:
with belief_scope(
"SupersetClient.get_native_filter_state",
f"dashboard={dashboard_id}, key={filter_state_key}",
):
response = self.network.request(
method="GET",
endpoint=f"/dashboard/{dashboard_id}/filter_state/{filter_state_key}",
)
return cast(Dict, response)
# [/DEF:SupersetClientGetNativeFilterState:Function]
# [DEF:SupersetClientExtractNativeFiltersFromPermalink:Function]
# @COMPLEXITY: 3
# @PURPOSE: Extract native filters dataMask from a permalink key.
# @RELATION: CALLS -> [SupersetClientGetDashboardPermalinkState]
def extract_native_filters_from_permalink(self, permalink_key: str) -> Dict:
with belief_scope(
"SupersetClient.extract_native_filters_from_permalink",
f"key={permalink_key}",
):
permalink_response = self.get_dashboard_permalink_state(permalink_key)
result = permalink_response.get("result", permalink_response)
state = result.get("state", result)
data_mask = state.get("dataMask", {})
extracted_filters = {}
for filter_id, filter_data in data_mask.items():
if not isinstance(filter_data, dict):
continue
extracted_filters[filter_id] = {
"extraFormData": filter_data.get("extraFormData", {}),
"filterState": filter_data.get("filterState", {}),
"ownState": filter_data.get("ownState", {}),
}
return {
"dataMask": extracted_filters,
"activeTabs": state.get("activeTabs", []),
"anchor": state.get("anchor"),
"chartStates": state.get("chartStates", {}),
"permalink_key": permalink_key,
}
# [/DEF:SupersetClientExtractNativeFiltersFromPermalink:Function]
# [DEF:SupersetClientExtractNativeFiltersFromKey:Function]
# @COMPLEXITY: 3
# @PURPOSE: Extract native filters from a native_filters_key URL parameter.
# @RELATION: CALLS -> [SupersetClientGetNativeFilterState]
def extract_native_filters_from_key(
self, dashboard_id: Union[int, str], filter_state_key: str
) -> Dict:
with belief_scope(
"SupersetClient.extract_native_filters_from_key",
f"dashboard={dashboard_id}, key={filter_state_key}",
):
filter_response = self.get_native_filter_state(
dashboard_id, filter_state_key
)
result = filter_response.get("result", filter_response)
value = result.get("value")
if isinstance(value, str):
try:
parsed_value = json.loads(value)
except json.JSONDecodeError as e:
app_logger.warning(
"[extract_native_filters_from_key][Warning] Failed to parse filter state JSON: %s",
e,
)
parsed_value = {}
elif isinstance(value, dict):
parsed_value = value
else:
parsed_value = {}
extracted_filters = {}
if "id" in parsed_value and "extraFormData" in parsed_value:
filter_id = parsed_value.get("id", filter_state_key)
extracted_filters[filter_id] = {
"extraFormData": parsed_value.get("extraFormData", {}),
"filterState": parsed_value.get("filterState", {}),
"ownState": parsed_value.get("ownState", {}),
}
else:
for filter_id, filter_data in parsed_value.items():
if not isinstance(filter_data, dict):
continue
extracted_filters[filter_id] = {
"extraFormData": filter_data.get("extraFormData", {}),
"filterState": filter_data.get("filterState", {}),
"ownState": filter_data.get("ownState", {}),
}
return {
"dataMask": extracted_filters,
"dashboard_id": dashboard_id,
"filter_state_key": filter_state_key,
}
# [/DEF:SupersetClientExtractNativeFiltersFromKey:Function]
# [DEF:SupersetClientParseDashboardUrlForFilters:Function]
# @COMPLEXITY: 3
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state if present.
# @RELATION: CALLS -> [SupersetClientExtractNativeFiltersFromPermalink]
# @RELATION: CALLS -> [SupersetClientExtractNativeFiltersFromKey]
def parse_dashboard_url_for_filters(self, url: str) -> Dict:
with belief_scope(
"SupersetClient.parse_dashboard_url_for_filters", f"url={url}"
):
import urllib.parse
parsed_url = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed_url.query)
path_parts = parsed_url.path.rstrip("/").split("/")
result = {
"url": url,
"dashboard_id": None,
"filter_type": None,
"filters": {},
}
# Check for permalink URL: /dashboard/p/{key}/ or /superset/dashboard/p/{key}/
if "p" in path_parts:
try:
p_index = path_parts.index("p")
if p_index + 1 < len(path_parts):
permalink_key = path_parts[p_index + 1]
filter_data = self.extract_native_filters_from_permalink(
permalink_key
)
result["filter_type"] = "permalink"
result["filters"] = filter_data
return result
except ValueError:
pass
# Check for native_filters_key in query params
native_filters_key = query_params.get("native_filters_key", [None])[0]
if native_filters_key:
dashboard_ref = None
if "dashboard" in path_parts:
try:
dash_index = path_parts.index("dashboard")
if dash_index + 1 < len(path_parts):
potential_id = path_parts[dash_index + 1]
if potential_id not in ("p", "list", "new"):
dashboard_ref = potential_id
except ValueError:
pass
if dashboard_ref:
resolved_id = None
try:
resolved_id = int(dashboard_ref)
except (ValueError, TypeError):
try:
dash_resp = self.get_dashboard(dashboard_ref)
dash_data = (
dash_resp.get("result", dash_resp)
if isinstance(dash_resp, dict)
else {}
)
raw_id = dash_data.get("id")
if raw_id is not None:
resolved_id = int(raw_id)
except Exception as e:
app_logger.warning(
"[parse_dashboard_url_for_filters][Warning] Failed to resolve dashboard slug '%s' to ID: %s",
dashboard_ref,
e,
)
if resolved_id is not None:
filter_data = self.extract_native_filters_from_key(
resolved_id, native_filters_key
)
result["filter_type"] = "native_filters_key"
result["dashboard_id"] = resolved_id
result["filters"] = filter_data
return result
else:
app_logger.warning(
"[parse_dashboard_url_for_filters][Warning] Could not resolve dashboard_id from URL for native_filters_key"
)
# Check for native_filters in query params (direct filter values)
native_filters = query_params.get("native_filters", [None])[0]
if native_filters:
try:
parsed_filters = json.loads(native_filters)
result["filter_type"] = "native_filters"
result["filters"] = {"dataMask": parsed_filters}
return result
except json.JSONDecodeError as e:
app_logger.warning(
"[parse_dashboard_url_for_filters][Warning] Failed to parse native_filters JSON: %s",
e,
)
return result
# [/DEF:SupersetClientParseDashboardUrlForFilters:Function]
# [/DEF:SupersetDashboardsFiltersMixin:Class]
# [/DEF:SupersetDashboardsFiltersMixin:Module]

View File

@@ -0,0 +1,205 @@
# [DEF:SupersetDashboardsListMixin:Module]
# @COMPLEXITY: 3
# @PURPOSE: Dashboard listing mixin for SupersetClient — paginated list, summary projection.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
# @RELATION: DEPENDS_ON -> [SupersetUserProjectionMixin]
import json
from typing import Any, Dict, List, Optional, Tuple, cast
from ..logger import logger as app_logger, belief_scope
app_logger = cast(Any, app_logger)
# [DEF:SupersetDashboardsListMixin:Class]
# @COMPLEXITY: 3
# @PURPOSE: Mixin providing dashboard listing and summary projection operations.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
# @RELATION: DEPENDS_ON -> [SupersetUserProjectionMixin]
class SupersetDashboardsListMixin:
# [DEF:SupersetClientGetDashboards:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает полный список дашбордов, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> [SupersetClientFetchAllPages]
def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards"):
app_logger.info("[get_dashboards][Enter] Fetching dashboards.")
validated_query = self._validate_query_params(query or {})
if "columns" not in validated_query:
validated_query["columns"] = [
"slug", "id", "url", "changed_on_utc", "dashboard_title",
"published", "created_by", "changed_by", "changed_by_name", "owners",
]
paginated_data = self._fetch_all_pages(
endpoint="/dashboard/",
pagination_options={
"base_query": validated_query,
"results_field": "result",
},
)
total_count = len(paginated_data)
app_logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
# [/DEF:SupersetClientGetDashboards:Function]
# [DEF:SupersetClientGetDashboardsPage:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches a single dashboards page from Superset without iterating all pages.
# @RELATION: CALLS -> [APIClient]
def get_dashboards_page(
self, query: Optional[Dict] = None
) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards_page"):
validated_query = self._validate_query_params(query or {})
if "columns" not in validated_query:
validated_query["columns"] = [
"slug", "id", "url", "changed_on_utc", "dashboard_title",
"published", "created_by", "changed_by", "changed_by_name", "owners",
]
response_json = cast(
Dict[str, Any],
self.network.request(
method="GET",
endpoint="/dashboard/",
params={"q": json.dumps(validated_query)},
),
)
result = response_json.get("result", [])
total_count = response_json.get("count", len(result))
return total_count, result
# [/DEF:SupersetClientGetDashboardsPage:Function]
# [DEF:SupersetClientGetDashboardsSummary:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches dashboard metadata optimized for the grid.
# @RELATION: CALLS -> [SupersetClientGetDashboards]
def get_dashboards_summary(self, require_slug: bool = False) -> List[Dict]:
with belief_scope("SupersetClient.get_dashboards_summary"):
query: Dict[str, Any] = {}
if require_slug:
query["filters"] = [{"col": "slug", "opr": "neq", "value": ""}]
_, dashboards = self.get_dashboards(query=query)
result = []
max_debug_samples = 12
for index, dash in enumerate(dashboards):
raw_owners = dash.get("owners")
raw_created_by = dash.get("created_by")
raw_changed_by = dash.get("changed_by")
raw_changed_by_name = dash.get("changed_by_name")
owners = self._extract_owner_labels(raw_owners)
if not owners:
owners = self._extract_owner_labels([raw_created_by, raw_changed_by])
projected_created_by = self._extract_user_display(None, raw_created_by)
projected_modified_by = self._extract_user_display(
raw_changed_by_name, raw_changed_by,
)
raw_owner_usernames: List[str] = []
if isinstance(raw_owners, list):
for owner_payload in raw_owners:
if isinstance(owner_payload, dict):
owner_username = self._sanitize_user_text(
owner_payload.get("username")
)
if owner_username:
raw_owner_usernames.append(owner_username)
result.append({
"id": dash.get("id"),
"slug": dash.get("slug"),
"title": dash.get("dashboard_title"),
"url": dash.get("url"),
"last_modified": dash.get("changed_on_utc"),
"status": "published" if dash.get("published") else "draft",
"created_by": projected_created_by,
"modified_by": projected_modified_by,
"owners": owners,
})
if index < max_debug_samples:
app_logger.reflect(
"[REFLECT] Dashboard actor projection sample "
f"(env={getattr(self.env, 'id', None)}, dashboard_id={dash.get('id')}, "
f"raw_owners={raw_owners!r}, raw_owner_usernames={raw_owner_usernames!r}, "
f"raw_created_by={raw_created_by!r}, raw_changed_by={raw_changed_by!r}, "
f"raw_changed_by_name={raw_changed_by_name!r}, projected_owners={owners!r}, "
f"projected_created_by={projected_created_by!r}, projected_modified_by={projected_modified_by!r})"
)
app_logger.reflect(
"[REFLECT] Dashboard actor projection summary "
f"(env={getattr(self.env, 'id', None)}, dashboards={len(result)}, "
f"sampled={min(len(result), max_debug_samples)})"
)
return result
# [/DEF:SupersetClientGetDashboardsSummary:Function]
# [DEF:SupersetClientGetDashboardsSummaryPage:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches one page of dashboard metadata optimized for the grid.
# @RELATION: CALLS -> [SupersetClientGetDashboardsPage]
def get_dashboards_summary_page(
self,
page: int,
page_size: int,
search: Optional[str] = None,
require_slug: bool = False,
) -> Tuple[int, List[Dict]]:
with belief_scope("SupersetClient.get_dashboards_summary_page"):
query: Dict[str, Any] = {
"page": max(page - 1, 0),
"page_size": page_size,
}
filters: List[Dict[str, Any]] = []
if require_slug:
filters.append({"col": "slug", "opr": "neq", "value": ""})
normalized_search = (search or "").strip()
if normalized_search:
filters.append({
"col": "dashboard_title", "opr": "ct", "value": normalized_search,
})
if filters:
query["filters"] = filters
total_count, dashboards = self.get_dashboards_page(query=query)
result = []
for dash in dashboards:
owners = self._extract_owner_labels(dash.get("owners"))
if not owners:
owners = self._extract_owner_labels(
[dash.get("created_by"), dash.get("changed_by")],
)
result.append({
"id": dash.get("id"),
"slug": dash.get("slug"),
"title": dash.get("dashboard_title"),
"url": dash.get("url"),
"last_modified": dash.get("changed_on_utc"),
"status": "published" if dash.get("published") else "draft",
"created_by": self._extract_user_display(
None, dash.get("created_by"),
),
"modified_by": self._extract_user_display(
dash.get("changed_by_name"), dash.get("changed_by"),
),
"owners": owners,
})
return total_count, result
# [/DEF:SupersetClientGetDashboardsSummaryPage:Function]
# [/DEF:SupersetDashboardsListMixin:Class]
# [/DEF:SupersetDashboardsListMixin:Module]

View File

@@ -0,0 +1,89 @@
# [DEF:SupersetDatabasesMixin:Module]
# @COMPLEXITY: 3
# @PURPOSE: Database domain mixin for SupersetClient — list, get, summary, by_uuid.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
from typing import Any, Dict, List, Optional, Tuple, cast
from ..logger import logger as app_logger, belief_scope
app_logger = cast(Any, app_logger)
# [DEF:SupersetDatabasesMixin:Class]
# @COMPLEXITY: 3
# @PURPOSE: Mixin providing all database-related Superset API operations.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
class SupersetDatabasesMixin:
# [DEF:SupersetClientGetDatabases:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает полный список баз данных.
# @RELATION: CALLS -> [SupersetClientFetchAllPages]
def get_databases(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_databases"):
app_logger.info("[get_databases][Enter] Fetching databases.")
validated_query = self._validate_query_params(query or {})
if "columns" not in validated_query:
validated_query["columns"] = []
paginated_data = self._fetch_all_pages(
endpoint="/database/",
pagination_options={
"base_query": validated_query,
"results_field": "result",
},
)
total_count = len(paginated_data)
app_logger.info("[get_databases][Exit] Found %d databases.", total_count)
return total_count, paginated_data
# [/DEF:SupersetClientGetDatabases:Function]
# [DEF:SupersetClientGetDatabase:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает информацию о конкретной базе данных по её ID.
# @RELATION: CALLS -> [APIClient]
def get_database(self, database_id: int) -> Dict:
with belief_scope("get_database"):
app_logger.info("[get_database][Enter] Fetching database %s.", database_id)
response = self.network.request(
method="GET", endpoint=f"/database/{database_id}"
)
response = cast(Dict, response)
app_logger.info("[get_database][Exit] Got database %s.", database_id)
return response
# [/DEF:SupersetClientGetDatabase:Function]
# [DEF:SupersetClientGetDatabasesSummary:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetch a summary of databases including uuid, name, and engine.
# @RELATION: CALLS -> [SupersetClientGetDatabases]
def get_databases_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_databases_summary"):
query = {"columns": ["uuid", "database_name", "backend"]}
_, databases = self.get_databases(query=query)
# Map 'backend' to 'engine' for consistency with contracts
for db in databases:
db["engine"] = db.pop("backend", None)
return databases
# [/DEF:SupersetClientGetDatabasesSummary:Function]
# [DEF:SupersetClientGetDatabaseByUuid:Function]
# @COMPLEXITY: 3
# @PURPOSE: Find a database by its UUID.
# @RELATION: CALLS -> [SupersetClientGetDatabases]
def get_database_by_uuid(self, db_uuid: str) -> Optional[Dict]:
with belief_scope("SupersetClient.get_database_by_uuid", f"uuid={db_uuid}"):
query = {"filters": [{"col": "uuid", "op": "eq", "value": db_uuid}]}
_, databases = self.get_databases(query=query)
return databases[0] if databases else None
# [/DEF:SupersetClientGetDatabaseByUuid:Function]
# [/DEF:SupersetDatabasesMixin:Class]
# [/DEF:SupersetDatabasesMixin:Module]

View File

@@ -0,0 +1,217 @@
# [DEF:SupersetDatasetsMixin:Module]
# @COMPLEXITY: 3
# @PURPOSE: Dataset domain mixin for SupersetClient — list, get, detail, update.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
import json
from typing import Any, Dict, List, Optional, Tuple, cast
from ..logger import logger as app_logger, belief_scope
app_logger = cast(Any, app_logger)
# [DEF:SupersetDatasetsMixin:Class]
# @COMPLEXITY: 3
# @PURPOSE: Mixin providing basic dataset CRUD operations (list, get, detail, update).
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
class SupersetDatasetsMixin:
# [DEF:SupersetClientGetDatasets:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает полный список датасетов, автоматически обрабатывая пагинацию.
# @RELATION: CALLS -> [SupersetClientFetchAllPages]
def get_datasets(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_datasets"):
app_logger.info("[get_datasets][Enter] Fetching datasets.")
validated_query = self._validate_query_params(query)
paginated_data = self._fetch_all_pages(
endpoint="/dataset/",
pagination_options={
"base_query": validated_query,
"results_field": "result",
},
)
total_count = len(paginated_data)
app_logger.info("[get_datasets][Exit] Found %d datasets.", total_count)
return total_count, paginated_data
# [/DEF:SupersetClientGetDatasets:Function]
# [DEF:SupersetClientGetDatasetsSummary:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches dataset metadata optimized for the Dataset Hub grid.
# @RELATION: CALLS -> [SupersetClientGetDatasets]
def get_datasets_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_datasets_summary"):
query = {"columns": ["id", "table_name", "schema", "database"]}
_, datasets = self.get_datasets(query=query)
result = []
for ds in datasets:
result.append(
{
"id": ds.get("id"),
"table_name": ds.get("table_name"),
"schema": ds.get("schema"),
"database": ds.get("database", {}).get(
"database_name", "Unknown"
),
}
)
return result
# [/DEF:SupersetClientGetDatasetsSummary:Function]
# [DEF:SupersetClientGetDatasetDetail:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches detailed dataset information including columns and linked dashboards.
# @RELATION: CALLS -> [SupersetClientGetDataset]
# @RELATION: CALLS -> [APIClient]
def get_dataset_detail(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset_detail", f"id={dataset_id}"):
def as_bool(value, default=False):
if value is None:
return default
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.strip().lower() in ("1", "true", "yes", "y", "on")
return bool(value)
response = self.get_dataset(dataset_id)
if isinstance(response, dict) and "result" in response:
dataset = response["result"]
else:
dataset = response
columns = dataset.get("columns", [])
column_info = []
for col in columns:
col_id = col.get("id")
if col_id is None:
continue
column_info.append(
{
"id": int(col_id),
"name": col.get("column_name"),
"type": col.get("type"),
"is_dttm": as_bool(col.get("is_dttm"), default=False),
"is_active": as_bool(col.get("is_active"), default=True),
"description": col.get("description", ""),
}
)
linked_dashboards = []
try:
related_objects = self.network.request(
method="GET", endpoint=f"/dataset/{dataset_id}/related_objects"
)
if isinstance(related_objects, dict):
if "dashboards" in related_objects:
dashboards_data = related_objects["dashboards"]
elif "result" in related_objects and isinstance(
related_objects["result"], dict
):
dashboards_data = related_objects["result"].get("dashboards", [])
else:
dashboards_data = []
for dash in dashboards_data:
if isinstance(dash, dict):
dash_id = dash.get("id")
if dash_id is None:
continue
linked_dashboards.append(
{
"id": int(dash_id),
"title": dash.get("dashboard_title")
or dash.get("title", f"Dashboard {dash_id}"),
"slug": dash.get("slug"),
}
)
else:
try:
dash_id = int(dash)
except (TypeError, ValueError):
continue
linked_dashboards.append(
{"id": dash_id, "title": f"Dashboard {dash_id}", "slug": None}
)
except Exception as e:
app_logger.warning(
f"[get_dataset_detail][Warning] Failed to fetch related dashboards: {e}"
)
linked_dashboards = []
sql = dataset.get("sql", "")
result = {
"id": dataset.get("id"),
"table_name": dataset.get("table_name"),
"schema": dataset.get("schema"),
"database": (
dataset.get("database", {}).get("database_name", "Unknown")
if isinstance(dataset.get("database"), dict)
else dataset.get("database_name") or "Unknown"
),
"description": dataset.get("description", ""),
"columns": column_info,
"column_count": len(column_info),
"sql": sql,
"linked_dashboards": linked_dashboards,
"linked_dashboard_count": len(linked_dashboards),
"is_sqllab_view": as_bool(dataset.get("is_sqllab_view"), default=False),
"created_on": dataset.get("created_on"),
"changed_on": dataset.get("changed_on"),
}
app_logger.info(
f"[get_dataset_detail][Exit] Got dataset {dataset_id} with {len(column_info)} columns and {len(linked_dashboards)} linked dashboards"
)
return result
# [/DEF:SupersetClientGetDatasetDetail:Function]
# [DEF:SupersetClientGetDataset:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает информацию о конкретном датасете по его ID.
# @RELATION: CALLS -> [APIClient]
def get_dataset(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset", f"id={dataset_id}"):
app_logger.info("[get_dataset][Enter] Fetching dataset %s.", dataset_id)
response = self.network.request(
method="GET", endpoint=f"/dataset/{dataset_id}"
)
response = cast(Dict, response)
app_logger.info("[get_dataset][Exit] Got dataset %s.", dataset_id)
return response
# [/DEF:SupersetClientGetDataset:Function]
# [DEF:SupersetClientUpdateDataset:Function]
# @COMPLEXITY: 3
# @PURPOSE: Обновляет данные датасета по его ID.
# @SIDE_EFFECT: Modifies resource in upstream Superset environment.
# @RELATION: CALLS -> [APIClient]
def update_dataset(self, dataset_id: int, data: Dict) -> Dict:
with belief_scope("SupersetClient.update_dataset", f"id={dataset_id}"):
app_logger.info("[update_dataset][Enter] Updating dataset %s.", dataset_id)
response = self.network.request(
method="PUT",
endpoint=f"/dataset/{dataset_id}",
data=json.dumps(data),
headers={"Content-Type": "application/json"},
)
response = cast(Dict, response)
app_logger.info("[update_dataset][Exit] Updated dataset %s.", dataset_id)
return response
# [/DEF:SupersetClientUpdateDataset:Function]
# [/DEF:SupersetDatasetsMixin:Class]
# [/DEF:SupersetDatasetsMixin:Module]

View File

@@ -0,0 +1,397 @@
# [DEF:SupersetDatasetsPreviewMixin:Module]
# @PURPOSE: Dataset preview compilation mixin for SupersetClient — build query context, compile SQL.
import json
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple
from ..logger import logger as app_logger, belief_scope
from ..utils.network import SupersetAPIError
# [DEF:SupersetDatasetsPreviewMixin:Class]
# @PURPOSE: Mixin providing dataset preview compilation and query context building.
class SupersetDatasetsPreviewMixin:
# [DEF:SupersetClientCompileDatasetPreview:Function]
# @COMPLEXITY: 4
# @PURPOSE: Compile dataset preview SQL through the strongest supported Superset preview endpoint family and return normalized SQL output.
def compile_dataset_preview(self, dataset_id: int, template_params: Optional[Dict[str, Any]] = None, effective_filters: Optional[List[Dict[str, Any]]] = None) -> Dict[str, Any]:
with belief_scope('SupersetClientCompileDatasetPreview'):
app_logger.reason('Belief protocol reasoning checkpoint for SupersetClientCompileDatasetPreview')
dataset_response = self.get_dataset(dataset_id)
dataset_record = dataset_response.get('result', dataset_response) if isinstance(dataset_response, dict) else {}
query_context = self.build_dataset_preview_query_context(dataset_id=dataset_id, dataset_record=dataset_record, template_params=template_params or {}, effective_filters=effective_filters or [])
legacy_form_data = self.build_dataset_preview_legacy_form_data(dataset_id=dataset_id, dataset_record=dataset_record, template_params=template_params or {}, effective_filters=effective_filters or [])
legacy_form_data_payload = json.dumps(legacy_form_data, sort_keys=True, default=str)
request_payload = json.dumps(query_context)
strategy_attempts: List[Dict[str, Any]] = []
strategy_candidates: List[Dict[str, Any]] = [{'endpoint_kind': 'legacy_explore_form_data', 'endpoint': '/explore_json/form_data', 'request_transport': 'query_param_form_data', 'params': {'form_data': legacy_form_data_payload}}, {'endpoint_kind': 'legacy_data_form_data', 'endpoint': '/data', 'request_transport': 'query_param_form_data', 'params': {'form_data': legacy_form_data_payload}}, {'endpoint_kind': 'v1_chart_data', 'endpoint': '/chart/data', 'request_transport': 'json_body', 'data': request_payload, 'headers': {'Content-Type': 'application/json'}}]
for candidate in strategy_candidates:
endpoint_kind = candidate['endpoint_kind']
endpoint_path = candidate['endpoint']
request_transport = candidate['request_transport']
request_params = deepcopy(candidate.get('params') or {})
request_body = candidate.get('data')
request_headers = deepcopy(candidate.get('headers') or {})
request_param_keys = sorted(request_params.keys())
request_payload_keys: List[str] = []
if isinstance(request_body, str):
try:
decoded_request_body = json.loads(request_body)
if isinstance(decoded_request_body, dict):
request_payload_keys = sorted(decoded_request_body.keys())
except json.JSONDecodeError:
request_payload_keys = []
elif isinstance(request_body, dict):
request_payload_keys = sorted(request_body.keys())
strategy_diagnostics = {'endpoint': endpoint_path, 'endpoint_kind': endpoint_kind, 'request_transport': request_transport, 'contains_root_datasource': endpoint_kind == 'v1_chart_data' and 'datasource' in query_context, 'contains_form_datasource': endpoint_kind.startswith('legacy_') and 'datasource' in legacy_form_data, 'contains_query_object_datasource': bool(query_context.get('queries')) and isinstance(query_context['queries'][0], dict) and ('datasource' in query_context['queries'][0]), 'request_param_keys': request_param_keys, 'request_payload_keys': request_payload_keys}
app_logger.reason('Attempting Superset dataset preview compilation strategy', extra={'dataset_id': dataset_id, **strategy_diagnostics, 'request_params': request_params, 'request_payload': request_body, 'legacy_form_data': legacy_form_data if endpoint_kind.startswith('legacy_') else None, 'query_context': query_context if endpoint_kind == 'v1_chart_data' else None, 'template_param_count': len(template_params or {}), 'filter_count': len(effective_filters or [])})
try:
response = self.network.request(method='POST', endpoint=endpoint_path, params=request_params or None, data=request_body, headers=request_headers or None)
normalized = self._extract_compiled_sql_from_preview_response(response)
normalized['query_context'] = query_context
normalized['legacy_form_data'] = legacy_form_data
normalized['endpoint'] = endpoint_path
normalized['endpoint_kind'] = endpoint_kind
normalized['dataset_id'] = dataset_id
normalized['strategy_attempts'] = strategy_attempts + [{**strategy_diagnostics, 'success': True}]
app_logger.reflect('Dataset preview compilation returned normalized SQL payload', extra={'dataset_id': dataset_id, **strategy_diagnostics, 'success': True, 'compiled_sql_length': len(str(normalized.get('compiled_sql') or '')), 'response_diagnostics': normalized.get('response_diagnostics')})
app_logger.reflect('Belief protocol postcondition checkpoint for SupersetClientCompileDatasetPreview')
return normalized
except Exception as exc:
failure_diagnostics = {**strategy_diagnostics, 'success': False, 'error': str(exc)}
strategy_attempts.append(failure_diagnostics)
app_logger.explore('Superset dataset preview compilation strategy failed', extra={'dataset_id': dataset_id, **failure_diagnostics, 'request_params': request_params, 'request_payload': request_body})
raise SupersetAPIError(f'Superset preview compilation failed for all known strategies (attempts={strategy_attempts!r})')
# [/DEF:SupersetClientCompileDatasetPreview:Function]
# [DEF:SupersetClientBuildDatasetPreviewLegacyFormData:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build browser-style legacy form_data payload for Superset preview endpoints inferred from observed deployment traffic.
def build_dataset_preview_legacy_form_data(self, dataset_id: int, dataset_record: Dict[str, Any], template_params: Dict[str, Any], effective_filters: List[Dict[str, Any]]) -> Dict[str, Any]:
with belief_scope('SupersetClientBuildDatasetPreviewLegacyFormData'):
app_logger.reason('Belief protocol reasoning checkpoint for SupersetClientBuildDatasetPreviewLegacyFormData')
query_context = self.build_dataset_preview_query_context(dataset_id=dataset_id, dataset_record=dataset_record, template_params=template_params, effective_filters=effective_filters)
query_object = deepcopy(query_context.get('queries', [{}])[0] if query_context.get('queries') else {})
legacy_form_data = deepcopy(query_context.get('form_data', {}))
legacy_form_data.pop('datasource', None)
legacy_form_data['metrics'] = deepcopy(query_object.get('metrics', ['count']))
legacy_form_data['columns'] = deepcopy(query_object.get('columns', []))
legacy_form_data['orderby'] = deepcopy(query_object.get('orderby', []))
legacy_form_data['annotation_layers'] = deepcopy(query_object.get('annotation_layers', []))
legacy_form_data['row_limit'] = query_object.get('row_limit', 1000)
legacy_form_data['series_limit'] = query_object.get('series_limit', 0)
legacy_form_data['url_params'] = deepcopy(query_object.get('url_params', template_params))
legacy_form_data['applied_time_extras'] = deepcopy(query_object.get('applied_time_extras', {}))
legacy_form_data['result_format'] = query_context.get('result_format', 'json')
legacy_form_data['result_type'] = query_context.get('result_type', 'query')
legacy_form_data['force'] = bool(query_context.get('force', True))
extras = query_object.get('extras')
if isinstance(extras, dict):
legacy_form_data['extras'] = deepcopy(extras)
time_range = query_object.get('time_range')
if time_range:
legacy_form_data['time_range'] = time_range
app_logger.reflect('Built Superset legacy preview form_data payload from browser-observed request shape', extra={'dataset_id': dataset_id, 'legacy_endpoint_inference': 'POST /explore_json/form_data?form_data=... primary, POST /data?form_data=... fallback, based on observed browser traffic', 'contains_form_datasource': 'datasource' in legacy_form_data, 'legacy_form_data_keys': sorted(legacy_form_data.keys()), 'legacy_extra_filters': legacy_form_data.get('extra_filters', []), 'legacy_extra_form_data': legacy_form_data.get('extra_form_data', {})})
app_logger.reflect('Belief protocol postcondition checkpoint for SupersetClientBuildDatasetPreviewLegacyFormData')
return legacy_form_data
# [/DEF:SupersetClientBuildDatasetPreviewLegacyFormData:Function]
# [DEF:SupersetClientBuildDatasetPreviewQueryContext:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build a reduced-scope chart-data query context for deterministic dataset preview compilation.
def build_dataset_preview_query_context(
self,
dataset_id: int,
dataset_record: Dict[str, Any],
template_params: Dict[str, Any],
effective_filters: List[Dict[str, Any]],
) -> Dict[str, Any]:
with belief_scope("SupersetClientBuildDatasetPreviewQueryContext"):
app_logger.reason(
"Building Superset dataset preview query context",
extra={"dataset_id": dataset_id, "filter_count": len(effective_filters or [])},
)
normalized_template_params = deepcopy(template_params or {})
normalized_filter_payload = (
self._normalize_effective_filters_for_query_context(
effective_filters or []
)
)
normalized_filters = normalized_filter_payload["filters"]
normalized_extra_form_data = normalized_filter_payload["extra_form_data"]
datasource_payload: Dict[str, Any] = {
"id": dataset_id,
"type": "table",
}
datasource = dataset_record.get("datasource")
if isinstance(datasource, dict):
datasource_id = datasource.get("id")
datasource_type = datasource.get("type")
if datasource_id is not None:
datasource_payload["id"] = datasource_id
if datasource_type:
datasource_payload["type"] = datasource_type
serialized_dataset_template_params = dataset_record.get("template_params")
if (
isinstance(serialized_dataset_template_params, str)
and serialized_dataset_template_params.strip()
):
try:
parsed_dataset_template_params = json.loads(
serialized_dataset_template_params
)
if isinstance(parsed_dataset_template_params, dict):
for key, value in parsed_dataset_template_params.items():
normalized_template_params.setdefault(str(key), value)
except json.JSONDecodeError:
app_logger.explore(
"Dataset template_params could not be parsed while building preview query context",
extra={"dataset_id": dataset_id},
)
extra_form_data: Dict[str, Any] = deepcopy(normalized_extra_form_data)
if normalized_filters:
extra_form_data["filters"] = deepcopy(normalized_filters)
query_object: Dict[str, Any] = {
"filters": normalized_filters,
"extras": {"where": ""},
"columns": [],
"metrics": ["count"],
"orderby": [],
"annotation_layers": [],
"row_limit": 1000,
"series_limit": 0,
"url_params": normalized_template_params,
"applied_time_extras": {},
"result_type": "query",
}
schema = dataset_record.get("schema")
if schema:
query_object["schema"] = schema
time_range = extra_form_data.get("time_range") or dataset_record.get(
"default_time_range"
)
if time_range:
query_object["time_range"] = time_range
extra_form_data["time_range"] = time_range
result_format = dataset_record.get("result_format") or "json"
result_type = "query"
form_data: Dict[str, Any] = {
"datasource": f"{datasource_payload['id']}__{datasource_payload['type']}",
"datasource_id": datasource_payload["id"],
"datasource_type": datasource_payload["type"],
"viz_type": "table",
"slice_id": None,
"query_mode": "raw",
"url_params": normalized_template_params,
"extra_filters": deepcopy(normalized_filters),
"adhoc_filters": [],
}
if extra_form_data:
form_data["extra_form_data"] = extra_form_data
payload = {
"datasource": datasource_payload,
"queries": [query_object],
"form_data": form_data,
"result_format": result_format,
"result_type": result_type,
"force": True,
}
app_logger.reflect(
"Built Superset dataset preview query context",
extra={
"dataset_id": dataset_id,
"datasource": datasource_payload,
"normalized_effective_filters": normalized_filters,
"normalized_filter_diagnostics": normalized_filter_payload[
"diagnostics"
],
"result_type": result_type,
"result_format": result_format,
},
)
return payload
# [/DEF:SupersetClientBuildDatasetPreviewQueryContext:Function]
# [DEF:SupersetClientNormalizeEffectiveFiltersForQueryContext:Function]
def _normalize_effective_filters_for_query_context(
self,
effective_filters: List[Dict[str, Any]],
) -> Dict[str, Any]:
with belief_scope(
"SupersetClient._normalize_effective_filters_for_query_context"
):
normalized_filters: List[Dict[str, Any]] = []
merged_extra_form_data: Dict[str, Any] = {}
diagnostics: List[Dict[str, Any]] = []
for item in effective_filters:
if not isinstance(item, dict):
continue
display_name = str(
item.get("display_name")
or item.get("filter_name")
or item.get("variable_name")
or "unresolved_filter"
).strip()
value = item.get("effective_value")
normalized_payload = item.get("normalized_filter_payload")
preserved_clauses: List[Dict[str, Any]] = []
preserved_extra_form_data: Dict[str, Any] = {}
used_preserved_clauses = False
if isinstance(normalized_payload, dict):
raw_clauses = normalized_payload.get("filter_clauses")
if isinstance(raw_clauses, list):
preserved_clauses = [
deepcopy(clause)
for clause in raw_clauses
if isinstance(clause, dict)
]
raw_extra_form_data = normalized_payload.get("extra_form_data")
if isinstance(raw_extra_form_data, dict):
preserved_extra_form_data = deepcopy(raw_extra_form_data)
if isinstance(preserved_extra_form_data, dict):
for key, extra_value in preserved_extra_form_data.items():
if key == "filters":
continue
merged_extra_form_data[key] = deepcopy(extra_value)
outgoing_clauses: List[Dict[str, Any]] = []
if preserved_clauses:
for clause in preserved_clauses:
clause_copy = deepcopy(clause)
if "val" not in clause_copy and value is not None:
clause_copy["val"] = deepcopy(value)
outgoing_clauses.append(clause_copy)
used_preserved_clauses = True
elif preserved_extra_form_data:
outgoing_clauses = []
else:
column = str(
item.get("variable_name") or item.get("filter_name") or ""
).strip()
if column and value is not None:
operator = "IN" if isinstance(value, list) else "=="
outgoing_clauses.append(
{"col": column, "op": operator, "val": value}
)
normalized_filters.extend(outgoing_clauses)
diagnostics.append(
{
"filter_name": display_name,
"value_origin": (
normalized_payload.get("value_origin")
if isinstance(normalized_payload, dict)
else None
),
"used_preserved_clauses": used_preserved_clauses,
"outgoing_clauses": deepcopy(outgoing_clauses),
}
)
app_logger.reason(
"Normalized effective preview filter for Superset query context",
extra={
"filter_name": display_name,
"used_preserved_clauses": used_preserved_clauses,
"outgoing_clauses": outgoing_clauses,
"value_origin": (
normalized_payload.get("value_origin")
if isinstance(normalized_payload, dict)
else "heuristic_reconstruction"
),
},
)
return {
"filters": normalized_filters,
"extra_form_data": merged_extra_form_data,
"diagnostics": diagnostics,
}
# [/DEF:SupersetClientNormalizeEffectiveFiltersForQueryContext:Function]
# [DEF:SupersetClientExtractCompiledSqlFromPreviewResponse:Function]
def _extract_compiled_sql_from_preview_response(
self, response: Any
) -> Dict[str, Any]:
with belief_scope("SupersetClient._extract_compiled_sql_from_preview_response"):
if not isinstance(response, dict):
raise SupersetAPIError(
"Superset preview response was not a JSON object"
)
response_diagnostics: List[Dict[str, Any]] = []
result_payload = response.get("result")
if isinstance(result_payload, list):
for index, item in enumerate(result_payload):
if not isinstance(item, dict):
continue
compiled_sql = str(
item.get("query")
or item.get("sql")
or item.get("compiled_sql")
or ""
).strip()
response_diagnostics.append(
{
"index": index,
"status": item.get("status"),
"applied_filters": item.get("applied_filters"),
"rejected_filters": item.get("rejected_filters"),
"has_query": bool(compiled_sql),
"source": "result_list",
}
)
if compiled_sql:
return {
"compiled_sql": compiled_sql,
"raw_response": response,
"response_diagnostics": response_diagnostics,
}
top_level_candidates: List[Tuple[str, Any]] = [
("query", response.get("query")),
("sql", response.get("sql")),
("compiled_sql", response.get("compiled_sql")),
]
if isinstance(result_payload, dict):
top_level_candidates.extend(
[
("result.query", result_payload.get("query")),
("result.sql", result_payload.get("sql")),
("result.compiled_sql", result_payload.get("compiled_sql")),
]
)
for source, candidate in top_level_candidates:
compiled_sql = str(candidate or "").strip()
response_diagnostics.append(
{"source": source, "has_query": bool(compiled_sql)}
)
if compiled_sql:
return {
"compiled_sql": compiled_sql,
"raw_response": response,
"response_diagnostics": response_diagnostics,
}
raise SupersetAPIError(
"Superset preview response did not expose compiled SQL "
f"(diagnostics={response_diagnostics!r})"
)
# [/DEF:SupersetClientExtractCompiledSqlFromPreviewResponse:Function]
# [/DEF:SupersetDatasetsPreviewMixin:Class]
# [/DEF:SupersetDatasetsPreviewMixin:Module]

View File

@@ -0,0 +1,86 @@
# [DEF:SupersetUserProjection:Module]
# @COMPLEXITY: 2
# @PURPOSE: User/owner payload normalization helpers for Superset client responses.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
from typing import Any, Dict, List, Optional, Union
# [DEF:SupersetUserProjectionMixin:Class]
# @COMPLEXITY: 2
# @PURPOSE: Mixin providing user/owner payload normalization for Superset API responses.
# @RELATION: DEPENDS_ON -> [SupersetClientBase]
class SupersetUserProjectionMixin:
# [DEF:SupersetClientExtractOwnerLabels:Function]
# @COMPLEXITY: 1
# @PURPOSE: Normalize dashboard owners payload to stable display labels.
def _extract_owner_labels(self, owners_payload: Any) -> List[str]:
if owners_payload is None:
return []
owners_list: List[Any]
if isinstance(owners_payload, list):
owners_list = owners_payload
else:
owners_list = [owners_payload]
normalized: List[str] = []
for owner in owners_list:
label: Optional[str] = None
if isinstance(owner, dict):
label = self._extract_user_display(None, owner)
else:
label = self._sanitize_user_text(owner)
if label and label not in normalized:
normalized.append(label)
return normalized
# [/DEF:SupersetClientExtractOwnerLabels:Function]
# [DEF:SupersetClientExtractUserDisplay:Function]
# @COMPLEXITY: 1
# @PURPOSE: Normalize user payload to a stable display name.
def _extract_user_display(
self, preferred_value: Optional[str], user_payload: Optional[Dict]
) -> Optional[str]:
preferred = self._sanitize_user_text(preferred_value)
if preferred:
return preferred
if isinstance(user_payload, dict):
full_name = self._sanitize_user_text(user_payload.get("full_name"))
if full_name:
return full_name
first_name = self._sanitize_user_text(user_payload.get("first_name")) or ""
last_name = self._sanitize_user_text(user_payload.get("last_name")) or ""
combined = " ".join(
part for part in [first_name, last_name] if part
).strip()
if combined:
return combined
username = self._sanitize_user_text(user_payload.get("username"))
if username:
return username
email = self._sanitize_user_text(user_payload.get("email"))
if email:
return email
return None
# [/DEF:SupersetClientExtractUserDisplay:Function]
# [DEF:SupersetClientSanitizeUserText:Function]
# @COMPLEXITY: 1
# @PURPOSE: Convert scalar value to non-empty user-facing text.
def _sanitize_user_text(self, value: Optional[Union[str, int]]) -> Optional[str]:
if value is None:
return None
normalized = str(value).strip()
if not normalized:
return None
return normalized
# [/DEF:SupersetClientSanitizeUserText:Function]
# [/DEF:SupersetUserProjectionMixin:Class]
# [/DEF:SupersetUserProjection:Module]

View File

@@ -10,7 +10,7 @@
},
"changed_by_name": "Superset Admin",
"changed_on": "2026-02-24T19:24:01.850617",
"changed_on_delta_humanized": "29 days ago",
"changed_on_delta_humanized": "2 months ago",
"charts": [
"TA-0001-001 test_chart"
],
@@ -19,7 +19,7 @@
"id": 1,
"last_name": "Admin"
},
"created_on_delta_humanized": "a month ago",
"created_on_delta_humanized": "2 months ago",
"css": null,
"dashboard_title": "TA-0001 Test dashboard",
"id": 13,
@@ -54,7 +54,7 @@
"last_name": "Admin"
},
"changed_on": "2026-02-18T14:56:04.863722",
"changed_on_humanized": "a month ago",
"changed_on_humanized": "2 months ago",
"column_formats": {},
"columns": [
{
@@ -424,7 +424,7 @@
"last_name": "Admin"
},
"created_on": "2026-02-18T14:56:04.317950",
"created_on_humanized": "a month ago",
"created_on_humanized": "2 months ago",
"database": {
"allow_multi_catalog": false,
"backend": "postgresql",