semantics

This commit is contained in:
2026-03-20 20:01:58 +03:00
parent 1149e8df1d
commit 80ce8fe150
12 changed files with 1734 additions and 6577 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
--- ---
description: Primary user-facing fast dispatcher that routes requests only to approved project subagents. description: Primary user-facing fast dispatcher that routes requests only to approved project subagents.
mode: all mode: all
model: github-copilot/gpt-5.1-codex-mini model: github-copilot/gpt-5-mini
temperature: 0.0 temperature: 0.0
permission: permission:
edit: deny edit: deny

View File

@@ -4,12 +4,12 @@
# @PURPOSE: HTTP contract layer for migration orchestration, settings, dry-run, and mapping sync endpoints. # @PURPOSE: HTTP contract layer for migration orchestration, settings, dry-run, and mapping sync endpoints.
# @LAYER: Infra # @LAYER: Infra
# @RELATION: DEPENDS_ON ->[AppDependencies] # @RELATION: DEPENDS_ON ->[AppDependencies]
# @RELATION: DEPENDS_ON ->[backend.src.core.database] # @RELATION: DEPENDS_ON ->[DatabaseModule]
# @RELATION: DEPENDS_ON ->[backend.src.core.superset_client.SupersetClient] # @RELATION: DEPENDS_ON ->[DashboardSelection]
# @RELATION: DEPENDS_ON ->[backend.src.core.migration.dry_run_orchestrator.MigrationDryRunService] # @RELATION: DEPENDS_ON ->[DashboardMetadata]
# @RELATION: DEPENDS_ON ->[backend.src.core.mapping_service.IdMappingService] # @RELATION: DEPENDS_ON ->[MigrationDryRunService]
# @RELATION: DEPENDS_ON ->[backend.src.models.dashboard] # @RELATION: DEPENDS_ON ->[IdMappingService]
# @RELATION: DEPENDS_ON ->[backend.src.models.mapping] # @RELATION: DEPENDS_ON ->[ResourceMapping]
# @INVARIANT: Migration endpoints never execute with invalid environment references and always return explicit HTTP errors on guard failures. # @INVARIANT: Migration endpoints never execute with invalid environment references and always return explicit HTTP errors on guard failures.
# @PRE: Backend core services initialized and Database session available. # @PRE: Backend core services initialized and Database session available.
# @POST: Migration tasks are enqueued or dry-run results are computed and returned. # @POST: Migration tasks are enqueued or dry-run results are computed and returned.
@@ -24,7 +24,7 @@
# @TEST_INVARIANT: [EnvironmentValidationBeforeAction] -> VERIFIED_BY: [invalid_environment, valid_execution] # @TEST_INVARIANT: [EnvironmentValidationBeforeAction] -> VERIFIED_BY: [invalid_environment, valid_execution]
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter, Depends, HTTPException, Query
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional, cast
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from ...dependencies import get_config_manager, get_task_manager, has_permission from ...dependencies import get_config_manager, get_task_manager, has_permission
from ...core.database import get_db from ...core.database import get_db
@@ -35,8 +35,11 @@ from ...core.migration.dry_run_orchestrator import MigrationDryRunService
from ...core.mapping_service import IdMappingService from ...core.mapping_service import IdMappingService
from ...models.mapping import ResourceMapping from ...models.mapping import ResourceMapping
logger = cast(Any, logger)
router = APIRouter(prefix="/api", tags=["migration"]) router = APIRouter(prefix="/api", tags=["migration"])
# [DEF:get_dashboards:Function] # [DEF:get_dashboards:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Fetch dashboard metadata from a requested environment for migration selection UI. # @PURPOSE: Fetch dashboard metadata from a requested environment for migration selection UI.
@@ -44,17 +47,18 @@ router = APIRouter(prefix="/api", tags=["migration"])
# @POST: Returns List[DashboardMetadata] for the resolved environment; emits HTTP_404 when environment is absent. # @POST: Returns List[DashboardMetadata] for the resolved environment; emits HTTP_404 when environment is absent.
# @SIDE_EFFECT: Reads environment configuration and performs remote Superset metadata retrieval over network. # @SIDE_EFFECT: Reads environment configuration and performs remote Superset metadata retrieval over network.
# @DATA_CONTRACT: Input[str env_id] -> Output[List[DashboardMetadata]] # @DATA_CONTRACT: Input[str env_id] -> Output[List[DashboardMetadata]]
# @RELATION: CALLS ->[SupersetClient.get_dashboards_summary]
@router.get("/environments/{env_id}/dashboards", response_model=List[DashboardMetadata]) @router.get("/environments/{env_id}/dashboards", response_model=List[DashboardMetadata])
async def get_dashboards( async def get_dashboards(
env_id: str, env_id: str,
config_manager=Depends(get_config_manager), config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "EXECUTE")) _=Depends(has_permission("plugin:migration", "EXECUTE")),
): ):
with belief_scope("get_dashboards", f"env_id={env_id}"): with belief_scope("get_dashboards", f"env_id={env_id}"):
logger.reason(f"Fetching dashboards for environment: {env_id}") logger.reason(f"Fetching dashboards for environment: {env_id}")
environments = config_manager.get_environments() environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None) env = next((e for e in environments if e.id == env_id), None)
if not env: if not env:
logger.explore(f"Environment {env_id} not found in configuration") logger.explore(f"Environment {env_id} not found in configuration")
raise HTTPException(status_code=404, detail="Environment not found") raise HTTPException(status_code=404, detail="Environment not found")
@@ -63,8 +67,11 @@ async def get_dashboards(
dashboards = client.get_dashboards_summary() dashboards = client.get_dashboards_summary()
logger.reflect(f"Retrieved {len(dashboards)} dashboards from {env_id}") logger.reflect(f"Retrieved {len(dashboards)} dashboards from {env_id}")
return dashboards return dashboards
# [/DEF:get_dashboards:Function] # [/DEF:get_dashboards:Function]
# [DEF:execute_migration:Function] # [DEF:execute_migration:Function]
# @COMPLEXITY: 5 # @COMPLEXITY: 5
# @PURPOSE: Validate migration selection and enqueue asynchronous migration task execution. # @PURPOSE: Validate migration selection and enqueue asynchronous migration task execution.
@@ -72,38 +79,60 @@ async def get_dashboards(
# @POST: Returns {"task_id": str, "message": str} when task creation succeeds; emits HTTP_400/HTTP_500 on failure. # @POST: Returns {"task_id": str, "message": str} when task creation succeeds; emits HTTP_400/HTTP_500 on failure.
# @SIDE_EFFECT: Reads configuration, writes task record through task manager, and writes operational logs. # @SIDE_EFFECT: Reads configuration, writes task record through task manager, and writes operational logs.
# @DATA_CONTRACT: Input[DashboardSelection] -> Output[Dict[str, str]] # @DATA_CONTRACT: Input[DashboardSelection] -> Output[Dict[str, str]]
# @RELATION: CALLS ->[create_task]
# @RELATION: DEPENDS_ON ->[DashboardSelection]
# @INVARIANT: Migration task dispatch never occurs before source and target environment ids pass guard validation.
@router.post("/migration/execute") @router.post("/migration/execute")
async def execute_migration( async def execute_migration(
selection: DashboardSelection, selection: DashboardSelection,
config_manager=Depends(get_config_manager), config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager), task_manager=Depends(get_task_manager),
_ = Depends(has_permission("plugin:migration", "EXECUTE")) _=Depends(has_permission("plugin:migration", "EXECUTE")),
): ):
with belief_scope("execute_migration"): with belief_scope("execute_migration"):
logger.reason(f"Initiating migration from {selection.source_env_id} to {selection.target_env_id}") logger.reason(
f"Initiating migration from {selection.source_env_id} to {selection.target_env_id}"
)
# Validate environments exist # Validate environments exist
environments = config_manager.get_environments() environments = config_manager.get_environments()
env_ids = {e.id for e in environments} env_ids = {e.id for e in environments}
if selection.source_env_id not in env_ids or selection.target_env_id not in env_ids: if (
logger.explore("Invalid environment selection", extra={"source": selection.source_env_id, "target": selection.target_env_id}) selection.source_env_id not in env_ids
raise HTTPException(status_code=400, detail="Invalid source or target environment") or selection.target_env_id not in env_ids
):
logger.explore(
"Invalid environment selection",
extra={
"source": selection.source_env_id,
"target": selection.target_env_id,
},
)
raise HTTPException(
status_code=400, detail="Invalid source or target environment"
)
# Include replace_db_config and fix_cross_filters in the task parameters # Include replace_db_config and fix_cross_filters in the task parameters
task_params = selection.dict() task_params = selection.dict()
task_params['replace_db_config'] = selection.replace_db_config task_params["replace_db_config"] = selection.replace_db_config
task_params['fix_cross_filters'] = selection.fix_cross_filters task_params["fix_cross_filters"] = selection.fix_cross_filters
logger.reason(f"Creating migration task with {len(selection.selected_ids)} dashboards") logger.reason(
f"Creating migration task with {len(selection.selected_ids)} dashboards"
)
try: try:
task = await task_manager.create_task("superset-migration", task_params) task = await task_manager.create_task("superset-migration", task_params)
logger.reflect(f"Migration task created: {task.id}") logger.reflect(f"Migration task created: {task.id}")
return {"task_id": task.id, "message": "Migration initiated"} return {"task_id": task.id, "message": "Migration initiated"}
except Exception as e: except Exception as e:
logger.explore(f"Task creation failed: {e}") logger.explore(f"Task creation failed: {e}")
raise HTTPException(status_code=500, detail=f"Failed to create migration task: {str(e)}") raise HTTPException(
status_code=500, detail=f"Failed to create migration task: {str(e)}"
)
# [/DEF:execute_migration:Function] # [/DEF:execute_migration:Function]
@@ -114,37 +143,49 @@ async def execute_migration(
# @POST: Returns deterministic dry-run payload; emits HTTP_400 for guard violations and HTTP_500 for orchestrator value errors. # @POST: Returns deterministic dry-run payload; emits HTTP_400 for guard violations and HTTP_500 for orchestrator value errors.
# @SIDE_EFFECT: Reads local mappings from DB and fetches source/target metadata via Superset API. # @SIDE_EFFECT: Reads local mappings from DB and fetches source/target metadata via Superset API.
# @DATA_CONTRACT: Input[DashboardSelection] -> Output[Dict[str, Any]] # @DATA_CONTRACT: Input[DashboardSelection] -> Output[Dict[str, Any]]
# @RELATION: DEPENDS_ON ->[DashboardSelection]
# @RELATION: DEPENDS_ON ->[MigrationDryRunService]
# @INVARIANT: Dry-run flow remains read-only and rejects identical source/target environments before service execution.
@router.post("/migration/dry-run", response_model=Dict[str, Any]) @router.post("/migration/dry-run", response_model=Dict[str, Any])
async def dry_run_migration( async def dry_run_migration(
selection: DashboardSelection, selection: DashboardSelection,
config_manager=Depends(get_config_manager), config_manager=Depends(get_config_manager),
db: Session = Depends(get_db), db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "EXECUTE")) _=Depends(has_permission("plugin:migration", "EXECUTE")),
): ):
with belief_scope("dry_run_migration"): with belief_scope("dry_run_migration"):
logger.reason(f"Starting dry run: {selection.source_env_id} -> {selection.target_env_id}") logger.reason(
f"Starting dry run: {selection.source_env_id} -> {selection.target_env_id}"
)
environments = config_manager.get_environments() environments = config_manager.get_environments()
env_map = {env.id: env for env in environments} env_map = {env.id: env for env in environments}
source_env = env_map.get(selection.source_env_id) source_env = env_map.get(selection.source_env_id)
target_env = env_map.get(selection.target_env_id) target_env = env_map.get(selection.target_env_id)
if not source_env or not target_env: if not source_env or not target_env:
logger.explore("Invalid environment selection for dry run") logger.explore("Invalid environment selection for dry run")
raise HTTPException(status_code=400, detail="Invalid source or target environment") raise HTTPException(
status_code=400, detail="Invalid source or target environment"
)
if selection.source_env_id == selection.target_env_id: if selection.source_env_id == selection.target_env_id:
logger.explore("Source and target environments are identical") logger.explore("Source and target environments are identical")
raise HTTPException(status_code=400, detail="Source and target environments must be different") raise HTTPException(
status_code=400,
detail="Source and target environments must be different",
)
if not selection.selected_ids: if not selection.selected_ids:
logger.explore("No dashboards selected for dry run") logger.explore("No dashboards selected for dry run")
raise HTTPException(status_code=400, detail="No dashboards selected for dry run") raise HTTPException(
status_code=400, detail="No dashboards selected for dry run"
)
service = MigrationDryRunService() service = MigrationDryRunService()
source_client = SupersetClient(source_env) source_client = SupersetClient(source_env)
target_client = SupersetClient(target_env) target_client = SupersetClient(target_env)
try: try:
result = service.run( result = service.run(
selection=selection, selection=selection,
@@ -157,8 +198,11 @@ async def dry_run_migration(
except ValueError as exc: except ValueError as exc:
logger.explore(f"Dry run orchestrator failed: {exc}") logger.explore(f"Dry run orchestrator failed: {exc}")
raise HTTPException(status_code=500, detail=str(exc)) from exc raise HTTPException(status_code=500, detail=str(exc)) from exc
# [/DEF:dry_run_migration:Function] # [/DEF:dry_run_migration:Function]
# [DEF:get_migration_settings:Function] # [DEF:get_migration_settings:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Read and return configured migration synchronization cron expression. # @PURPOSE: Read and return configured migration synchronization cron expression.
@@ -166,17 +210,21 @@ async def dry_run_migration(
# @POST: Returns {"cron": str} reflecting current persisted settings value. # @POST: Returns {"cron": str} reflecting current persisted settings value.
# @SIDE_EFFECT: Reads configuration from config manager. # @SIDE_EFFECT: Reads configuration from config manager.
# @DATA_CONTRACT: Input[None] -> Output[Dict[str, str]] # @DATA_CONTRACT: Input[None] -> Output[Dict[str, str]]
# @RELATION: DEPENDS_ON ->[AppDependencies]
@router.get("/migration/settings", response_model=Dict[str, str]) @router.get("/migration/settings", response_model=Dict[str, str])
async def get_migration_settings( async def get_migration_settings(
config_manager=Depends(get_config_manager), config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "READ")) _=Depends(has_permission("plugin:migration", "READ")),
): ):
with belief_scope("get_migration_settings"): with belief_scope("get_migration_settings"):
config = config_manager.get_config() config = config_manager.get_config()
cron = config.settings.migration_sync_cron cron = config.settings.migration_sync_cron
return {"cron": cron} return {"cron": cron}
# [/DEF:get_migration_settings:Function] # [/DEF:get_migration_settings:Function]
# [DEF:update_migration_settings:Function] # [DEF:update_migration_settings:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Validate and persist migration synchronization cron expression update. # @PURPOSE: Validate and persist migration synchronization cron expression update.
@@ -184,25 +232,31 @@ async def get_migration_settings(
# @POST: Returns {"cron": str, "status": "updated"} and persists updated cron value. # @POST: Returns {"cron": str, "status": "updated"} and persists updated cron value.
# @SIDE_EFFECT: Mutates configuration and writes persisted config through config manager. # @SIDE_EFFECT: Mutates configuration and writes persisted config through config manager.
# @DATA_CONTRACT: Input[Dict[str, str]] -> Output[Dict[str, str]] # @DATA_CONTRACT: Input[Dict[str, str]] -> Output[Dict[str, str]]
# @RELATION: DEPENDS_ON ->[AppDependencies]
@router.put("/migration/settings", response_model=Dict[str, str]) @router.put("/migration/settings", response_model=Dict[str, str])
async def update_migration_settings( async def update_migration_settings(
payload: Dict[str, str], payload: Dict[str, str],
config_manager=Depends(get_config_manager), config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "WRITE")) _=Depends(has_permission("plugin:migration", "WRITE")),
): ):
with belief_scope("update_migration_settings"): with belief_scope("update_migration_settings"):
if "cron" not in payload: if "cron" not in payload:
raise HTTPException(status_code=400, detail="Missing 'cron' field in payload") raise HTTPException(
status_code=400, detail="Missing 'cron' field in payload"
)
cron_expr = payload["cron"] cron_expr = payload["cron"]
config = config_manager.get_config() config = config_manager.get_config()
config.settings.migration_sync_cron = cron_expr config.settings.migration_sync_cron = cron_expr
config_manager.save_config(config) config_manager.save_config(config)
return {"cron": cron_expr, "status": "updated"} return {"cron": cron_expr, "status": "updated"}
# [/DEF:update_migration_settings:Function] # [/DEF:update_migration_settings:Function]
# [DEF:get_resource_mappings:Function] # [DEF:get_resource_mappings:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Fetch synchronized resource mappings with optional filters and pagination for migration mappings view. # @PURPOSE: Fetch synchronized resource mappings with optional filters and pagination for migration mappings view.
@@ -210,6 +264,7 @@ async def update_migration_settings(
# @POST: Returns {"items": [...], "total": int} where items reflect applied filters and pagination. # @POST: Returns {"items": [...], "total": int} where items reflect applied filters and pagination.
# @SIDE_EFFECT: Executes database read queries against ResourceMapping table. # @SIDE_EFFECT: Executes database read queries against ResourceMapping table.
# @DATA_CONTRACT: Input[QueryParams] -> Output[Dict[str, Any]] # @DATA_CONTRACT: Input[QueryParams] -> Output[Dict[str, Any]]
# @RELATION: DEPENDS_ON ->[ResourceMapping]
@router.get("/migration/mappings-data", response_model=Dict[str, Any]) @router.get("/migration/mappings-data", response_model=Dict[str, Any])
async def get_resource_mappings( async def get_resource_mappings(
skip: int = Query(0, ge=0), skip: int = Query(0, ge=0),
@@ -218,42 +273,63 @@ async def get_resource_mappings(
env_id: Optional[str] = Query(None, description="Filter by environment ID"), env_id: Optional[str] = Query(None, description="Filter by environment ID"),
resource_type: Optional[str] = Query(None, description="Filter by resource type"), resource_type: Optional[str] = Query(None, description="Filter by resource type"),
db: Session = Depends(get_db), db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "READ")) _=Depends(has_permission("plugin:migration", "READ")),
): ):
with belief_scope("get_resource_mappings"): with belief_scope("get_resource_mappings"):
query = db.query(ResourceMapping) query = db.query(ResourceMapping)
if env_id: if env_id:
query = query.filter(ResourceMapping.environment_id == env_id) query = query.filter(ResourceMapping.environment_id == env_id)
if resource_type: if resource_type:
query = query.filter(ResourceMapping.resource_type == resource_type.upper()) query = query.filter(ResourceMapping.resource_type == resource_type.upper())
if search: if search:
search_term = f"%{search}%" search_term = f"%{search}%"
query = query.filter( query = query.filter(
(ResourceMapping.resource_name.ilike(search_term)) | (ResourceMapping.resource_name.ilike(search_term))
(ResourceMapping.uuid.ilike(search_term)) | (ResourceMapping.uuid.ilike(search_term))
) )
total = query.count() total = query.count()
mappings = query.order_by(ResourceMapping.resource_type, ResourceMapping.resource_name).offset(skip).limit(limit).all() mappings = (
query.order_by(ResourceMapping.resource_type, ResourceMapping.resource_name)
.offset(skip)
.limit(limit)
.all()
)
items = [] items = []
for m in mappings: for m in mappings:
items.append({ mapping = cast(Any, m)
"id": m.id, resource_type_value = (
"environment_id": m.environment_id, mapping.resource_type.value
"resource_type": m.resource_type.value if m.resource_type else None, if mapping.resource_type is not None
"uuid": m.uuid, else None
"remote_id": m.remote_integer_id, )
"resource_name": m.resource_name, last_synced_at = (
"last_synced_at": m.last_synced_at.isoformat() if m.last_synced_at else None mapping.last_synced_at.isoformat()
}) if mapping.last_synced_at is not None
else None
)
items.append(
{
"id": mapping.id,
"environment_id": mapping.environment_id,
"resource_type": resource_type_value,
"uuid": mapping.uuid,
"remote_id": mapping.remote_integer_id,
"resource_name": mapping.resource_name,
"last_synced_at": last_synced_at,
}
)
return {"items": items, "total": total} return {"items": items, "total": total}
# [/DEF:get_resource_mappings:Function] # [/DEF:get_resource_mappings:Function]
# [DEF:trigger_sync_now:Function] # [DEF:trigger_sync_now:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Trigger immediate ID synchronization for every configured environment. # @PURPOSE: Trigger immediate ID synchronization for every configured environment.
@@ -261,22 +337,24 @@ async def get_resource_mappings(
# @POST: Returns sync summary with synced/failed counts after attempting all environments. # @POST: Returns sync summary with synced/failed counts after attempting all environments.
# @SIDE_EFFECT: Upserts Environment rows, commits DB transaction, performs network sync calls, and writes logs. # @SIDE_EFFECT: Upserts Environment rows, commits DB transaction, performs network sync calls, and writes logs.
# @DATA_CONTRACT: Input[None] -> Output[Dict[str, Any]] # @DATA_CONTRACT: Input[None] -> Output[Dict[str, Any]]
# @RELATION: DEPENDS_ON ->[IdMappingService]
# @RELATION: CALLS ->[sync_environment]
@router.post("/migration/sync-now", response_model=Dict[str, Any]) @router.post("/migration/sync-now", response_model=Dict[str, Any])
async def trigger_sync_now( async def trigger_sync_now(
config_manager=Depends(get_config_manager), config_manager=Depends(get_config_manager),
db: Session = Depends(get_db), db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "EXECUTE")) _=Depends(has_permission("plugin:migration", "EXECUTE")),
): ):
with belief_scope("trigger_sync_now"): with belief_scope("trigger_sync_now"):
from ...core.logger import logger from ...core.logger import logger
from ...models.mapping import Environment as EnvironmentModel from ...models.mapping import Environment as EnvironmentModel
config = config_manager.get_config() config = config_manager.get_config()
environments = config.environments environments = config.environments
if not environments: if not environments:
raise HTTPException(status_code=400, detail="No environments configured") raise HTTPException(status_code=400, detail="No environments configured")
# Ensure each environment exists in DB (upsert) to satisfy FK constraints # Ensure each environment exists in DB (upsert) to satisfy FK constraints
for env in environments: for env in environments:
existing = db.query(EnvironmentModel).filter_by(id=env.id).first() existing = db.query(EnvironmentModel).filter_by(id=env.id).first()
@@ -288,15 +366,17 @@ async def trigger_sync_now(
credentials_id=env.id, # Use env.id as credentials reference credentials_id=env.id, # Use env.id as credentials reference
) )
db.add(db_env) db.add(db_env)
logger.info(f"[trigger_sync_now][Action] Created environment row for {env.id}") logger.info(
f"[trigger_sync_now][Action] Created environment row for {env.id}"
)
else: else:
existing.name = env.name existing.name = env.name
existing.url = env.url existing.url = env.url
db.commit() db.commit()
service = IdMappingService(db) service = IdMappingService(db)
results = {"synced": [], "failed": []} results = {"synced": [], "failed": []}
for env in environments: for env in environments:
try: try:
client = SupersetClient(env) client = SupersetClient(env)
@@ -306,13 +386,15 @@ async def trigger_sync_now(
except Exception as e: except Exception as e:
results["failed"].append({"env_id": env.id, "error": str(e)}) results["failed"].append({"env_id": env.id, "error": str(e)})
logger.error(f"[trigger_sync_now][Error] Failed to sync {env.id}: {e}") logger.error(f"[trigger_sync_now][Error] Failed to sync {env.id}: {e}")
return { return {
"status": "completed", "status": "completed",
"synced_count": len(results["synced"]), "synced_count": len(results["synced"]),
"failed_count": len(results["failed"]), "failed_count": len(results["failed"]),
"details": results "details": results,
} }
# [/DEF:trigger_sync_now:Function] # [/DEF:trigger_sync_now:Function]
# [/DEF:MigrationApi:Module] # [/DEF:MigrationApi:Module]

View File

@@ -3,9 +3,7 @@
# @SEMANTICS: tests, superset, preview, chart_data, network, 404-mapping # @SEMANTICS: tests, superset, preview, chart_data, network, 404-mapping
# @PURPOSE: Verify explicit chart-data preview compilation and ensure non-dashboard 404 errors remain generic across sync and async clients. # @PURPOSE: Verify explicit chart-data preview compilation and ensure non-dashboard 404 errors remain generic across sync and async clients.
# @LAYER: Domain # @LAYER: Domain
# @RELATION: [BINDS_TO] ->[SupersetClient] # @RELATION: [BINDS_TO] ->[AsyncNetworkModule]
# @RELATION: [BINDS_TO] ->[APIClient]
# @RELATION: [BINDS_TO] ->[AsyncAPIClient]
import json import json
from unittest.mock import MagicMock from unittest.mock import MagicMock
@@ -29,11 +27,15 @@ def _make_environment() -> Environment:
username="demo", username="demo",
password="secret", password="secret",
) )
# [/DEF:_make_environment:Function] # [/DEF:_make_environment:Function]
# [DEF:_make_requests_http_error:Function] # [DEF:_make_requests_http_error:Function]
def _make_requests_http_error(status_code: int, url: str) -> requests.exceptions.HTTPError: def _make_requests_http_error(
status_code: int, url: str
) -> requests.exceptions.HTTPError:
response = requests.Response() response = requests.Response()
response.status_code = status_code response.status_code = status_code
response.url = url response.url = url
@@ -41,14 +43,20 @@ def _make_requests_http_error(status_code: int, url: str) -> requests.exceptions
request = requests.Request("GET", url).prepare() request = requests.Request("GET", url).prepare()
response.request = request response.request = request
return requests.exceptions.HTTPError(response=response, request=request) return requests.exceptions.HTTPError(response=response, request=request)
# [/DEF:_make_requests_http_error:Function] # [/DEF:_make_requests_http_error:Function]
# [DEF:_make_httpx_status_error:Function] # [DEF:_make_httpx_status_error:Function]
def _make_httpx_status_error(status_code: int, url: str) -> httpx.HTTPStatusError: def _make_httpx_status_error(status_code: int, url: str) -> httpx.HTTPStatusError:
request = httpx.Request("GET", url) request = httpx.Request("GET", url)
response = httpx.Response(status_code=status_code, request=request, text='{"message":"not found"}') response = httpx.Response(
status_code=status_code, request=request, text='{"message":"not found"}'
)
return httpx.HTTPStatusError("upstream error", request=request, response=response) return httpx.HTTPStatusError("upstream error", request=request, response=response)
# [/DEF:_make_httpx_status_error:Function] # [/DEF:_make_httpx_status_error:Function]
@@ -80,7 +88,10 @@ def test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy():
effective_filters=[{"filter_name": "country", "effective_value": ["DE"]}], effective_filters=[{"filter_name": "country", "effective_value": ["DE"]}],
) )
assert result["compiled_sql"] == "SELECT count(*) FROM public.sales WHERE country IN ('DE')" assert (
result["compiled_sql"]
== "SELECT count(*) FROM public.sales WHERE country IN ('DE')"
)
client.network.request.assert_called_once() client.network.request.assert_called_once()
request_call = client.network.request.call_args request_call = client.network.request.call_args
assert request_call.kwargs["method"] == "POST" assert request_call.kwargs["method"] == "POST"
@@ -129,8 +140,11 @@ def test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy():
"success": True, "success": True,
} }
] ]
# [/DEF:test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy:Function] # [/DEF:test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy:Function]
# [DEF:test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures:Function] # [DEF:test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures:Function]
# @PURPOSE: Superset preview compilation should fall back to chart-data when legacy form_data strategies are rejected. # @PURPOSE: Superset preview compilation should fall back to chart-data when legacy form_data strategies are rejected.
def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(): def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures():
@@ -180,7 +194,9 @@ def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(
assert len(result["strategy_attempts"]) == 3 assert len(result["strategy_attempts"]) == 3
assert result["strategy_attempts"][0]["endpoint"] == "/explore_json/form_data" assert result["strategy_attempts"][0]["endpoint"] == "/explore_json/form_data"
assert result["strategy_attempts"][0]["endpoint_kind"] == "legacy_explore_form_data" assert result["strategy_attempts"][0]["endpoint_kind"] == "legacy_explore_form_data"
assert result["strategy_attempts"][0]["request_transport"] == "query_param_form_data" assert (
result["strategy_attempts"][0]["request_transport"] == "query_param_form_data"
)
assert result["strategy_attempts"][0]["contains_root_datasource"] is False assert result["strategy_attempts"][0]["contains_root_datasource"] is False
assert result["strategy_attempts"][0]["contains_form_datasource"] is False assert result["strategy_attempts"][0]["contains_form_datasource"] is False
assert result["strategy_attempts"][0]["contains_query_object_datasource"] is False assert result["strategy_attempts"][0]["contains_query_object_datasource"] is False
@@ -191,7 +207,9 @@ def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(
assert result["strategy_attempts"][1]["endpoint"] == "/data" assert result["strategy_attempts"][1]["endpoint"] == "/data"
assert result["strategy_attempts"][1]["endpoint_kind"] == "legacy_data_form_data" assert result["strategy_attempts"][1]["endpoint_kind"] == "legacy_data_form_data"
assert result["strategy_attempts"][1]["request_transport"] == "query_param_form_data" assert (
result["strategy_attempts"][1]["request_transport"] == "query_param_form_data"
)
assert result["strategy_attempts"][1]["contains_root_datasource"] is False assert result["strategy_attempts"][1]["contains_root_datasource"] is False
assert result["strategy_attempts"][1]["contains_form_datasource"] is False assert result["strategy_attempts"][1]["contains_form_datasource"] is False
assert result["strategy_attempts"][1]["contains_query_object_datasource"] is False assert result["strategy_attempts"][1]["contains_query_object_datasource"] is False
@@ -208,9 +226,18 @@ def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(
"contains_form_datasource": False, "contains_form_datasource": False,
"contains_query_object_datasource": False, "contains_query_object_datasource": False,
"request_param_keys": [], "request_param_keys": [],
"request_payload_keys": ["datasource", "force", "form_data", "queries", "result_format", "result_type"], "request_payload_keys": [
"datasource",
"force",
"form_data",
"queries",
"result_format",
"result_type",
],
"success": True, "success": True,
} }
# [/DEF:test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures:Function] # [/DEF:test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures:Function]
@@ -234,8 +261,12 @@ def test_build_dataset_preview_query_context_places_recovered_filters_in_chart_s
"display_name": "Country", "display_name": "Country",
"effective_value": ["DE"], "effective_value": ["DE"],
"normalized_filter_payload": { "normalized_filter_payload": {
"filter_clauses": [{"col": "country_code", "op": "IN", "val": ["DE"]}], "filter_clauses": [
"extra_form_data": {"filters": [{"col": "country_code", "op": "IN", "val": ["DE"]}]}, {"col": "country_code", "op": "IN", "val": ["DE"]}
],
"extra_form_data": {
"filters": [{"col": "country_code", "op": "IN", "val": ["DE"]}]
},
"value_origin": "extra_form_data.filters", "value_origin": "extra_form_data.filters",
}, },
}, },
@@ -267,6 +298,8 @@ def test_build_dataset_preview_query_context_places_recovered_filters_in_chart_s
"time_range": "Last year", "time_range": "Last year",
} }
assert query_context["form_data"]["url_params"] == {"country": "DE"} assert query_context["form_data"]["url_params"] == {"country": "DE"}
# [/DEF:test_build_dataset_preview_query_context_places_recovered_filters_in_chart_style_form_data:Function] # [/DEF:test_build_dataset_preview_query_context_places_recovered_filters_in_chart_style_form_data:Function]
@@ -287,8 +320,16 @@ def test_build_dataset_preview_query_context_merges_dataset_template_params_and_
effective_filters=[], effective_filters=[],
) )
assert query_context["queries"][0]["url_params"] == {"region": "EMEA", "country": "DE"} assert query_context["queries"][0]["url_params"] == {
assert query_context["form_data"]["url_params"] == {"region": "EMEA", "country": "DE"} "region": "EMEA",
"country": "DE",
}
assert query_context["form_data"]["url_params"] == {
"region": "EMEA",
"country": "DE",
}
# [/DEF:test_build_dataset_preview_query_context_merges_dataset_template_params_and_preserves_user_values:Function] # [/DEF:test_build_dataset_preview_query_context_merges_dataset_template_params_and_preserves_user_values:Function]
@@ -325,6 +366,8 @@ def test_build_dataset_preview_query_context_preserves_time_range_from_native_fi
"time_range": "2020-01-01 : 2020-12-31" "time_range": "2020-01-01 : 2020-12-31"
} }
assert query_context["queries"][0]["filters"] == [] assert query_context["queries"][0]["filters"] == []
# [/DEF:test_build_dataset_preview_query_context_preserves_time_range_from_native_filter_payload:Function] # [/DEF:test_build_dataset_preview_query_context_preserves_time_range_from_native_filter_payload:Function]
@@ -348,9 +391,13 @@ def test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses(
"display_name": "Country", "display_name": "Country",
"effective_value": ["DE", "FR"], "effective_value": ["DE", "FR"],
"normalized_filter_payload": { "normalized_filter_payload": {
"filter_clauses": [{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}], "filter_clauses": [
{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}
],
"extra_form_data": { "extra_form_data": {
"filters": [{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}], "filters": [
{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}
],
"time_range": "Last quarter", "time_range": "Last quarter",
}, },
"value_origin": "extra_form_data.filters", "value_origin": "extra_form_data.filters",
@@ -372,6 +419,8 @@ def test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses(
assert legacy_form_data["time_range"] == "Last quarter" assert legacy_form_data["time_range"] == "Last quarter"
assert legacy_form_data["url_params"] == {"country": "DE"} assert legacy_form_data["url_params"] == {"country": "DE"}
assert legacy_form_data["result_type"] == "query" assert legacy_form_data["result_type"] == "query"
# [/DEF:test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses:Function] # [/DEF:test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses:Function]
@@ -393,6 +442,8 @@ def test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic():
assert not isinstance(exc_info.value, DashboardNotFoundError) assert not isinstance(exc_info.value, DashboardNotFoundError)
assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value) assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value)
# [/DEF:test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function] # [/DEF:test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
@@ -413,6 +464,8 @@ def test_sync_network_404_mapping_translates_dashboard_endpoints():
) )
assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value) assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value)
# [/DEF:test_sync_network_404_mapping_translates_dashboard_endpoints:Function] # [/DEF:test_sync_network_404_mapping_translates_dashboard_endpoints:Function]
@@ -430,7 +483,9 @@ async def test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic()
try: try:
with pytest.raises(SupersetAPIError) as exc_info: with pytest.raises(SupersetAPIError) as exc_info:
client._handle_http_error( client._handle_http_error(
_make_httpx_status_error(404, "http://superset.local/api/v1/chart/data"), _make_httpx_status_error(
404, "http://superset.local/api/v1/chart/data"
),
"/chart/data", "/chart/data",
) )
@@ -438,6 +493,8 @@ async def test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic()
assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value) assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value)
finally: finally:
await client.aclose() await client.aclose()
# [/DEF:test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function] # [/DEF:test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
@@ -455,14 +512,18 @@ async def test_async_network_404_mapping_translates_dashboard_endpoints():
try: try:
with pytest.raises(DashboardNotFoundError) as exc_info: with pytest.raises(DashboardNotFoundError) as exc_info:
client._handle_http_error( client._handle_http_error(
_make_httpx_status_error(404, "http://superset.local/api/v1/dashboard/10"), _make_httpx_status_error(
404, "http://superset.local/api/v1/dashboard/10"
),
"/dashboard/10", "/dashboard/10",
) )
assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value) assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value)
finally: finally:
await client.aclose() await client.aclose()
# [/DEF:test_async_network_404_mapping_translates_dashboard_endpoints:Function] # [/DEF:test_async_network_404_mapping_translates_dashboard_endpoints:Function]
# [/DEF:SupersetPreviewPipelineTests:Module] # [/DEF:SupersetPreviewPipelineTests:Module]

View File

@@ -32,12 +32,13 @@ from .utils.async_network import AsyncAPIClient
# @RELATION: [DEPENDS_ON] ->[backend.src.core.utils.async_network.AsyncAPIClient] # @RELATION: [DEPENDS_ON] ->[backend.src.core.utils.async_network.AsyncAPIClient]
# @RELATION: [CALLS] ->[backend.src.core.utils.async_network.AsyncAPIClient.request] # @RELATION: [CALLS] ->[backend.src.core.utils.async_network.AsyncAPIClient.request]
class AsyncSupersetClient(SupersetClient): class AsyncSupersetClient(SupersetClient):
# [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.__init__:Function] # [DEF:AsyncSupersetClientInit:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Initialize async Superset client with AsyncAPIClient transport. # @PURPOSE: Initialize async Superset client with AsyncAPIClient transport.
# @PRE: env is valid Environment instance. # @PRE: env is valid Environment instance.
# @POST: Client uses async network transport and inherited projection helpers. # @POST: Client uses async network transport and inherited projection helpers.
# @DATA_CONTRACT: Input[Environment] -> self.network[AsyncAPIClient] # @DATA_CONTRACT: Input[Environment] -> self.network[AsyncAPIClient]
# @RELATION: [DEPENDS_ON] ->[AsyncAPIClient]
def __init__(self, env: Environment): def __init__(self, env: Environment):
self.env = env self.env = env
auth_payload = { auth_payload = {
@@ -52,23 +53,28 @@ class AsyncSupersetClient(SupersetClient):
timeout=env.timeout, timeout=env.timeout,
) )
self.delete_before_reimport = False self.delete_before_reimport = False
# [/DEF:backend.src.core.async_superset_client.AsyncSupersetClient.__init__:Function]
# [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.aclose:Function] # [/DEF:AsyncSupersetClientInit:Function]
# [DEF:AsyncSupersetClientClose:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Close async transport resources. # @PURPOSE: Close async transport resources.
# @POST: Underlying AsyncAPIClient is closed. # @POST: Underlying AsyncAPIClient is closed.
# @SIDE_EFFECT: Closes network sockets. # @SIDE_EFFECT: Closes network sockets.
# @RELATION: [CALLS] ->[AsyncAPIClient.aclose]
async def aclose(self) -> None: async def aclose(self) -> None:
await self.network.aclose() await self.network.aclose()
# [/DEF:backend.src.core.async_superset_client.AsyncSupersetClient.aclose:Function]
# [/DEF:AsyncSupersetClientClose:Function]
# [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.get_dashboards_page_async:Function] # [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.get_dashboards_page_async:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Fetch one dashboards page asynchronously. # @PURPOSE: Fetch one dashboards page asynchronously.
# @POST: Returns total count and page result list. # @POST: Returns total count and page result list.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]] # @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
async def get_dashboards_page_async(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]: async def get_dashboards_page_async(
self, query: Optional[Dict] = None
) -> Tuple[int, List[Dict]]:
with belief_scope("AsyncSupersetClient.get_dashboards_page_async"): with belief_scope("AsyncSupersetClient.get_dashboards_page_async"):
validated_query = self._validate_query_params(query or {}) validated_query = self._validate_query_params(query or {})
if "columns" not in validated_query: if "columns" not in validated_query:
@@ -96,6 +102,7 @@ class AsyncSupersetClient(SupersetClient):
result = response_json.get("result", []) result = response_json.get("result", [])
total_count = response_json.get("count", len(result)) total_count = response_json.get("count", len(result))
return total_count, result return total_count, result
# [/DEF:get_dashboards_page_async:Function] # [/DEF:get_dashboards_page_async:Function]
# [DEF:get_dashboard_async:Function] # [DEF:get_dashboard_async:Function]
@@ -103,10 +110,16 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch one dashboard payload asynchronously. # @PURPOSE: Fetch one dashboard payload asynchronously.
# @POST: Returns raw dashboard payload from Superset API. # @POST: Returns raw dashboard payload from Superset API.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict] # @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[AsyncAPIClient.request]
async def get_dashboard_async(self, dashboard_id: int) -> Dict: async def get_dashboard_async(self, dashboard_id: int) -> Dict:
with belief_scope("AsyncSupersetClient.get_dashboard_async", f"id={dashboard_id}"): with belief_scope(
response = await self.network.request(method="GET", endpoint=f"/dashboard/{dashboard_id}") "AsyncSupersetClient.get_dashboard_async", f"id={dashboard_id}"
):
response = await self.network.request(
method="GET", endpoint=f"/dashboard/{dashboard_id}"
)
return cast(Dict, response) return cast(Dict, response)
# [/DEF:get_dashboard_async:Function] # [/DEF:get_dashboard_async:Function]
# [DEF:get_chart_async:Function] # [DEF:get_chart_async:Function]
@@ -114,10 +127,14 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch one chart payload asynchronously. # @PURPOSE: Fetch one chart payload asynchronously.
# @POST: Returns raw chart payload from Superset API. # @POST: Returns raw chart payload from Superset API.
# @DATA_CONTRACT: Input[chart_id: int] -> Output[Dict] # @DATA_CONTRACT: Input[chart_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[AsyncAPIClient.request]
async def get_chart_async(self, chart_id: int) -> Dict: async def get_chart_async(self, chart_id: int) -> Dict:
with belief_scope("AsyncSupersetClient.get_chart_async", f"id={chart_id}"): with belief_scope("AsyncSupersetClient.get_chart_async", f"id={chart_id}"):
response = await self.network.request(method="GET", endpoint=f"/chart/{chart_id}") response = await self.network.request(
method="GET", endpoint=f"/chart/{chart_id}"
)
return cast(Dict, response) return cast(Dict, response)
# [/DEF:get_chart_async:Function] # [/DEF:get_chart_async:Function]
# [DEF:get_dashboard_detail_async:Function] # [DEF:get_dashboard_detail_async:Function]
@@ -125,17 +142,21 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch dashboard detail asynchronously with concurrent charts/datasets requests. # @PURPOSE: Fetch dashboard detail asynchronously with concurrent charts/datasets requests.
# @POST: Returns dashboard detail payload for overview page. # @POST: Returns dashboard detail payload for overview page.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict] # @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_dashboard_async] # @RELATION: [CALLS] ->[get_dashboard_async]
# @RELATION: [CALLS] ->[self.get_chart_async] # @RELATION: [CALLS] ->[get_chart_async]
async def get_dashboard_detail_async(self, dashboard_id: int) -> Dict: async def get_dashboard_detail_async(self, dashboard_id: int) -> Dict:
with belief_scope("AsyncSupersetClient.get_dashboard_detail_async", f"id={dashboard_id}"): with belief_scope(
"AsyncSupersetClient.get_dashboard_detail_async", f"id={dashboard_id}"
):
dashboard_response = await self.get_dashboard_async(dashboard_id) dashboard_response = await self.get_dashboard_async(dashboard_id)
dashboard_data = dashboard_response.get("result", dashboard_response) dashboard_data = dashboard_response.get("result", dashboard_response)
charts: List[Dict] = [] charts: List[Dict] = []
datasets: List[Dict] = [] datasets: List[Dict] = []
def extract_dataset_id_from_form_data(form_data: Optional[Dict]) -> Optional[int]: def extract_dataset_id_from_form_data(
form_data: Optional[Dict],
) -> Optional[int]:
if not isinstance(form_data, dict): if not isinstance(form_data, dict):
return None return None
datasource = form_data.get("datasource") datasource = form_data.get("datasource")
@@ -173,7 +194,11 @@ class AsyncSupersetClient(SupersetClient):
) )
if not isinstance(charts_response, Exception): if not isinstance(charts_response, Exception):
charts_payload = charts_response.get("result", []) if isinstance(charts_response, dict) else [] charts_payload = (
charts_response.get("result", [])
if isinstance(charts_response, dict)
else []
)
for chart_obj in charts_payload: for chart_obj in charts_payload:
if not isinstance(chart_obj, dict): if not isinstance(chart_obj, dict):
continue continue
@@ -186,20 +211,45 @@ class AsyncSupersetClient(SupersetClient):
form_data = json.loads(form_data) form_data = json.loads(form_data)
except Exception: except Exception:
form_data = {} form_data = {}
dataset_id = extract_dataset_id_from_form_data(form_data) or chart_obj.get("datasource_id") dataset_id = extract_dataset_id_from_form_data(
charts.append({ form_data
"id": int(chart_id), ) or chart_obj.get("datasource_id")
"title": chart_obj.get("slice_name") or chart_obj.get("name") or f"Chart {chart_id}", charts.append(
"viz_type": (form_data.get("viz_type") if isinstance(form_data, dict) else None), {
"dataset_id": int(dataset_id) if dataset_id is not None else None, "id": int(chart_id),
"last_modified": chart_obj.get("changed_on"), "title": chart_obj.get("slice_name")
"overview": chart_obj.get("description") or (form_data.get("viz_type") if isinstance(form_data, dict) else None) or "Chart", or chart_obj.get("name")
}) or f"Chart {chart_id}",
"viz_type": (
form_data.get("viz_type")
if isinstance(form_data, dict)
else None
),
"dataset_id": int(dataset_id)
if dataset_id is not None
else None,
"last_modified": chart_obj.get("changed_on"),
"overview": chart_obj.get("description")
or (
form_data.get("viz_type")
if isinstance(form_data, dict)
else None
)
or "Chart",
}
)
else: else:
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to fetch dashboard charts: %s", charts_response) app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to fetch dashboard charts: %s",
charts_response,
)
if not isinstance(datasets_response, Exception): if not isinstance(datasets_response, Exception):
datasets_payload = datasets_response.get("result", []) if isinstance(datasets_response, dict) else [] datasets_payload = (
datasets_response.get("result", [])
if isinstance(datasets_response, dict)
else []
)
for dataset_obj in datasets_payload: for dataset_obj in datasets_payload:
if not isinstance(dataset_obj, dict): if not isinstance(dataset_obj, dict):
continue continue
@@ -207,20 +257,36 @@ class AsyncSupersetClient(SupersetClient):
if dataset_id is None: if dataset_id is None:
continue continue
db_payload = dataset_obj.get("database") db_payload = dataset_obj.get("database")
db_name = db_payload.get("database_name") if isinstance(db_payload, dict) else None db_name = (
table_name = dataset_obj.get("table_name") or dataset_obj.get("datasource_name") or dataset_obj.get("name") or f"Dataset {dataset_id}" db_payload.get("database_name")
if isinstance(db_payload, dict)
else None
)
table_name = (
dataset_obj.get("table_name")
or dataset_obj.get("datasource_name")
or dataset_obj.get("name")
or f"Dataset {dataset_id}"
)
schema = dataset_obj.get("schema") schema = dataset_obj.get("schema")
fq_name = f"{schema}.{table_name}" if schema else table_name fq_name = f"{schema}.{table_name}" if schema else table_name
datasets.append({ datasets.append(
"id": int(dataset_id), {
"table_name": table_name, "id": int(dataset_id),
"schema": schema, "table_name": table_name,
"database": db_name or dataset_obj.get("database_name") or "Unknown", "schema": schema,
"last_modified": dataset_obj.get("changed_on"), "database": db_name
"overview": fq_name, or dataset_obj.get("database_name")
}) or "Unknown",
"last_modified": dataset_obj.get("changed_on"),
"overview": fq_name,
}
)
else: else:
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to fetch dashboard datasets: %s", datasets_response) app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to fetch dashboard datasets: %s",
datasets_response,
)
if not charts: if not charts:
raw_position_json = dashboard_data.get("position_json") raw_position_json = dashboard_data.get("position_json")
@@ -228,21 +294,29 @@ class AsyncSupersetClient(SupersetClient):
if isinstance(raw_position_json, str) and raw_position_json: if isinstance(raw_position_json, str) and raw_position_json:
try: try:
parsed_position = json.loads(raw_position_json) parsed_position = json.loads(raw_position_json)
chart_ids_from_position.update(self._extract_chart_ids_from_layout(parsed_position)) chart_ids_from_position.update(
self._extract_chart_ids_from_layout(parsed_position)
)
except Exception: except Exception:
pass pass
elif isinstance(raw_position_json, dict): elif isinstance(raw_position_json, dict):
chart_ids_from_position.update(self._extract_chart_ids_from_layout(raw_position_json)) chart_ids_from_position.update(
self._extract_chart_ids_from_layout(raw_position_json)
)
raw_json_metadata = dashboard_data.get("json_metadata") raw_json_metadata = dashboard_data.get("json_metadata")
if isinstance(raw_json_metadata, str) and raw_json_metadata: if isinstance(raw_json_metadata, str) and raw_json_metadata:
try: try:
parsed_metadata = json.loads(raw_json_metadata) parsed_metadata = json.loads(raw_json_metadata)
chart_ids_from_position.update(self._extract_chart_ids_from_layout(parsed_metadata)) chart_ids_from_position.update(
self._extract_chart_ids_from_layout(parsed_metadata)
)
except Exception: except Exception:
pass pass
elif isinstance(raw_json_metadata, dict): elif isinstance(raw_json_metadata, dict):
chart_ids_from_position.update(self._extract_chart_ids_from_layout(raw_json_metadata)) chart_ids_from_position.update(
self._extract_chart_ids_from_layout(raw_json_metadata)
)
fallback_chart_tasks = [ fallback_chart_tasks = [
self.get_chart_async(int(chart_id)) self.get_chart_async(int(chart_id))
@@ -252,68 +326,113 @@ class AsyncSupersetClient(SupersetClient):
*fallback_chart_tasks, *fallback_chart_tasks,
return_exceptions=True, return_exceptions=True,
) )
for chart_id, chart_response in zip(sorted(chart_ids_from_position), fallback_chart_responses): for chart_id, chart_response in zip(
sorted(chart_ids_from_position), fallback_chart_responses
):
if isinstance(chart_response, Exception): if isinstance(chart_response, Exception):
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to resolve fallback chart %s: %s", chart_id, chart_response) app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to resolve fallback chart %s: %s",
chart_id,
chart_response,
)
continue continue
chart_data = chart_response.get("result", chart_response) chart_data = chart_response.get("result", chart_response)
charts.append({ charts.append(
"id": int(chart_id), {
"title": chart_data.get("slice_name") or chart_data.get("name") or f"Chart {chart_id}", "id": int(chart_id),
"viz_type": chart_data.get("viz_type"), "title": chart_data.get("slice_name")
"dataset_id": chart_data.get("datasource_id"), or chart_data.get("name")
"last_modified": chart_data.get("changed_on"), or f"Chart {chart_id}",
"overview": chart_data.get("description") or chart_data.get("viz_type") or "Chart", "viz_type": chart_data.get("viz_type"),
}) "dataset_id": chart_data.get("datasource_id"),
"last_modified": chart_data.get("changed_on"),
"overview": chart_data.get("description")
or chart_data.get("viz_type")
or "Chart",
}
)
dataset_ids_from_charts = { dataset_ids_from_charts = {
c.get("dataset_id") c.get("dataset_id") for c in charts if c.get("dataset_id") is not None
for c in charts
if c.get("dataset_id") is not None
} }
known_dataset_ids = {d.get("id") for d in datasets if d.get("id") is not None} known_dataset_ids = {
missing_dataset_ids = sorted(int(item) for item in dataset_ids_from_charts if item not in known_dataset_ids) d.get("id") for d in datasets if d.get("id") is not None
}
missing_dataset_ids = sorted(
int(item)
for item in dataset_ids_from_charts
if item not in known_dataset_ids
)
if missing_dataset_ids: if missing_dataset_ids:
dataset_fetch_tasks = [ dataset_fetch_tasks = [
self.network.request(method="GET", endpoint=f"/dataset/{dataset_id}") self.network.request(
method="GET", endpoint=f"/dataset/{dataset_id}"
)
for dataset_id in missing_dataset_ids for dataset_id in missing_dataset_ids
] ]
dataset_fetch_responses = await asyncio.gather( dataset_fetch_responses = await asyncio.gather(
*dataset_fetch_tasks, *dataset_fetch_tasks,
return_exceptions=True, return_exceptions=True,
) )
for dataset_id, dataset_response in zip(missing_dataset_ids, dataset_fetch_responses): for dataset_id, dataset_response in zip(
missing_dataset_ids, dataset_fetch_responses
):
if isinstance(dataset_response, Exception): if isinstance(dataset_response, Exception):
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to backfill dataset %s: %s", dataset_id, dataset_response) app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to backfill dataset %s: %s",
dataset_id,
dataset_response,
)
continue continue
dataset_data = dataset_response.get("result", dataset_response) if isinstance(dataset_response, dict) else {} dataset_data = (
dataset_response.get("result", dataset_response)
if isinstance(dataset_response, dict)
else {}
)
db_payload = dataset_data.get("database") db_payload = dataset_data.get("database")
db_name = db_payload.get("database_name") if isinstance(db_payload, dict) else None db_name = (
table_name = dataset_data.get("table_name") or dataset_data.get("datasource_name") or dataset_data.get("name") or f"Dataset {dataset_id}" db_payload.get("database_name")
if isinstance(db_payload, dict)
else None
)
table_name = (
dataset_data.get("table_name")
or dataset_data.get("datasource_name")
or dataset_data.get("name")
or f"Dataset {dataset_id}"
)
schema = dataset_data.get("schema") schema = dataset_data.get("schema")
fq_name = f" {schema}.{table_name}" if schema else table_name fq_name = f" {schema}.{table_name}" if schema else table_name
datasets.append({ datasets.append(
"id": int(dataset_id), {
"table_name": table_name, "id": int(dataset_id),
"schema": schema, "table_name": table_name,
"database": db_name or dataset_data.get("database_name") or "Unknown", "schema": schema,
"last_modified": dataset_data.get("changed_on"), "database": db_name
"overview": fq_name, or dataset_data.get("database_name")
}) or "Unknown",
"last_modified": dataset_data.get("changed_on"),
"overview": fq_name,
}
)
return { return {
"id": int(dashboard_data.get("id") or dashboard_id), "id": int(dashboard_data.get("id") or dashboard_id),
"title": dashboard_data.get("dashboard_title") or dashboard_data.get("title") or f"Dashboard {dashboard_id}", "title": dashboard_data.get("dashboard_title")
or dashboard_data.get("title")
or f"Dashboard {dashboard_id}",
"slug": dashboard_data.get("slug"), "slug": dashboard_data.get("slug"),
"url": dashboard_data.get("url"), "url": dashboard_data.get("url"),
"description": dashboard_data.get("description"), "description": dashboard_data.get("description"),
"last_modified": dashboard_data.get("changed_on_utc") or dashboard_data.get("changed_on"), "last_modified": dashboard_data.get("changed_on_utc")
or dashboard_data.get("changed_on"),
"published": dashboard_data.get("published"), "published": dashboard_data.get("published"),
"charts": charts, "charts": charts,
"datasets": datasets, "datasets": datasets,
"chart_count": len(charts), "chart_count": len(charts),
"dataset_count": len(datasets), "dataset_count": len(datasets),
} }
# [/DEF:get_dashboard_detail_async:Function] # [/DEF:get_dashboard_detail_async:Function]
# [DEF:get_dashboard_permalink_state_async:Function] # [DEF:get_dashboard_permalink_state_async:Function]
@@ -322,12 +441,15 @@ class AsyncSupersetClient(SupersetClient):
# @POST: Returns dashboard permalink state payload from Superset API. # @POST: Returns dashboard permalink state payload from Superset API.
# @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict] # @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict]
async def get_dashboard_permalink_state_async(self, permalink_key: str) -> Dict: async def get_dashboard_permalink_state_async(self, permalink_key: str) -> Dict:
with belief_scope("AsyncSupersetClient.get_dashboard_permalink_state_async", f"key={permalink_key}"): with belief_scope(
"AsyncSupersetClient.get_dashboard_permalink_state_async",
f"key={permalink_key}",
):
response = await self.network.request( response = await self.network.request(
method="GET", method="GET", endpoint=f"/dashboard/permalink/{permalink_key}"
endpoint=f"/dashboard/permalink/{permalink_key}"
) )
return cast(Dict, response) return cast(Dict, response)
# [/DEF:get_dashboard_permalink_state_async:Function] # [/DEF:get_dashboard_permalink_state_async:Function]
# [DEF:get_native_filter_state_async:Function] # [DEF:get_native_filter_state_async:Function]
@@ -335,13 +457,19 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch stored native filter state asynchronously. # @PURPOSE: Fetch stored native filter state asynchronously.
# @POST: Returns native filter state payload from Superset API. # @POST: Returns native filter state payload from Superset API.
# @DATA_CONTRACT: Input[dashboard_id: Union[int, str], filter_state_key: str] -> Output[Dict] # @DATA_CONTRACT: Input[dashboard_id: Union[int, str], filter_state_key: str] -> Output[Dict]
async def get_native_filter_state_async(self, dashboard_id: int, filter_state_key: str) -> Dict: async def get_native_filter_state_async(
with belief_scope("AsyncSupersetClient.get_native_filter_state_async", f"dashboard={dashboard_id}, key={filter_state_key}"): self, dashboard_id: int, filter_state_key: str
) -> Dict:
with belief_scope(
"AsyncSupersetClient.get_native_filter_state_async",
f"dashboard={dashboard_id}, key={filter_state_key}",
):
response = await self.network.request( response = await self.network.request(
method="GET", method="GET",
endpoint=f"/dashboard/{dashboard_id}/filter_state/{filter_state_key}" endpoint=f"/dashboard/{dashboard_id}/filter_state/{filter_state_key}",
) )
return cast(Dict, response) return cast(Dict, response)
# [/DEF:get_native_filter_state_async:Function] # [/DEF:get_native_filter_state_async:Function]
# [DEF:extract_native_filters_from_permalink_async:Function] # [DEF:extract_native_filters_from_permalink_async:Function]
@@ -349,15 +477,22 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Extract native filters dataMask from a permalink key asynchronously. # @PURPOSE: Extract native filters dataMask from a permalink key asynchronously.
# @POST: Returns extracted dataMask with filter states. # @POST: Returns extracted dataMask with filter states.
# @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict] # @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_dashboard_permalink_state_async] # @RELATION: [CALLS] ->[get_dashboard_permalink_state_async]
async def extract_native_filters_from_permalink_async(self, permalink_key: str) -> Dict: async def extract_native_filters_from_permalink_async(
with belief_scope("AsyncSupersetClient.extract_native_filters_from_permalink_async", f"key={permalink_key}"): self, permalink_key: str
permalink_response = await self.get_dashboard_permalink_state_async(permalink_key) ) -> Dict:
with belief_scope(
"AsyncSupersetClient.extract_native_filters_from_permalink_async",
f"key={permalink_key}",
):
permalink_response = await self.get_dashboard_permalink_state_async(
permalink_key
)
result = permalink_response.get("result", permalink_response) result = permalink_response.get("result", permalink_response)
state = result.get("state", result) state = result.get("state", result)
data_mask = state.get("dataMask", {}) data_mask = state.get("dataMask", {})
extracted_filters = {} extracted_filters = {}
for filter_id, filter_data in data_mask.items(): for filter_id, filter_data in data_mask.items():
if not isinstance(filter_data, dict): if not isinstance(filter_data, dict):
@@ -367,7 +502,7 @@ class AsyncSupersetClient(SupersetClient):
"filterState": filter_data.get("filterState", {}), "filterState": filter_data.get("filterState", {}),
"ownState": filter_data.get("ownState", {}), "ownState": filter_data.get("ownState", {}),
} }
return { return {
"dataMask": extracted_filters, "dataMask": extracted_filters,
"activeTabs": state.get("activeTabs", []), "activeTabs": state.get("activeTabs", []),
@@ -375,6 +510,7 @@ class AsyncSupersetClient(SupersetClient):
"chartStates": state.get("chartStates", {}), "chartStates": state.get("chartStates", {}),
"permalink_key": permalink_key, "permalink_key": permalink_key,
} }
# [/DEF:extract_native_filters_from_permalink_async:Function] # [/DEF:extract_native_filters_from_permalink_async:Function]
# [DEF:extract_native_filters_from_key_async:Function] # [DEF:extract_native_filters_from_key_async:Function]
@@ -382,27 +518,37 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Extract native filters from a native_filters_key URL parameter asynchronously. # @PURPOSE: Extract native filters from a native_filters_key URL parameter asynchronously.
# @POST: Returns extracted filter state with extraFormData. # @POST: Returns extracted filter state with extraFormData.
# @DATA_CONTRACT: Input[dashboard_id: Union[int, str], filter_state_key: str] -> Output[Dict] # @DATA_CONTRACT: Input[dashboard_id: Union[int, str], filter_state_key: str] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_native_filter_state_async] # @RELATION: [CALLS] ->[get_native_filter_state_async]
async def extract_native_filters_from_key_async(self, dashboard_id: int, filter_state_key: str) -> Dict: async def extract_native_filters_from_key_async(
with belief_scope("AsyncSupersetClient.extract_native_filters_from_key_async", f"dashboard={dashboard_id}, key={filter_state_key}"): self, dashboard_id: int, filter_state_key: str
filter_response = await self.get_native_filter_state_async(dashboard_id, filter_state_key) ) -> Dict:
with belief_scope(
"AsyncSupersetClient.extract_native_filters_from_key_async",
f"dashboard={dashboard_id}, key={filter_state_key}",
):
filter_response = await self.get_native_filter_state_async(
dashboard_id, filter_state_key
)
result = filter_response.get("result", filter_response) result = filter_response.get("result", filter_response)
value = result.get("value") value = result.get("value")
if isinstance(value, str): if isinstance(value, str):
try: try:
parsed_value = json.loads(value) parsed_value = json.loads(value)
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
app_logger.warning("[extract_native_filters_from_key_async][Warning] Failed to parse filter state JSON: %s", e) app_logger.warning(
"[extract_native_filters_from_key_async][Warning] Failed to parse filter state JSON: %s",
e,
)
parsed_value = {} parsed_value = {}
elif isinstance(value, dict): elif isinstance(value, dict):
parsed_value = value parsed_value = value
else: else:
parsed_value = {} parsed_value = {}
extracted_filters = {} extracted_filters = {}
if "id" in parsed_value and "extraFormData" in parsed_value: if "id" in parsed_value and "extraFormData" in parsed_value:
filter_id = parsed_value.get("id", filter_state_key) filter_id = parsed_value.get("id", filter_state_key)
extracted_filters[filter_id] = { extracted_filters[filter_id] = {
@@ -419,12 +565,13 @@ class AsyncSupersetClient(SupersetClient):
"filterState": filter_data.get("filterState", {}), "filterState": filter_data.get("filterState", {}),
"ownState": filter_data.get("ownState", {}), "ownState": filter_data.get("ownState", {}),
} }
return { return {
"dataMask": extracted_filters, "dataMask": extracted_filters,
"dashboard_id": dashboard_id, "dashboard_id": dashboard_id,
"filter_state_key": filter_state_key, "filter_state_key": filter_state_key,
} }
# [/DEF:extract_native_filters_from_key_async:Function] # [/DEF:extract_native_filters_from_key_async:Function]
# [DEF:parse_dashboard_url_for_filters_async:Function] # [DEF:parse_dashboard_url_for_filters_async:Function]
@@ -432,36 +579,42 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously. # @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @POST: Returns extracted filter state or empty dict if no filters found. # @POST: Returns extracted filter state or empty dict if no filters found.
# @DATA_CONTRACT: Input[url: str] -> Output[Dict] # @DATA_CONTRACT: Input[url: str] -> Output[Dict]
# @RELATION: [CALLS] ->[self.extract_native_filters_from_permalink_async] # @RELATION: [CALLS] ->[extract_native_filters_from_permalink_async]
# @RELATION: [CALLS] ->[self.extract_native_filters_from_key_async] # @RELATION: [CALLS] ->[extract_native_filters_from_key_async]
async def parse_dashboard_url_for_filters_async(self, url: str) -> Dict: async def parse_dashboard_url_for_filters_async(self, url: str) -> Dict:
with belief_scope("AsyncSupersetClient.parse_dashboard_url_for_filters_async", f"url={url}"): with belief_scope(
"AsyncSupersetClient.parse_dashboard_url_for_filters_async", f"url={url}"
):
import urllib.parse import urllib.parse
parsed_url = urllib.parse.urlparse(url) parsed_url = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed_url.query) query_params = urllib.parse.parse_qs(parsed_url.query)
path_parts = parsed_url.path.rstrip("/").split("/") path_parts = parsed_url.path.rstrip("/").split("/")
result = { result = {
"url": url, "url": url,
"dashboard_id": None, "dashboard_id": None,
"filter_type": None, "filter_type": None,
"filters": {}, "filters": {},
} }
# Check for permalink URL: /dashboard/p/{key}/ # Check for permalink URL: /dashboard/p/{key}/
if "p" in path_parts: if "p" in path_parts:
try: try:
p_index = path_parts.index("p") p_index = path_parts.index("p")
if p_index + 1 < len(path_parts): if p_index + 1 < len(path_parts):
permalink_key = path_parts[p_index + 1] permalink_key = path_parts[p_index + 1]
filter_data = await self.extract_native_filters_from_permalink_async(permalink_key) filter_data = (
await self.extract_native_filters_from_permalink_async(
permalink_key
)
)
result["filter_type"] = "permalink" result["filter_type"] = "permalink"
result["filters"] = filter_data result["filters"] = filter_data
return result return result
except ValueError: except ValueError:
pass pass
# Check for native_filters_key in query params # Check for native_filters_key in query params
native_filters_key = query_params.get("native_filters_key", [None])[0] native_filters_key = query_params.get("native_filters_key", [None])[0]
if native_filters_key: if native_filters_key:
@@ -475,7 +628,7 @@ class AsyncSupersetClient(SupersetClient):
dashboard_ref = potential_id dashboard_ref = potential_id
except ValueError: except ValueError:
pass pass
if dashboard_ref: if dashboard_ref:
# Resolve slug to numeric ID — the filter_state API requires a numeric ID # Resolve slug to numeric ID — the filter_state API requires a numeric ID
resolved_id = None resolved_id = None
@@ -484,23 +637,35 @@ class AsyncSupersetClient(SupersetClient):
except (ValueError, TypeError): except (ValueError, TypeError):
try: try:
dash_resp = await self.get_dashboard_async(dashboard_ref) dash_resp = await self.get_dashboard_async(dashboard_ref)
dash_data = dash_resp.get("result", dash_resp) if isinstance(dash_resp, dict) else {} dash_data = (
dash_resp.get("result", dash_resp)
if isinstance(dash_resp, dict)
else {}
)
raw_id = dash_data.get("id") raw_id = dash_data.get("id")
if raw_id is not None: if raw_id is not None:
resolved_id = int(raw_id) resolved_id = int(raw_id)
except Exception as e: except Exception as e:
app_logger.warning("[parse_dashboard_url_for_filters_async][Warning] Failed to resolve dashboard slug '%s' to ID: %s", dashboard_ref, e) app_logger.warning(
"[parse_dashboard_url_for_filters_async][Warning] Failed to resolve dashboard slug '%s' to ID: %s",
dashboard_ref,
e,
)
if resolved_id is not None: if resolved_id is not None:
filter_data = await self.extract_native_filters_from_key_async(resolved_id, native_filters_key) filter_data = await self.extract_native_filters_from_key_async(
resolved_id, native_filters_key
)
result["filter_type"] = "native_filters_key" result["filter_type"] = "native_filters_key"
result["dashboard_id"] = resolved_id result["dashboard_id"] = resolved_id
result["filters"] = filter_data result["filters"] = filter_data
return result return result
else: else:
app_logger.warning("[parse_dashboard_url_for_filters_async][Warning] Could not resolve dashboard_id from URL for native_filters_key") app_logger.warning(
"[parse_dashboard_url_for_filters_async][Warning] Could not resolve dashboard_id from URL for native_filters_key"
)
return result return result
# Check for native_filters in query params (direct filter values) # Check for native_filters in query params (direct filter values)
native_filters = query_params.get("native_filters", [None])[0] native_filters = query_params.get("native_filters", [None])[0]
if native_filters: if native_filters:
@@ -510,10 +675,16 @@ class AsyncSupersetClient(SupersetClient):
result["filters"] = {"dataMask": parsed_filters} result["filters"] = {"dataMask": parsed_filters}
return result return result
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
app_logger.warning("[parse_dashboard_url_for_filters_async][Warning] Failed to parse native_filters JSON: %s", e) app_logger.warning(
"[parse_dashboard_url_for_filters_async][Warning] Failed to parse native_filters JSON: %s",
e,
)
return result return result
# [/DEF:parse_dashboard_url_for_filters_async:Function] # [/DEF:parse_dashboard_url_for_filters_async:Function]
# [/DEF:AsyncSupersetClient:Class] # [/DEF:AsyncSupersetClient:Class]
# [/DEF:backend.src.core.async_superset_client:Module] # [/DEF:backend.src.core.async_superset_client:Module]

View File

@@ -1,12 +1,12 @@
# [DEF:backend.src.core.database:Module] # [DEF:DatabaseModule:Module]
# #
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @SEMANTICS: database, postgresql, sqlalchemy, session, persistence # @SEMANTICS: database, postgresql, sqlalchemy, session, persistence
# @PURPOSE: Configures database connection and session management (PostgreSQL-first). # @PURPOSE: Configures database connection and session management (PostgreSQL-first).
# @LAYER: Core # @LAYER: Core
# @RELATION: DEPENDS_ON ->[sqlalchemy] # @RELATION: [DEPENDS_ON] ->[MappingModels]
# @RELATION: DEPENDS_ON ->[backend.src.models.mapping] # @RELATION: [DEPENDS_ON] ->[auth_config]
# @RELATION: DEPENDS_ON ->[backend.src.core.auth.config] # @RELATION: [DEPENDS_ON] ->[ConnectionConfig]
# #
# @INVARIANT: A single engine instance is used for the entire application. # @INVARIANT: A single engine instance is used for the entire application.
@@ -15,6 +15,7 @@ from sqlalchemy import create_engine, inspect, text
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from ..models.mapping import Base from ..models.mapping import Base
from ..models.connection import ConnectionConfig from ..models.connection import ConnectionConfig
# Import models to ensure they're registered with Base # Import models to ensure they're registered with Base
from ..models import task as _task_models # noqa: F401 from ..models import task as _task_models # noqa: F401
from ..models import auth as _auth_models # noqa: F401 from ..models import auth as _auth_models # noqa: F401
@@ -60,6 +61,7 @@ TASKS_DATABASE_URL = os.getenv("TASKS_DATABASE_URL", DATABASE_URL)
AUTH_DATABASE_URL = os.getenv("AUTH_DATABASE_URL", auth_config.AUTH_DATABASE_URL) AUTH_DATABASE_URL = os.getenv("AUTH_DATABASE_URL", auth_config.AUTH_DATABASE_URL)
# [/DEF:AUTH_DATABASE_URL:Constant] # [/DEF:AUTH_DATABASE_URL:Constant]
# [DEF:engine:Variable] # [DEF:engine:Variable]
# @COMPLEXITY: 1 # @COMPLEXITY: 1
# @PURPOSE: SQLAlchemy engine for mappings database. # @PURPOSE: SQLAlchemy engine for mappings database.
@@ -70,6 +72,7 @@ def _build_engine(db_url: str):
return create_engine(db_url, connect_args={"check_same_thread": False}) return create_engine(db_url, connect_args={"check_same_thread": False})
return create_engine(db_url, pool_pre_ping=True) return create_engine(db_url, pool_pre_ping=True)
engine = _build_engine(DATABASE_URL) engine = _build_engine(DATABASE_URL)
# [/DEF:engine:Variable] # [/DEF:engine:Variable]
@@ -106,11 +109,13 @@ TasksSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=tasks_e
AuthSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=auth_engine) AuthSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=auth_engine)
# [/DEF:AuthSessionLocal:Class] # [/DEF:AuthSessionLocal:Class]
# [DEF:_ensure_user_dashboard_preferences_columns:Function] # [DEF:_ensure_user_dashboard_preferences_columns:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Applies additive schema upgrades for user_dashboard_preferences table. # @PURPOSE: Applies additive schema upgrades for user_dashboard_preferences table.
# @PRE: bind_engine points to application database where profile table is stored. # @PRE: bind_engine points to application database where profile table is stored.
# @POST: Missing columns are added without data loss. # @POST: Missing columns are added without data loss.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_user_dashboard_preferences_columns(bind_engine): def _ensure_user_dashboard_preferences_columns(bind_engine):
with belief_scope("_ensure_user_dashboard_preferences_columns"): with belief_scope("_ensure_user_dashboard_preferences_columns"):
table_name = "user_dashboard_preferences" table_name = "user_dashboard_preferences"
@@ -170,12 +175,15 @@ def _ensure_user_dashboard_preferences_columns(bind_engine):
"[database][EXPLORE] Profile preference additive migration failed: %s", "[database][EXPLORE] Profile preference additive migration failed: %s",
migration_error, migration_error,
) )
# [/DEF:_ensure_user_dashboard_preferences_columns:Function] # [/DEF:_ensure_user_dashboard_preferences_columns:Function]
# [DEF:_ensure_user_dashboard_preferences_health_columns:Function] # [DEF:_ensure_user_dashboard_preferences_health_columns:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Applies additive schema upgrades for user_dashboard_preferences table (health fields). # @PURPOSE: Applies additive schema upgrades for user_dashboard_preferences table (health fields).
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_user_dashboard_preferences_health_columns(bind_engine): def _ensure_user_dashboard_preferences_health_columns(bind_engine):
with belief_scope("_ensure_user_dashboard_preferences_health_columns"): with belief_scope("_ensure_user_dashboard_preferences_health_columns"):
table_name = "user_dashboard_preferences" table_name = "user_dashboard_preferences"
@@ -214,12 +222,15 @@ def _ensure_user_dashboard_preferences_health_columns(bind_engine):
"[database][EXPLORE] Profile health preference additive migration failed: %s", "[database][EXPLORE] Profile health preference additive migration failed: %s",
migration_error, migration_error,
) )
# [/DEF:_ensure_user_dashboard_preferences_health_columns:Function] # [/DEF:_ensure_user_dashboard_preferences_health_columns:Function]
# [DEF:_ensure_llm_validation_results_columns:Function] # [DEF:_ensure_llm_validation_results_columns:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Applies additive schema upgrades for llm_validation_results table. # @PURPOSE: Applies additive schema upgrades for llm_validation_results table.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_llm_validation_results_columns(bind_engine): def _ensure_llm_validation_results_columns(bind_engine):
with belief_scope("_ensure_llm_validation_results_columns"): with belief_scope("_ensure_llm_validation_results_columns"):
table_name = "llm_validation_results" table_name = "llm_validation_results"
@@ -254,6 +265,8 @@ def _ensure_llm_validation_results_columns(bind_engine):
"[database][EXPLORE] ValidationRecord additive migration failed: %s", "[database][EXPLORE] ValidationRecord additive migration failed: %s",
migration_error, migration_error,
) )
# [/DEF:_ensure_llm_validation_results_columns:Function] # [/DEF:_ensure_llm_validation_results_columns:Function]
@@ -262,6 +275,7 @@ def _ensure_llm_validation_results_columns(bind_engine):
# @PURPOSE: Applies additive schema upgrades for git_server_configs table. # @PURPOSE: Applies additive schema upgrades for git_server_configs table.
# @PRE: bind_engine points to application database. # @PRE: bind_engine points to application database.
# @POST: Missing columns are added without data loss. # @POST: Missing columns are added without data loss.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_git_server_configs_columns(bind_engine): def _ensure_git_server_configs_columns(bind_engine):
with belief_scope("_ensure_git_server_configs_columns"): with belief_scope("_ensure_git_server_configs_columns"):
table_name = "git_server_configs" table_name = "git_server_configs"
@@ -292,6 +306,8 @@ def _ensure_git_server_configs_columns(bind_engine):
"[database][EXPLORE] GitServerConfig preference additive migration failed: %s", "[database][EXPLORE] GitServerConfig preference additive migration failed: %s",
migration_error, migration_error,
) )
# [/DEF:_ensure_git_server_configs_columns:Function] # [/DEF:_ensure_git_server_configs_columns:Function]
@@ -300,6 +316,7 @@ def _ensure_git_server_configs_columns(bind_engine):
# @PURPOSE: Applies additive schema upgrades for auth users table. # @PURPOSE: Applies additive schema upgrades for auth users table.
# @PRE: bind_engine points to authentication database. # @PRE: bind_engine points to authentication database.
# @POST: Missing columns are added without data loss. # @POST: Missing columns are added without data loss.
# @RELATION: [DEPENDS_ON] ->[auth_engine]
def _ensure_auth_users_columns(bind_engine): def _ensure_auth_users_columns(bind_engine):
with belief_scope("_ensure_auth_users_columns"): with belief_scope("_ensure_auth_users_columns"):
table_name = "users" table_name = "users"
@@ -314,9 +331,7 @@ def _ensure_auth_users_columns(bind_engine):
alter_statements = [] alter_statements = []
if "full_name" not in existing_columns: if "full_name" not in existing_columns:
alter_statements.append( alter_statements.append("ALTER TABLE users ADD COLUMN full_name VARCHAR")
"ALTER TABLE users ADD COLUMN full_name VARCHAR"
)
if "is_ad_user" not in existing_columns: if "is_ad_user" not in existing_columns:
alter_statements.append( alter_statements.append(
"ALTER TABLE users ADD COLUMN is_ad_user BOOLEAN NOT NULL DEFAULT FALSE" "ALTER TABLE users ADD COLUMN is_ad_user BOOLEAN NOT NULL DEFAULT FALSE"
@@ -340,7 +355,13 @@ def _ensure_auth_users_columns(bind_engine):
connection.execute(text(statement)) connection.execute(text(statement))
logger.reason( logger.reason(
"Auth users schema migration completed", "Auth users schema migration completed",
extra={"table": table_name, "added_columns": [stmt.split(" ADD COLUMN ", 1)[1].split()[0] for stmt in alter_statements]}, extra={
"table": table_name,
"added_columns": [
stmt.split(" ADD COLUMN ", 1)[1].split()[0]
for stmt in alter_statements
],
},
) )
except Exception as migration_error: except Exception as migration_error:
logger.warning( logger.warning(
@@ -348,6 +369,8 @@ def _ensure_auth_users_columns(bind_engine):
migration_error, migration_error,
) )
raise raise
# [/DEF:_ensure_auth_users_columns:Function] # [/DEF:_ensure_auth_users_columns:Function]
@@ -356,6 +379,7 @@ def _ensure_auth_users_columns(bind_engine):
# @PURPOSE: Ensures the external connection registry table exists in the main database. # @PURPOSE: Ensures the external connection registry table exists in the main database.
# @PRE: bind_engine points to the application database. # @PRE: bind_engine points to the application database.
# @POST: connection_configs table exists without dropping existing data. # @POST: connection_configs table exists without dropping existing data.
# @RELATION: [DEPENDS_ON] ->[ConnectionConfig]
def ensure_connection_configs_table(bind_engine): def ensure_connection_configs_table(bind_engine):
with belief_scope("ensure_connection_configs_table"): with belief_scope("ensure_connection_configs_table"):
try: try:
@@ -366,6 +390,8 @@ def ensure_connection_configs_table(bind_engine):
migration_error, migration_error,
) )
raise raise
# [/DEF:ensure_connection_configs_table:Function] # [/DEF:ensure_connection_configs_table:Function]
@@ -374,6 +400,7 @@ def ensure_connection_configs_table(bind_engine):
# @PURPOSE: Adds missing FilterSource enum values to the PostgreSQL native filtersource type. # @PURPOSE: Adds missing FilterSource enum values to the PostgreSQL native filtersource type.
# @PRE: bind_engine points to application database with imported_filters table. # @PRE: bind_engine points to application database with imported_filters table.
# @POST: New enum values are available without data loss. # @POST: New enum values are available without data loss.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_filter_source_enum_values(bind_engine): def _ensure_filter_source_enum_values(bind_engine):
with belief_scope("_ensure_filter_source_enum_values"): with belief_scope("_ensure_filter_source_enum_values"):
try: try:
@@ -387,7 +414,9 @@ def _ensure_filter_source_enum_values(bind_engine):
) )
) )
if result.fetchone() is None: if result.fetchone() is None:
logger.reason("filtersource enum type does not exist yet; skipping migration") logger.reason(
"filtersource enum type does not exist yet; skipping migration"
)
return return
# Get existing enum values # Get existing enum values
@@ -402,7 +431,9 @@ def _ensure_filter_source_enum_values(bind_engine):
existing_values = {row[0] for row in result.fetchall()} existing_values = {row[0] for row in result.fetchall()}
required_values = ["SUPERSET_PERMALINK", "SUPERSET_NATIVE_FILTERS_KEY"] required_values = ["SUPERSET_PERMALINK", "SUPERSET_NATIVE_FILTERS_KEY"]
missing_values = [v for v in required_values if v not in existing_values] missing_values = [
v for v in required_values if v not in existing_values
]
if not missing_values: if not missing_values:
logger.reason( logger.reason(
@@ -417,7 +448,9 @@ def _ensure_filter_source_enum_values(bind_engine):
) )
for value in missing_values: for value in missing_values:
connection.execute( connection.execute(
text(f"ALTER TYPE filtersource ADD VALUE IF NOT EXISTS '{value}'") text(
f"ALTER TYPE filtersource ADD VALUE IF NOT EXISTS '{value}'"
)
) )
connection.commit() connection.commit()
logger.reason( logger.reason(
@@ -429,6 +462,8 @@ def _ensure_filter_source_enum_values(bind_engine):
"[database][EXPLORE] FilterSource enum additive migration failed: %s", "[database][EXPLORE] FilterSource enum additive migration failed: %s",
migration_error, migration_error,
) )
# [/DEF:_ensure_filter_source_enum_values:Function] # [/DEF:_ensure_filter_source_enum_values:Function]
@@ -438,6 +473,8 @@ def _ensure_filter_source_enum_values(bind_engine):
# @PRE: engine, tasks_engine and auth_engine are initialized. # @PRE: engine, tasks_engine and auth_engine are initialized.
# @POST: Database tables created in all databases. # @POST: Database tables created in all databases.
# @SIDE_EFFECT: Creates physical database files if they don't exist. # @SIDE_EFFECT: Creates physical database files if they don't exist.
# @RELATION: [CALLS] ->[ensure_connection_configs_table]
# @RELATION: [CALLS] ->[_ensure_filter_source_enum_values]
def init_db(): def init_db():
with belief_scope("init_db"): with belief_scope("init_db"):
Base.metadata.create_all(bind=engine) Base.metadata.create_all(bind=engine)
@@ -450,14 +487,18 @@ def init_db():
_ensure_auth_users_columns(auth_engine) _ensure_auth_users_columns(auth_engine)
ensure_connection_configs_table(engine) ensure_connection_configs_table(engine)
_ensure_filter_source_enum_values(engine) _ensure_filter_source_enum_values(engine)
# [/DEF:init_db:Function] # [/DEF:init_db:Function]
# [DEF:get_db:Function] # [DEF:get_db:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Dependency for getting a database session. # @PURPOSE: Dependency for getting a database session.
# @PRE: SessionLocal is initialized. # @PRE: SessionLocal is initialized.
# @POST: Session is closed after use. # @POST: Session is closed after use.
# @RETURN: Generator[Session, None, None] # @RETURN: Generator[Session, None, None]
# @RELATION: [DEPENDS_ON] ->[SessionLocal]
def get_db(): def get_db():
with belief_scope("get_db"): with belief_scope("get_db"):
db = SessionLocal() db = SessionLocal()
@@ -465,14 +506,18 @@ def get_db():
yield db yield db
finally: finally:
db.close() db.close()
# [/DEF:get_db:Function] # [/DEF:get_db:Function]
# [DEF:get_tasks_db:Function] # [DEF:get_tasks_db:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Dependency for getting a tasks database session. # @PURPOSE: Dependency for getting a tasks database session.
# @PRE: TasksSessionLocal is initialized. # @PRE: TasksSessionLocal is initialized.
# @POST: Session is closed after use. # @POST: Session is closed after use.
# @RETURN: Generator[Session, None, None] # @RETURN: Generator[Session, None, None]
# @RELATION: [DEPENDS_ON] ->[TasksSessionLocal]
def get_tasks_db(): def get_tasks_db():
with belief_scope("get_tasks_db"): with belief_scope("get_tasks_db"):
db = TasksSessionLocal() db = TasksSessionLocal()
@@ -480,8 +525,11 @@ def get_tasks_db():
yield db yield db
finally: finally:
db.close() db.close()
# [/DEF:get_tasks_db:Function] # [/DEF:get_tasks_db:Function]
# [DEF:get_auth_db:Function] # [DEF:get_auth_db:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Dependency for getting an authentication database session. # @PURPOSE: Dependency for getting an authentication database session.
@@ -489,6 +537,7 @@ def get_tasks_db():
# @POST: Session is closed after use. # @POST: Session is closed after use.
# @DATA_CONTRACT: None -> Output[sqlalchemy.orm.Session] # @DATA_CONTRACT: None -> Output[sqlalchemy.orm.Session]
# @RETURN: Generator[Session, None, None] # @RETURN: Generator[Session, None, None]
# @RELATION: [DEPENDS_ON] ->[AuthSessionLocal]
def get_auth_db(): def get_auth_db():
with belief_scope("get_auth_db"): with belief_scope("get_auth_db"):
db = AuthSessionLocal() db = AuthSessionLocal()
@@ -496,6 +545,8 @@ def get_auth_db():
yield db yield db
finally: finally:
db.close() db.close()
# [/DEF:get_auth_db:Function] # [/DEF:get_auth_db:Function]
# [/DEF:backend.src.core.database:Module] # [/DEF:DatabaseModule:Module]

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
# @SEMANTICS: dataset_review, superset, link_parsing, context_recovery, partial_recovery # @SEMANTICS: dataset_review, superset, link_parsing, context_recovery, partial_recovery
# @PURPOSE: Recover dataset and dashboard context from Superset links while preserving explicit partial-recovery markers. # @PURPOSE: Recover dataset and dashboard context from Superset links while preserving explicit partial-recovery markers.
# @LAYER: Infra # @LAYER: Infra
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient:Class] # @RELATION: [DEPENDS_ON] ->[ImportedFilter]
# @RELATION: [DEPENDS_ON] ->[ImportedFilter] # @RELATION: [DEPENDS_ON] ->[ImportedFilter]
# @RELATION: [DEPENDS_ON] ->[TemplateVariable] # @RELATION: [DEPENDS_ON] ->[TemplateVariable]
# @PRE: Superset link or dataset reference must be parseable enough to resolve an environment-scoped target resource. # @PRE: Superset link or dataset reference must be parseable enough to resolve an environment-scoped target resource.
@@ -18,7 +18,7 @@ import json
import re import re
from copy import deepcopy from copy import deepcopy
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Set from typing import Any, Dict, List, Optional, Set, cast
from urllib.parse import parse_qs, unquote, urlparse from urllib.parse import parse_qs, unquote, urlparse
from src.core.config_models import Environment from src.core.config_models import Environment
@@ -26,6 +26,8 @@ from src.core.logger import belief_scope, logger
from src.core.superset_client import SupersetClient from src.core.superset_client import SupersetClient
# [/DEF:SupersetContextExtractor.imports:Block] # [/DEF:SupersetContextExtractor.imports:Block]
logger = cast(Any, logger)
# [DEF:SupersetParsedContext:Class] # [DEF:SupersetParsedContext:Class]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
@@ -42,13 +44,15 @@ class SupersetParsedContext:
imported_filters: List[Dict[str, Any]] = field(default_factory=list) imported_filters: List[Dict[str, Any]] = field(default_factory=list)
unresolved_references: List[str] = field(default_factory=list) unresolved_references: List[str] = field(default_factory=list)
partial_recovery: bool = False partial_recovery: bool = False
# [/DEF:SupersetParsedContext:Class] # [/DEF:SupersetParsedContext:Class]
# [DEF:SupersetContextExtractor:Class] # [DEF:SupersetContextExtractor:Class]
# @COMPLEXITY: 4 # @COMPLEXITY: 4
# @PURPOSE: Parse supported Superset URLs and recover canonical dataset/dashboard references for review-session intake. # @PURPOSE: Parse supported Superset URLs and recover canonical dataset/dashboard references for review-session intake.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient] # @RELATION: [DEPENDS_ON] ->[Environment]
# @PRE: constructor receives a configured environment with a usable Superset base URL. # @PRE: constructor receives a configured environment with a usable Superset base URL.
# @POST: extractor instance is ready to parse links against one Superset environment. # @POST: extractor instance is ready to parse links against one Superset environment.
# @SIDE_EFFECT: downstream parse operations may call Superset APIs through SupersetClient. # @SIDE_EFFECT: downstream parse operations may call Superset APIs through SupersetClient.
@@ -56,15 +60,18 @@ class SupersetContextExtractor:
# [DEF:SupersetContextExtractor.__init__:Function] # [DEF:SupersetContextExtractor.__init__:Function]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
# @PURPOSE: Bind extractor to one Superset environment and client instance. # @PURPOSE: Bind extractor to one Superset environment and client instance.
def __init__(self, environment: Environment, client: Optional[SupersetClient] = None) -> None: def __init__(
self, environment: Environment, client: Optional[SupersetClient] = None
) -> None:
self.environment = environment self.environment = environment
self.client = client or SupersetClient(environment) self.client = client or SupersetClient(environment)
# [/DEF:SupersetContextExtractor.__init__:Function] # [/DEF:SupersetContextExtractor.__init__:Function]
# [DEF:SupersetContextExtractor.parse_superset_link:Function] # [DEF:SupersetContextExtractor.parse_superset_link:Function]
# @COMPLEXITY: 4 # @COMPLEXITY: 4
# @PURPOSE: Extract candidate identifiers and query state from supported Superset URLs. # @PURPOSE: Extract candidate identifiers and query state from supported Superset URLs.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient] # @RELATION: [CALLS] ->[SupersetClient.get_dashboard_detail]
# @PRE: link is a non-empty Superset URL compatible with the configured environment. # @PRE: link is a non-empty Superset URL compatible with the configured environment.
# @POST: returns resolved dataset/dashboard context, preserving explicit partial-recovery state if some identifiers cannot be confirmed. # @POST: returns resolved dataset/dashboard context, preserving explicit partial-recovery state if some identifiers cannot be confirmed.
# @SIDE_EFFECT: may issue Superset API reads to resolve dataset references from dashboard or chart URLs. # @SIDE_EFFECT: may issue Superset API reads to resolve dataset references from dashboard or chart URLs.
@@ -115,12 +122,16 @@ class SupersetContextExtractor:
resource_type = "dashboard" resource_type = "dashboard"
partial_recovery = True partial_recovery = True
dataset_ref = f"dashboard_permalink:{dashboard_permalink_key}" dataset_ref = f"dashboard_permalink:{dashboard_permalink_key}"
unresolved_references.append("dashboard_permalink_dataset_binding_unresolved") unresolved_references.append(
"dashboard_permalink_dataset_binding_unresolved"
)
logger.reason( logger.reason(
"Resolving dashboard permalink state from Superset", "Resolving dashboard permalink state from Superset",
extra={"permalink_key": dashboard_permalink_key}, extra={"permalink_key": dashboard_permalink_key},
) )
permalink_payload = self.client.get_dashboard_permalink_state(dashboard_permalink_key) permalink_payload = self.client.get_dashboard_permalink_state(
dashboard_permalink_key
)
permalink_state = ( permalink_state = (
permalink_payload.get("state", permalink_payload) permalink_payload.get("state", permalink_payload)
if isinstance(permalink_payload, dict) if isinstance(permalink_payload, dict)
@@ -137,8 +148,12 @@ class SupersetContextExtractor:
"Extracted native filters from permalink dataMask", "Extracted native filters from permalink dataMask",
extra={"filter_count": len(data_mask)}, extra={"filter_count": len(data_mask)},
) )
resolved_dashboard_id = self._extract_dashboard_id_from_state(permalink_state) resolved_dashboard_id = self._extract_dashboard_id_from_state(
resolved_chart_id = self._extract_chart_id_from_state(permalink_state) permalink_state
)
resolved_chart_id = self._extract_chart_id_from_state(
permalink_state
)
if resolved_dashboard_id is not None: if resolved_dashboard_id is not None:
dashboard_id = resolved_dashboard_id dashboard_id = resolved_dashboard_id
unresolved_references = [ unresolved_references = [
@@ -146,10 +161,12 @@ class SupersetContextExtractor:
for item in unresolved_references for item in unresolved_references
if item != "dashboard_permalink_dataset_binding_unresolved" if item != "dashboard_permalink_dataset_binding_unresolved"
] ]
dataset_id, unresolved_references = self._recover_dataset_binding_from_dashboard( dataset_id, unresolved_references = (
dashboard_id=dashboard_id, self._recover_dataset_binding_from_dashboard(
dataset_ref=dataset_ref, dashboard_id=dashboard_id,
unresolved_references=unresolved_references, dataset_ref=dataset_ref,
unresolved_references=unresolved_references,
)
) )
if dataset_id is not None: if dataset_id is not None:
dataset_ref = f"dataset:{dataset_id}" dataset_ref = f"dataset:{dataset_id}"
@@ -162,19 +179,30 @@ class SupersetContextExtractor:
] ]
try: try:
chart_payload = self.client.get_chart(chart_id) chart_payload = self.client.get_chart(chart_id)
chart_data = chart_payload.get("result", chart_payload) if isinstance(chart_payload, dict) else {} chart_data = (
chart_payload.get("result", chart_payload)
if isinstance(chart_payload, dict)
else {}
)
datasource_id = chart_data.get("datasource_id") datasource_id = chart_data.get("datasource_id")
if datasource_id is not None: if datasource_id is not None:
dataset_id = int(datasource_id) dataset_id = int(datasource_id)
dataset_ref = f"dataset:{dataset_id}" dataset_ref = f"dataset:{dataset_id}"
logger.reason( logger.reason(
"Recovered dataset reference from permalink chart context", "Recovered dataset reference from permalink chart context",
extra={"chart_id": chart_id, "dataset_id": dataset_id}, extra={
"chart_id": chart_id,
"dataset_id": dataset_id,
},
) )
else: else:
unresolved_references.append("chart_dataset_binding_unresolved") unresolved_references.append(
"chart_dataset_binding_unresolved"
)
except Exception as exc: except Exception as exc:
unresolved_references.append("chart_dataset_binding_unresolved") unresolved_references.append(
"chart_dataset_binding_unresolved"
)
logger.explore( logger.explore(
"Chart lookup failed during permalink recovery", "Chart lookup failed during permalink recovery",
extra={"chart_id": chart_id, "error": str(exc)}, extra={"chart_id": chart_id, "error": str(exc)},
@@ -186,19 +214,25 @@ class SupersetContextExtractor:
) )
elif dashboard_id is not None or dashboard_ref is not None: elif dashboard_id is not None or dashboard_ref is not None:
resource_type = "dashboard" resource_type = "dashboard"
resolved_dashboard_ref = dashboard_id if dashboard_id is not None else dashboard_ref resolved_dashboard_ref = (
dashboard_id if dashboard_id is not None else dashboard_ref
)
if resolved_dashboard_ref is None:
raise ValueError("Dashboard reference could not be resolved")
logger.reason( logger.reason(
"Resolving dashboard-bound dataset from Superset", "Resolving dashboard-bound dataset from Superset",
extra={"dashboard_ref": resolved_dashboard_ref}, extra={"dashboard_ref": resolved_dashboard_ref},
) )
# Resolve dashboard detail first — handles both numeric ID and slug, # Resolve dashboard detail first — handles both numeric ID and slug,
# ensuring dashboard_id is available for the native_filters_key fetch below. # ensuring dashboard_id is available for the native_filters_key fetch below.
dashboard_detail = self.client.get_dashboard_detail(resolved_dashboard_ref) dashboard_detail = self.client.get_dashboard_detail(
resolved_dashboard_ref
)
resolved_dashboard_id = dashboard_detail.get("id") resolved_dashboard_id = dashboard_detail.get("id")
if resolved_dashboard_id is not None: if resolved_dashboard_id is not None:
dashboard_id = int(resolved_dashboard_id) dashboard_id = int(resolved_dashboard_id)
# Check for native_filters_key in query params and fetch filter state. # Check for native_filters_key in query params and fetch filter state.
# This must run AFTER dashboard_id is resolved from slug above. # This must run AFTER dashboard_id is resolved from slug above.
native_filters_key = query_params.get("native_filters_key", [None])[0] native_filters_key = query_params.get("native_filters_key", [None])[0]
@@ -206,7 +240,10 @@ class SupersetContextExtractor:
try: try:
logger.reason( logger.reason(
"Fetching native filter state from Superset", "Fetching native filter state from Superset",
extra={"dashboard_id": dashboard_id, "filter_key": native_filters_key}, extra={
"dashboard_id": dashboard_id,
"filter_key": native_filters_key,
},
) )
extracted = self.client.extract_native_filters_from_key( extracted = self.client.extract_native_filters_from_key(
dashboard_id, native_filters_key dashboard_id, native_filters_key
@@ -221,14 +258,21 @@ class SupersetContextExtractor:
else: else:
logger.explore( logger.explore(
"Native filter state returned empty dataMask", "Native filter state returned empty dataMask",
extra={"dashboard_id": dashboard_id, "filter_key": native_filters_key}, extra={
"dashboard_id": dashboard_id,
"filter_key": native_filters_key,
},
) )
except Exception as exc: except Exception as exc:
logger.explore( logger.explore(
"Failed to fetch native filter state from Superset", "Failed to fetch native filter state from Superset",
extra={"dashboard_id": dashboard_id, "filter_key": native_filters_key, "error": str(exc)}, extra={
"dashboard_id": dashboard_id,
"filter_key": native_filters_key,
"error": str(exc),
},
) )
datasets = dashboard_detail.get("datasets") or [] datasets = dashboard_detail.get("datasets") or []
if datasets: if datasets:
first_dataset = datasets[0] first_dataset = datasets[0]
@@ -280,7 +324,10 @@ class SupersetContextExtractor:
) )
logger.reason( logger.reason(
"Canonicalized dataset reference from dataset detail", "Canonicalized dataset reference from dataset detail",
extra={"dataset_ref": dataset_ref, "dataset_id": dataset_id}, extra={
"dataset_ref": dataset_ref,
"dataset_id": dataset_id,
},
) )
except Exception as exc: except Exception as exc:
partial_recovery = True partial_recovery = True
@@ -316,17 +363,20 @@ class SupersetContextExtractor:
}, },
) )
return result return result
# [/DEF:SupersetContextExtractor.parse_superset_link:Function] # [/DEF:SupersetContextExtractor.parse_superset_link:Function]
# [DEF:SupersetContextExtractor.recover_imported_filters:Function] # [DEF:SupersetContextExtractor.recover_imported_filters:Function]
# @COMPLEXITY: 4 # @COMPLEXITY: 4
# @PURPOSE: Build imported filter entries from URL state and Superset-side saved context. # @PURPOSE: Build imported filter entries from URL state and Superset-side saved context.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient] # @RELATION: [CALLS] ->[SupersetClient.get_dashboard]
# @PRE: parsed_context comes from a successful Superset link parse for one environment. # @PRE: parsed_context comes from a successful Superset link parse for one environment.
# @POST: returns explicit recovered and partial filter entries with preserved provenance and confirmation requirements. # @POST: returns explicit recovered and partial filter entries with preserved provenance and confirmation requirements.
# @SIDE_EFFECT: may issue Superset reads for dashboard metadata enrichment. # @SIDE_EFFECT: may issue Superset reads for dashboard metadata enrichment.
# @DATA_CONTRACT: Input[SupersetParsedContext] -> Output[List[Dict[str,Any]]] # @DATA_CONTRACT: Input[SupersetParsedContext] -> Output[List[Dict[str,Any]]]
def recover_imported_filters(self, parsed_context: SupersetParsedContext) -> List[Dict[str, Any]]: def recover_imported_filters(
self, parsed_context: SupersetParsedContext
) -> List[Dict[str, Any]]:
with belief_scope("SupersetContextExtractor.recover_imported_filters"): with belief_scope("SupersetContextExtractor.recover_imported_filters"):
recovered_filters: List[Dict[str, Any]] = [] recovered_filters: List[Dict[str, Any]] = []
seen_filter_keys: Set[str] = set() seen_filter_keys: Set[str] = set()
@@ -349,22 +399,46 @@ class SupersetContextExtractor:
return return
existing = recovered_filters[existing_index] existing = recovered_filters[existing_index]
if existing.get("display_name") in {None, "", existing.get("filter_name")} and candidate.get("display_name"): if existing.get("display_name") in {
None,
"",
existing.get("filter_name"),
} and candidate.get("display_name"):
existing["display_name"] = candidate["display_name"] existing["display_name"] = candidate["display_name"]
if existing.get("raw_value") is None and candidate.get("raw_value") is not None: if (
existing.get("raw_value") is None
and candidate.get("raw_value") is not None
):
existing["raw_value"] = candidate["raw_value"] existing["raw_value"] = candidate["raw_value"]
existing["confidence_state"] = candidate.get("confidence_state", "imported") existing["confidence_state"] = candidate.get(
existing["requires_confirmation"] = candidate.get("requires_confirmation", False) "confidence_state", "imported"
existing["recovery_status"] = candidate.get("recovery_status", "recovered") )
existing["requires_confirmation"] = candidate.get(
"requires_confirmation", False
)
existing["recovery_status"] = candidate.get(
"recovery_status", "recovered"
)
existing["source"] = candidate.get("source", existing.get("source")) existing["source"] = candidate.get("source", existing.get("source"))
if existing.get("normalized_value") is None and candidate.get("normalized_value") is not None: if (
existing["normalized_value"] = deepcopy(candidate["normalized_value"]) existing.get("normalized_value") is None
if existing.get("notes") and candidate.get("notes") and candidate["notes"] not in existing["notes"]: and candidate.get("normalized_value") is not None
existing["notes"] = f'{existing["notes"]}; {candidate["notes"]}' ):
existing["normalized_value"] = deepcopy(
candidate["normalized_value"]
)
if (
existing.get("notes")
and candidate.get("notes")
and candidate["notes"] not in existing["notes"]
):
existing["notes"] = f"{existing['notes']}; {candidate['notes']}"
if parsed_context.dashboard_id is not None: if parsed_context.dashboard_id is not None:
try: try:
dashboard_payload = self.client.get_dashboard(parsed_context.dashboard_id) dashboard_payload = self.client.get_dashboard(
parsed_context.dashboard_id
)
dashboard_record = ( dashboard_record = (
dashboard_payload.get("result", dashboard_payload) dashboard_payload.get("result", dashboard_payload)
if isinstance(dashboard_payload, dict) if isinstance(dashboard_payload, dict)
@@ -376,7 +450,9 @@ class SupersetContextExtractor:
if not isinstance(json_metadata, dict): if not isinstance(json_metadata, dict):
json_metadata = {} json_metadata = {}
native_filter_configuration = json_metadata.get("native_filter_configuration") or [] native_filter_configuration = (
json_metadata.get("native_filter_configuration") or []
)
default_filters = json_metadata.get("default_filters") or {} default_filters = json_metadata.get("default_filters") or {}
if isinstance(default_filters, str) and default_filters.strip(): if isinstance(default_filters, str) and default_filters.strip():
try: try:
@@ -400,7 +476,9 @@ class SupersetContextExtractor:
if not filter_name: if not filter_name:
continue continue
display_name = item.get("label") or item.get("name") or filter_name display_name = (
item.get("label") or item.get("name") or filter_name
)
filter_id = str(item.get("id") or "").strip() filter_id = str(item.get("id") or "").strip()
default_value = None default_value = None
@@ -413,7 +491,9 @@ class SupersetContextExtractor:
"display_name": display_name, "display_name": display_name,
"raw_value": default_value, "raw_value": default_value,
"source": "superset_native", "source": "superset_native",
"recovery_status": "recovered" if default_value is not None else "partial", "recovery_status": "recovered"
if default_value is not None
else "partial",
"requires_confirmation": default_value is None, "requires_confirmation": default_value is None,
"notes": "Recovered from Superset dashboard native filter configuration", "notes": "Recovered from Superset dashboard native filter configuration",
}, },
@@ -445,7 +525,9 @@ class SupersetContextExtractor:
default_source="superset_url", default_source="superset_url",
default_note="Recovered from Superset URL state", default_note="Recovered from Superset URL state",
) )
metadata_match = metadata_filters_by_id.get(normalized["filter_name"].strip().lower()) metadata_match = metadata_filters_by_id.get(
normalized["filter_name"].strip().lower()
)
if metadata_match is not None: if metadata_match is not None:
normalized["filter_name"] = metadata_match["filter_name"] normalized["filter_name"] = metadata_match["filter_name"]
normalized["display_name"] = metadata_match["display_name"] normalized["display_name"] = metadata_match["display_name"]
@@ -517,6 +599,7 @@ class SupersetContextExtractor:
}, },
) )
return recovered_filters return recovered_filters
# [/DEF:SupersetContextExtractor.recover_imported_filters:Function] # [/DEF:SupersetContextExtractor.recover_imported_filters:Function]
# [DEF:SupersetContextExtractor.discover_template_variables:Function] # [DEF:SupersetContextExtractor.discover_template_variables:Function]
@@ -527,12 +610,16 @@ class SupersetContextExtractor:
# @POST: returns deduplicated explicit variable records without executing Jinja or fabricating runtime values. # @POST: returns deduplicated explicit variable records without executing Jinja or fabricating runtime values.
# @SIDE_EFFECT: none. # @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[dataset_payload:Dict[str,Any]] -> Output[List[Dict[str,Any]]] # @DATA_CONTRACT: Input[dataset_payload:Dict[str,Any]] -> Output[List[Dict[str,Any]]]
def discover_template_variables(self, dataset_payload: Dict[str, Any]) -> List[Dict[str, Any]]: def discover_template_variables(
self, dataset_payload: Dict[str, Any]
) -> List[Dict[str, Any]]:
with belief_scope("SupersetContextExtractor.discover_template_variables"): with belief_scope("SupersetContextExtractor.discover_template_variables"):
discovered: List[Dict[str, Any]] = [] discovered: List[Dict[str, Any]] = []
seen_variable_names: Set[str] = set() seen_variable_names: Set[str] = set()
for expression_source in self._collect_query_bearing_expressions(dataset_payload): for expression_source in self._collect_query_bearing_expressions(
dataset_payload
):
for filter_match in re.finditer( for filter_match in re.finditer(
r"filter_values\(\s*['\"]([^'\"]+)['\"]\s*\)", r"filter_values\(\s*['\"]([^'\"]+)['\"]\s*\)",
expression_source, expression_source,
@@ -570,11 +657,16 @@ class SupersetContextExtractor:
default_value=self._normalize_default_literal(default_literal), default_value=self._normalize_default_literal(default_literal),
) )
for jinja_match in re.finditer(r"\{\{\s*(.*?)\s*\}\}", expression_source, flags=re.DOTALL): for jinja_match in re.finditer(
r"\{\{\s*(.*?)\s*\}\}", expression_source, flags=re.DOTALL
):
expression = str(jinja_match.group(1) or "").strip() expression = str(jinja_match.group(1) or "").strip()
if not expression: if not expression:
continue continue
if any(token in expression for token in ("filter_values(", "url_param(", "get_filters(")): if any(
token in expression
for token in ("filter_values(", "url_param(", "get_filters(")
):
continue continue
variable_name = self._extract_primary_jinja_identifier(expression) variable_name = self._extract_primary_jinja_identifier(expression)
if not variable_name: if not variable_name:
@@ -584,7 +676,9 @@ class SupersetContextExtractor:
seen_variable_names=seen_variable_names, seen_variable_names=seen_variable_names,
variable_name=variable_name, variable_name=variable_name,
expression_source=expression_source, expression_source=expression_source,
variable_kind="derived" if "." in expression or "|" in expression else "parameter", variable_kind="derived"
if "." in expression or "|" in expression
else "parameter",
is_required=True, is_required=True,
default_value=None, default_value=None,
) )
@@ -598,12 +692,15 @@ class SupersetContextExtractor:
}, },
) )
return discovered return discovered
# [/DEF:SupersetContextExtractor.discover_template_variables:Function] # [/DEF:SupersetContextExtractor.discover_template_variables:Function]
# [DEF:SupersetContextExtractor.build_recovery_summary:Function] # [DEF:SupersetContextExtractor.build_recovery_summary:Function]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
# @PURPOSE: Summarize recovered, partial, and unresolved context for session state and UX. # @PURPOSE: Summarize recovered, partial, and unresolved context for session state and UX.
def build_recovery_summary(self, parsed_context: SupersetParsedContext) -> Dict[str, Any]: def build_recovery_summary(
self, parsed_context: SupersetParsedContext
) -> Dict[str, Any]:
return { return {
"dataset_ref": parsed_context.dataset_ref, "dataset_ref": parsed_context.dataset_ref,
"dataset_id": parsed_context.dataset_id, "dataset_id": parsed_context.dataset_id,
@@ -613,12 +710,15 @@ class SupersetContextExtractor:
"unresolved_references": list(parsed_context.unresolved_references), "unresolved_references": list(parsed_context.unresolved_references),
"imported_filter_count": len(parsed_context.imported_filters), "imported_filter_count": len(parsed_context.imported_filters),
} }
# [/DEF:SupersetContextExtractor.build_recovery_summary:Function] # [/DEF:SupersetContextExtractor.build_recovery_summary:Function]
# [DEF:SupersetContextExtractor._extract_numeric_identifier:Function] # [DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
# @PURPOSE: Extract a numeric identifier from a REST-like Superset URL path. # @PURPOSE: Extract a numeric identifier from a REST-like Superset URL path.
def _extract_numeric_identifier(self, path_parts: List[str], resource_name: str) -> Optional[int]: def _extract_numeric_identifier(
self, path_parts: List[str], resource_name: str
) -> Optional[int]:
if resource_name not in path_parts: if resource_name not in path_parts:
return None return None
try: try:
@@ -633,6 +733,7 @@ class SupersetContextExtractor:
if not candidate.isdigit(): if not candidate.isdigit():
return None return None
return int(candidate) return int(candidate)
# [/DEF:SupersetContextExtractor._extract_numeric_identifier:Function] # [/DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_reference:Function] # [DEF:SupersetContextExtractor._extract_dashboard_reference:Function]
@@ -653,6 +754,7 @@ class SupersetContextExtractor:
if not candidate or candidate == "p": if not candidate or candidate == "p":
return None return None
return candidate return candidate
# [/DEF:SupersetContextExtractor._extract_dashboard_reference:Function] # [/DEF:SupersetContextExtractor._extract_dashboard_reference:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function] # [DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function]
@@ -674,6 +776,7 @@ class SupersetContextExtractor:
if permalink_marker != "p" or not permalink_key: if permalink_marker != "p" or not permalink_key:
return None return None
return permalink_key return permalink_key
# [/DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function] # [/DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function] # [DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function]
@@ -684,6 +787,7 @@ class SupersetContextExtractor:
payload=state, payload=state,
candidate_keys={"dashboardId", "dashboard_id", "dashboard_id_value"}, candidate_keys={"dashboardId", "dashboard_id", "dashboard_id_value"},
) )
# [/DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function] # [/DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function]
# [DEF:SupersetContextExtractor._extract_chart_id_from_state:Function] # [DEF:SupersetContextExtractor._extract_chart_id_from_state:Function]
@@ -694,12 +798,16 @@ class SupersetContextExtractor:
payload=state, payload=state,
candidate_keys={"slice_id", "sliceId", "chartId", "chart_id"}, candidate_keys={"slice_id", "sliceId", "chartId", "chart_id"},
) )
# [/DEF:SupersetContextExtractor._extract_chart_id_from_state:Function] # [/DEF:SupersetContextExtractor._extract_chart_id_from_state:Function]
# [DEF:SupersetContextExtractor._search_nested_numeric_key:Function] # [DEF:SupersetContextExtractor._search_nested_numeric_key:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Recursively search nested dict/list payloads for the first numeric value under a candidate key set. # @PURPOSE: Recursively search nested dict/list payloads for the first numeric value under a candidate key set.
def _search_nested_numeric_key(self, payload: Any, candidate_keys: Set[str]) -> Optional[int]: # @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor.parse_superset_link]
def _search_nested_numeric_key(
self, payload: Any, candidate_keys: Set[str]
) -> Optional[int]:
if isinstance(payload, dict): if isinstance(payload, dict):
for key, value in payload.items(): for key, value in payload.items():
if key in candidate_keys: if key in candidate_keys:
@@ -717,11 +825,13 @@ class SupersetContextExtractor:
if found is not None: if found is not None:
return found return found
return None return None
# [/DEF:SupersetContextExtractor._search_nested_numeric_key:Function] # [/DEF:SupersetContextExtractor._search_nested_numeric_key:Function]
# [DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function] # [DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Recover a dataset binding from resolved dashboard context while preserving explicit unresolved markers. # @PURPOSE: Recover a dataset binding from resolved dashboard context while preserving explicit unresolved markers.
# @RELATION: [CALLS] ->[SupersetClient.get_dashboard_detail]
def _recover_dataset_binding_from_dashboard( def _recover_dataset_binding_from_dashboard(
self, self,
dashboard_id: int, dashboard_id: int,
@@ -744,7 +854,10 @@ class SupersetContextExtractor:
"dataset_ref": dataset_ref, "dataset_ref": dataset_ref,
}, },
) )
if len(datasets) > 1 and "multiple_dashboard_datasets" not in unresolved_references: if (
len(datasets) > 1
and "multiple_dashboard_datasets" not in unresolved_references
):
unresolved_references.append("multiple_dashboard_datasets") unresolved_references.append("multiple_dashboard_datasets")
return resolved_dataset, unresolved_references return resolved_dataset, unresolved_references
if "dashboard_dataset_id_missing" not in unresolved_references: if "dashboard_dataset_id_missing" not in unresolved_references:
@@ -754,6 +867,7 @@ class SupersetContextExtractor:
if "dashboard_dataset_binding_missing" not in unresolved_references: if "dashboard_dataset_binding_missing" not in unresolved_references:
unresolved_references.append("dashboard_dataset_binding_missing") unresolved_references.append("dashboard_dataset_binding_missing")
return None, unresolved_references return None, unresolved_references
# [/DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function] # [/DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function]
# [DEF:SupersetContextExtractor._decode_query_state:Function] # [DEF:SupersetContextExtractor._decode_query_state:Function]
@@ -777,12 +891,15 @@ class SupersetContextExtractor:
) )
query_state[key] = decoded_value query_state[key] = decoded_value
return query_state return query_state
# [/DEF:SupersetContextExtractor._decode_query_state:Function] # [/DEF:SupersetContextExtractor._decode_query_state:Function]
# [DEF:SupersetContextExtractor._extract_imported_filters:Function] # [DEF:SupersetContextExtractor._extract_imported_filters:Function]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
# @PURPOSE: Normalize imported filters from decoded query state without fabricating missing values. # @PURPOSE: Normalize imported filters from decoded query state without fabricating missing values.
def _extract_imported_filters(self, query_state: Dict[str, Any]) -> List[Dict[str, Any]]: def _extract_imported_filters(
self, query_state: Dict[str, Any]
) -> List[Dict[str, Any]]:
imported_filters: List[Dict[str, Any]] = [] imported_filters: List[Dict[str, Any]] = []
native_filters_payload = query_state.get("native_filters") native_filters_payload = query_state.get("native_filters")
@@ -800,7 +917,8 @@ class SupersetContextExtractor:
if item.get("column") and ("value" in item or "val" in item): if item.get("column") and ("value" in item or "val" in item):
direct_clause = { direct_clause = {
"col": item.get("column"), "col": item.get("column"),
"op": item.get("op") or ("IN" if isinstance(item.get("value"), list) else "=="), "op": item.get("op")
or ("IN" if isinstance(item.get("value"), list) else "=="),
"val": item.get("val", item.get("value")), "val": item.get("val", item.get("value")),
} }
imported_filters.append( imported_filters.append(
@@ -809,7 +927,9 @@ class SupersetContextExtractor:
"raw_value": item.get("value"), "raw_value": item.get("value"),
"display_name": item.get("label") or item.get("name"), "display_name": item.get("label") or item.get("name"),
"normalized_value": { "normalized_value": {
"filter_clauses": [direct_clause] if isinstance(direct_clause, dict) else [], "filter_clauses": [direct_clause]
if isinstance(direct_clause, dict)
else [],
"extra_form_data": {}, "extra_form_data": {},
"value_origin": "native_filters", "value_origin": "native_filters",
}, },
@@ -834,7 +954,9 @@ class SupersetContextExtractor:
raw_value = None raw_value = None
normalized_value = { normalized_value = {
"filter_clauses": [], "filter_clauses": [],
"extra_form_data": deepcopy(extra_form_data) if isinstance(extra_form_data, dict) else {}, "extra_form_data": deepcopy(extra_form_data)
if isinstance(extra_form_data, dict)
else {},
"value_origin": "unresolved", "value_origin": "unresolved",
} }
@@ -868,10 +990,17 @@ class SupersetContextExtractor:
# If still no value, try extraFormData directly for time_range, time_grain, etc. # If still no value, try extraFormData directly for time_range, time_grain, etc.
if raw_value is None and isinstance(extra_form_data, dict): if raw_value is None and isinstance(extra_form_data, dict):
# Common Superset filter fields # Common Superset filter fields
for field in ["time_range", "time_grain_sqla", "time_column", "granularity"]: for field in [
"time_range",
"time_grain_sqla",
"time_column",
"granularity",
]:
if field in extra_form_data: if field in extra_form_data:
raw_value = extra_form_data[field] raw_value = extra_form_data[field]
normalized_value["value_origin"] = f"extra_form_data.{field}" normalized_value["value_origin"] = (
f"extra_form_data.{field}"
)
break break
imported_filters.append( imported_filters.append(
@@ -881,7 +1010,9 @@ class SupersetContextExtractor:
"display_name": display_name, "display_name": display_name,
"normalized_value": normalized_value, "normalized_value": normalized_value,
"source": "superset_permalink", "source": "superset_permalink",
"recovery_status": "recovered" if raw_value is not None else "partial", "recovery_status": "recovered"
if raw_value is not None
else "partial",
"requires_confirmation": raw_value is None, "requires_confirmation": raw_value is None,
"notes": "Recovered from Superset dashboard permalink state", "notes": "Recovered from Superset dashboard permalink state",
} }
@@ -901,7 +1032,9 @@ class SupersetContextExtractor:
raw_value = None raw_value = None
normalized_value = { normalized_value = {
"filter_clauses": [], "filter_clauses": [],
"extra_form_data": deepcopy(extra_form_data) if isinstance(extra_form_data, dict) else {}, "extra_form_data": deepcopy(extra_form_data)
if isinstance(extra_form_data, dict)
else {},
"value_origin": "unresolved", "value_origin": "unresolved",
} }
@@ -935,10 +1068,17 @@ class SupersetContextExtractor:
# If still no value, try extraFormData directly for time_range, time_grain, etc. # If still no value, try extraFormData directly for time_range, time_grain, etc.
if raw_value is None and isinstance(extra_form_data, dict): if raw_value is None and isinstance(extra_form_data, dict):
# Common Superset filter fields # Common Superset filter fields
for field in ["time_range", "time_grain_sqla", "time_column", "granularity"]: for field in [
"time_range",
"time_grain_sqla",
"time_column",
"granularity",
]:
if field in extra_form_data: if field in extra_form_data:
raw_value = extra_form_data[field] raw_value = extra_form_data[field]
normalized_value["value_origin"] = f"extra_form_data.{field}" normalized_value["value_origin"] = (
f"extra_form_data.{field}"
)
break break
imported_filters.append( imported_filters.append(
@@ -948,7 +1088,9 @@ class SupersetContextExtractor:
"display_name": display_name, "display_name": display_name,
"normalized_value": normalized_value, "normalized_value": normalized_value,
"source": "superset_native_filters_key", "source": "superset_native_filters_key",
"recovery_status": "recovered" if raw_value is not None else "partial", "recovery_status": "recovered"
if raw_value is not None
else "partial",
"requires_confirmation": raw_value is None, "requires_confirmation": raw_value is None,
"notes": "Recovered from Superset native_filters_key state", "notes": "Recovered from Superset native_filters_key state",
} }
@@ -960,7 +1102,9 @@ class SupersetContextExtractor:
for index, item in enumerate(extra_filters): for index, item in enumerate(extra_filters):
if not isinstance(item, dict): if not isinstance(item, dict):
continue continue
filter_name = item.get("col") or item.get("column") or f"extra_filter_{index}" filter_name = (
item.get("col") or item.get("column") or f"extra_filter_{index}"
)
imported_filters.append( imported_filters.append(
{ {
"filter_name": str(filter_name), "filter_name": str(filter_name),
@@ -981,6 +1125,7 @@ class SupersetContextExtractor:
) )
return imported_filters return imported_filters
# [/DEF:SupersetContextExtractor._extract_imported_filters:Function] # [/DEF:SupersetContextExtractor._extract_imported_filters:Function]
# [DEF:SupersetContextExtractor._normalize_imported_filter_payload:Function] # [DEF:SupersetContextExtractor._normalize_imported_filter_payload:Function]
@@ -996,15 +1141,24 @@ class SupersetContextExtractor:
if "raw_value" not in payload and "value" in payload: if "raw_value" not in payload and "value" in payload:
raw_value = payload.get("value") raw_value = payload.get("value")
recovery_status = str( recovery_status = (
payload.get("recovery_status") str(
or ("recovered" if raw_value is not None else "partial") payload.get("recovery_status")
).strip().lower() or ("recovered" if raw_value is not None else "partial")
)
.strip()
.lower()
)
requires_confirmation = bool( requires_confirmation = bool(
payload.get("requires_confirmation", raw_value is None or recovery_status != "recovered") payload.get(
"requires_confirmation",
raw_value is None or recovery_status != "recovered",
)
) )
return { return {
"filter_name": str(payload.get("filter_name") or "unresolved_filter").strip(), "filter_name": str(
payload.get("filter_name") or "unresolved_filter"
).strip(),
"display_name": payload.get("display_name"), "display_name": payload.get("display_name"),
"raw_value": raw_value, "raw_value": raw_value,
"normalized_value": payload.get("normalized_value"), "normalized_value": payload.get("normalized_value"),
@@ -1014,13 +1168,16 @@ class SupersetContextExtractor:
"recovery_status": recovery_status, "recovery_status": recovery_status,
"notes": str(payload.get("notes") or default_note), "notes": str(payload.get("notes") or default_note),
} }
# [/DEF:SupersetContextExtractor._normalize_imported_filter_payload:Function] # [/DEF:SupersetContextExtractor._normalize_imported_filter_payload:Function]
# [DEF:SupersetContextExtractor._collect_query_bearing_expressions:Function] # [DEF:SupersetContextExtractor._collect_query_bearing_expressions:Function]
# @COMPLEXITY: 3 # @COMPLEXITY: 3
# @PURPOSE: Collect SQL and expression-bearing dataset fields for deterministic template-variable discovery. # @PURPOSE: Collect SQL and expression-bearing dataset fields for deterministic template-variable discovery.
# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor.discover_template_variables] # @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor.discover_template_variables]
def _collect_query_bearing_expressions(self, dataset_payload: Dict[str, Any]) -> List[str]: def _collect_query_bearing_expressions(
self, dataset_payload: Dict[str, Any]
) -> List[str]:
expressions: List[str] = [] expressions: List[str] = []
def append_expression(candidate: Any) -> None: def append_expression(candidate: Any) -> None:
@@ -1055,6 +1212,7 @@ class SupersetContextExtractor:
append_expression(column.get("expression")) append_expression(column.get("expression"))
return expressions return expressions
# [/DEF:SupersetContextExtractor._collect_query_bearing_expressions:Function] # [/DEF:SupersetContextExtractor._collect_query_bearing_expressions:Function]
# [DEF:SupersetContextExtractor._append_template_variable:Function] # [DEF:SupersetContextExtractor._append_template_variable:Function]
@@ -1087,6 +1245,7 @@ class SupersetContextExtractor:
"mapping_status": "unmapped", "mapping_status": "unmapped",
} }
) )
# [/DEF:SupersetContextExtractor._append_template_variable:Function] # [/DEF:SupersetContextExtractor._append_template_variable:Function]
# [DEF:SupersetContextExtractor._extract_primary_jinja_identifier:Function] # [DEF:SupersetContextExtractor._extract_primary_jinja_identifier:Function]
@@ -1100,6 +1259,7 @@ class SupersetContextExtractor:
if candidate in {"if", "else", "for", "set", "True", "False", "none", "None"}: if candidate in {"if", "else", "for", "set", "True", "False", "none", "None"}:
return None return None
return candidate return candidate
# [/DEF:SupersetContextExtractor._extract_primary_jinja_identifier:Function] # [/DEF:SupersetContextExtractor._extract_primary_jinja_identifier:Function]
# [DEF:SupersetContextExtractor._normalize_default_literal:Function] # [DEF:SupersetContextExtractor._normalize_default_literal:Function]
@@ -1110,9 +1270,8 @@ class SupersetContextExtractor:
if not normalized_literal: if not normalized_literal:
return None return None
if ( if (
(normalized_literal.startswith("'") and normalized_literal.endswith("'")) normalized_literal.startswith("'") and normalized_literal.endswith("'")
or (normalized_literal.startswith('"') and normalized_literal.endswith('"')) ) or (normalized_literal.startswith('"') and normalized_literal.endswith('"')):
):
return normalized_literal[1:-1] return normalized_literal[1:-1]
lowered = normalized_literal.lower() lowered = normalized_literal.lower()
if lowered in {"true", "false"}: if lowered in {"true", "false"}:
@@ -1126,7 +1285,10 @@ class SupersetContextExtractor:
return float(normalized_literal) return float(normalized_literal)
except ValueError: except ValueError:
return normalized_literal return normalized_literal
# [/DEF:SupersetContextExtractor._normalize_default_literal:Function] # [/DEF:SupersetContextExtractor._normalize_default_literal:Function]
# [/DEF:SupersetContextExtractor:Class] # [/DEF:SupersetContextExtractor:Class]
# [/DEF:SupersetContextExtractor:Module] # [/DEF:SupersetContextExtractor:Module]

View File

@@ -1,4 +1,4 @@
# [DEF:backend.src.models.filter_state:Module] # [DEF:FilterStateModels:Module]
# #
# @COMPLEXITY: 2 # @COMPLEXITY: 2
# @SEMANTICS: superset, native, filters, pydantic, models, dataclasses # @SEMANTICS: superset, native, filters, pydantic, models, dataclasses
@@ -148,4 +148,4 @@ class ExtraFormDataMerge(BaseModel):
# [/DEF:ExtraFormDataMerge:Model] # [/DEF:ExtraFormDataMerge:Model]
# [/DEF:backend.src.models.filter_state:Module] # [/DEF:FilterStateModels:Module]

View File

@@ -22,7 +22,7 @@ from dataclasses import dataclass, field
from datetime import datetime from datetime import datetime
import hashlib import hashlib
import json import json
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional, cast
from src.core.config_manager import ConfigManager from src.core.config_manager import ConfigManager
from src.core.logger import belief_scope, logger from src.core.logger import belief_scope, logger
@@ -72,6 +72,8 @@ from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
from src.services.dataset_review.event_logger import SessionEventPayload from src.services.dataset_review.event_logger import SessionEventPayload
# [/DEF:DatasetReviewOrchestrator.imports:Block] # [/DEF:DatasetReviewOrchestrator.imports:Block]
logger = cast(Any, logger)
# [DEF:StartSessionCommand:Class] # [DEF:StartSessionCommand:Class]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
@@ -82,6 +84,8 @@ class StartSessionCommand:
environment_id: str environment_id: str
source_kind: str source_kind: str
source_input: str source_input: str
# [/DEF:StartSessionCommand:Class] # [/DEF:StartSessionCommand:Class]
@@ -93,6 +97,8 @@ class StartSessionResult:
session: DatasetReviewSession session: DatasetReviewSession
parsed_context: Optional[SupersetParsedContext] = None parsed_context: Optional[SupersetParsedContext] = None
findings: List[ValidationFinding] = field(default_factory=list) findings: List[ValidationFinding] = field(default_factory=list)
# [/DEF:StartSessionResult:Class] # [/DEF:StartSessionResult:Class]
@@ -103,6 +109,8 @@ class StartSessionResult:
class PreparePreviewCommand: class PreparePreviewCommand:
user: User user: User
session_id: str session_id: str
# [/DEF:PreparePreviewCommand:Class] # [/DEF:PreparePreviewCommand:Class]
@@ -114,6 +122,8 @@ class PreparePreviewResult:
session: DatasetReviewSession session: DatasetReviewSession
preview: CompiledPreview preview: CompiledPreview
blocked_reasons: List[str] = field(default_factory=list) blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:PreparePreviewResult:Class] # [/DEF:PreparePreviewResult:Class]
@@ -124,6 +134,8 @@ class PreparePreviewResult:
class LaunchDatasetCommand: class LaunchDatasetCommand:
user: User user: User
session_id: str session_id: str
# [/DEF:LaunchDatasetCommand:Class] # [/DEF:LaunchDatasetCommand:Class]
@@ -135,6 +147,8 @@ class LaunchDatasetResult:
session: DatasetReviewSession session: DatasetReviewSession
run_context: DatasetRunContext run_context: DatasetRunContext
blocked_reasons: List[str] = field(default_factory=list) blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:LaunchDatasetResult:Class] # [/DEF:LaunchDatasetResult:Class]
@@ -168,6 +182,7 @@ class DatasetReviewOrchestrator:
self.config_manager = config_manager self.config_manager = config_manager
self.task_manager = task_manager self.task_manager = task_manager
self.semantic_resolver = semantic_resolver or SemanticSourceResolver() self.semantic_resolver = semantic_resolver or SemanticSourceResolver()
# [/DEF:DatasetReviewOrchestrator.__init__:Function] # [/DEF:DatasetReviewOrchestrator.__init__:Function]
# [DEF:DatasetReviewOrchestrator.start_session:Function] # [DEF:DatasetReviewOrchestrator.start_session:Function]
@@ -188,7 +203,9 @@ class DatasetReviewOrchestrator:
normalized_environment_id = str(command.environment_id or "").strip() normalized_environment_id = str(command.environment_id or "").strip()
if not normalized_source_input: if not normalized_source_input:
logger.explore("Blocked dataset review session start due to empty source input") logger.explore(
"Blocked dataset review session start due to empty source input"
)
raise ValueError("source_input must be non-empty") raise ValueError("source_input must be non-empty")
if normalized_source_kind not in {"superset_link", "dataset_selection"}: if normalized_source_kind not in {"superset_link", "dataset_selection"}:
@@ -196,7 +213,9 @@ class DatasetReviewOrchestrator:
"Blocked dataset review session start due to unsupported source kind", "Blocked dataset review session start due to unsupported source kind",
extra={"source_kind": normalized_source_kind}, extra={"source_kind": normalized_source_kind},
) )
raise ValueError("source_kind must be 'superset_link' or 'dataset_selection'") raise ValueError(
"source_kind must be 'superset_link' or 'dataset_selection'"
)
environment = self.config_manager.get_environment(normalized_environment_id) environment = self.config_manager.get_environment(normalized_environment_id)
if environment is None: if environment is None:
@@ -234,11 +253,15 @@ class DatasetReviewOrchestrator:
if parsed_context.partial_recovery: if parsed_context.partial_recovery:
readiness_state = ReadinessState.RECOVERY_REQUIRED readiness_state = ReadinessState.RECOVERY_REQUIRED
recommended_action = RecommendedAction.REVIEW_DOCUMENTATION recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
findings.extend(self._build_partial_recovery_findings(parsed_context)) findings.extend(
self._build_partial_recovery_findings(parsed_context)
)
else: else:
readiness_state = ReadinessState.REVIEW_READY readiness_state = ReadinessState.REVIEW_READY
else: else:
dataset_ref, dataset_id = self._parse_dataset_selection(normalized_source_input) dataset_ref, dataset_id = self._parse_dataset_selection(
normalized_source_input
)
readiness_state = ReadinessState.REVIEW_READY readiness_state = ReadinessState.REVIEW_READY
current_phase = SessionPhase.REVIEW current_phase = SessionPhase.REVIEW
@@ -255,17 +278,19 @@ class DatasetReviewOrchestrator:
status=SessionStatus.ACTIVE, status=SessionStatus.ACTIVE,
current_phase=current_phase, current_phase=current_phase,
) )
persisted_session = self.repository.create_session(session) persisted_session = cast(Any, self.repository.create_session(session))
recovered_filters: List[ImportedFilter] = [] recovered_filters: List[ImportedFilter] = []
template_variables: List[TemplateVariable] = [] template_variables: List[TemplateVariable] = []
execution_mappings: List[ExecutionMapping] = [] execution_mappings: List[ExecutionMapping] = []
if normalized_source_kind == "superset_link" and parsed_context is not None: if normalized_source_kind == "superset_link" and parsed_context is not None:
recovered_filters, template_variables, execution_mappings, findings = self._build_recovery_bootstrap( recovered_filters, template_variables, execution_mappings, findings = (
environment=environment, self._build_recovery_bootstrap(
session=persisted_session, environment=environment,
parsed_context=parsed_context, session=persisted_session,
findings=findings, parsed_context=parsed_context,
findings=findings,
)
) )
profile = self._build_initial_profile( profile = self._build_initial_profile(
@@ -286,7 +311,9 @@ class DatasetReviewOrchestrator:
"dataset_ref": persisted_session.dataset_ref, "dataset_ref": persisted_session.dataset_ref,
"dataset_id": persisted_session.dataset_id, "dataset_id": persisted_session.dataset_id,
"dashboard_id": persisted_session.dashboard_id, "dashboard_id": persisted_session.dashboard_id,
"partial_recovery": bool(parsed_context and parsed_context.partial_recovery), "partial_recovery": bool(
parsed_context and parsed_context.partial_recovery
),
}, },
) )
) )
@@ -327,7 +354,10 @@ class DatasetReviewOrchestrator:
) )
logger.reason( logger.reason(
"Linked recovery task to started dataset review session", "Linked recovery task to started dataset review session",
extra={"session_id": persisted_session.session_id, "task_id": active_task_id}, extra={
"session_id": persisted_session.session_id,
"task_id": active_task_id,
},
) )
logger.reflect( logger.reflect(
@@ -347,6 +377,7 @@ class DatasetReviewOrchestrator:
parsed_context=parsed_context, parsed_context=parsed_context,
findings=findings, findings=findings,
) )
# [/DEF:DatasetReviewOrchestrator.start_session:Function] # [/DEF:DatasetReviewOrchestrator.start_session:Function]
# [DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function] # [DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
@@ -357,13 +388,20 @@ class DatasetReviewOrchestrator:
# @POST: returns preview artifact in pending, ready, failed, or stale state. # @POST: returns preview artifact in pending, ready, failed, or stale state.
# @SIDE_EFFECT: persists preview attempt and upstream compilation diagnostics. # @SIDE_EFFECT: persists preview attempt and upstream compilation diagnostics.
# @DATA_CONTRACT: Input[PreparePreviewCommand] -> Output[PreparePreviewResult] # @DATA_CONTRACT: Input[PreparePreviewCommand] -> Output[PreparePreviewResult]
def prepare_launch_preview(self, command: PreparePreviewCommand) -> PreparePreviewResult: def prepare_launch_preview(
self, command: PreparePreviewCommand
) -> PreparePreviewResult:
with belief_scope("DatasetReviewOrchestrator.prepare_launch_preview"): with belief_scope("DatasetReviewOrchestrator.prepare_launch_preview"):
session = self.repository.load_session_detail(command.session_id, command.user.id) session = self.repository.load_session_detail(
command.session_id, command.user.id
)
if session is None or session.user_id != command.user.id: if session is None or session.user_id != command.user.id:
logger.explore( logger.explore(
"Preview preparation rejected because owned session was not found", "Preview preparation rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id}, extra={
"session_id": command.session_id,
"user_id": command.user.id,
},
) )
raise ValueError("Session not found") raise ValueError("Session not found")
@@ -451,6 +489,7 @@ class DatasetReviewOrchestrator:
preview=persisted_preview, preview=persisted_preview,
blocked_reasons=[], blocked_reasons=[],
) )
# [/DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function] # [/DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
# [DEF:DatasetReviewOrchestrator.launch_dataset:Function] # [DEF:DatasetReviewOrchestrator.launch_dataset:Function]
@@ -464,11 +503,16 @@ class DatasetReviewOrchestrator:
# @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest Superset preview fingerprint matches current execution inputs. # @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest Superset preview fingerprint matches current execution inputs.
def launch_dataset(self, command: LaunchDatasetCommand) -> LaunchDatasetResult: def launch_dataset(self, command: LaunchDatasetCommand) -> LaunchDatasetResult:
with belief_scope("DatasetReviewOrchestrator.launch_dataset"): with belief_scope("DatasetReviewOrchestrator.launch_dataset"):
session = self.repository.load_session_detail(command.session_id, command.user.id) session = self.repository.load_session_detail(
command.session_id, command.user.id
)
if session is None or session.user_id != command.user.id: if session is None or session.user_id != command.user.id:
logger.explore( logger.explore(
"Launch rejected because owned session was not found", "Launch rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id}, extra={
"session_id": command.session_id,
"user_id": command.user.id,
},
) )
raise ValueError("Session not found") raise ValueError("Session not found")
@@ -579,6 +623,7 @@ class DatasetReviewOrchestrator:
run_context=persisted_run_context, run_context=persisted_run_context,
blocked_reasons=[], blocked_reasons=[],
) )
# [/DEF:DatasetReviewOrchestrator.launch_dataset:Function] # [/DEF:DatasetReviewOrchestrator.launch_dataset:Function]
# [DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function] # [DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
@@ -601,6 +646,7 @@ class DatasetReviewOrchestrator:
return normalized, None return normalized, None
return normalized, None return normalized, None
# [/DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function] # [/DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
# [DEF:DatasetReviewOrchestrator._build_initial_profile:Function] # [DEF:DatasetReviewOrchestrator._build_initial_profile:Function]
@@ -613,7 +659,9 @@ class DatasetReviewOrchestrator:
parsed_context: Optional[SupersetParsedContext], parsed_context: Optional[SupersetParsedContext],
dataset_ref: str, dataset_ref: str,
) -> DatasetProfile: ) -> DatasetProfile:
dataset_name = dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset" dataset_name = (
dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset"
)
business_summary = ( business_summary = (
f"Review session initialized for {dataset_ref}." f"Review session initialized for {dataset_ref}."
if dataset_ref if dataset_ref
@@ -636,9 +684,12 @@ class DatasetReviewOrchestrator:
completeness_score=0.25, completeness_score=0.25,
confidence_state=confidence_state, confidence_state=confidence_state,
has_blocking_findings=False, has_blocking_findings=False,
has_warning_findings=bool(parsed_context and parsed_context.partial_recovery), has_warning_findings=bool(
parsed_context and parsed_context.partial_recovery
),
manual_summary_locked=False, manual_summary_locked=False,
) )
# [/DEF:DatasetReviewOrchestrator._build_initial_profile:Function] # [/DEF:DatasetReviewOrchestrator._build_initial_profile:Function]
# [DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function] # [DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
@@ -670,36 +721,57 @@ class DatasetReviewOrchestrator:
) )
) )
return findings return findings
# [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function] # [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
# [DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function] # [DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function]
# @COMPLEXITY: 4 # @COMPLEXITY: 4
# @PURPOSE: Recover and materialize initial imported filters, template variables, and draft execution mappings after session creation. # @PURPOSE: Recover and materialize initial imported filters, template variables, and draft execution mappings after session creation.
# @RELATION: [CALLS] ->[SupersetContextExtractor.recover_imported_filters]
# @RELATION: [CALLS] ->[SupersetContextExtractor.discover_template_variables]
# @PRE: session belongs to the just-created review aggregate and parsed_context was produced for the same environment scope.
# @POST: Returns bootstrap imported filters, template variables, execution mappings, and updated findings without persisting them directly.
# @SIDE_EFFECT: Performs Superset reads through the extractor and may append warning findings for incomplete recovery.
# @DATA_CONTRACT: Input[Environment, DatasetReviewSession, SupersetParsedContext, List[ValidationFinding]] -> Output[Tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]]
def _build_recovery_bootstrap( def _build_recovery_bootstrap(
self, self,
environment, environment,
session: DatasetReviewSession, session: DatasetReviewSession,
parsed_context: SupersetParsedContext, parsed_context: SupersetParsedContext,
findings: List[ValidationFinding], findings: List[ValidationFinding],
) -> tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]: ) -> tuple[
List[ImportedFilter],
List[TemplateVariable],
List[ExecutionMapping],
List[ValidationFinding],
]:
session_record = cast(Any, session)
extractor = SupersetContextExtractor(environment) extractor = SupersetContextExtractor(environment)
imported_filters_payload = extractor.recover_imported_filters(parsed_context) imported_filters_payload = extractor.recover_imported_filters(parsed_context)
if imported_filters_payload is None: if imported_filters_payload is None:
imported_filters_payload = [] imported_filters_payload = []
imported_filters = [ imported_filters = [
ImportedFilter( ImportedFilter(
session_id=session.session_id, session_id=session_record.session_id,
filter_name=str(item.get("filter_name") or f"imported_filter_{index}"), filter_name=str(item.get("filter_name") or f"imported_filter_{index}"),
display_name=item.get("display_name"), display_name=item.get("display_name"),
raw_value=item.get("raw_value"), raw_value=item.get("raw_value"),
normalized_value=item.get("normalized_value"), normalized_value=item.get("normalized_value"),
source=FilterSource(str(item.get("source") or FilterSource.SUPERSET_URL.value)), source=FilterSource(
str(item.get("source") or FilterSource.SUPERSET_URL.value)
),
confidence_state=FilterConfidenceState( confidence_state=FilterConfidenceState(
str(item.get("confidence_state") or FilterConfidenceState.UNRESOLVED.value) str(
item.get("confidence_state")
or FilterConfidenceState.UNRESOLVED.value
)
), ),
requires_confirmation=bool(item.get("requires_confirmation", False)), requires_confirmation=bool(item.get("requires_confirmation", False)),
recovery_status=FilterRecoveryStatus( recovery_status=FilterRecoveryStatus(
str(item.get("recovery_status") or FilterRecoveryStatus.PARTIAL.value) str(
item.get("recovery_status")
or FilterRecoveryStatus.PARTIAL.value
)
), ),
notes=item.get("notes"), notes=item.get("notes"),
) )
@@ -711,25 +783,44 @@ class DatasetReviewOrchestrator:
if session.dataset_id is not None: if session.dataset_id is not None:
try: try:
dataset_payload = extractor.client.get_dataset_detail(session.dataset_id) dataset_payload = extractor.client.get_dataset_detail(
discovered_variables = extractor.discover_template_variables(dataset_payload) session_record.dataset_id
)
discovered_variables = extractor.discover_template_variables(
dataset_payload
)
template_variables = [ template_variables = [
TemplateVariable( TemplateVariable(
session_id=session.session_id, session_id=session_record.session_id,
variable_name=str(item.get("variable_name") or f"variable_{index}"), variable_name=str(
item.get("variable_name") or f"variable_{index}"
),
expression_source=str(item.get("expression_source") or ""), expression_source=str(item.get("expression_source") or ""),
variable_kind=VariableKind(str(item.get("variable_kind") or VariableKind.UNKNOWN.value)), variable_kind=VariableKind(
str(item.get("variable_kind") or VariableKind.UNKNOWN.value)
),
is_required=bool(item.get("is_required", True)), is_required=bool(item.get("is_required", True)),
default_value=item.get("default_value"), default_value=item.get("default_value"),
mapping_status=MappingStatus(str(item.get("mapping_status") or MappingStatus.UNMAPPED.value)), mapping_status=MappingStatus(
str(
item.get("mapping_status")
or MappingStatus.UNMAPPED.value
)
),
) )
for index, item in enumerate(discovered_variables) for index, item in enumerate(discovered_variables)
] ]
except Exception as exc: except Exception as exc:
if "dataset_template_variable_discovery_failed" not in parsed_context.unresolved_references: if (
parsed_context.unresolved_references.append("dataset_template_variable_discovery_failed") "dataset_template_variable_discovery_failed"
not in parsed_context.unresolved_references
):
parsed_context.unresolved_references.append(
"dataset_template_variable_discovery_failed"
)
if not any( if not any(
finding.caused_by_ref == "dataset_template_variable_discovery_failed" finding.caused_by_ref
== "dataset_template_variable_discovery_failed"
for finding in findings for finding in findings
): ):
findings.append( findings.append(
@@ -745,7 +836,11 @@ class DatasetReviewOrchestrator:
) )
logger.explore( logger.explore(
"Template variable discovery failed during session bootstrap", "Template variable discovery failed during session bootstrap",
extra={"session_id": session.session_id, "dataset_id": session.dataset_id, "error": str(exc)}, extra={
"session_id": session_record.session_id,
"dataset_id": session_record.dataset_id,
"error": str(exc),
},
) )
filter_lookup = { filter_lookup = {
@@ -754,7 +849,9 @@ class DatasetReviewOrchestrator:
if str(imported_filter.filter_name or "").strip() if str(imported_filter.filter_name or "").strip()
} }
for template_variable in template_variables: for template_variable in template_variables:
matched_filter = filter_lookup.get(str(template_variable.variable_name or "").strip().lower()) matched_filter = filter_lookup.get(
str(template_variable.variable_name or "").strip().lower()
)
if matched_filter is None: if matched_filter is None:
continue continue
requires_explicit_approval = bool( requires_explicit_approval = bool(
@@ -763,22 +860,27 @@ class DatasetReviewOrchestrator:
) )
execution_mappings.append( execution_mappings.append(
ExecutionMapping( ExecutionMapping(
session_id=session.session_id, session_id=session_record.session_id,
filter_id=matched_filter.filter_id, filter_id=matched_filter.filter_id,
variable_id=template_variable.variable_id, variable_id=template_variable.variable_id,
mapping_method=MappingMethod.DIRECT_MATCH, mapping_method=MappingMethod.DIRECT_MATCH,
raw_input_value=matched_filter.raw_value, raw_input_value=matched_filter.raw_value,
effective_value=matched_filter.normalized_value if matched_filter.normalized_value is not None else matched_filter.raw_value, effective_value=matched_filter.normalized_value
if matched_filter.normalized_value is not None
else matched_filter.raw_value,
transformation_note="Bootstrapped from Superset recovery context", transformation_note="Bootstrapped from Superset recovery context",
warning_level=None if not requires_explicit_approval else None, warning_level=None if not requires_explicit_approval else None,
requires_explicit_approval=requires_explicit_approval, requires_explicit_approval=requires_explicit_approval,
approval_state=ApprovalState.PENDING if requires_explicit_approval else ApprovalState.NOT_REQUIRED, approval_state=ApprovalState.PENDING
if requires_explicit_approval
else ApprovalState.NOT_REQUIRED,
approved_by_user_id=None, approved_by_user_id=None,
approved_at=None, approved_at=None,
) )
) )
return imported_filters, template_variables, execution_mappings, findings return imported_filters, template_variables, execution_mappings, findings
# [/DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function] # [/DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function]
# [DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function] # [DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
@@ -789,9 +891,16 @@ class DatasetReviewOrchestrator:
# @POST: returns deterministic execution snapshot for current session state without mutating persistence. # @POST: returns deterministic execution snapshot for current session state without mutating persistence.
# @SIDE_EFFECT: none. # @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[Dict[str,Any]] # @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[Dict[str,Any]]
def _build_execution_snapshot(self, session: DatasetReviewSession) -> Dict[str, Any]: def _build_execution_snapshot(
filter_lookup = {item.filter_id: item for item in session.imported_filters} self, session: DatasetReviewSession
variable_lookup = {item.variable_id: item for item in session.template_variables} ) -> Dict[str, Any]:
session_record = cast(Any, session)
filter_lookup = {
item.filter_id: item for item in session_record.imported_filters
}
variable_lookup = {
item.variable_id: item for item in session_record.template_variables
}
effective_filters: List[Dict[str, Any]] = [] effective_filters: List[Dict[str, Any]] = []
template_params: Dict[str, Any] = {} template_params: Dict[str, Any] = {}
@@ -800,14 +909,16 @@ class DatasetReviewOrchestrator:
preview_blockers: List[str] = [] preview_blockers: List[str] = []
mapped_filter_ids: set[str] = set() mapped_filter_ids: set[str] = set()
for mapping in session.execution_mappings: for mapping in session_record.execution_mappings:
imported_filter = filter_lookup.get(mapping.filter_id) imported_filter = filter_lookup.get(mapping.filter_id)
template_variable = variable_lookup.get(mapping.variable_id) template_variable = variable_lookup.get(mapping.variable_id)
if imported_filter is None: if imported_filter is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter") preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter")
continue continue
if template_variable is None: if template_variable is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_variable") preview_blockers.append(
f"mapping:{mapping.mapping_id}:missing_variable"
)
continue continue
effective_value = mapping.effective_value effective_value = mapping.effective_value
@@ -819,7 +930,9 @@ class DatasetReviewOrchestrator:
effective_value = template_variable.default_value effective_value = template_variable.default_value
if effective_value is None and template_variable.is_required: if effective_value is None and template_variable.is_required:
preview_blockers.append(f"variable:{template_variable.variable_name}:missing_required_value") preview_blockers.append(
f"variable:{template_variable.variable_name}:missing_required_value"
)
continue continue
mapped_filter_ids.add(imported_filter.filter_id) mapped_filter_ids.add(imported_filter.filter_id)
@@ -840,10 +953,13 @@ class DatasetReviewOrchestrator:
template_params[template_variable.variable_name] = effective_value template_params[template_variable.variable_name] = effective_value
if mapping.approval_state == ApprovalState.APPROVED: if mapping.approval_state == ApprovalState.APPROVED:
approved_mapping_ids.append(mapping.mapping_id) approved_mapping_ids.append(mapping.mapping_id)
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED: if (
mapping.requires_explicit_approval
and mapping.approval_state != ApprovalState.APPROVED
):
open_warning_refs.append(mapping.mapping_id) open_warning_refs.append(mapping.mapping_id)
for imported_filter in session.imported_filters: for imported_filter in session_record.imported_filters:
if imported_filter.filter_id in mapped_filter_ids: if imported_filter.filter_id in mapped_filter_ids:
continue continue
effective_value = imported_filter.normalized_value effective_value = imported_filter.normalized_value
@@ -862,8 +978,10 @@ class DatasetReviewOrchestrator:
} }
) )
mapped_variable_ids = {mapping.variable_id for mapping in session.execution_mappings} mapped_variable_ids = {
for variable in session.template_variables: mapping.variable_id for mapping in session_record.execution_mappings
}
for variable in session_record.template_variables:
if variable.variable_id in mapped_variable_ids: if variable.variable_id in mapped_variable_ids:
continue continue
if variable.default_value is not None: if variable.default_value is not None:
@@ -875,11 +993,13 @@ class DatasetReviewOrchestrator:
semantic_decision_refs = [ semantic_decision_refs = [
field.field_id field.field_id
for field in session.semantic_fields for field in session.semantic_fields
if field.is_locked or not field.needs_review or field.provenance.value != "unresolved" if field.is_locked
or not field.needs_review
or field.provenance.value != "unresolved"
] ]
preview_fingerprint = self._compute_preview_fingerprint( preview_fingerprint = self._compute_preview_fingerprint(
{ {
"dataset_id": session.dataset_id, "dataset_id": session_record.dataset_id,
"template_params": template_params, "template_params": template_params,
"effective_filters": effective_filters, "effective_filters": effective_filters,
} }
@@ -893,6 +1013,7 @@ class DatasetReviewOrchestrator:
"preview_blockers": sorted(set(preview_blockers)), "preview_blockers": sorted(set(preview_blockers)),
"preview_fingerprint": preview_fingerprint, "preview_fingerprint": preview_fingerprint,
} }
# [/DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function] # [/DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# [DEF:DatasetReviewOrchestrator._build_launch_blockers:Function] # [DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
@@ -909,16 +1030,21 @@ class DatasetReviewOrchestrator:
execution_snapshot: Dict[str, Any], execution_snapshot: Dict[str, Any],
preview: Optional[CompiledPreview], preview: Optional[CompiledPreview],
) -> List[str]: ) -> List[str]:
session_record = cast(Any, session)
blockers = list(execution_snapshot["preview_blockers"]) blockers = list(execution_snapshot["preview_blockers"])
for finding in session.findings: for finding in session_record.findings:
if ( if (
finding.severity == FindingSeverity.BLOCKING finding.severity == FindingSeverity.BLOCKING
and finding.resolution_state not in {ResolutionState.RESOLVED, ResolutionState.APPROVED} and finding.resolution_state
not in {ResolutionState.RESOLVED, ResolutionState.APPROVED}
): ):
blockers.append(f"finding:{finding.code}:blocking") blockers.append(f"finding:{finding.code}:blocking")
for mapping in session.execution_mappings: for mapping in session_record.execution_mappings:
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED: if (
mapping.requires_explicit_approval
and mapping.approval_state != ApprovalState.APPROVED
):
blockers.append(f"mapping:{mapping.mapping_id}:approval_required") blockers.append(f"mapping:{mapping.mapping_id}:approval_required")
if preview is None: if preview is None:
@@ -930,23 +1056,28 @@ class DatasetReviewOrchestrator:
blockers.append("preview:fingerprint_mismatch") blockers.append("preview:fingerprint_mismatch")
return sorted(set(blockers)) return sorted(set(blockers))
# [/DEF:DatasetReviewOrchestrator._build_launch_blockers:Function] # [/DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
# [DEF:DatasetReviewOrchestrator._get_latest_preview:Function] # [DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# @COMPLEXITY: 2 # @COMPLEXITY: 2
# @PURPOSE: Resolve the current latest preview snapshot for one session aggregate. # @PURPOSE: Resolve the current latest preview snapshot for one session aggregate.
def _get_latest_preview(self, session: DatasetReviewSession) -> Optional[CompiledPreview]: def _get_latest_preview(
if not session.previews: self, session: DatasetReviewSession
) -> Optional[CompiledPreview]:
session_record = cast(Any, session)
if not session_record.previews:
return None return None
if session.last_preview_id: if session_record.last_preview_id:
for preview in session.previews: for preview in session_record.previews:
if preview.preview_id == session.last_preview_id: if preview.preview_id == session_record.last_preview_id:
return preview return preview
return sorted( return sorted(
session.previews, session_record.previews,
key=lambda item: (item.created_at or datetime.min, item.preview_id), key=lambda item: (item.created_at or datetime.min, item.preview_id),
reverse=True, reverse=True,
)[0] )[0]
# [/DEF:DatasetReviewOrchestrator._get_latest_preview:Function] # [/DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# [DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function] # [DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
@@ -955,6 +1086,7 @@ class DatasetReviewOrchestrator:
def _compute_preview_fingerprint(self, payload: Dict[str, Any]) -> str: def _compute_preview_fingerprint(self, payload: Dict[str, Any]) -> str:
serialized = json.dumps(payload, sort_keys=True, default=str) serialized = json.dumps(payload, sort_keys=True, default=str)
return hashlib.sha256(serialized.encode("utf-8")).hexdigest() return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
# [/DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function] # [/DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
# [DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function] # [DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
@@ -971,28 +1103,33 @@ class DatasetReviewOrchestrator:
session: DatasetReviewSession, session: DatasetReviewSession,
parsed_context: Optional[SupersetParsedContext], parsed_context: Optional[SupersetParsedContext],
) -> Optional[str]: ) -> Optional[str]:
session_record = cast(Any, session)
if self.task_manager is None: if self.task_manager is None:
logger.reason( logger.reason(
"Dataset review session started without task manager; continuing synchronously", "Dataset review session started without task manager; continuing synchronously",
extra={"session_id": session.session_id}, extra={"session_id": session_record.session_id},
) )
return None return None
task_params: Dict[str, Any] = { task_params: Dict[str, Any] = {
"session_id": session.session_id, "session_id": session_record.session_id,
"user_id": command.user.id, "user_id": command.user.id,
"environment_id": session.environment_id, "environment_id": session_record.environment_id,
"source_kind": session.source_kind, "source_kind": session_record.source_kind,
"source_input": session.source_input, "source_input": session_record.source_input,
"dataset_ref": session.dataset_ref, "dataset_ref": session_record.dataset_ref,
"dataset_id": session.dataset_id, "dataset_id": session_record.dataset_id,
"dashboard_id": session.dashboard_id, "dashboard_id": session_record.dashboard_id,
"partial_recovery": bool(parsed_context and parsed_context.partial_recovery), "partial_recovery": bool(
parsed_context and parsed_context.partial_recovery
),
} }
create_task = getattr(self.task_manager, "create_task", None) create_task = getattr(self.task_manager, "create_task", None)
if create_task is None: if create_task is None:
logger.explore("Task manager has no create_task method; skipping recovery enqueue") logger.explore(
"Task manager has no create_task method; skipping recovery enqueue"
)
return None return None
try: try:
@@ -1003,13 +1140,16 @@ class DatasetReviewOrchestrator:
except TypeError: except TypeError:
logger.explore( logger.explore(
"Recovery task enqueue skipped because task manager create_task contract is incompatible", "Recovery task enqueue skipped because task manager create_task contract is incompatible",
extra={"session_id": session.session_id}, extra={"session_id": session_record.session_id},
) )
return None return None
task_id = getattr(task_object, "id", None) task_id = getattr(task_object, "id", None)
return str(task_id) if task_id else None return str(task_id) if task_id else None
# [/DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function] # [/DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
# [/DEF:DatasetReviewOrchestrator:Class] # [/DEF:DatasetReviewOrchestrator:Class]
# [/DEF:DatasetReviewOrchestrator:Module] # [/DEF:DatasetReviewOrchestrator:Module]

View File

@@ -3,6 +3,7 @@
// @SEMANTICS: api, client, fetch, rest // @SEMANTICS: api, client, fetch, rest
// @PURPOSE: Handles all communication with the backend API. // @PURPOSE: Handles all communication with the backend API.
// @LAYER: Infra-API // @LAYER: Infra-API
// @RELATION: [DEPENDS_ON] ->[toasts_module]
import { addToast } from './toasts.js'; import { addToast } from './toasts.js';
import { PUBLIC_WS_URL } from '$env/static/public'; import { PUBLIC_WS_URL } from '$env/static/public';