semantics

This commit is contained in:
2026-03-20 20:01:58 +03:00
parent 1149e8df1d
commit 80ce8fe150
12 changed files with 1734 additions and 6577 deletions

View File

@@ -4,12 +4,12 @@
# @PURPOSE: HTTP contract layer for migration orchestration, settings, dry-run, and mapping sync endpoints.
# @LAYER: Infra
# @RELATION: DEPENDS_ON ->[AppDependencies]
# @RELATION: DEPENDS_ON ->[backend.src.core.database]
# @RELATION: DEPENDS_ON ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: DEPENDS_ON ->[backend.src.core.migration.dry_run_orchestrator.MigrationDryRunService]
# @RELATION: DEPENDS_ON ->[backend.src.core.mapping_service.IdMappingService]
# @RELATION: DEPENDS_ON ->[backend.src.models.dashboard]
# @RELATION: DEPENDS_ON ->[backend.src.models.mapping]
# @RELATION: DEPENDS_ON ->[DatabaseModule]
# @RELATION: DEPENDS_ON ->[DashboardSelection]
# @RELATION: DEPENDS_ON ->[DashboardMetadata]
# @RELATION: DEPENDS_ON ->[MigrationDryRunService]
# @RELATION: DEPENDS_ON ->[IdMappingService]
# @RELATION: DEPENDS_ON ->[ResourceMapping]
# @INVARIANT: Migration endpoints never execute with invalid environment references and always return explicit HTTP errors on guard failures.
# @PRE: Backend core services initialized and Database session available.
# @POST: Migration tasks are enqueued or dry-run results are computed and returned.
@@ -24,7 +24,7 @@
# @TEST_INVARIANT: [EnvironmentValidationBeforeAction] -> VERIFIED_BY: [invalid_environment, valid_execution]
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import List, Dict, Any, Optional
from typing import List, Dict, Any, Optional, cast
from sqlalchemy.orm import Session
from ...dependencies import get_config_manager, get_task_manager, has_permission
from ...core.database import get_db
@@ -35,8 +35,11 @@ from ...core.migration.dry_run_orchestrator import MigrationDryRunService
from ...core.mapping_service import IdMappingService
from ...models.mapping import ResourceMapping
logger = cast(Any, logger)
router = APIRouter(prefix="/api", tags=["migration"])
# [DEF:get_dashboards:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetch dashboard metadata from a requested environment for migration selection UI.
@@ -44,17 +47,18 @@ router = APIRouter(prefix="/api", tags=["migration"])
# @POST: Returns List[DashboardMetadata] for the resolved environment; emits HTTP_404 when environment is absent.
# @SIDE_EFFECT: Reads environment configuration and performs remote Superset metadata retrieval over network.
# @DATA_CONTRACT: Input[str env_id] -> Output[List[DashboardMetadata]]
# @RELATION: CALLS ->[SupersetClient.get_dashboards_summary]
@router.get("/environments/{env_id}/dashboards", response_model=List[DashboardMetadata])
async def get_dashboards(
env_id: str,
config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "EXECUTE"))
_=Depends(has_permission("plugin:migration", "EXECUTE")),
):
with belief_scope("get_dashboards", f"env_id={env_id}"):
logger.reason(f"Fetching dashboards for environment: {env_id}")
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None)
if not env:
logger.explore(f"Environment {env_id} not found in configuration")
raise HTTPException(status_code=404, detail="Environment not found")
@@ -63,8 +67,11 @@ async def get_dashboards(
dashboards = client.get_dashboards_summary()
logger.reflect(f"Retrieved {len(dashboards)} dashboards from {env_id}")
return dashboards
# [/DEF:get_dashboards:Function]
# [DEF:execute_migration:Function]
# @COMPLEXITY: 5
# @PURPOSE: Validate migration selection and enqueue asynchronous migration task execution.
@@ -72,38 +79,60 @@ async def get_dashboards(
# @POST: Returns {"task_id": str, "message": str} when task creation succeeds; emits HTTP_400/HTTP_500 on failure.
# @SIDE_EFFECT: Reads configuration, writes task record through task manager, and writes operational logs.
# @DATA_CONTRACT: Input[DashboardSelection] -> Output[Dict[str, str]]
# @RELATION: CALLS ->[create_task]
# @RELATION: DEPENDS_ON ->[DashboardSelection]
# @INVARIANT: Migration task dispatch never occurs before source and target environment ids pass guard validation.
@router.post("/migration/execute")
async def execute_migration(
selection: DashboardSelection,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
_ = Depends(has_permission("plugin:migration", "EXECUTE"))
_=Depends(has_permission("plugin:migration", "EXECUTE")),
):
with belief_scope("execute_migration"):
logger.reason(f"Initiating migration from {selection.source_env_id} to {selection.target_env_id}")
logger.reason(
f"Initiating migration from {selection.source_env_id} to {selection.target_env_id}"
)
# Validate environments exist
environments = config_manager.get_environments()
env_ids = {e.id for e in environments}
if selection.source_env_id not in env_ids or selection.target_env_id not in env_ids:
logger.explore("Invalid environment selection", extra={"source": selection.source_env_id, "target": selection.target_env_id})
raise HTTPException(status_code=400, detail="Invalid source or target environment")
if (
selection.source_env_id not in env_ids
or selection.target_env_id not in env_ids
):
logger.explore(
"Invalid environment selection",
extra={
"source": selection.source_env_id,
"target": selection.target_env_id,
},
)
raise HTTPException(
status_code=400, detail="Invalid source or target environment"
)
# Include replace_db_config and fix_cross_filters in the task parameters
task_params = selection.dict()
task_params['replace_db_config'] = selection.replace_db_config
task_params['fix_cross_filters'] = selection.fix_cross_filters
logger.reason(f"Creating migration task with {len(selection.selected_ids)} dashboards")
task_params["replace_db_config"] = selection.replace_db_config
task_params["fix_cross_filters"] = selection.fix_cross_filters
logger.reason(
f"Creating migration task with {len(selection.selected_ids)} dashboards"
)
try:
task = await task_manager.create_task("superset-migration", task_params)
logger.reflect(f"Migration task created: {task.id}")
return {"task_id": task.id, "message": "Migration initiated"}
except Exception as e:
logger.explore(f"Task creation failed: {e}")
raise HTTPException(status_code=500, detail=f"Failed to create migration task: {str(e)}")
raise HTTPException(
status_code=500, detail=f"Failed to create migration task: {str(e)}"
)
# [/DEF:execute_migration:Function]
@@ -114,37 +143,49 @@ async def execute_migration(
# @POST: Returns deterministic dry-run payload; emits HTTP_400 for guard violations and HTTP_500 for orchestrator value errors.
# @SIDE_EFFECT: Reads local mappings from DB and fetches source/target metadata via Superset API.
# @DATA_CONTRACT: Input[DashboardSelection] -> Output[Dict[str, Any]]
# @RELATION: DEPENDS_ON ->[DashboardSelection]
# @RELATION: DEPENDS_ON ->[MigrationDryRunService]
# @INVARIANT: Dry-run flow remains read-only and rejects identical source/target environments before service execution.
@router.post("/migration/dry-run", response_model=Dict[str, Any])
async def dry_run_migration(
selection: DashboardSelection,
config_manager=Depends(get_config_manager),
db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "EXECUTE"))
_=Depends(has_permission("plugin:migration", "EXECUTE")),
):
with belief_scope("dry_run_migration"):
logger.reason(f"Starting dry run: {selection.source_env_id} -> {selection.target_env_id}")
logger.reason(
f"Starting dry run: {selection.source_env_id} -> {selection.target_env_id}"
)
environments = config_manager.get_environments()
env_map = {env.id: env for env in environments}
source_env = env_map.get(selection.source_env_id)
target_env = env_map.get(selection.target_env_id)
if not source_env or not target_env:
logger.explore("Invalid environment selection for dry run")
raise HTTPException(status_code=400, detail="Invalid source or target environment")
raise HTTPException(
status_code=400, detail="Invalid source or target environment"
)
if selection.source_env_id == selection.target_env_id:
logger.explore("Source and target environments are identical")
raise HTTPException(status_code=400, detail="Source and target environments must be different")
raise HTTPException(
status_code=400,
detail="Source and target environments must be different",
)
if not selection.selected_ids:
logger.explore("No dashboards selected for dry run")
raise HTTPException(status_code=400, detail="No dashboards selected for dry run")
raise HTTPException(
status_code=400, detail="No dashboards selected for dry run"
)
service = MigrationDryRunService()
source_client = SupersetClient(source_env)
target_client = SupersetClient(target_env)
try:
result = service.run(
selection=selection,
@@ -157,8 +198,11 @@ async def dry_run_migration(
except ValueError as exc:
logger.explore(f"Dry run orchestrator failed: {exc}")
raise HTTPException(status_code=500, detail=str(exc)) from exc
# [/DEF:dry_run_migration:Function]
# [DEF:get_migration_settings:Function]
# @COMPLEXITY: 3
# @PURPOSE: Read and return configured migration synchronization cron expression.
@@ -166,17 +210,21 @@ async def dry_run_migration(
# @POST: Returns {"cron": str} reflecting current persisted settings value.
# @SIDE_EFFECT: Reads configuration from config manager.
# @DATA_CONTRACT: Input[None] -> Output[Dict[str, str]]
# @RELATION: DEPENDS_ON ->[AppDependencies]
@router.get("/migration/settings", response_model=Dict[str, str])
async def get_migration_settings(
config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "READ"))
_=Depends(has_permission("plugin:migration", "READ")),
):
with belief_scope("get_migration_settings"):
config = config_manager.get_config()
cron = config.settings.migration_sync_cron
return {"cron": cron}
# [/DEF:get_migration_settings:Function]
# [DEF:update_migration_settings:Function]
# @COMPLEXITY: 3
# @PURPOSE: Validate and persist migration synchronization cron expression update.
@@ -184,25 +232,31 @@ async def get_migration_settings(
# @POST: Returns {"cron": str, "status": "updated"} and persists updated cron value.
# @SIDE_EFFECT: Mutates configuration and writes persisted config through config manager.
# @DATA_CONTRACT: Input[Dict[str, str]] -> Output[Dict[str, str]]
# @RELATION: DEPENDS_ON ->[AppDependencies]
@router.put("/migration/settings", response_model=Dict[str, str])
async def update_migration_settings(
payload: Dict[str, str],
config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "WRITE"))
_=Depends(has_permission("plugin:migration", "WRITE")),
):
with belief_scope("update_migration_settings"):
if "cron" not in payload:
raise HTTPException(status_code=400, detail="Missing 'cron' field in payload")
raise HTTPException(
status_code=400, detail="Missing 'cron' field in payload"
)
cron_expr = payload["cron"]
config = config_manager.get_config()
config.settings.migration_sync_cron = cron_expr
config_manager.save_config(config)
return {"cron": cron_expr, "status": "updated"}
# [/DEF:update_migration_settings:Function]
# [DEF:get_resource_mappings:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetch synchronized resource mappings with optional filters and pagination for migration mappings view.
@@ -210,6 +264,7 @@ async def update_migration_settings(
# @POST: Returns {"items": [...], "total": int} where items reflect applied filters and pagination.
# @SIDE_EFFECT: Executes database read queries against ResourceMapping table.
# @DATA_CONTRACT: Input[QueryParams] -> Output[Dict[str, Any]]
# @RELATION: DEPENDS_ON ->[ResourceMapping]
@router.get("/migration/mappings-data", response_model=Dict[str, Any])
async def get_resource_mappings(
skip: int = Query(0, ge=0),
@@ -218,42 +273,63 @@ async def get_resource_mappings(
env_id: Optional[str] = Query(None, description="Filter by environment ID"),
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "READ"))
_=Depends(has_permission("plugin:migration", "READ")),
):
with belief_scope("get_resource_mappings"):
query = db.query(ResourceMapping)
if env_id:
query = query.filter(ResourceMapping.environment_id == env_id)
if resource_type:
query = query.filter(ResourceMapping.resource_type == resource_type.upper())
if search:
search_term = f"%{search}%"
query = query.filter(
(ResourceMapping.resource_name.ilike(search_term)) |
(ResourceMapping.uuid.ilike(search_term))
(ResourceMapping.resource_name.ilike(search_term))
| (ResourceMapping.uuid.ilike(search_term))
)
total = query.count()
mappings = query.order_by(ResourceMapping.resource_type, ResourceMapping.resource_name).offset(skip).limit(limit).all()
mappings = (
query.order_by(ResourceMapping.resource_type, ResourceMapping.resource_name)
.offset(skip)
.limit(limit)
.all()
)
items = []
for m in mappings:
items.append({
"id": m.id,
"environment_id": m.environment_id,
"resource_type": m.resource_type.value if m.resource_type else None,
"uuid": m.uuid,
"remote_id": m.remote_integer_id,
"resource_name": m.resource_name,
"last_synced_at": m.last_synced_at.isoformat() if m.last_synced_at else None
})
mapping = cast(Any, m)
resource_type_value = (
mapping.resource_type.value
if mapping.resource_type is not None
else None
)
last_synced_at = (
mapping.last_synced_at.isoformat()
if mapping.last_synced_at is not None
else None
)
items.append(
{
"id": mapping.id,
"environment_id": mapping.environment_id,
"resource_type": resource_type_value,
"uuid": mapping.uuid,
"remote_id": mapping.remote_integer_id,
"resource_name": mapping.resource_name,
"last_synced_at": last_synced_at,
}
)
return {"items": items, "total": total}
# [/DEF:get_resource_mappings:Function]
# [DEF:trigger_sync_now:Function]
# @COMPLEXITY: 3
# @PURPOSE: Trigger immediate ID synchronization for every configured environment.
@@ -261,22 +337,24 @@ async def get_resource_mappings(
# @POST: Returns sync summary with synced/failed counts after attempting all environments.
# @SIDE_EFFECT: Upserts Environment rows, commits DB transaction, performs network sync calls, and writes logs.
# @DATA_CONTRACT: Input[None] -> Output[Dict[str, Any]]
# @RELATION: DEPENDS_ON ->[IdMappingService]
# @RELATION: CALLS ->[sync_environment]
@router.post("/migration/sync-now", response_model=Dict[str, Any])
async def trigger_sync_now(
config_manager=Depends(get_config_manager),
db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "EXECUTE"))
_=Depends(has_permission("plugin:migration", "EXECUTE")),
):
with belief_scope("trigger_sync_now"):
from ...core.logger import logger
from ...models.mapping import Environment as EnvironmentModel
config = config_manager.get_config()
environments = config.environments
if not environments:
raise HTTPException(status_code=400, detail="No environments configured")
# Ensure each environment exists in DB (upsert) to satisfy FK constraints
for env in environments:
existing = db.query(EnvironmentModel).filter_by(id=env.id).first()
@@ -288,15 +366,17 @@ async def trigger_sync_now(
credentials_id=env.id, # Use env.id as credentials reference
)
db.add(db_env)
logger.info(f"[trigger_sync_now][Action] Created environment row for {env.id}")
logger.info(
f"[trigger_sync_now][Action] Created environment row for {env.id}"
)
else:
existing.name = env.name
existing.url = env.url
db.commit()
service = IdMappingService(db)
results = {"synced": [], "failed": []}
for env in environments:
try:
client = SupersetClient(env)
@@ -306,13 +386,15 @@ async def trigger_sync_now(
except Exception as e:
results["failed"].append({"env_id": env.id, "error": str(e)})
logger.error(f"[trigger_sync_now][Error] Failed to sync {env.id}: {e}")
return {
"status": "completed",
"synced_count": len(results["synced"]),
"failed_count": len(results["failed"]),
"details": results
"details": results,
}
# [/DEF:trigger_sync_now:Function]
# [/DEF:MigrationApi:Module]

View File

@@ -3,9 +3,7 @@
# @SEMANTICS: tests, superset, preview, chart_data, network, 404-mapping
# @PURPOSE: Verify explicit chart-data preview compilation and ensure non-dashboard 404 errors remain generic across sync and async clients.
# @LAYER: Domain
# @RELATION: [BINDS_TO] ->[SupersetClient]
# @RELATION: [BINDS_TO] ->[APIClient]
# @RELATION: [BINDS_TO] ->[AsyncAPIClient]
# @RELATION: [BINDS_TO] ->[AsyncNetworkModule]
import json
from unittest.mock import MagicMock
@@ -29,11 +27,15 @@ def _make_environment() -> Environment:
username="demo",
password="secret",
)
# [/DEF:_make_environment:Function]
# [DEF:_make_requests_http_error:Function]
def _make_requests_http_error(status_code: int, url: str) -> requests.exceptions.HTTPError:
def _make_requests_http_error(
status_code: int, url: str
) -> requests.exceptions.HTTPError:
response = requests.Response()
response.status_code = status_code
response.url = url
@@ -41,14 +43,20 @@ def _make_requests_http_error(status_code: int, url: str) -> requests.exceptions
request = requests.Request("GET", url).prepare()
response.request = request
return requests.exceptions.HTTPError(response=response, request=request)
# [/DEF:_make_requests_http_error:Function]
# [DEF:_make_httpx_status_error:Function]
def _make_httpx_status_error(status_code: int, url: str) -> httpx.HTTPStatusError:
request = httpx.Request("GET", url)
response = httpx.Response(status_code=status_code, request=request, text='{"message":"not found"}')
response = httpx.Response(
status_code=status_code, request=request, text='{"message":"not found"}'
)
return httpx.HTTPStatusError("upstream error", request=request, response=response)
# [/DEF:_make_httpx_status_error:Function]
@@ -80,7 +88,10 @@ def test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy():
effective_filters=[{"filter_name": "country", "effective_value": ["DE"]}],
)
assert result["compiled_sql"] == "SELECT count(*) FROM public.sales WHERE country IN ('DE')"
assert (
result["compiled_sql"]
== "SELECT count(*) FROM public.sales WHERE country IN ('DE')"
)
client.network.request.assert_called_once()
request_call = client.network.request.call_args
assert request_call.kwargs["method"] == "POST"
@@ -129,8 +140,11 @@ def test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy():
"success": True,
}
]
# [/DEF:test_compile_dataset_preview_prefers_legacy_explore_form_data_strategy:Function]
# [DEF:test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures:Function]
# @PURPOSE: Superset preview compilation should fall back to chart-data when legacy form_data strategies are rejected.
def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures():
@@ -180,7 +194,9 @@ def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(
assert len(result["strategy_attempts"]) == 3
assert result["strategy_attempts"][0]["endpoint"] == "/explore_json/form_data"
assert result["strategy_attempts"][0]["endpoint_kind"] == "legacy_explore_form_data"
assert result["strategy_attempts"][0]["request_transport"] == "query_param_form_data"
assert (
result["strategy_attempts"][0]["request_transport"] == "query_param_form_data"
)
assert result["strategy_attempts"][0]["contains_root_datasource"] is False
assert result["strategy_attempts"][0]["contains_form_datasource"] is False
assert result["strategy_attempts"][0]["contains_query_object_datasource"] is False
@@ -191,7 +207,9 @@ def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(
assert result["strategy_attempts"][1]["endpoint"] == "/data"
assert result["strategy_attempts"][1]["endpoint_kind"] == "legacy_data_form_data"
assert result["strategy_attempts"][1]["request_transport"] == "query_param_form_data"
assert (
result["strategy_attempts"][1]["request_transport"] == "query_param_form_data"
)
assert result["strategy_attempts"][1]["contains_root_datasource"] is False
assert result["strategy_attempts"][1]["contains_form_datasource"] is False
assert result["strategy_attempts"][1]["contains_query_object_datasource"] is False
@@ -208,9 +226,18 @@ def test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures(
"contains_form_datasource": False,
"contains_query_object_datasource": False,
"request_param_keys": [],
"request_payload_keys": ["datasource", "force", "form_data", "queries", "result_format", "result_type"],
"request_payload_keys": [
"datasource",
"force",
"form_data",
"queries",
"result_format",
"result_type",
],
"success": True,
}
# [/DEF:test_compile_dataset_preview_falls_back_to_chart_data_after_legacy_failures:Function]
@@ -234,8 +261,12 @@ def test_build_dataset_preview_query_context_places_recovered_filters_in_chart_s
"display_name": "Country",
"effective_value": ["DE"],
"normalized_filter_payload": {
"filter_clauses": [{"col": "country_code", "op": "IN", "val": ["DE"]}],
"extra_form_data": {"filters": [{"col": "country_code", "op": "IN", "val": ["DE"]}]},
"filter_clauses": [
{"col": "country_code", "op": "IN", "val": ["DE"]}
],
"extra_form_data": {
"filters": [{"col": "country_code", "op": "IN", "val": ["DE"]}]
},
"value_origin": "extra_form_data.filters",
},
},
@@ -267,6 +298,8 @@ def test_build_dataset_preview_query_context_places_recovered_filters_in_chart_s
"time_range": "Last year",
}
assert query_context["form_data"]["url_params"] == {"country": "DE"}
# [/DEF:test_build_dataset_preview_query_context_places_recovered_filters_in_chart_style_form_data:Function]
@@ -287,8 +320,16 @@ def test_build_dataset_preview_query_context_merges_dataset_template_params_and_
effective_filters=[],
)
assert query_context["queries"][0]["url_params"] == {"region": "EMEA", "country": "DE"}
assert query_context["form_data"]["url_params"] == {"region": "EMEA", "country": "DE"}
assert query_context["queries"][0]["url_params"] == {
"region": "EMEA",
"country": "DE",
}
assert query_context["form_data"]["url_params"] == {
"region": "EMEA",
"country": "DE",
}
# [/DEF:test_build_dataset_preview_query_context_merges_dataset_template_params_and_preserves_user_values:Function]
@@ -325,6 +366,8 @@ def test_build_dataset_preview_query_context_preserves_time_range_from_native_fi
"time_range": "2020-01-01 : 2020-12-31"
}
assert query_context["queries"][0]["filters"] == []
# [/DEF:test_build_dataset_preview_query_context_preserves_time_range_from_native_filter_payload:Function]
@@ -348,9 +391,13 @@ def test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses(
"display_name": "Country",
"effective_value": ["DE", "FR"],
"normalized_filter_payload": {
"filter_clauses": [{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}],
"filter_clauses": [
{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}
],
"extra_form_data": {
"filters": [{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}],
"filters": [
{"col": "country_code", "op": "IN", "val": ["DE", "FR"]}
],
"time_range": "Last quarter",
},
"value_origin": "extra_form_data.filters",
@@ -372,6 +419,8 @@ def test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses(
assert legacy_form_data["time_range"] == "Last quarter"
assert legacy_form_data["url_params"] == {"country": "DE"}
assert legacy_form_data["result_type"] == "query"
# [/DEF:test_build_dataset_preview_legacy_form_data_preserves_native_filter_clauses:Function]
@@ -393,6 +442,8 @@ def test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic():
assert not isinstance(exc_info.value, DashboardNotFoundError)
assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value)
# [/DEF:test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
@@ -413,6 +464,8 @@ def test_sync_network_404_mapping_translates_dashboard_endpoints():
)
assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value)
# [/DEF:test_sync_network_404_mapping_translates_dashboard_endpoints:Function]
@@ -430,7 +483,9 @@ async def test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic()
try:
with pytest.raises(SupersetAPIError) as exc_info:
client._handle_http_error(
_make_httpx_status_error(404, "http://superset.local/api/v1/chart/data"),
_make_httpx_status_error(
404, "http://superset.local/api/v1/chart/data"
),
"/chart/data",
)
@@ -438,6 +493,8 @@ async def test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic()
assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value)
finally:
await client.aclose()
# [/DEF:test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
@@ -455,14 +512,18 @@ async def test_async_network_404_mapping_translates_dashboard_endpoints():
try:
with pytest.raises(DashboardNotFoundError) as exc_info:
client._handle_http_error(
_make_httpx_status_error(404, "http://superset.local/api/v1/dashboard/10"),
_make_httpx_status_error(
404, "http://superset.local/api/v1/dashboard/10"
),
"/dashboard/10",
)
assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value)
finally:
await client.aclose()
# [/DEF:test_async_network_404_mapping_translates_dashboard_endpoints:Function]
# [/DEF:SupersetPreviewPipelineTests:Module]
# [/DEF:SupersetPreviewPipelineTests:Module]

View File

@@ -32,12 +32,13 @@ from .utils.async_network import AsyncAPIClient
# @RELATION: [DEPENDS_ON] ->[backend.src.core.utils.async_network.AsyncAPIClient]
# @RELATION: [CALLS] ->[backend.src.core.utils.async_network.AsyncAPIClient.request]
class AsyncSupersetClient(SupersetClient):
# [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.__init__:Function]
# [DEF:AsyncSupersetClientInit:Function]
# @COMPLEXITY: 3
# @PURPOSE: Initialize async Superset client with AsyncAPIClient transport.
# @PRE: env is valid Environment instance.
# @POST: Client uses async network transport and inherited projection helpers.
# @DATA_CONTRACT: Input[Environment] -> self.network[AsyncAPIClient]
# @RELATION: [DEPENDS_ON] ->[AsyncAPIClient]
def __init__(self, env: Environment):
self.env = env
auth_payload = {
@@ -52,23 +53,28 @@ class AsyncSupersetClient(SupersetClient):
timeout=env.timeout,
)
self.delete_before_reimport = False
# [/DEF:backend.src.core.async_superset_client.AsyncSupersetClient.__init__:Function]
# [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.aclose:Function]
# [/DEF:AsyncSupersetClientInit:Function]
# [DEF:AsyncSupersetClientClose:Function]
# @COMPLEXITY: 3
# @PURPOSE: Close async transport resources.
# @POST: Underlying AsyncAPIClient is closed.
# @SIDE_EFFECT: Closes network sockets.
# @RELATION: [CALLS] ->[AsyncAPIClient.aclose]
async def aclose(self) -> None:
await self.network.aclose()
# [/DEF:backend.src.core.async_superset_client.AsyncSupersetClient.aclose:Function]
# [/DEF:AsyncSupersetClientClose:Function]
# [DEF:backend.src.core.async_superset_client.AsyncSupersetClient.get_dashboards_page_async:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetch one dashboards page asynchronously.
# @POST: Returns total count and page result list.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
async def get_dashboards_page_async(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
async def get_dashboards_page_async(
self, query: Optional[Dict] = None
) -> Tuple[int, List[Dict]]:
with belief_scope("AsyncSupersetClient.get_dashboards_page_async"):
validated_query = self._validate_query_params(query or {})
if "columns" not in validated_query:
@@ -96,6 +102,7 @@ class AsyncSupersetClient(SupersetClient):
result = response_json.get("result", [])
total_count = response_json.get("count", len(result))
return total_count, result
# [/DEF:get_dashboards_page_async:Function]
# [DEF:get_dashboard_async:Function]
@@ -103,10 +110,16 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch one dashboard payload asynchronously.
# @POST: Returns raw dashboard payload from Superset API.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[AsyncAPIClient.request]
async def get_dashboard_async(self, dashboard_id: int) -> Dict:
with belief_scope("AsyncSupersetClient.get_dashboard_async", f"id={dashboard_id}"):
response = await self.network.request(method="GET", endpoint=f"/dashboard/{dashboard_id}")
with belief_scope(
"AsyncSupersetClient.get_dashboard_async", f"id={dashboard_id}"
):
response = await self.network.request(
method="GET", endpoint=f"/dashboard/{dashboard_id}"
)
return cast(Dict, response)
# [/DEF:get_dashboard_async:Function]
# [DEF:get_chart_async:Function]
@@ -114,10 +127,14 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch one chart payload asynchronously.
# @POST: Returns raw chart payload from Superset API.
# @DATA_CONTRACT: Input[chart_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[AsyncAPIClient.request]
async def get_chart_async(self, chart_id: int) -> Dict:
with belief_scope("AsyncSupersetClient.get_chart_async", f"id={chart_id}"):
response = await self.network.request(method="GET", endpoint=f"/chart/{chart_id}")
response = await self.network.request(
method="GET", endpoint=f"/chart/{chart_id}"
)
return cast(Dict, response)
# [/DEF:get_chart_async:Function]
# [DEF:get_dashboard_detail_async:Function]
@@ -125,17 +142,21 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch dashboard detail asynchronously with concurrent charts/datasets requests.
# @POST: Returns dashboard detail payload for overview page.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_dashboard_async]
# @RELATION: [CALLS] ->[self.get_chart_async]
# @RELATION: [CALLS] ->[get_dashboard_async]
# @RELATION: [CALLS] ->[get_chart_async]
async def get_dashboard_detail_async(self, dashboard_id: int) -> Dict:
with belief_scope("AsyncSupersetClient.get_dashboard_detail_async", f"id={dashboard_id}"):
with belief_scope(
"AsyncSupersetClient.get_dashboard_detail_async", f"id={dashboard_id}"
):
dashboard_response = await self.get_dashboard_async(dashboard_id)
dashboard_data = dashboard_response.get("result", dashboard_response)
charts: List[Dict] = []
datasets: List[Dict] = []
def extract_dataset_id_from_form_data(form_data: Optional[Dict]) -> Optional[int]:
def extract_dataset_id_from_form_data(
form_data: Optional[Dict],
) -> Optional[int]:
if not isinstance(form_data, dict):
return None
datasource = form_data.get("datasource")
@@ -173,7 +194,11 @@ class AsyncSupersetClient(SupersetClient):
)
if not isinstance(charts_response, Exception):
charts_payload = charts_response.get("result", []) if isinstance(charts_response, dict) else []
charts_payload = (
charts_response.get("result", [])
if isinstance(charts_response, dict)
else []
)
for chart_obj in charts_payload:
if not isinstance(chart_obj, dict):
continue
@@ -186,20 +211,45 @@ class AsyncSupersetClient(SupersetClient):
form_data = json.loads(form_data)
except Exception:
form_data = {}
dataset_id = extract_dataset_id_from_form_data(form_data) or chart_obj.get("datasource_id")
charts.append({
"id": int(chart_id),
"title": chart_obj.get("slice_name") or chart_obj.get("name") or f"Chart {chart_id}",
"viz_type": (form_data.get("viz_type") if isinstance(form_data, dict) else None),
"dataset_id": int(dataset_id) if dataset_id is not None else None,
"last_modified": chart_obj.get("changed_on"),
"overview": chart_obj.get("description") or (form_data.get("viz_type") if isinstance(form_data, dict) else None) or "Chart",
})
dataset_id = extract_dataset_id_from_form_data(
form_data
) or chart_obj.get("datasource_id")
charts.append(
{
"id": int(chart_id),
"title": chart_obj.get("slice_name")
or chart_obj.get("name")
or f"Chart {chart_id}",
"viz_type": (
form_data.get("viz_type")
if isinstance(form_data, dict)
else None
),
"dataset_id": int(dataset_id)
if dataset_id is not None
else None,
"last_modified": chart_obj.get("changed_on"),
"overview": chart_obj.get("description")
or (
form_data.get("viz_type")
if isinstance(form_data, dict)
else None
)
or "Chart",
}
)
else:
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to fetch dashboard charts: %s", charts_response)
app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to fetch dashboard charts: %s",
charts_response,
)
if not isinstance(datasets_response, Exception):
datasets_payload = datasets_response.get("result", []) if isinstance(datasets_response, dict) else []
datasets_payload = (
datasets_response.get("result", [])
if isinstance(datasets_response, dict)
else []
)
for dataset_obj in datasets_payload:
if not isinstance(dataset_obj, dict):
continue
@@ -207,20 +257,36 @@ class AsyncSupersetClient(SupersetClient):
if dataset_id is None:
continue
db_payload = dataset_obj.get("database")
db_name = db_payload.get("database_name") if isinstance(db_payload, dict) else None
table_name = dataset_obj.get("table_name") or dataset_obj.get("datasource_name") or dataset_obj.get("name") or f"Dataset {dataset_id}"
db_name = (
db_payload.get("database_name")
if isinstance(db_payload, dict)
else None
)
table_name = (
dataset_obj.get("table_name")
or dataset_obj.get("datasource_name")
or dataset_obj.get("name")
or f"Dataset {dataset_id}"
)
schema = dataset_obj.get("schema")
fq_name = f"{schema}.{table_name}" if schema else table_name
datasets.append({
"id": int(dataset_id),
"table_name": table_name,
"schema": schema,
"database": db_name or dataset_obj.get("database_name") or "Unknown",
"last_modified": dataset_obj.get("changed_on"),
"overview": fq_name,
})
datasets.append(
{
"id": int(dataset_id),
"table_name": table_name,
"schema": schema,
"database": db_name
or dataset_obj.get("database_name")
or "Unknown",
"last_modified": dataset_obj.get("changed_on"),
"overview": fq_name,
}
)
else:
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to fetch dashboard datasets: %s", datasets_response)
app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to fetch dashboard datasets: %s",
datasets_response,
)
if not charts:
raw_position_json = dashboard_data.get("position_json")
@@ -228,21 +294,29 @@ class AsyncSupersetClient(SupersetClient):
if isinstance(raw_position_json, str) and raw_position_json:
try:
parsed_position = json.loads(raw_position_json)
chart_ids_from_position.update(self._extract_chart_ids_from_layout(parsed_position))
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(parsed_position)
)
except Exception:
pass
elif isinstance(raw_position_json, dict):
chart_ids_from_position.update(self._extract_chart_ids_from_layout(raw_position_json))
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(raw_position_json)
)
raw_json_metadata = dashboard_data.get("json_metadata")
if isinstance(raw_json_metadata, str) and raw_json_metadata:
try:
parsed_metadata = json.loads(raw_json_metadata)
chart_ids_from_position.update(self._extract_chart_ids_from_layout(parsed_metadata))
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(parsed_metadata)
)
except Exception:
pass
elif isinstance(raw_json_metadata, dict):
chart_ids_from_position.update(self._extract_chart_ids_from_layout(raw_json_metadata))
chart_ids_from_position.update(
self._extract_chart_ids_from_layout(raw_json_metadata)
)
fallback_chart_tasks = [
self.get_chart_async(int(chart_id))
@@ -252,68 +326,113 @@ class AsyncSupersetClient(SupersetClient):
*fallback_chart_tasks,
return_exceptions=True,
)
for chart_id, chart_response in zip(sorted(chart_ids_from_position), fallback_chart_responses):
for chart_id, chart_response in zip(
sorted(chart_ids_from_position), fallback_chart_responses
):
if isinstance(chart_response, Exception):
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to resolve fallback chart %s: %s", chart_id, chart_response)
app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to resolve fallback chart %s: %s",
chart_id,
chart_response,
)
continue
chart_data = chart_response.get("result", chart_response)
charts.append({
"id": int(chart_id),
"title": chart_data.get("slice_name") or chart_data.get("name") or f"Chart {chart_id}",
"viz_type": chart_data.get("viz_type"),
"dataset_id": chart_data.get("datasource_id"),
"last_modified": chart_data.get("changed_on"),
"overview": chart_data.get("description") or chart_data.get("viz_type") or "Chart",
})
charts.append(
{
"id": int(chart_id),
"title": chart_data.get("slice_name")
or chart_data.get("name")
or f"Chart {chart_id}",
"viz_type": chart_data.get("viz_type"),
"dataset_id": chart_data.get("datasource_id"),
"last_modified": chart_data.get("changed_on"),
"overview": chart_data.get("description")
or chart_data.get("viz_type")
or "Chart",
}
)
dataset_ids_from_charts = {
c.get("dataset_id")
for c in charts
if c.get("dataset_id") is not None
c.get("dataset_id") for c in charts if c.get("dataset_id") is not None
}
known_dataset_ids = {d.get("id") for d in datasets if d.get("id") is not None}
missing_dataset_ids = sorted(int(item) for item in dataset_ids_from_charts if item not in known_dataset_ids)
known_dataset_ids = {
d.get("id") for d in datasets if d.get("id") is not None
}
missing_dataset_ids = sorted(
int(item)
for item in dataset_ids_from_charts
if item not in known_dataset_ids
)
if missing_dataset_ids:
dataset_fetch_tasks = [
self.network.request(method="GET", endpoint=f"/dataset/{dataset_id}")
self.network.request(
method="GET", endpoint=f"/dataset/{dataset_id}"
)
for dataset_id in missing_dataset_ids
]
dataset_fetch_responses = await asyncio.gather(
*dataset_fetch_tasks,
return_exceptions=True,
)
for dataset_id, dataset_response in zip(missing_dataset_ids, dataset_fetch_responses):
for dataset_id, dataset_response in zip(
missing_dataset_ids, dataset_fetch_responses
):
if isinstance(dataset_response, Exception):
app_logger.warning("[get_dashboard_detail_async][Warning] Failed to backfill dataset %s: %s", dataset_id, dataset_response)
app_logger.warning(
"[get_dashboard_detail_async][Warning] Failed to backfill dataset %s: %s",
dataset_id,
dataset_response,
)
continue
dataset_data = dataset_response.get("result", dataset_response) if isinstance(dataset_response, dict) else {}
dataset_data = (
dataset_response.get("result", dataset_response)
if isinstance(dataset_response, dict)
else {}
)
db_payload = dataset_data.get("database")
db_name = db_payload.get("database_name") if isinstance(db_payload, dict) else None
table_name = dataset_data.get("table_name") or dataset_data.get("datasource_name") or dataset_data.get("name") or f"Dataset {dataset_id}"
db_name = (
db_payload.get("database_name")
if isinstance(db_payload, dict)
else None
)
table_name = (
dataset_data.get("table_name")
or dataset_data.get("datasource_name")
or dataset_data.get("name")
or f"Dataset {dataset_id}"
)
schema = dataset_data.get("schema")
fq_name = f" {schema}.{table_name}" if schema else table_name
datasets.append({
"id": int(dataset_id),
"table_name": table_name,
"schema": schema,
"database": db_name or dataset_data.get("database_name") or "Unknown",
"last_modified": dataset_data.get("changed_on"),
"overview": fq_name,
})
datasets.append(
{
"id": int(dataset_id),
"table_name": table_name,
"schema": schema,
"database": db_name
or dataset_data.get("database_name")
or "Unknown",
"last_modified": dataset_data.get("changed_on"),
"overview": fq_name,
}
)
return {
"id": int(dashboard_data.get("id") or dashboard_id),
"title": dashboard_data.get("dashboard_title") or dashboard_data.get("title") or f"Dashboard {dashboard_id}",
"title": dashboard_data.get("dashboard_title")
or dashboard_data.get("title")
or f"Dashboard {dashboard_id}",
"slug": dashboard_data.get("slug"),
"url": dashboard_data.get("url"),
"description": dashboard_data.get("description"),
"last_modified": dashboard_data.get("changed_on_utc") or dashboard_data.get("changed_on"),
"last_modified": dashboard_data.get("changed_on_utc")
or dashboard_data.get("changed_on"),
"published": dashboard_data.get("published"),
"charts": charts,
"datasets": datasets,
"chart_count": len(charts),
"dataset_count": len(datasets),
}
# [/DEF:get_dashboard_detail_async:Function]
# [DEF:get_dashboard_permalink_state_async:Function]
@@ -322,12 +441,15 @@ class AsyncSupersetClient(SupersetClient):
# @POST: Returns dashboard permalink state payload from Superset API.
# @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict]
async def get_dashboard_permalink_state_async(self, permalink_key: str) -> Dict:
with belief_scope("AsyncSupersetClient.get_dashboard_permalink_state_async", f"key={permalink_key}"):
with belief_scope(
"AsyncSupersetClient.get_dashboard_permalink_state_async",
f"key={permalink_key}",
):
response = await self.network.request(
method="GET",
endpoint=f"/dashboard/permalink/{permalink_key}"
method="GET", endpoint=f"/dashboard/permalink/{permalink_key}"
)
return cast(Dict, response)
# [/DEF:get_dashboard_permalink_state_async:Function]
# [DEF:get_native_filter_state_async:Function]
@@ -335,13 +457,19 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Fetch stored native filter state asynchronously.
# @POST: Returns native filter state payload from Superset API.
# @DATA_CONTRACT: Input[dashboard_id: Union[int, str], filter_state_key: str] -> Output[Dict]
async def get_native_filter_state_async(self, dashboard_id: int, filter_state_key: str) -> Dict:
with belief_scope("AsyncSupersetClient.get_native_filter_state_async", f"dashboard={dashboard_id}, key={filter_state_key}"):
async def get_native_filter_state_async(
self, dashboard_id: int, filter_state_key: str
) -> Dict:
with belief_scope(
"AsyncSupersetClient.get_native_filter_state_async",
f"dashboard={dashboard_id}, key={filter_state_key}",
):
response = await self.network.request(
method="GET",
endpoint=f"/dashboard/{dashboard_id}/filter_state/{filter_state_key}"
endpoint=f"/dashboard/{dashboard_id}/filter_state/{filter_state_key}",
)
return cast(Dict, response)
# [/DEF:get_native_filter_state_async:Function]
# [DEF:extract_native_filters_from_permalink_async:Function]
@@ -349,15 +477,22 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Extract native filters dataMask from a permalink key asynchronously.
# @POST: Returns extracted dataMask with filter states.
# @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_dashboard_permalink_state_async]
async def extract_native_filters_from_permalink_async(self, permalink_key: str) -> Dict:
with belief_scope("AsyncSupersetClient.extract_native_filters_from_permalink_async", f"key={permalink_key}"):
permalink_response = await self.get_dashboard_permalink_state_async(permalink_key)
# @RELATION: [CALLS] ->[get_dashboard_permalink_state_async]
async def extract_native_filters_from_permalink_async(
self, permalink_key: str
) -> Dict:
with belief_scope(
"AsyncSupersetClient.extract_native_filters_from_permalink_async",
f"key={permalink_key}",
):
permalink_response = await self.get_dashboard_permalink_state_async(
permalink_key
)
result = permalink_response.get("result", permalink_response)
state = result.get("state", result)
data_mask = state.get("dataMask", {})
extracted_filters = {}
for filter_id, filter_data in data_mask.items():
if not isinstance(filter_data, dict):
@@ -367,7 +502,7 @@ class AsyncSupersetClient(SupersetClient):
"filterState": filter_data.get("filterState", {}),
"ownState": filter_data.get("ownState", {}),
}
return {
"dataMask": extracted_filters,
"activeTabs": state.get("activeTabs", []),
@@ -375,6 +510,7 @@ class AsyncSupersetClient(SupersetClient):
"chartStates": state.get("chartStates", {}),
"permalink_key": permalink_key,
}
# [/DEF:extract_native_filters_from_permalink_async:Function]
# [DEF:extract_native_filters_from_key_async:Function]
@@ -382,27 +518,37 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Extract native filters from a native_filters_key URL parameter asynchronously.
# @POST: Returns extracted filter state with extraFormData.
# @DATA_CONTRACT: Input[dashboard_id: Union[int, str], filter_state_key: str] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_native_filter_state_async]
async def extract_native_filters_from_key_async(self, dashboard_id: int, filter_state_key: str) -> Dict:
with belief_scope("AsyncSupersetClient.extract_native_filters_from_key_async", f"dashboard={dashboard_id}, key={filter_state_key}"):
filter_response = await self.get_native_filter_state_async(dashboard_id, filter_state_key)
# @RELATION: [CALLS] ->[get_native_filter_state_async]
async def extract_native_filters_from_key_async(
self, dashboard_id: int, filter_state_key: str
) -> Dict:
with belief_scope(
"AsyncSupersetClient.extract_native_filters_from_key_async",
f"dashboard={dashboard_id}, key={filter_state_key}",
):
filter_response = await self.get_native_filter_state_async(
dashboard_id, filter_state_key
)
result = filter_response.get("result", filter_response)
value = result.get("value")
if isinstance(value, str):
try:
parsed_value = json.loads(value)
except json.JSONDecodeError as e:
app_logger.warning("[extract_native_filters_from_key_async][Warning] Failed to parse filter state JSON: %s", e)
app_logger.warning(
"[extract_native_filters_from_key_async][Warning] Failed to parse filter state JSON: %s",
e,
)
parsed_value = {}
elif isinstance(value, dict):
parsed_value = value
else:
parsed_value = {}
extracted_filters = {}
if "id" in parsed_value and "extraFormData" in parsed_value:
filter_id = parsed_value.get("id", filter_state_key)
extracted_filters[filter_id] = {
@@ -419,12 +565,13 @@ class AsyncSupersetClient(SupersetClient):
"filterState": filter_data.get("filterState", {}),
"ownState": filter_data.get("ownState", {}),
}
return {
"dataMask": extracted_filters,
"dashboard_id": dashboard_id,
"filter_state_key": filter_state_key,
}
# [/DEF:extract_native_filters_from_key_async:Function]
# [DEF:parse_dashboard_url_for_filters_async:Function]
@@ -432,36 +579,42 @@ class AsyncSupersetClient(SupersetClient):
# @PURPOSE: Parse a Superset dashboard URL and extract native filter state asynchronously.
# @POST: Returns extracted filter state or empty dict if no filters found.
# @DATA_CONTRACT: Input[url: str] -> Output[Dict]
# @RELATION: [CALLS] ->[self.extract_native_filters_from_permalink_async]
# @RELATION: [CALLS] ->[self.extract_native_filters_from_key_async]
# @RELATION: [CALLS] ->[extract_native_filters_from_permalink_async]
# @RELATION: [CALLS] ->[extract_native_filters_from_key_async]
async def parse_dashboard_url_for_filters_async(self, url: str) -> Dict:
with belief_scope("AsyncSupersetClient.parse_dashboard_url_for_filters_async", f"url={url}"):
with belief_scope(
"AsyncSupersetClient.parse_dashboard_url_for_filters_async", f"url={url}"
):
import urllib.parse
parsed_url = urllib.parse.urlparse(url)
query_params = urllib.parse.parse_qs(parsed_url.query)
path_parts = parsed_url.path.rstrip("/").split("/")
result = {
"url": url,
"dashboard_id": None,
"filter_type": None,
"filters": {},
}
# Check for permalink URL: /dashboard/p/{key}/
if "p" in path_parts:
try:
p_index = path_parts.index("p")
if p_index + 1 < len(path_parts):
permalink_key = path_parts[p_index + 1]
filter_data = await self.extract_native_filters_from_permalink_async(permalink_key)
filter_data = (
await self.extract_native_filters_from_permalink_async(
permalink_key
)
)
result["filter_type"] = "permalink"
result["filters"] = filter_data
return result
except ValueError:
pass
# Check for native_filters_key in query params
native_filters_key = query_params.get("native_filters_key", [None])[0]
if native_filters_key:
@@ -475,7 +628,7 @@ class AsyncSupersetClient(SupersetClient):
dashboard_ref = potential_id
except ValueError:
pass
if dashboard_ref:
# Resolve slug to numeric ID — the filter_state API requires a numeric ID
resolved_id = None
@@ -484,23 +637,35 @@ class AsyncSupersetClient(SupersetClient):
except (ValueError, TypeError):
try:
dash_resp = await self.get_dashboard_async(dashboard_ref)
dash_data = dash_resp.get("result", dash_resp) if isinstance(dash_resp, dict) else {}
dash_data = (
dash_resp.get("result", dash_resp)
if isinstance(dash_resp, dict)
else {}
)
raw_id = dash_data.get("id")
if raw_id is not None:
resolved_id = int(raw_id)
except Exception as e:
app_logger.warning("[parse_dashboard_url_for_filters_async][Warning] Failed to resolve dashboard slug '%s' to ID: %s", dashboard_ref, e)
app_logger.warning(
"[parse_dashboard_url_for_filters_async][Warning] Failed to resolve dashboard slug '%s' to ID: %s",
dashboard_ref,
e,
)
if resolved_id is not None:
filter_data = await self.extract_native_filters_from_key_async(resolved_id, native_filters_key)
filter_data = await self.extract_native_filters_from_key_async(
resolved_id, native_filters_key
)
result["filter_type"] = "native_filters_key"
result["dashboard_id"] = resolved_id
result["filters"] = filter_data
return result
else:
app_logger.warning("[parse_dashboard_url_for_filters_async][Warning] Could not resolve dashboard_id from URL for native_filters_key")
app_logger.warning(
"[parse_dashboard_url_for_filters_async][Warning] Could not resolve dashboard_id from URL for native_filters_key"
)
return result
# Check for native_filters in query params (direct filter values)
native_filters = query_params.get("native_filters", [None])[0]
if native_filters:
@@ -510,10 +675,16 @@ class AsyncSupersetClient(SupersetClient):
result["filters"] = {"dataMask": parsed_filters}
return result
except json.JSONDecodeError as e:
app_logger.warning("[parse_dashboard_url_for_filters_async][Warning] Failed to parse native_filters JSON: %s", e)
app_logger.warning(
"[parse_dashboard_url_for_filters_async][Warning] Failed to parse native_filters JSON: %s",
e,
)
return result
# [/DEF:parse_dashboard_url_for_filters_async:Function]
# [/DEF:AsyncSupersetClient:Class]
# [/DEF:backend.src.core.async_superset_client:Module]

View File

@@ -1,12 +1,12 @@
# [DEF:backend.src.core.database:Module]
# [DEF:DatabaseModule:Module]
#
# @COMPLEXITY: 3
# @SEMANTICS: database, postgresql, sqlalchemy, session, persistence
# @PURPOSE: Configures database connection and session management (PostgreSQL-first).
# @LAYER: Core
# @RELATION: DEPENDS_ON ->[sqlalchemy]
# @RELATION: DEPENDS_ON ->[backend.src.models.mapping]
# @RELATION: DEPENDS_ON ->[backend.src.core.auth.config]
# @RELATION: [DEPENDS_ON] ->[MappingModels]
# @RELATION: [DEPENDS_ON] ->[auth_config]
# @RELATION: [DEPENDS_ON] ->[ConnectionConfig]
#
# @INVARIANT: A single engine instance is used for the entire application.
@@ -15,6 +15,7 @@ from sqlalchemy import create_engine, inspect, text
from sqlalchemy.orm import sessionmaker
from ..models.mapping import Base
from ..models.connection import ConnectionConfig
# Import models to ensure they're registered with Base
from ..models import task as _task_models # noqa: F401
from ..models import auth as _auth_models # noqa: F401
@@ -60,6 +61,7 @@ TASKS_DATABASE_URL = os.getenv("TASKS_DATABASE_URL", DATABASE_URL)
AUTH_DATABASE_URL = os.getenv("AUTH_DATABASE_URL", auth_config.AUTH_DATABASE_URL)
# [/DEF:AUTH_DATABASE_URL:Constant]
# [DEF:engine:Variable]
# @COMPLEXITY: 1
# @PURPOSE: SQLAlchemy engine for mappings database.
@@ -70,6 +72,7 @@ def _build_engine(db_url: str):
return create_engine(db_url, connect_args={"check_same_thread": False})
return create_engine(db_url, pool_pre_ping=True)
engine = _build_engine(DATABASE_URL)
# [/DEF:engine:Variable]
@@ -106,11 +109,13 @@ TasksSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=tasks_e
AuthSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=auth_engine)
# [/DEF:AuthSessionLocal:Class]
# [DEF:_ensure_user_dashboard_preferences_columns:Function]
# @COMPLEXITY: 3
# @PURPOSE: Applies additive schema upgrades for user_dashboard_preferences table.
# @PRE: bind_engine points to application database where profile table is stored.
# @POST: Missing columns are added without data loss.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_user_dashboard_preferences_columns(bind_engine):
with belief_scope("_ensure_user_dashboard_preferences_columns"):
table_name = "user_dashboard_preferences"
@@ -170,12 +175,15 @@ def _ensure_user_dashboard_preferences_columns(bind_engine):
"[database][EXPLORE] Profile preference additive migration failed: %s",
migration_error,
)
# [/DEF:_ensure_user_dashboard_preferences_columns:Function]
# [DEF:_ensure_user_dashboard_preferences_health_columns:Function]
# @COMPLEXITY: 3
# @PURPOSE: Applies additive schema upgrades for user_dashboard_preferences table (health fields).
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_user_dashboard_preferences_health_columns(bind_engine):
with belief_scope("_ensure_user_dashboard_preferences_health_columns"):
table_name = "user_dashboard_preferences"
@@ -214,12 +222,15 @@ def _ensure_user_dashboard_preferences_health_columns(bind_engine):
"[database][EXPLORE] Profile health preference additive migration failed: %s",
migration_error,
)
# [/DEF:_ensure_user_dashboard_preferences_health_columns:Function]
# [DEF:_ensure_llm_validation_results_columns:Function]
# @COMPLEXITY: 3
# @PURPOSE: Applies additive schema upgrades for llm_validation_results table.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_llm_validation_results_columns(bind_engine):
with belief_scope("_ensure_llm_validation_results_columns"):
table_name = "llm_validation_results"
@@ -254,6 +265,8 @@ def _ensure_llm_validation_results_columns(bind_engine):
"[database][EXPLORE] ValidationRecord additive migration failed: %s",
migration_error,
)
# [/DEF:_ensure_llm_validation_results_columns:Function]
@@ -262,6 +275,7 @@ def _ensure_llm_validation_results_columns(bind_engine):
# @PURPOSE: Applies additive schema upgrades for git_server_configs table.
# @PRE: bind_engine points to application database.
# @POST: Missing columns are added without data loss.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_git_server_configs_columns(bind_engine):
with belief_scope("_ensure_git_server_configs_columns"):
table_name = "git_server_configs"
@@ -292,6 +306,8 @@ def _ensure_git_server_configs_columns(bind_engine):
"[database][EXPLORE] GitServerConfig preference additive migration failed: %s",
migration_error,
)
# [/DEF:_ensure_git_server_configs_columns:Function]
@@ -300,6 +316,7 @@ def _ensure_git_server_configs_columns(bind_engine):
# @PURPOSE: Applies additive schema upgrades for auth users table.
# @PRE: bind_engine points to authentication database.
# @POST: Missing columns are added without data loss.
# @RELATION: [DEPENDS_ON] ->[auth_engine]
def _ensure_auth_users_columns(bind_engine):
with belief_scope("_ensure_auth_users_columns"):
table_name = "users"
@@ -314,9 +331,7 @@ def _ensure_auth_users_columns(bind_engine):
alter_statements = []
if "full_name" not in existing_columns:
alter_statements.append(
"ALTER TABLE users ADD COLUMN full_name VARCHAR"
)
alter_statements.append("ALTER TABLE users ADD COLUMN full_name VARCHAR")
if "is_ad_user" not in existing_columns:
alter_statements.append(
"ALTER TABLE users ADD COLUMN is_ad_user BOOLEAN NOT NULL DEFAULT FALSE"
@@ -340,7 +355,13 @@ def _ensure_auth_users_columns(bind_engine):
connection.execute(text(statement))
logger.reason(
"Auth users schema migration completed",
extra={"table": table_name, "added_columns": [stmt.split(" ADD COLUMN ", 1)[1].split()[0] for stmt in alter_statements]},
extra={
"table": table_name,
"added_columns": [
stmt.split(" ADD COLUMN ", 1)[1].split()[0]
for stmt in alter_statements
],
},
)
except Exception as migration_error:
logger.warning(
@@ -348,6 +369,8 @@ def _ensure_auth_users_columns(bind_engine):
migration_error,
)
raise
# [/DEF:_ensure_auth_users_columns:Function]
@@ -356,6 +379,7 @@ def _ensure_auth_users_columns(bind_engine):
# @PURPOSE: Ensures the external connection registry table exists in the main database.
# @PRE: bind_engine points to the application database.
# @POST: connection_configs table exists without dropping existing data.
# @RELATION: [DEPENDS_ON] ->[ConnectionConfig]
def ensure_connection_configs_table(bind_engine):
with belief_scope("ensure_connection_configs_table"):
try:
@@ -366,6 +390,8 @@ def ensure_connection_configs_table(bind_engine):
migration_error,
)
raise
# [/DEF:ensure_connection_configs_table:Function]
@@ -374,6 +400,7 @@ def ensure_connection_configs_table(bind_engine):
# @PURPOSE: Adds missing FilterSource enum values to the PostgreSQL native filtersource type.
# @PRE: bind_engine points to application database with imported_filters table.
# @POST: New enum values are available without data loss.
# @RELATION: [DEPENDS_ON] ->[engine]
def _ensure_filter_source_enum_values(bind_engine):
with belief_scope("_ensure_filter_source_enum_values"):
try:
@@ -387,7 +414,9 @@ def _ensure_filter_source_enum_values(bind_engine):
)
)
if result.fetchone() is None:
logger.reason("filtersource enum type does not exist yet; skipping migration")
logger.reason(
"filtersource enum type does not exist yet; skipping migration"
)
return
# Get existing enum values
@@ -402,7 +431,9 @@ def _ensure_filter_source_enum_values(bind_engine):
existing_values = {row[0] for row in result.fetchall()}
required_values = ["SUPERSET_PERMALINK", "SUPERSET_NATIVE_FILTERS_KEY"]
missing_values = [v for v in required_values if v not in existing_values]
missing_values = [
v for v in required_values if v not in existing_values
]
if not missing_values:
logger.reason(
@@ -417,7 +448,9 @@ def _ensure_filter_source_enum_values(bind_engine):
)
for value in missing_values:
connection.execute(
text(f"ALTER TYPE filtersource ADD VALUE IF NOT EXISTS '{value}'")
text(
f"ALTER TYPE filtersource ADD VALUE IF NOT EXISTS '{value}'"
)
)
connection.commit()
logger.reason(
@@ -429,6 +462,8 @@ def _ensure_filter_source_enum_values(bind_engine):
"[database][EXPLORE] FilterSource enum additive migration failed: %s",
migration_error,
)
# [/DEF:_ensure_filter_source_enum_values:Function]
@@ -438,6 +473,8 @@ def _ensure_filter_source_enum_values(bind_engine):
# @PRE: engine, tasks_engine and auth_engine are initialized.
# @POST: Database tables created in all databases.
# @SIDE_EFFECT: Creates physical database files if they don't exist.
# @RELATION: [CALLS] ->[ensure_connection_configs_table]
# @RELATION: [CALLS] ->[_ensure_filter_source_enum_values]
def init_db():
with belief_scope("init_db"):
Base.metadata.create_all(bind=engine)
@@ -450,14 +487,18 @@ def init_db():
_ensure_auth_users_columns(auth_engine)
ensure_connection_configs_table(engine)
_ensure_filter_source_enum_values(engine)
# [/DEF:init_db:Function]
# [DEF:get_db:Function]
# @COMPLEXITY: 3
# @PURPOSE: Dependency for getting a database session.
# @PRE: SessionLocal is initialized.
# @POST: Session is closed after use.
# @RETURN: Generator[Session, None, None]
# @RELATION: [DEPENDS_ON] ->[SessionLocal]
def get_db():
with belief_scope("get_db"):
db = SessionLocal()
@@ -465,14 +506,18 @@ def get_db():
yield db
finally:
db.close()
# [/DEF:get_db:Function]
# [DEF:get_tasks_db:Function]
# @COMPLEXITY: 3
# @PURPOSE: Dependency for getting a tasks database session.
# @PRE: TasksSessionLocal is initialized.
# @POST: Session is closed after use.
# @RETURN: Generator[Session, None, None]
# @RELATION: [DEPENDS_ON] ->[TasksSessionLocal]
def get_tasks_db():
with belief_scope("get_tasks_db"):
db = TasksSessionLocal()
@@ -480,8 +525,11 @@ def get_tasks_db():
yield db
finally:
db.close()
# [/DEF:get_tasks_db:Function]
# [DEF:get_auth_db:Function]
# @COMPLEXITY: 3
# @PURPOSE: Dependency for getting an authentication database session.
@@ -489,6 +537,7 @@ def get_tasks_db():
# @POST: Session is closed after use.
# @DATA_CONTRACT: None -> Output[sqlalchemy.orm.Session]
# @RETURN: Generator[Session, None, None]
# @RELATION: [DEPENDS_ON] ->[AuthSessionLocal]
def get_auth_db():
with belief_scope("get_auth_db"):
db = AuthSessionLocal()
@@ -496,6 +545,8 @@ def get_auth_db():
yield db
finally:
db.close()
# [/DEF:get_auth_db:Function]
# [/DEF:backend.src.core.database:Module]
# [/DEF:DatabaseModule:Module]

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
# @SEMANTICS: dataset_review, superset, link_parsing, context_recovery, partial_recovery
# @PURPOSE: Recover dataset and dashboard context from Superset links while preserving explicit partial-recovery markers.
# @LAYER: Infra
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient:Class]
# @RELATION: [DEPENDS_ON] ->[ImportedFilter]
# @RELATION: [DEPENDS_ON] ->[ImportedFilter]
# @RELATION: [DEPENDS_ON] ->[TemplateVariable]
# @PRE: Superset link or dataset reference must be parseable enough to resolve an environment-scoped target resource.
@@ -18,7 +18,7 @@ import json
import re
from copy import deepcopy
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Set
from typing import Any, Dict, List, Optional, Set, cast
from urllib.parse import parse_qs, unquote, urlparse
from src.core.config_models import Environment
@@ -26,6 +26,8 @@ from src.core.logger import belief_scope, logger
from src.core.superset_client import SupersetClient
# [/DEF:SupersetContextExtractor.imports:Block]
logger = cast(Any, logger)
# [DEF:SupersetParsedContext:Class]
# @COMPLEXITY: 2
@@ -42,13 +44,15 @@ class SupersetParsedContext:
imported_filters: List[Dict[str, Any]] = field(default_factory=list)
unresolved_references: List[str] = field(default_factory=list)
partial_recovery: bool = False
# [/DEF:SupersetParsedContext:Class]
# [DEF:SupersetContextExtractor:Class]
# @COMPLEXITY: 4
# @PURPOSE: Parse supported Superset URLs and recover canonical dataset/dashboard references for review-session intake.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [DEPENDS_ON] ->[Environment]
# @PRE: constructor receives a configured environment with a usable Superset base URL.
# @POST: extractor instance is ready to parse links against one Superset environment.
# @SIDE_EFFECT: downstream parse operations may call Superset APIs through SupersetClient.
@@ -56,15 +60,18 @@ class SupersetContextExtractor:
# [DEF:SupersetContextExtractor.__init__:Function]
# @COMPLEXITY: 2
# @PURPOSE: Bind extractor to one Superset environment and client instance.
def __init__(self, environment: Environment, client: Optional[SupersetClient] = None) -> None:
def __init__(
self, environment: Environment, client: Optional[SupersetClient] = None
) -> None:
self.environment = environment
self.client = client or SupersetClient(environment)
# [/DEF:SupersetContextExtractor.__init__:Function]
# [DEF:SupersetContextExtractor.parse_superset_link:Function]
# @COMPLEXITY: 4
# @PURPOSE: Extract candidate identifiers and query state from supported Superset URLs.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[SupersetClient.get_dashboard_detail]
# @PRE: link is a non-empty Superset URL compatible with the configured environment.
# @POST: returns resolved dataset/dashboard context, preserving explicit partial-recovery state if some identifiers cannot be confirmed.
# @SIDE_EFFECT: may issue Superset API reads to resolve dataset references from dashboard or chart URLs.
@@ -115,12 +122,16 @@ class SupersetContextExtractor:
resource_type = "dashboard"
partial_recovery = True
dataset_ref = f"dashboard_permalink:{dashboard_permalink_key}"
unresolved_references.append("dashboard_permalink_dataset_binding_unresolved")
unresolved_references.append(
"dashboard_permalink_dataset_binding_unresolved"
)
logger.reason(
"Resolving dashboard permalink state from Superset",
extra={"permalink_key": dashboard_permalink_key},
)
permalink_payload = self.client.get_dashboard_permalink_state(dashboard_permalink_key)
permalink_payload = self.client.get_dashboard_permalink_state(
dashboard_permalink_key
)
permalink_state = (
permalink_payload.get("state", permalink_payload)
if isinstance(permalink_payload, dict)
@@ -137,8 +148,12 @@ class SupersetContextExtractor:
"Extracted native filters from permalink dataMask",
extra={"filter_count": len(data_mask)},
)
resolved_dashboard_id = self._extract_dashboard_id_from_state(permalink_state)
resolved_chart_id = self._extract_chart_id_from_state(permalink_state)
resolved_dashboard_id = self._extract_dashboard_id_from_state(
permalink_state
)
resolved_chart_id = self._extract_chart_id_from_state(
permalink_state
)
if resolved_dashboard_id is not None:
dashboard_id = resolved_dashboard_id
unresolved_references = [
@@ -146,10 +161,12 @@ class SupersetContextExtractor:
for item in unresolved_references
if item != "dashboard_permalink_dataset_binding_unresolved"
]
dataset_id, unresolved_references = self._recover_dataset_binding_from_dashboard(
dashboard_id=dashboard_id,
dataset_ref=dataset_ref,
unresolved_references=unresolved_references,
dataset_id, unresolved_references = (
self._recover_dataset_binding_from_dashboard(
dashboard_id=dashboard_id,
dataset_ref=dataset_ref,
unresolved_references=unresolved_references,
)
)
if dataset_id is not None:
dataset_ref = f"dataset:{dataset_id}"
@@ -162,19 +179,30 @@ class SupersetContextExtractor:
]
try:
chart_payload = self.client.get_chart(chart_id)
chart_data = chart_payload.get("result", chart_payload) if isinstance(chart_payload, dict) else {}
chart_data = (
chart_payload.get("result", chart_payload)
if isinstance(chart_payload, dict)
else {}
)
datasource_id = chart_data.get("datasource_id")
if datasource_id is not None:
dataset_id = int(datasource_id)
dataset_ref = f"dataset:{dataset_id}"
logger.reason(
"Recovered dataset reference from permalink chart context",
extra={"chart_id": chart_id, "dataset_id": dataset_id},
extra={
"chart_id": chart_id,
"dataset_id": dataset_id,
},
)
else:
unresolved_references.append("chart_dataset_binding_unresolved")
unresolved_references.append(
"chart_dataset_binding_unresolved"
)
except Exception as exc:
unresolved_references.append("chart_dataset_binding_unresolved")
unresolved_references.append(
"chart_dataset_binding_unresolved"
)
logger.explore(
"Chart lookup failed during permalink recovery",
extra={"chart_id": chart_id, "error": str(exc)},
@@ -186,19 +214,25 @@ class SupersetContextExtractor:
)
elif dashboard_id is not None or dashboard_ref is not None:
resource_type = "dashboard"
resolved_dashboard_ref = dashboard_id if dashboard_id is not None else dashboard_ref
resolved_dashboard_ref = (
dashboard_id if dashboard_id is not None else dashboard_ref
)
if resolved_dashboard_ref is None:
raise ValueError("Dashboard reference could not be resolved")
logger.reason(
"Resolving dashboard-bound dataset from Superset",
extra={"dashboard_ref": resolved_dashboard_ref},
)
# Resolve dashboard detail first — handles both numeric ID and slug,
# ensuring dashboard_id is available for the native_filters_key fetch below.
dashboard_detail = self.client.get_dashboard_detail(resolved_dashboard_ref)
dashboard_detail = self.client.get_dashboard_detail(
resolved_dashboard_ref
)
resolved_dashboard_id = dashboard_detail.get("id")
if resolved_dashboard_id is not None:
dashboard_id = int(resolved_dashboard_id)
# Check for native_filters_key in query params and fetch filter state.
# This must run AFTER dashboard_id is resolved from slug above.
native_filters_key = query_params.get("native_filters_key", [None])[0]
@@ -206,7 +240,10 @@ class SupersetContextExtractor:
try:
logger.reason(
"Fetching native filter state from Superset",
extra={"dashboard_id": dashboard_id, "filter_key": native_filters_key},
extra={
"dashboard_id": dashboard_id,
"filter_key": native_filters_key,
},
)
extracted = self.client.extract_native_filters_from_key(
dashboard_id, native_filters_key
@@ -221,14 +258,21 @@ class SupersetContextExtractor:
else:
logger.explore(
"Native filter state returned empty dataMask",
extra={"dashboard_id": dashboard_id, "filter_key": native_filters_key},
extra={
"dashboard_id": dashboard_id,
"filter_key": native_filters_key,
},
)
except Exception as exc:
logger.explore(
"Failed to fetch native filter state from Superset",
extra={"dashboard_id": dashboard_id, "filter_key": native_filters_key, "error": str(exc)},
extra={
"dashboard_id": dashboard_id,
"filter_key": native_filters_key,
"error": str(exc),
},
)
datasets = dashboard_detail.get("datasets") or []
if datasets:
first_dataset = datasets[0]
@@ -280,7 +324,10 @@ class SupersetContextExtractor:
)
logger.reason(
"Canonicalized dataset reference from dataset detail",
extra={"dataset_ref": dataset_ref, "dataset_id": dataset_id},
extra={
"dataset_ref": dataset_ref,
"dataset_id": dataset_id,
},
)
except Exception as exc:
partial_recovery = True
@@ -316,17 +363,20 @@ class SupersetContextExtractor:
},
)
return result
# [/DEF:SupersetContextExtractor.parse_superset_link:Function]
# [DEF:SupersetContextExtractor.recover_imported_filters:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build imported filter entries from URL state and Superset-side saved context.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[SupersetClient.get_dashboard]
# @PRE: parsed_context comes from a successful Superset link parse for one environment.
# @POST: returns explicit recovered and partial filter entries with preserved provenance and confirmation requirements.
# @SIDE_EFFECT: may issue Superset reads for dashboard metadata enrichment.
# @DATA_CONTRACT: Input[SupersetParsedContext] -> Output[List[Dict[str,Any]]]
def recover_imported_filters(self, parsed_context: SupersetParsedContext) -> List[Dict[str, Any]]:
def recover_imported_filters(
self, parsed_context: SupersetParsedContext
) -> List[Dict[str, Any]]:
with belief_scope("SupersetContextExtractor.recover_imported_filters"):
recovered_filters: List[Dict[str, Any]] = []
seen_filter_keys: Set[str] = set()
@@ -349,22 +399,46 @@ class SupersetContextExtractor:
return
existing = recovered_filters[existing_index]
if existing.get("display_name") in {None, "", existing.get("filter_name")} and candidate.get("display_name"):
if existing.get("display_name") in {
None,
"",
existing.get("filter_name"),
} and candidate.get("display_name"):
existing["display_name"] = candidate["display_name"]
if existing.get("raw_value") is None and candidate.get("raw_value") is not None:
if (
existing.get("raw_value") is None
and candidate.get("raw_value") is not None
):
existing["raw_value"] = candidate["raw_value"]
existing["confidence_state"] = candidate.get("confidence_state", "imported")
existing["requires_confirmation"] = candidate.get("requires_confirmation", False)
existing["recovery_status"] = candidate.get("recovery_status", "recovered")
existing["confidence_state"] = candidate.get(
"confidence_state", "imported"
)
existing["requires_confirmation"] = candidate.get(
"requires_confirmation", False
)
existing["recovery_status"] = candidate.get(
"recovery_status", "recovered"
)
existing["source"] = candidate.get("source", existing.get("source"))
if existing.get("normalized_value") is None and candidate.get("normalized_value") is not None:
existing["normalized_value"] = deepcopy(candidate["normalized_value"])
if existing.get("notes") and candidate.get("notes") and candidate["notes"] not in existing["notes"]:
existing["notes"] = f'{existing["notes"]}; {candidate["notes"]}'
if (
existing.get("normalized_value") is None
and candidate.get("normalized_value") is not None
):
existing["normalized_value"] = deepcopy(
candidate["normalized_value"]
)
if (
existing.get("notes")
and candidate.get("notes")
and candidate["notes"] not in existing["notes"]
):
existing["notes"] = f"{existing['notes']}; {candidate['notes']}"
if parsed_context.dashboard_id is not None:
try:
dashboard_payload = self.client.get_dashboard(parsed_context.dashboard_id)
dashboard_payload = self.client.get_dashboard(
parsed_context.dashboard_id
)
dashboard_record = (
dashboard_payload.get("result", dashboard_payload)
if isinstance(dashboard_payload, dict)
@@ -376,7 +450,9 @@ class SupersetContextExtractor:
if not isinstance(json_metadata, dict):
json_metadata = {}
native_filter_configuration = json_metadata.get("native_filter_configuration") or []
native_filter_configuration = (
json_metadata.get("native_filter_configuration") or []
)
default_filters = json_metadata.get("default_filters") or {}
if isinstance(default_filters, str) and default_filters.strip():
try:
@@ -400,7 +476,9 @@ class SupersetContextExtractor:
if not filter_name:
continue
display_name = item.get("label") or item.get("name") or filter_name
display_name = (
item.get("label") or item.get("name") or filter_name
)
filter_id = str(item.get("id") or "").strip()
default_value = None
@@ -413,7 +491,9 @@ class SupersetContextExtractor:
"display_name": display_name,
"raw_value": default_value,
"source": "superset_native",
"recovery_status": "recovered" if default_value is not None else "partial",
"recovery_status": "recovered"
if default_value is not None
else "partial",
"requires_confirmation": default_value is None,
"notes": "Recovered from Superset dashboard native filter configuration",
},
@@ -445,7 +525,9 @@ class SupersetContextExtractor:
default_source="superset_url",
default_note="Recovered from Superset URL state",
)
metadata_match = metadata_filters_by_id.get(normalized["filter_name"].strip().lower())
metadata_match = metadata_filters_by_id.get(
normalized["filter_name"].strip().lower()
)
if metadata_match is not None:
normalized["filter_name"] = metadata_match["filter_name"]
normalized["display_name"] = metadata_match["display_name"]
@@ -517,6 +599,7 @@ class SupersetContextExtractor:
},
)
return recovered_filters
# [/DEF:SupersetContextExtractor.recover_imported_filters:Function]
# [DEF:SupersetContextExtractor.discover_template_variables:Function]
@@ -527,12 +610,16 @@ class SupersetContextExtractor:
# @POST: returns deduplicated explicit variable records without executing Jinja or fabricating runtime values.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[dataset_payload:Dict[str,Any]] -> Output[List[Dict[str,Any]]]
def discover_template_variables(self, dataset_payload: Dict[str, Any]) -> List[Dict[str, Any]]:
def discover_template_variables(
self, dataset_payload: Dict[str, Any]
) -> List[Dict[str, Any]]:
with belief_scope("SupersetContextExtractor.discover_template_variables"):
discovered: List[Dict[str, Any]] = []
seen_variable_names: Set[str] = set()
for expression_source in self._collect_query_bearing_expressions(dataset_payload):
for expression_source in self._collect_query_bearing_expressions(
dataset_payload
):
for filter_match in re.finditer(
r"filter_values\(\s*['\"]([^'\"]+)['\"]\s*\)",
expression_source,
@@ -570,11 +657,16 @@ class SupersetContextExtractor:
default_value=self._normalize_default_literal(default_literal),
)
for jinja_match in re.finditer(r"\{\{\s*(.*?)\s*\}\}", expression_source, flags=re.DOTALL):
for jinja_match in re.finditer(
r"\{\{\s*(.*?)\s*\}\}", expression_source, flags=re.DOTALL
):
expression = str(jinja_match.group(1) or "").strip()
if not expression:
continue
if any(token in expression for token in ("filter_values(", "url_param(", "get_filters(")):
if any(
token in expression
for token in ("filter_values(", "url_param(", "get_filters(")
):
continue
variable_name = self._extract_primary_jinja_identifier(expression)
if not variable_name:
@@ -584,7 +676,9 @@ class SupersetContextExtractor:
seen_variable_names=seen_variable_names,
variable_name=variable_name,
expression_source=expression_source,
variable_kind="derived" if "." in expression or "|" in expression else "parameter",
variable_kind="derived"
if "." in expression or "|" in expression
else "parameter",
is_required=True,
default_value=None,
)
@@ -598,12 +692,15 @@ class SupersetContextExtractor:
},
)
return discovered
# [/DEF:SupersetContextExtractor.discover_template_variables:Function]
# [DEF:SupersetContextExtractor.build_recovery_summary:Function]
# @COMPLEXITY: 2
# @PURPOSE: Summarize recovered, partial, and unresolved context for session state and UX.
def build_recovery_summary(self, parsed_context: SupersetParsedContext) -> Dict[str, Any]:
def build_recovery_summary(
self, parsed_context: SupersetParsedContext
) -> Dict[str, Any]:
return {
"dataset_ref": parsed_context.dataset_ref,
"dataset_id": parsed_context.dataset_id,
@@ -613,12 +710,15 @@ class SupersetContextExtractor:
"unresolved_references": list(parsed_context.unresolved_references),
"imported_filter_count": len(parsed_context.imported_filters),
}
# [/DEF:SupersetContextExtractor.build_recovery_summary:Function]
# [DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract a numeric identifier from a REST-like Superset URL path.
def _extract_numeric_identifier(self, path_parts: List[str], resource_name: str) -> Optional[int]:
def _extract_numeric_identifier(
self, path_parts: List[str], resource_name: str
) -> Optional[int]:
if resource_name not in path_parts:
return None
try:
@@ -633,6 +733,7 @@ class SupersetContextExtractor:
if not candidate.isdigit():
return None
return int(candidate)
# [/DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_reference:Function]
@@ -653,6 +754,7 @@ class SupersetContextExtractor:
if not candidate or candidate == "p":
return None
return candidate
# [/DEF:SupersetContextExtractor._extract_dashboard_reference:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function]
@@ -674,6 +776,7 @@ class SupersetContextExtractor:
if permalink_marker != "p" or not permalink_key:
return None
return permalink_key
# [/DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function]
@@ -684,6 +787,7 @@ class SupersetContextExtractor:
payload=state,
candidate_keys={"dashboardId", "dashboard_id", "dashboard_id_value"},
)
# [/DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function]
# [DEF:SupersetContextExtractor._extract_chart_id_from_state:Function]
@@ -694,12 +798,16 @@ class SupersetContextExtractor:
payload=state,
candidate_keys={"slice_id", "sliceId", "chartId", "chart_id"},
)
# [/DEF:SupersetContextExtractor._extract_chart_id_from_state:Function]
# [DEF:SupersetContextExtractor._search_nested_numeric_key:Function]
# @COMPLEXITY: 3
# @PURPOSE: Recursively search nested dict/list payloads for the first numeric value under a candidate key set.
def _search_nested_numeric_key(self, payload: Any, candidate_keys: Set[str]) -> Optional[int]:
# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor.parse_superset_link]
def _search_nested_numeric_key(
self, payload: Any, candidate_keys: Set[str]
) -> Optional[int]:
if isinstance(payload, dict):
for key, value in payload.items():
if key in candidate_keys:
@@ -717,11 +825,13 @@ class SupersetContextExtractor:
if found is not None:
return found
return None
# [/DEF:SupersetContextExtractor._search_nested_numeric_key:Function]
# [DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Recover a dataset binding from resolved dashboard context while preserving explicit unresolved markers.
# @RELATION: [CALLS] ->[SupersetClient.get_dashboard_detail]
def _recover_dataset_binding_from_dashboard(
self,
dashboard_id: int,
@@ -744,7 +854,10 @@ class SupersetContextExtractor:
"dataset_ref": dataset_ref,
},
)
if len(datasets) > 1 and "multiple_dashboard_datasets" not in unresolved_references:
if (
len(datasets) > 1
and "multiple_dashboard_datasets" not in unresolved_references
):
unresolved_references.append("multiple_dashboard_datasets")
return resolved_dataset, unresolved_references
if "dashboard_dataset_id_missing" not in unresolved_references:
@@ -754,6 +867,7 @@ class SupersetContextExtractor:
if "dashboard_dataset_binding_missing" not in unresolved_references:
unresolved_references.append("dashboard_dataset_binding_missing")
return None, unresolved_references
# [/DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function]
# [DEF:SupersetContextExtractor._decode_query_state:Function]
@@ -777,12 +891,15 @@ class SupersetContextExtractor:
)
query_state[key] = decoded_value
return query_state
# [/DEF:SupersetContextExtractor._decode_query_state:Function]
# [DEF:SupersetContextExtractor._extract_imported_filters:Function]
# @COMPLEXITY: 2
# @PURPOSE: Normalize imported filters from decoded query state without fabricating missing values.
def _extract_imported_filters(self, query_state: Dict[str, Any]) -> List[Dict[str, Any]]:
def _extract_imported_filters(
self, query_state: Dict[str, Any]
) -> List[Dict[str, Any]]:
imported_filters: List[Dict[str, Any]] = []
native_filters_payload = query_state.get("native_filters")
@@ -800,7 +917,8 @@ class SupersetContextExtractor:
if item.get("column") and ("value" in item or "val" in item):
direct_clause = {
"col": item.get("column"),
"op": item.get("op") or ("IN" if isinstance(item.get("value"), list) else "=="),
"op": item.get("op")
or ("IN" if isinstance(item.get("value"), list) else "=="),
"val": item.get("val", item.get("value")),
}
imported_filters.append(
@@ -809,7 +927,9 @@ class SupersetContextExtractor:
"raw_value": item.get("value"),
"display_name": item.get("label") or item.get("name"),
"normalized_value": {
"filter_clauses": [direct_clause] if isinstance(direct_clause, dict) else [],
"filter_clauses": [direct_clause]
if isinstance(direct_clause, dict)
else [],
"extra_form_data": {},
"value_origin": "native_filters",
},
@@ -834,7 +954,9 @@ class SupersetContextExtractor:
raw_value = None
normalized_value = {
"filter_clauses": [],
"extra_form_data": deepcopy(extra_form_data) if isinstance(extra_form_data, dict) else {},
"extra_form_data": deepcopy(extra_form_data)
if isinstance(extra_form_data, dict)
else {},
"value_origin": "unresolved",
}
@@ -868,10 +990,17 @@ class SupersetContextExtractor:
# If still no value, try extraFormData directly for time_range, time_grain, etc.
if raw_value is None and isinstance(extra_form_data, dict):
# Common Superset filter fields
for field in ["time_range", "time_grain_sqla", "time_column", "granularity"]:
for field in [
"time_range",
"time_grain_sqla",
"time_column",
"granularity",
]:
if field in extra_form_data:
raw_value = extra_form_data[field]
normalized_value["value_origin"] = f"extra_form_data.{field}"
normalized_value["value_origin"] = (
f"extra_form_data.{field}"
)
break
imported_filters.append(
@@ -881,7 +1010,9 @@ class SupersetContextExtractor:
"display_name": display_name,
"normalized_value": normalized_value,
"source": "superset_permalink",
"recovery_status": "recovered" if raw_value is not None else "partial",
"recovery_status": "recovered"
if raw_value is not None
else "partial",
"requires_confirmation": raw_value is None,
"notes": "Recovered from Superset dashboard permalink state",
}
@@ -901,7 +1032,9 @@ class SupersetContextExtractor:
raw_value = None
normalized_value = {
"filter_clauses": [],
"extra_form_data": deepcopy(extra_form_data) if isinstance(extra_form_data, dict) else {},
"extra_form_data": deepcopy(extra_form_data)
if isinstance(extra_form_data, dict)
else {},
"value_origin": "unresolved",
}
@@ -935,10 +1068,17 @@ class SupersetContextExtractor:
# If still no value, try extraFormData directly for time_range, time_grain, etc.
if raw_value is None and isinstance(extra_form_data, dict):
# Common Superset filter fields
for field in ["time_range", "time_grain_sqla", "time_column", "granularity"]:
for field in [
"time_range",
"time_grain_sqla",
"time_column",
"granularity",
]:
if field in extra_form_data:
raw_value = extra_form_data[field]
normalized_value["value_origin"] = f"extra_form_data.{field}"
normalized_value["value_origin"] = (
f"extra_form_data.{field}"
)
break
imported_filters.append(
@@ -948,7 +1088,9 @@ class SupersetContextExtractor:
"display_name": display_name,
"normalized_value": normalized_value,
"source": "superset_native_filters_key",
"recovery_status": "recovered" if raw_value is not None else "partial",
"recovery_status": "recovered"
if raw_value is not None
else "partial",
"requires_confirmation": raw_value is None,
"notes": "Recovered from Superset native_filters_key state",
}
@@ -960,7 +1102,9 @@ class SupersetContextExtractor:
for index, item in enumerate(extra_filters):
if not isinstance(item, dict):
continue
filter_name = item.get("col") or item.get("column") or f"extra_filter_{index}"
filter_name = (
item.get("col") or item.get("column") or f"extra_filter_{index}"
)
imported_filters.append(
{
"filter_name": str(filter_name),
@@ -981,6 +1125,7 @@ class SupersetContextExtractor:
)
return imported_filters
# [/DEF:SupersetContextExtractor._extract_imported_filters:Function]
# [DEF:SupersetContextExtractor._normalize_imported_filter_payload:Function]
@@ -996,15 +1141,24 @@ class SupersetContextExtractor:
if "raw_value" not in payload and "value" in payload:
raw_value = payload.get("value")
recovery_status = str(
payload.get("recovery_status")
or ("recovered" if raw_value is not None else "partial")
).strip().lower()
recovery_status = (
str(
payload.get("recovery_status")
or ("recovered" if raw_value is not None else "partial")
)
.strip()
.lower()
)
requires_confirmation = bool(
payload.get("requires_confirmation", raw_value is None or recovery_status != "recovered")
payload.get(
"requires_confirmation",
raw_value is None or recovery_status != "recovered",
)
)
return {
"filter_name": str(payload.get("filter_name") or "unresolved_filter").strip(),
"filter_name": str(
payload.get("filter_name") or "unresolved_filter"
).strip(),
"display_name": payload.get("display_name"),
"raw_value": raw_value,
"normalized_value": payload.get("normalized_value"),
@@ -1014,13 +1168,16 @@ class SupersetContextExtractor:
"recovery_status": recovery_status,
"notes": str(payload.get("notes") or default_note),
}
# [/DEF:SupersetContextExtractor._normalize_imported_filter_payload:Function]
# [DEF:SupersetContextExtractor._collect_query_bearing_expressions:Function]
# @COMPLEXITY: 3
# @PURPOSE: Collect SQL and expression-bearing dataset fields for deterministic template-variable discovery.
# @RELATION: [DEPENDS_ON] ->[SupersetContextExtractor.discover_template_variables]
def _collect_query_bearing_expressions(self, dataset_payload: Dict[str, Any]) -> List[str]:
def _collect_query_bearing_expressions(
self, dataset_payload: Dict[str, Any]
) -> List[str]:
expressions: List[str] = []
def append_expression(candidate: Any) -> None:
@@ -1055,6 +1212,7 @@ class SupersetContextExtractor:
append_expression(column.get("expression"))
return expressions
# [/DEF:SupersetContextExtractor._collect_query_bearing_expressions:Function]
# [DEF:SupersetContextExtractor._append_template_variable:Function]
@@ -1087,6 +1245,7 @@ class SupersetContextExtractor:
"mapping_status": "unmapped",
}
)
# [/DEF:SupersetContextExtractor._append_template_variable:Function]
# [DEF:SupersetContextExtractor._extract_primary_jinja_identifier:Function]
@@ -1100,6 +1259,7 @@ class SupersetContextExtractor:
if candidate in {"if", "else", "for", "set", "True", "False", "none", "None"}:
return None
return candidate
# [/DEF:SupersetContextExtractor._extract_primary_jinja_identifier:Function]
# [DEF:SupersetContextExtractor._normalize_default_literal:Function]
@@ -1110,9 +1270,8 @@ class SupersetContextExtractor:
if not normalized_literal:
return None
if (
(normalized_literal.startswith("'") and normalized_literal.endswith("'"))
or (normalized_literal.startswith('"') and normalized_literal.endswith('"'))
):
normalized_literal.startswith("'") and normalized_literal.endswith("'")
) or (normalized_literal.startswith('"') and normalized_literal.endswith('"')):
return normalized_literal[1:-1]
lowered = normalized_literal.lower()
if lowered in {"true", "false"}:
@@ -1126,7 +1285,10 @@ class SupersetContextExtractor:
return float(normalized_literal)
except ValueError:
return normalized_literal
# [/DEF:SupersetContextExtractor._normalize_default_literal:Function]
# [/DEF:SupersetContextExtractor:Class]
# [/DEF:SupersetContextExtractor:Module]
# [/DEF:SupersetContextExtractor:Module]

View File

@@ -1,4 +1,4 @@
# [DEF:backend.src.models.filter_state:Module]
# [DEF:FilterStateModels:Module]
#
# @COMPLEXITY: 2
# @SEMANTICS: superset, native, filters, pydantic, models, dataclasses
@@ -148,4 +148,4 @@ class ExtraFormDataMerge(BaseModel):
# [/DEF:ExtraFormDataMerge:Model]
# [/DEF:backend.src.models.filter_state:Module]
# [/DEF:FilterStateModels:Module]

View File

@@ -22,7 +22,7 @@ from dataclasses import dataclass, field
from datetime import datetime
import hashlib
import json
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, cast
from src.core.config_manager import ConfigManager
from src.core.logger import belief_scope, logger
@@ -72,6 +72,8 @@ from src.services.dataset_review.semantic_resolver import SemanticSourceResolver
from src.services.dataset_review.event_logger import SessionEventPayload
# [/DEF:DatasetReviewOrchestrator.imports:Block]
logger = cast(Any, logger)
# [DEF:StartSessionCommand:Class]
# @COMPLEXITY: 2
@@ -82,6 +84,8 @@ class StartSessionCommand:
environment_id: str
source_kind: str
source_input: str
# [/DEF:StartSessionCommand:Class]
@@ -93,6 +97,8 @@ class StartSessionResult:
session: DatasetReviewSession
parsed_context: Optional[SupersetParsedContext] = None
findings: List[ValidationFinding] = field(default_factory=list)
# [/DEF:StartSessionResult:Class]
@@ -103,6 +109,8 @@ class StartSessionResult:
class PreparePreviewCommand:
user: User
session_id: str
# [/DEF:PreparePreviewCommand:Class]
@@ -114,6 +122,8 @@ class PreparePreviewResult:
session: DatasetReviewSession
preview: CompiledPreview
blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:PreparePreviewResult:Class]
@@ -124,6 +134,8 @@ class PreparePreviewResult:
class LaunchDatasetCommand:
user: User
session_id: str
# [/DEF:LaunchDatasetCommand:Class]
@@ -135,6 +147,8 @@ class LaunchDatasetResult:
session: DatasetReviewSession
run_context: DatasetRunContext
blocked_reasons: List[str] = field(default_factory=list)
# [/DEF:LaunchDatasetResult:Class]
@@ -168,6 +182,7 @@ class DatasetReviewOrchestrator:
self.config_manager = config_manager
self.task_manager = task_manager
self.semantic_resolver = semantic_resolver or SemanticSourceResolver()
# [/DEF:DatasetReviewOrchestrator.__init__:Function]
# [DEF:DatasetReviewOrchestrator.start_session:Function]
@@ -188,7 +203,9 @@ class DatasetReviewOrchestrator:
normalized_environment_id = str(command.environment_id or "").strip()
if not normalized_source_input:
logger.explore("Blocked dataset review session start due to empty source input")
logger.explore(
"Blocked dataset review session start due to empty source input"
)
raise ValueError("source_input must be non-empty")
if normalized_source_kind not in {"superset_link", "dataset_selection"}:
@@ -196,7 +213,9 @@ class DatasetReviewOrchestrator:
"Blocked dataset review session start due to unsupported source kind",
extra={"source_kind": normalized_source_kind},
)
raise ValueError("source_kind must be 'superset_link' or 'dataset_selection'")
raise ValueError(
"source_kind must be 'superset_link' or 'dataset_selection'"
)
environment = self.config_manager.get_environment(normalized_environment_id)
if environment is None:
@@ -234,11 +253,15 @@ class DatasetReviewOrchestrator:
if parsed_context.partial_recovery:
readiness_state = ReadinessState.RECOVERY_REQUIRED
recommended_action = RecommendedAction.REVIEW_DOCUMENTATION
findings.extend(self._build_partial_recovery_findings(parsed_context))
findings.extend(
self._build_partial_recovery_findings(parsed_context)
)
else:
readiness_state = ReadinessState.REVIEW_READY
else:
dataset_ref, dataset_id = self._parse_dataset_selection(normalized_source_input)
dataset_ref, dataset_id = self._parse_dataset_selection(
normalized_source_input
)
readiness_state = ReadinessState.REVIEW_READY
current_phase = SessionPhase.REVIEW
@@ -255,17 +278,19 @@ class DatasetReviewOrchestrator:
status=SessionStatus.ACTIVE,
current_phase=current_phase,
)
persisted_session = self.repository.create_session(session)
persisted_session = cast(Any, self.repository.create_session(session))
recovered_filters: List[ImportedFilter] = []
template_variables: List[TemplateVariable] = []
execution_mappings: List[ExecutionMapping] = []
if normalized_source_kind == "superset_link" and parsed_context is not None:
recovered_filters, template_variables, execution_mappings, findings = self._build_recovery_bootstrap(
environment=environment,
session=persisted_session,
parsed_context=parsed_context,
findings=findings,
recovered_filters, template_variables, execution_mappings, findings = (
self._build_recovery_bootstrap(
environment=environment,
session=persisted_session,
parsed_context=parsed_context,
findings=findings,
)
)
profile = self._build_initial_profile(
@@ -286,7 +311,9 @@ class DatasetReviewOrchestrator:
"dataset_ref": persisted_session.dataset_ref,
"dataset_id": persisted_session.dataset_id,
"dashboard_id": persisted_session.dashboard_id,
"partial_recovery": bool(parsed_context and parsed_context.partial_recovery),
"partial_recovery": bool(
parsed_context and parsed_context.partial_recovery
),
},
)
)
@@ -327,7 +354,10 @@ class DatasetReviewOrchestrator:
)
logger.reason(
"Linked recovery task to started dataset review session",
extra={"session_id": persisted_session.session_id, "task_id": active_task_id},
extra={
"session_id": persisted_session.session_id,
"task_id": active_task_id,
},
)
logger.reflect(
@@ -347,6 +377,7 @@ class DatasetReviewOrchestrator:
parsed_context=parsed_context,
findings=findings,
)
# [/DEF:DatasetReviewOrchestrator.start_session:Function]
# [DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
@@ -357,13 +388,20 @@ class DatasetReviewOrchestrator:
# @POST: returns preview artifact in pending, ready, failed, or stale state.
# @SIDE_EFFECT: persists preview attempt and upstream compilation diagnostics.
# @DATA_CONTRACT: Input[PreparePreviewCommand] -> Output[PreparePreviewResult]
def prepare_launch_preview(self, command: PreparePreviewCommand) -> PreparePreviewResult:
def prepare_launch_preview(
self, command: PreparePreviewCommand
) -> PreparePreviewResult:
with belief_scope("DatasetReviewOrchestrator.prepare_launch_preview"):
session = self.repository.load_session_detail(command.session_id, command.user.id)
session = self.repository.load_session_detail(
command.session_id, command.user.id
)
if session is None or session.user_id != command.user.id:
logger.explore(
"Preview preparation rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id},
extra={
"session_id": command.session_id,
"user_id": command.user.id,
},
)
raise ValueError("Session not found")
@@ -451,6 +489,7 @@ class DatasetReviewOrchestrator:
preview=persisted_preview,
blocked_reasons=[],
)
# [/DEF:DatasetReviewOrchestrator.prepare_launch_preview:Function]
# [DEF:DatasetReviewOrchestrator.launch_dataset:Function]
@@ -464,11 +503,16 @@ class DatasetReviewOrchestrator:
# @INVARIANT: launch remains blocked unless blocking findings are closed, approvals are satisfied, and the latest Superset preview fingerprint matches current execution inputs.
def launch_dataset(self, command: LaunchDatasetCommand) -> LaunchDatasetResult:
with belief_scope("DatasetReviewOrchestrator.launch_dataset"):
session = self.repository.load_session_detail(command.session_id, command.user.id)
session = self.repository.load_session_detail(
command.session_id, command.user.id
)
if session is None or session.user_id != command.user.id:
logger.explore(
"Launch rejected because owned session was not found",
extra={"session_id": command.session_id, "user_id": command.user.id},
extra={
"session_id": command.session_id,
"user_id": command.user.id,
},
)
raise ValueError("Session not found")
@@ -579,6 +623,7 @@ class DatasetReviewOrchestrator:
run_context=persisted_run_context,
blocked_reasons=[],
)
# [/DEF:DatasetReviewOrchestrator.launch_dataset:Function]
# [DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
@@ -601,6 +646,7 @@ class DatasetReviewOrchestrator:
return normalized, None
return normalized, None
# [/DEF:DatasetReviewOrchestrator._parse_dataset_selection:Function]
# [DEF:DatasetReviewOrchestrator._build_initial_profile:Function]
@@ -613,7 +659,9 @@ class DatasetReviewOrchestrator:
parsed_context: Optional[SupersetParsedContext],
dataset_ref: str,
) -> DatasetProfile:
dataset_name = dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset"
dataset_name = (
dataset_ref.split(".")[-1] if dataset_ref else "Unresolved dataset"
)
business_summary = (
f"Review session initialized for {dataset_ref}."
if dataset_ref
@@ -636,9 +684,12 @@ class DatasetReviewOrchestrator:
completeness_score=0.25,
confidence_state=confidence_state,
has_blocking_findings=False,
has_warning_findings=bool(parsed_context and parsed_context.partial_recovery),
has_warning_findings=bool(
parsed_context and parsed_context.partial_recovery
),
manual_summary_locked=False,
)
# [/DEF:DatasetReviewOrchestrator._build_initial_profile:Function]
# [DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
@@ -670,36 +721,57 @@ class DatasetReviewOrchestrator:
)
)
return findings
# [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
# [DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function]
# @COMPLEXITY: 4
# @PURPOSE: Recover and materialize initial imported filters, template variables, and draft execution mappings after session creation.
# @RELATION: [CALLS] ->[SupersetContextExtractor.recover_imported_filters]
# @RELATION: [CALLS] ->[SupersetContextExtractor.discover_template_variables]
# @PRE: session belongs to the just-created review aggregate and parsed_context was produced for the same environment scope.
# @POST: Returns bootstrap imported filters, template variables, execution mappings, and updated findings without persisting them directly.
# @SIDE_EFFECT: Performs Superset reads through the extractor and may append warning findings for incomplete recovery.
# @DATA_CONTRACT: Input[Environment, DatasetReviewSession, SupersetParsedContext, List[ValidationFinding]] -> Output[Tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]]
def _build_recovery_bootstrap(
self,
environment,
session: DatasetReviewSession,
parsed_context: SupersetParsedContext,
findings: List[ValidationFinding],
) -> tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]:
) -> tuple[
List[ImportedFilter],
List[TemplateVariable],
List[ExecutionMapping],
List[ValidationFinding],
]:
session_record = cast(Any, session)
extractor = SupersetContextExtractor(environment)
imported_filters_payload = extractor.recover_imported_filters(parsed_context)
if imported_filters_payload is None:
imported_filters_payload = []
imported_filters = [
ImportedFilter(
session_id=session.session_id,
session_id=session_record.session_id,
filter_name=str(item.get("filter_name") or f"imported_filter_{index}"),
display_name=item.get("display_name"),
raw_value=item.get("raw_value"),
normalized_value=item.get("normalized_value"),
source=FilterSource(str(item.get("source") or FilterSource.SUPERSET_URL.value)),
source=FilterSource(
str(item.get("source") or FilterSource.SUPERSET_URL.value)
),
confidence_state=FilterConfidenceState(
str(item.get("confidence_state") or FilterConfidenceState.UNRESOLVED.value)
str(
item.get("confidence_state")
or FilterConfidenceState.UNRESOLVED.value
)
),
requires_confirmation=bool(item.get("requires_confirmation", False)),
recovery_status=FilterRecoveryStatus(
str(item.get("recovery_status") or FilterRecoveryStatus.PARTIAL.value)
str(
item.get("recovery_status")
or FilterRecoveryStatus.PARTIAL.value
)
),
notes=item.get("notes"),
)
@@ -711,25 +783,44 @@ class DatasetReviewOrchestrator:
if session.dataset_id is not None:
try:
dataset_payload = extractor.client.get_dataset_detail(session.dataset_id)
discovered_variables = extractor.discover_template_variables(dataset_payload)
dataset_payload = extractor.client.get_dataset_detail(
session_record.dataset_id
)
discovered_variables = extractor.discover_template_variables(
dataset_payload
)
template_variables = [
TemplateVariable(
session_id=session.session_id,
variable_name=str(item.get("variable_name") or f"variable_{index}"),
session_id=session_record.session_id,
variable_name=str(
item.get("variable_name") or f"variable_{index}"
),
expression_source=str(item.get("expression_source") or ""),
variable_kind=VariableKind(str(item.get("variable_kind") or VariableKind.UNKNOWN.value)),
variable_kind=VariableKind(
str(item.get("variable_kind") or VariableKind.UNKNOWN.value)
),
is_required=bool(item.get("is_required", True)),
default_value=item.get("default_value"),
mapping_status=MappingStatus(str(item.get("mapping_status") or MappingStatus.UNMAPPED.value)),
mapping_status=MappingStatus(
str(
item.get("mapping_status")
or MappingStatus.UNMAPPED.value
)
),
)
for index, item in enumerate(discovered_variables)
]
except Exception as exc:
if "dataset_template_variable_discovery_failed" not in parsed_context.unresolved_references:
parsed_context.unresolved_references.append("dataset_template_variable_discovery_failed")
if (
"dataset_template_variable_discovery_failed"
not in parsed_context.unresolved_references
):
parsed_context.unresolved_references.append(
"dataset_template_variable_discovery_failed"
)
if not any(
finding.caused_by_ref == "dataset_template_variable_discovery_failed"
finding.caused_by_ref
== "dataset_template_variable_discovery_failed"
for finding in findings
):
findings.append(
@@ -745,7 +836,11 @@ class DatasetReviewOrchestrator:
)
logger.explore(
"Template variable discovery failed during session bootstrap",
extra={"session_id": session.session_id, "dataset_id": session.dataset_id, "error": str(exc)},
extra={
"session_id": session_record.session_id,
"dataset_id": session_record.dataset_id,
"error": str(exc),
},
)
filter_lookup = {
@@ -754,7 +849,9 @@ class DatasetReviewOrchestrator:
if str(imported_filter.filter_name or "").strip()
}
for template_variable in template_variables:
matched_filter = filter_lookup.get(str(template_variable.variable_name or "").strip().lower())
matched_filter = filter_lookup.get(
str(template_variable.variable_name or "").strip().lower()
)
if matched_filter is None:
continue
requires_explicit_approval = bool(
@@ -763,22 +860,27 @@ class DatasetReviewOrchestrator:
)
execution_mappings.append(
ExecutionMapping(
session_id=session.session_id,
session_id=session_record.session_id,
filter_id=matched_filter.filter_id,
variable_id=template_variable.variable_id,
mapping_method=MappingMethod.DIRECT_MATCH,
raw_input_value=matched_filter.raw_value,
effective_value=matched_filter.normalized_value if matched_filter.normalized_value is not None else matched_filter.raw_value,
effective_value=matched_filter.normalized_value
if matched_filter.normalized_value is not None
else matched_filter.raw_value,
transformation_note="Bootstrapped from Superset recovery context",
warning_level=None if not requires_explicit_approval else None,
requires_explicit_approval=requires_explicit_approval,
approval_state=ApprovalState.PENDING if requires_explicit_approval else ApprovalState.NOT_REQUIRED,
approval_state=ApprovalState.PENDING
if requires_explicit_approval
else ApprovalState.NOT_REQUIRED,
approved_by_user_id=None,
approved_at=None,
)
)
return imported_filters, template_variables, execution_mappings, findings
# [/DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function]
# [DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
@@ -789,9 +891,16 @@ class DatasetReviewOrchestrator:
# @POST: returns deterministic execution snapshot for current session state without mutating persistence.
# @SIDE_EFFECT: none.
# @DATA_CONTRACT: Input[DatasetReviewSession] -> Output[Dict[str,Any]]
def _build_execution_snapshot(self, session: DatasetReviewSession) -> Dict[str, Any]:
filter_lookup = {item.filter_id: item for item in session.imported_filters}
variable_lookup = {item.variable_id: item for item in session.template_variables}
def _build_execution_snapshot(
self, session: DatasetReviewSession
) -> Dict[str, Any]:
session_record = cast(Any, session)
filter_lookup = {
item.filter_id: item for item in session_record.imported_filters
}
variable_lookup = {
item.variable_id: item for item in session_record.template_variables
}
effective_filters: List[Dict[str, Any]] = []
template_params: Dict[str, Any] = {}
@@ -800,14 +909,16 @@ class DatasetReviewOrchestrator:
preview_blockers: List[str] = []
mapped_filter_ids: set[str] = set()
for mapping in session.execution_mappings:
for mapping in session_record.execution_mappings:
imported_filter = filter_lookup.get(mapping.filter_id)
template_variable = variable_lookup.get(mapping.variable_id)
if imported_filter is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_filter")
continue
if template_variable is None:
preview_blockers.append(f"mapping:{mapping.mapping_id}:missing_variable")
preview_blockers.append(
f"mapping:{mapping.mapping_id}:missing_variable"
)
continue
effective_value = mapping.effective_value
@@ -819,7 +930,9 @@ class DatasetReviewOrchestrator:
effective_value = template_variable.default_value
if effective_value is None and template_variable.is_required:
preview_blockers.append(f"variable:{template_variable.variable_name}:missing_required_value")
preview_blockers.append(
f"variable:{template_variable.variable_name}:missing_required_value"
)
continue
mapped_filter_ids.add(imported_filter.filter_id)
@@ -840,10 +953,13 @@ class DatasetReviewOrchestrator:
template_params[template_variable.variable_name] = effective_value
if mapping.approval_state == ApprovalState.APPROVED:
approved_mapping_ids.append(mapping.mapping_id)
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED:
if (
mapping.requires_explicit_approval
and mapping.approval_state != ApprovalState.APPROVED
):
open_warning_refs.append(mapping.mapping_id)
for imported_filter in session.imported_filters:
for imported_filter in session_record.imported_filters:
if imported_filter.filter_id in mapped_filter_ids:
continue
effective_value = imported_filter.normalized_value
@@ -862,8 +978,10 @@ class DatasetReviewOrchestrator:
}
)
mapped_variable_ids = {mapping.variable_id for mapping in session.execution_mappings}
for variable in session.template_variables:
mapped_variable_ids = {
mapping.variable_id for mapping in session_record.execution_mappings
}
for variable in session_record.template_variables:
if variable.variable_id in mapped_variable_ids:
continue
if variable.default_value is not None:
@@ -875,11 +993,13 @@ class DatasetReviewOrchestrator:
semantic_decision_refs = [
field.field_id
for field in session.semantic_fields
if field.is_locked or not field.needs_review or field.provenance.value != "unresolved"
if field.is_locked
or not field.needs_review
or field.provenance.value != "unresolved"
]
preview_fingerprint = self._compute_preview_fingerprint(
{
"dataset_id": session.dataset_id,
"dataset_id": session_record.dataset_id,
"template_params": template_params,
"effective_filters": effective_filters,
}
@@ -893,6 +1013,7 @@ class DatasetReviewOrchestrator:
"preview_blockers": sorted(set(preview_blockers)),
"preview_fingerprint": preview_fingerprint,
}
# [/DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# [DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
@@ -909,16 +1030,21 @@ class DatasetReviewOrchestrator:
execution_snapshot: Dict[str, Any],
preview: Optional[CompiledPreview],
) -> List[str]:
session_record = cast(Any, session)
blockers = list(execution_snapshot["preview_blockers"])
for finding in session.findings:
for finding in session_record.findings:
if (
finding.severity == FindingSeverity.BLOCKING
and finding.resolution_state not in {ResolutionState.RESOLVED, ResolutionState.APPROVED}
and finding.resolution_state
not in {ResolutionState.RESOLVED, ResolutionState.APPROVED}
):
blockers.append(f"finding:{finding.code}:blocking")
for mapping in session.execution_mappings:
if mapping.requires_explicit_approval and mapping.approval_state != ApprovalState.APPROVED:
for mapping in session_record.execution_mappings:
if (
mapping.requires_explicit_approval
and mapping.approval_state != ApprovalState.APPROVED
):
blockers.append(f"mapping:{mapping.mapping_id}:approval_required")
if preview is None:
@@ -930,23 +1056,28 @@ class DatasetReviewOrchestrator:
blockers.append("preview:fingerprint_mismatch")
return sorted(set(blockers))
# [/DEF:DatasetReviewOrchestrator._build_launch_blockers:Function]
# [DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# @COMPLEXITY: 2
# @PURPOSE: Resolve the current latest preview snapshot for one session aggregate.
def _get_latest_preview(self, session: DatasetReviewSession) -> Optional[CompiledPreview]:
if not session.previews:
def _get_latest_preview(
self, session: DatasetReviewSession
) -> Optional[CompiledPreview]:
session_record = cast(Any, session)
if not session_record.previews:
return None
if session.last_preview_id:
for preview in session.previews:
if preview.preview_id == session.last_preview_id:
if session_record.last_preview_id:
for preview in session_record.previews:
if preview.preview_id == session_record.last_preview_id:
return preview
return sorted(
session.previews,
session_record.previews,
key=lambda item: (item.created_at or datetime.min, item.preview_id),
reverse=True,
)[0]
# [/DEF:DatasetReviewOrchestrator._get_latest_preview:Function]
# [DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
@@ -955,6 +1086,7 @@ class DatasetReviewOrchestrator:
def _compute_preview_fingerprint(self, payload: Dict[str, Any]) -> str:
serialized = json.dumps(payload, sort_keys=True, default=str)
return hashlib.sha256(serialized.encode("utf-8")).hexdigest()
# [/DEF:DatasetReviewOrchestrator._compute_preview_fingerprint:Function]
# [DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
@@ -971,28 +1103,33 @@ class DatasetReviewOrchestrator:
session: DatasetReviewSession,
parsed_context: Optional[SupersetParsedContext],
) -> Optional[str]:
session_record = cast(Any, session)
if self.task_manager is None:
logger.reason(
"Dataset review session started without task manager; continuing synchronously",
extra={"session_id": session.session_id},
extra={"session_id": session_record.session_id},
)
return None
task_params: Dict[str, Any] = {
"session_id": session.session_id,
"session_id": session_record.session_id,
"user_id": command.user.id,
"environment_id": session.environment_id,
"source_kind": session.source_kind,
"source_input": session.source_input,
"dataset_ref": session.dataset_ref,
"dataset_id": session.dataset_id,
"dashboard_id": session.dashboard_id,
"partial_recovery": bool(parsed_context and parsed_context.partial_recovery),
"environment_id": session_record.environment_id,
"source_kind": session_record.source_kind,
"source_input": session_record.source_input,
"dataset_ref": session_record.dataset_ref,
"dataset_id": session_record.dataset_id,
"dashboard_id": session_record.dashboard_id,
"partial_recovery": bool(
parsed_context and parsed_context.partial_recovery
),
}
create_task = getattr(self.task_manager, "create_task", None)
if create_task is None:
logger.explore("Task manager has no create_task method; skipping recovery enqueue")
logger.explore(
"Task manager has no create_task method; skipping recovery enqueue"
)
return None
try:
@@ -1003,13 +1140,16 @@ class DatasetReviewOrchestrator:
except TypeError:
logger.explore(
"Recovery task enqueue skipped because task manager create_task contract is incompatible",
extra={"session_id": session.session_id},
extra={"session_id": session_record.session_id},
)
return None
task_id = getattr(task_object, "id", None)
return str(task_id) if task_id else None
# [/DEF:DatasetReviewOrchestrator._enqueue_recovery_task:Function]
# [/DEF:DatasetReviewOrchestrator:Class]
# [/DEF:DatasetReviewOrchestrator:Module]
# [/DEF:DatasetReviewOrchestrator:Module]