This commit is contained in:
2026-03-18 08:45:15 +03:00
parent 3094a2b58b
commit 6d64124e88
17 changed files with 1563 additions and 31159 deletions

View File

@@ -331,6 +331,7 @@ def test_parse_superset_link_dashboard_partial_recovery():
)
fake_client = MagicMock()
fake_client.get_dashboard_detail.return_value = {
"id": 10,
"datasets": [{"id": 42}, {"id": 77}],
}
fake_client.get_dataset_detail.return_value = {
@@ -352,6 +353,123 @@ def test_parse_superset_link_dashboard_partial_recovery():
# [/DEF:test_parse_superset_link_dashboard_partial_recovery:Function]
# [DEF:test_parse_superset_link_dashboard_slug_recovery:Function]
# @PURPOSE: Verify dashboard slug links resolve through dashboard detail endpoints and recover dataset context.
def test_parse_superset_link_dashboard_slug_recovery():
env = Environment(
id="env-1",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
fake_client = MagicMock()
fake_client.get_dashboard_detail.return_value = {
"id": 15,
"datasets": [{"id": 42}],
}
fake_client.get_dataset_detail.return_value = {
"table_name": "sales",
"schema": "public",
}
extractor = SupersetContextExtractor(environment=env, client=fake_client)
result = extractor.parse_superset_link(
"https://ss-dev.bebesh.ru/superset/dashboard/slack/?native_filters_key=8ZLV4M-UXOM"
)
assert result.dataset_id == 42
assert result.dashboard_id == 15
assert result.dataset_ref == "public.sales"
assert result.partial_recovery is False
assert result.query_state["native_filters_key"] == "8ZLV4M-UXOM"
fake_client.get_dashboard_detail.assert_called_once_with("slack")
# [/DEF:test_parse_superset_link_dashboard_slug_recovery:Function]
# [DEF:test_parse_superset_link_dashboard_permalink_partial_recovery:Function]
# @PURPOSE: Verify dashboard permalink links no longer fail parsing and preserve permalink filter state for partial recovery.
def test_parse_superset_link_dashboard_permalink_partial_recovery():
env = Environment(
id="env-1",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
fake_client = MagicMock()
fake_client.get_dashboard_permalink_state.return_value = {
"state": {
"dataMask": {
"NATIVE_FILTER-1": {
"id": "country",
"filterState": {
"label": "Country",
"value": ["DE"],
},
"extraFormData": {
"filters": [{"col": "country", "op": "IN", "val": ["DE"]}],
},
}
}
}
}
extractor = SupersetContextExtractor(environment=env, client=fake_client)
result = extractor.parse_superset_link(
"http://ss-dev.bebesh.ru/superset/dashboard/p/QabXy6wG30Z/"
)
assert result.resource_type == "dashboard"
assert result.dataset_id is None
assert result.dashboard_id is None
assert result.dataset_ref == "dashboard_permalink:QabXy6wG30Z"
assert result.partial_recovery is True
assert "dashboard_permalink_dataset_binding_unresolved" in result.unresolved_references
assert result.imported_filters[0]["filter_name"] == "country"
assert result.imported_filters[0]["raw_value"] == ["DE"]
fake_client.get_dashboard_permalink_state.assert_called_once_with("QabXy6wG30Z")
# [DEF:test_parse_superset_link_dashboard_permalink_recovers_dataset_from_nested_dashboard_state:Function]
# @PURPOSE: Verify permalink state with nested dashboard id recovers dataset binding and keeps imported filters.
def test_parse_superset_link_dashboard_permalink_recovers_dataset_from_nested_dashboard_state():
env = Environment(
id="env-1",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
fake_client = MagicMock()
fake_client.get_dashboard_permalink_state.return_value = {
"state": {
"form_data": {"dashboardId": 22},
"dataMask": {
"NATIVE_FILTER-1": {
"id": "country",
"filterState": {"label": "Country", "value": ["DE"]},
}
},
}
}
fake_client.get_dashboard_detail.return_value = {"id": 22, "datasets": [{"id": 42}]}
fake_client.get_dataset_detail.return_value = {"table_name": "sales", "schema": "public"}
extractor = SupersetContextExtractor(environment=env, client=fake_client)
result = extractor.parse_superset_link(
"http://ss-dev.bebesh.ru/superset/dashboard/p/QabXy6wG30Z/"
)
assert result.dashboard_id == 22
assert result.dataset_id == 42
assert result.dataset_ref == "public.sales"
assert "dashboard_permalink_dataset_binding_unresolved" not in result.unresolved_references
assert result.imported_filters[0]["filter_name"] == "country"
# [/DEF:test_parse_superset_link_dashboard_permalink_recovers_dataset_from_nested_dashboard_state:Function]
# [/DEF:test_parse_superset_link_dashboard_permalink_partial_recovery:Function]
# [DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
# @PURPOSE: Verify trusted dictionary exact matches outrank fuzzy candidates and unresolved fields stay explicit.
def test_resolve_from_dictionary_prefers_exact_match():
@@ -400,6 +518,7 @@ def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_ap
repository.create_session.return_value = created_session
repository.save_profile_and_findings.return_value = created_session
repository.save_recovery_state.return_value = created_session
repository.db = MagicMock()
orchestrator = DatasetReviewOrchestrator(
@@ -415,11 +534,23 @@ def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_ap
chart_id=None,
partial_recovery=True,
unresolved_references=["dashboard_dataset_binding_missing"],
imported_filters=[],
)
fake_extractor = MagicMock()
fake_extractor.parse_superset_link.return_value = parsed_context
fake_extractor.recover_imported_filters.return_value = []
fake_extractor.client.get_dataset_detail.return_value = {
"id": 42,
"sql": "",
"columns": [],
"metrics": [],
}
fake_extractor.discover_template_variables.return_value = []
with patch(
"src.services.dataset_review.orchestrator.SupersetContextExtractor.parse_superset_link",
return_value=parsed_context,
"src.services.dataset_review.orchestrator.SupersetContextExtractor",
side_effect=[fake_extractor, fake_extractor],
):
result = orchestrator.start_session(
StartSessionCommand(
@@ -438,6 +569,94 @@ def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_ap
# [/DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]
# [DEF:test_orchestrator_start_session_bootstraps_recovery_state:Function]
# @PURPOSE: Verify session start persists recovered filters, template variables, and initial execution mappings for review workspace bootstrap.
def test_orchestrator_start_session_bootstraps_recovery_state(dataset_review_api_dependencies):
repository = MagicMock()
created_session = _make_session()
created_session.readiness_state = ReadinessState.RECOVERY_REQUIRED
created_session.current_phase = SessionPhase.RECOVERY
repository.create_session.return_value = created_session
repository.save_profile_and_findings.return_value = created_session
repository.save_recovery_state.return_value = created_session
repository.db = MagicMock()
orchestrator = DatasetReviewOrchestrator(
repository=repository,
config_manager=dataset_review_api_dependencies["config_manager"],
task_manager=None,
)
parsed_context = SimpleNamespace(
dataset_ref="public.sales",
dataset_id=42,
dashboard_id=10,
chart_id=None,
partial_recovery=True,
unresolved_references=["dashboard_dataset_binding_missing"],
imported_filters=[{"filter_name": "country", "raw_value": ["DE"]}],
)
fake_extractor = MagicMock()
fake_extractor.parse_superset_link.return_value = parsed_context
fake_extractor.recover_imported_filters.return_value = [
{
"filter_name": "country",
"display_name": "Country",
"raw_value": ["DE"],
"normalized_value": ["DE"],
"source": "superset_url",
"confidence_state": "imported",
"requires_confirmation": False,
"recovery_status": "recovered",
"notes": "Recovered from permalink state",
}
]
fake_extractor.client.get_dataset_detail.return_value = {
"id": 42,
"sql": "select * from sales where country in {{ filter_values('country') }}",
"columns": [],
"metrics": [],
}
fake_extractor.discover_template_variables.return_value = [
{
"variable_name": "country",
"expression_source": "{{ filter_values('country') }}",
"variable_kind": "native_filter",
"is_required": True,
"default_value": None,
"mapping_status": "unmapped",
}
]
with patch(
"src.services.dataset_review.orchestrator.SupersetContextExtractor",
side_effect=[fake_extractor, fake_extractor],
):
result = orchestrator.start_session(
StartSessionCommand(
user=dataset_review_api_dependencies["user"],
environment_id="env-1",
source_kind="superset_link",
source_input="http://superset.local/dashboard/10",
)
)
assert result.session.readiness_state == ReadinessState.RECOVERY_REQUIRED
repository.save_recovery_state.assert_called_once()
saved_filters = repository.save_recovery_state.call_args.args[2]
saved_variables = repository.save_recovery_state.call_args.args[3]
saved_mappings = repository.save_recovery_state.call_args.args[4]
assert len(saved_filters) == 1
assert saved_filters[0].filter_name == "country"
assert len(saved_variables) == 1
assert saved_variables[0].variable_name == "country"
assert len(saved_mappings) == 1
assert saved_mappings[0].raw_input_value == ["DE"]
# [/DEF:test_orchestrator_start_session_bootstraps_recovery_state:Function]
# [DEF:test_start_session_endpoint_returns_created_summary:Function]
# @PURPOSE: Verify POST session lifecycle endpoint returns a persisted ownership-scoped summary.
def test_start_session_endpoint_returns_created_summary(dataset_review_api_dependencies):
@@ -828,6 +1047,54 @@ def test_us3_mapping_patch_approval_preview_and_launch_endpoints(dataset_review_
# [/DEF:test_us3_mapping_patch_approval_preview_and_launch_endpoints:Function]
# [DEF:test_us3_preview_endpoint_returns_failed_preview_without_false_dashboard_not_found_contract_drift:Function]
# @PURPOSE: Preview endpoint should preserve API contract and surface generic upstream preview failures without fabricating dashboard-not-found semantics for non-dashboard 404s.
def test_us3_preview_endpoint_returns_failed_preview_without_false_dashboard_not_found_contract_drift(
dataset_review_api_dependencies,
):
session = _make_us3_session()
repository = MagicMock()
repository.load_session_detail.return_value = session
repository.db = MagicMock()
repository.event_logger = MagicMock(spec=SessionEventLogger)
failed_preview = SimpleNamespace(
preview_id="preview-failed",
session_id="sess-1",
preview_status=PreviewStatus.FAILED,
compiled_sql=None,
preview_fingerprint="fingerprint-failed",
compiled_by="superset",
error_code="superset_preview_failed",
error_details="RuntimeError: [API_FAILURE] API resource not found at endpoint '/chart/data' | Context: {'status_code': 404, 'endpoint': '/chart/data', 'subtype': 'not_found'}",
compiled_at=None,
created_at=datetime.now(timezone.utc),
)
orchestrator = MagicMock()
orchestrator.prepare_launch_preview.return_value = PreparePreviewResult(
session=session,
preview=failed_preview,
blocked_reasons=[],
)
app.dependency_overrides[_get_repository] = lambda: repository
app.dependency_overrides[_get_orchestrator] = lambda: orchestrator
response = client.post("/api/dataset-orchestration/sessions/sess-1/preview")
assert response.status_code == 200
payload = response.json()
assert payload["preview_id"] == "preview-failed"
assert payload["preview_status"] == "failed"
assert payload["compiled_sql"] is None
assert payload["compiled_by"] == "superset"
assert payload["error_code"] == "superset_preview_failed"
assert "/chart/data" in payload["error_details"]
assert "API resource not found" in payload["error_details"]
assert "Dashboard not found" not in payload["error_details"]
# [/DEF:test_us3_preview_endpoint_returns_failed_preview_without_false_dashboard_not_found_contract_drift:Function]
# [DEF:test_us3_launch_endpoint_requires_launch_permission:Function]
# @PURPOSE: Launch endpoint should enforce the contract RBAC permission instead of the generic session-manage permission.
def test_us3_launch_endpoint_requires_launch_permission(dataset_review_api_dependencies):

View File

@@ -5,8 +5,10 @@
# @LAYER: Domain
# @RELATION: VERIFIES -> ConfigManager
from types import SimpleNamespace
from src.core.config_manager import ConfigManager
from src.core.config_models import AppConfig, GlobalSettings
from src.core.config_models import AppConfig, Environment, GlobalSettings
# [DEF:test_get_payload_preserves_legacy_sections:Function]
@@ -48,6 +50,115 @@ def test_save_config_accepts_raw_payload_and_keeps_extras(monkeypatch):
assert manager.raw_payload["notifications"]["telegram"]["bot_token"] == "secret"
assert manager.config.settings.migration_sync_cron == "0 2 * * *"
assert persisted["payload"]["notifications"]["telegram"]["bot_token"] == "secret"
# [/DEF:test_save_config_accepts_raw_payload_and_keeps_extras:Function]
# [DEF:test_save_config_syncs_environment_records_for_fk_backed_flows:Function]
# @PURPOSE: Ensure saving config mirrors typed environments into relational records required by FK-backed session persistence.
def test_save_config_syncs_environment_records_for_fk_backed_flows():
manager = ConfigManager.__new__(ConfigManager)
manager.raw_payload = {}
manager.config = AppConfig(environments=[], settings=GlobalSettings())
added_records = []
deleted_records = []
existing_record = SimpleNamespace(
id="legacy-env",
name="Legacy",
url="http://legacy.local",
credentials_id="legacy-user",
)
class _FakeQuery:
def all(self):
return [existing_record]
class _FakeSession:
def query(self, model):
return _FakeQuery()
def add(self, value):
added_records.append(value)
def delete(self, value):
deleted_records.append(value)
session = _FakeSession()
config = AppConfig(
environments=[
Environment(
id="dev",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
],
settings=GlobalSettings(),
)
manager._sync_environment_records(session, config)
assert len(added_records) == 1
assert added_records[0].id == "dev"
assert added_records[0].name == "DEV"
assert added_records[0].url == "http://superset.local"
assert added_records[0].credentials_id == "demo"
assert deleted_records == [existing_record]
# [/DEF:test_save_config_syncs_environment_records_for_fk_backed_flows:Function]
# [DEF:test_load_config_syncs_environment_records_from_existing_db_payload:Function]
# @PURPOSE: Ensure loading an existing DB-backed config also mirrors environment rows required by FK-backed runtime flows.
def test_load_config_syncs_environment_records_from_existing_db_payload(monkeypatch):
manager = ConfigManager.__new__(ConfigManager)
manager.config_path = None
manager.raw_payload = {}
manager.config = AppConfig(environments=[], settings=GlobalSettings())
sync_calls = []
closed = {"value": False}
committed = {"value": False}
class _FakeSession:
def commit(self):
committed["value"] = True
def close(self):
closed["value"] = True
fake_session = _FakeSession()
fake_record = SimpleNamespace(
id="global",
payload={
"environments": [
{
"id": "dev",
"name": "DEV",
"url": "http://superset.local",
"username": "demo",
"password": "secret",
}
],
"settings": GlobalSettings().model_dump(),
},
)
monkeypatch.setattr("src.core.config_manager.SessionLocal", lambda: fake_session)
monkeypatch.setattr(manager, "_get_record", lambda session: fake_record)
monkeypatch.setattr(
manager,
"_sync_environment_records",
lambda session, config: sync_calls.append((session, config)),
)
config = manager._load_config()
assert config.environments[0].id == "dev"
assert len(sync_calls) == 1
assert sync_calls[0][0] is fake_session
assert sync_calls[0][1].environments[0].id == "dev"
assert committed["value"] is True
assert closed["value"] is True
# [/DEF:test_load_config_syncs_environment_records_from_existing_db_payload:Function]
# [/DEF:backend.src.core.__tests__.test_config_manager_compat:Module]

View File

@@ -0,0 +1,196 @@
# [DEF:SupersetPreviewPipelineTests:Module]
# @COMPLEXITY: 3
# @SEMANTICS: tests, superset, preview, chart_data, network, 404-mapping
# @PURPOSE: Verify explicit chart-data preview compilation and ensure non-dashboard 404 errors remain generic across sync and async clients.
# @LAYER: Domain
# @RELATION: [BINDS_TO] ->[SupersetClient]
# @RELATION: [BINDS_TO] ->[APIClient]
# @RELATION: [BINDS_TO] ->[AsyncAPIClient]
import json
from unittest.mock import MagicMock
import httpx
import pytest
import requests
from src.core.config_models import Environment
from src.core.superset_client import SupersetClient
from src.core.utils.async_network import AsyncAPIClient
from src.core.utils.network import APIClient, DashboardNotFoundError, SupersetAPIError
# [DEF:_make_environment:Function]
def _make_environment() -> Environment:
return Environment(
id="env-1",
name="DEV",
url="http://superset.local",
username="demo",
password="secret",
)
# [/DEF:_make_environment:Function]
# [DEF:_make_requests_http_error:Function]
def _make_requests_http_error(status_code: int, url: str) -> requests.exceptions.HTTPError:
response = requests.Response()
response.status_code = status_code
response.url = url
response._content = b'{"message":"not found"}'
request = requests.Request("GET", url).prepare()
response.request = request
return requests.exceptions.HTTPError(response=response, request=request)
# [/DEF:_make_requests_http_error:Function]
# [DEF:_make_httpx_status_error:Function]
def _make_httpx_status_error(status_code: int, url: str) -> httpx.HTTPStatusError:
request = httpx.Request("GET", url)
response = httpx.Response(status_code=status_code, request=request, text='{"message":"not found"}')
return httpx.HTTPStatusError("upstream error", request=request, response=response)
# [/DEF:_make_httpx_status_error:Function]
# [DEF:test_compile_dataset_preview_uses_chart_data_and_result_query_sql:Function]
# @PURPOSE: Superset preview compilation should call the real chart-data endpoint and extract SQL from result[].query.
def test_compile_dataset_preview_uses_chart_data_and_result_query_sql():
client = SupersetClient(_make_environment())
client.get_dataset = MagicMock(
return_value={
"result": {
"id": 42,
"schema": "public",
"datasource": {"id": 42, "type": "table"},
"result_format": "json",
"result_type": "full",
}
}
)
client.network = MagicMock()
client.network.request.return_value = {
"result": [
{
"query": "SELECT count(*) FROM public.sales WHERE country IN ('DE')",
}
]
}
result = client.compile_dataset_preview(
dataset_id=42,
template_params={"country": "DE"},
effective_filters=[{"filter_name": "country", "effective_value": ["DE"]}],
)
assert result["compiled_sql"] == "SELECT count(*) FROM public.sales WHERE country IN ('DE')"
client.network.request.assert_called_once()
request_call = client.network.request.call_args
assert request_call.kwargs["method"] == "POST"
assert request_call.kwargs["endpoint"] == "/chart/data"
assert request_call.kwargs["headers"] == {"Content-Type": "application/json"}
query_context = json.loads(request_call.kwargs["data"])
assert query_context["datasource"] == {"id": 42, "type": "table"}
assert query_context["queries"][0]["filters"] == [
{"col": "country", "op": "IN", "val": ["DE"]}
]
assert query_context["queries"][0]["url_params"] == {"country": "DE"}
assert result["query_context"]["datasource"] == {"id": 42, "type": "table"}
assert result["query_context"]["queries"][0]["filters"] == [
{"col": "country", "op": "IN", "val": ["DE"]}
]
# [/DEF:test_compile_dataset_preview_uses_chart_data_and_result_query_sql:Function]
# [DEF:test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
# @PURPOSE: Sync network client should reserve dashboard-not-found translation for dashboard endpoints only.
def test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic():
client = APIClient(
config={
"base_url": "http://superset.local",
"auth": {"username": "demo", "password": "secret"},
}
)
with pytest.raises(SupersetAPIError) as exc_info:
client._handle_http_error(
_make_requests_http_error(404, "http://superset.local/api/v1/chart/data"),
"/chart/data",
)
assert not isinstance(exc_info.value, DashboardNotFoundError)
assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value)
# [/DEF:test_sync_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
# [DEF:test_sync_network_404_mapping_translates_dashboard_endpoints:Function]
# @PURPOSE: Sync network client should still translate dashboard endpoint 404 responses into dashboard-not-found errors.
def test_sync_network_404_mapping_translates_dashboard_endpoints():
client = APIClient(
config={
"base_url": "http://superset.local",
"auth": {"username": "demo", "password": "secret"},
}
)
with pytest.raises(DashboardNotFoundError) as exc_info:
client._handle_http_error(
_make_requests_http_error(404, "http://superset.local/api/v1/dashboard/10"),
"/dashboard/10",
)
assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value)
# [/DEF:test_sync_network_404_mapping_translates_dashboard_endpoints:Function]
# [DEF:test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
# @PURPOSE: Async network client should reserve dashboard-not-found translation for dashboard endpoints only.
@pytest.mark.asyncio
async def test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic():
client = AsyncAPIClient(
config={
"base_url": "http://superset.local",
"auth": {"username": "demo", "password": "secret"},
}
)
try:
with pytest.raises(SupersetAPIError) as exc_info:
client._handle_http_error(
_make_httpx_status_error(404, "http://superset.local/api/v1/chart/data"),
"/chart/data",
)
assert not isinstance(exc_info.value, DashboardNotFoundError)
assert "API resource not found at endpoint '/chart/data'" in str(exc_info.value)
finally:
await client.aclose()
# [/DEF:test_async_network_404_mapping_keeps_non_dashboard_endpoints_generic:Function]
# [DEF:test_async_network_404_mapping_translates_dashboard_endpoints:Function]
# @PURPOSE: Async network client should still translate dashboard endpoint 404 responses into dashboard-not-found errors.
@pytest.mark.asyncio
async def test_async_network_404_mapping_translates_dashboard_endpoints():
client = AsyncAPIClient(
config={
"base_url": "http://superset.local",
"auth": {"username": "demo", "password": "secret"},
}
)
try:
with pytest.raises(DashboardNotFoundError) as exc_info:
client._handle_http_error(
_make_httpx_status_error(404, "http://superset.local/api/v1/dashboard/10"),
"/dashboard/10",
)
assert "Dashboard '/dashboard/10' Dashboard not found" in str(exc_info.value)
finally:
await client.aclose()
# [/DEF:test_async_network_404_mapping_translates_dashboard_endpoints:Function]
# [/DEF:SupersetPreviewPipelineTests:Module]

View File

@@ -25,6 +25,7 @@ from sqlalchemy.orm import Session
from .config_models import AppConfig, Environment, GlobalSettings
from .database import SessionLocal
from ..models.config import AppConfigRecord
from ..models.mapping import Environment as EnvironmentRecord
from .logger import logger, configure_logger, belief_scope
@@ -146,6 +147,8 @@ class ConfigManager:
"settings": self.raw_payload.get("settings", {}),
}
)
self._sync_environment_records(session, config)
session.commit()
logger.reason(
"Database configuration validated successfully",
extra={
@@ -202,6 +205,60 @@ class ConfigManager:
session.close()
# [/DEF:_load_config:Function]
# [DEF:_sync_environment_records:Function]
# @PURPOSE: Mirror configured environments into the relational environments table used by FK-backed domain models.
def _sync_environment_records(self, session: Session, config: AppConfig) -> None:
with belief_scope("ConfigManager._sync_environment_records"):
configured_envs = list(config.environments or [])
configured_ids = {
str(environment.id or "").strip()
for environment in configured_envs
if str(environment.id or "").strip()
}
persisted_records = session.query(EnvironmentRecord).all()
persisted_by_id = {str(record.id or "").strip(): record for record in persisted_records}
for environment in configured_envs:
normalized_id = str(environment.id or "").strip()
if not normalized_id:
continue
display_name = str(environment.name or normalized_id).strip() or normalized_id
normalized_url = str(environment.url or "").strip()
credentials_id = str(environment.username or "").strip() or normalized_id
record = persisted_by_id.get(normalized_id)
if record is None:
logger.reason(
"Creating relational environment record from typed config",
extra={"environment_id": normalized_id, "environment_name": display_name},
)
session.add(
EnvironmentRecord(
id=normalized_id,
name=display_name,
url=normalized_url,
credentials_id=credentials_id,
)
)
continue
record.name = display_name
record.url = normalized_url
record.credentials_id = credentials_id
for record in persisted_records:
normalized_id = str(record.id or "").strip()
if normalized_id and normalized_id not in configured_ids:
logger.reason(
"Removing stale relational environment record absent from typed config",
extra={"environment_id": normalized_id},
)
session.delete(record)
# [/DEF:_sync_environment_records:Function]
# [DEF:_save_config_to_db:Function]
# @PURPOSE: Persist provided AppConfig into the global DB configuration record.
def _save_config_to_db(self, config: AppConfig, session: Optional[Session] = None) -> None:
@@ -220,6 +277,8 @@ class ConfigManager:
logger.reason("Updating existing global app config record", extra={"record_id": record.id})
record.payload = payload
self._sync_environment_records(db, config)
db.commit()
logger.reason(
"Configuration persisted to database",

View File

@@ -24,6 +24,7 @@ from ..models import assistant as _assistant_models # noqa: F401
from ..models import profile as _profile_models # noqa: F401
from ..models import clean_release as _clean_release_models # noqa: F401
from ..models import connection as _connection_models # noqa: F401
from ..models import dataset_review as _dataset_review_models # noqa: F401
from .logger import belief_scope, logger
from .auth.config import auth_config
import os

View File

@@ -1,11 +1,10 @@
# [DEF:backend.src.core.superset_client:Module]
# [DEF:SupersetClientModule:Module]
#
# @COMPLEXITY: 3
# @SEMANTICS: superset, api, client, rest, http, dashboard, dataset, import, export
# @PURPOSE: Предоставляет высокоуровневый клиент для взаимодействия с Superset REST API, инкапсулируя логику запросов, обработку ошибок и пагинацию.
# @LAYER: Core
# @RELATION: USES -> backend.src.core.utils.network.APIClient
# @RELATION: USES -> backend.src.core.config_models.Environment
# @RELATION: [DEPENDS_ON] ->[APIClient]
#
# @INVARIANT: All network operations must use the internal APIClient instance.
# @PUBLIC_API: SupersetClient
@@ -14,6 +13,7 @@
import json
import re
import zipfile
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from requests import Response
@@ -24,18 +24,18 @@ from .utils.fileio import get_filename_from_headers
from .config_models import Environment
# [/SECTION]
# [DEF:backend.src.core.superset_client.SupersetClient:Class]
# [DEF:SupersetClient:Class]
# @COMPLEXITY: 3
# @PURPOSE: Класс-обёртка над Superset REST API, предоставляющий методы для работы с дашбордами и датасетами.
# @RELATION: [DEPENDS_ON] ->[backend.src.core.utils.network.APIClient]
# @RELATION: [DEPENDS_ON] ->[backend.src.core.config_models.Environment]
# @RELATION: [DEPENDS_ON] ->[APIClient]
class SupersetClient:
# [DEF:backend.src.core.superset_client.SupersetClient.__init__:Function]
# [DEF:SupersetClient.__init__:Function]
# @COMPLEXITY: 3
# @PURPOSE: Инициализирует клиент, проверяет конфигурацию и создает сетевой клиент.
# @PRE: `env` должен быть валидным объектом Environment.
# @POST: Атрибуты `env` и `network` созданы и готовы к работе.
# @DATA_CONTRACT: Input[Environment] -> self.network[APIClient]
# @RELATION: [DEPENDS_ON] ->[APIClient]
def __init__(self, env: Environment):
with belief_scope("__init__"):
app_logger.info("[SupersetClient.__init__][Enter] Initializing SupersetClient for env %s.", env.name)
@@ -57,22 +57,22 @@ class SupersetClient:
)
self.delete_before_reimport: bool = False
app_logger.info("[SupersetClient.__init__][Exit] SupersetClient initialized.")
# [/DEF:backend.src.core.superset_client.SupersetClient.__init__:Function]
# [/DEF:SupersetClient.__init__:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.authenticate:Function]
# [DEF:SupersetClient.authenticate:Function]
# @COMPLEXITY: 3
# @PURPOSE: Authenticates the client using the configured credentials.
# @PRE: self.network must be initialized with valid auth configuration.
# @POST: Client is authenticated and tokens are stored.
# @DATA_CONTRACT: None -> Output[Dict[str, str]]
# @RELATION: [CALLS] ->[self.network.authenticate]
# @RELATION: [CALLS] ->[APIClient.authenticate]
def authenticate(self) -> Dict[str, str]:
with belief_scope("SupersetClient.authenticate"):
return self.network.authenticate()
# [/DEF:backend.src.core.superset_client.SupersetClient.authenticate:Function]
# [/DEF:SupersetClient.authenticate:Function]
@property
# [DEF:backend.src.core.superset_client.SupersetClient.headers:Function]
# [DEF:SupersetClient.headers:Function]
# @COMPLEXITY: 1
# @PURPOSE: Возвращает базовые HTTP-заголовки, используемые сетевым клиентом.
# @PRE: APIClient is initialized and authenticated.
@@ -80,17 +80,17 @@ class SupersetClient:
def headers(self) -> dict:
with belief_scope("headers"):
return self.network.headers
# [/DEF:backend.src.core.superset_client.SupersetClient.headers:Function]
# [/DEF:SupersetClient.headers:Function]
# [SECTION: DASHBOARD OPERATIONS]
# [DEF:backend.src.core.superset_client.SupersetClient.get_dashboards:Function]
# [DEF:SupersetClient.get_dashboards:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает полный список дашбордов, автоматически обрабатывая пагинацию.
# @PRE: Client is authenticated.
# @POST: Returns a tuple with total count and list of dashboards.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
# @RELATION: [CALLS] ->[self._fetch_all_pages]
# @RELATION: [CALLS] ->[SupersetClient._fetch_all_pages]
def get_dashboards(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards"):
app_logger.info("[get_dashboards][Enter] Fetching dashboards.")
@@ -116,15 +116,15 @@ class SupersetClient:
total_count = len(paginated_data)
app_logger.info("[get_dashboards][Exit] Found %d dashboards.", total_count)
return total_count, paginated_data
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dashboards:Function]
# [/DEF:SupersetClient.get_dashboards:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_dashboards_page:Function]
# [DEF:SupersetClient.get_dashboards_page:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches a single dashboards page from Superset without iterating all pages.
# @PRE: Client is authenticated.
# @POST: Returns total count and one page of dashboards.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[APIClient.request]
def get_dashboards_page(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_dashboards_page"):
validated_query = self._validate_query_params(query or {})
@@ -153,15 +153,15 @@ class SupersetClient:
result = response_json.get("result", [])
total_count = response_json.get("count", len(result))
return total_count, result
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dashboards_page:Function]
# [/DEF:SupersetClient.get_dashboards_page:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_dashboards_summary:Function]
# [DEF:SupersetClient.get_dashboards_summary:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches dashboard metadata optimized for the grid.
# @PRE: Client is authenticated.
# @POST: Returns a list of dashboard metadata summaries.
# @DATA_CONTRACT: None -> Output[List[Dict]]
# @RELATION: [CALLS] ->[self.get_dashboards]
# @RELATION: [CALLS] ->[SupersetClient.get_dashboards]
def get_dashboards_summary(self, require_slug: bool = False) -> List[Dict]:
with belief_scope("SupersetClient.get_dashboards_summary"):
# Rely on list endpoint default projection to stay compatible
@@ -238,15 +238,15 @@ class SupersetClient:
f"sampled={min(len(result), max_debug_samples)})"
)
return result
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dashboards_summary:Function]
# [/DEF:SupersetClient.get_dashboards_summary:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_dashboards_summary_page:Function]
# [DEF:SupersetClient.get_dashboards_summary_page:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches one page of dashboard metadata optimized for the grid.
# @PRE: page >= 1 and page_size > 0.
# @POST: Returns mapped summaries and total dashboard count.
# @DATA_CONTRACT: Input[page: int, page_size: int] -> Output[Tuple[int, List[Dict]]]
# @RELATION: [CALLS] ->[self.get_dashboards_page]
# @RELATION: [CALLS] ->[SupersetClient.get_dashboards_page]
def get_dashboards_summary_page(
self,
page: int,
@@ -313,7 +313,7 @@ class SupersetClient:
return total_count, result
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dashboards_summary_page:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._extract_owner_labels:Function]
# [DEF:SupersetClient._extract_owner_labels:Function]
# @COMPLEXITY: 1
# @PURPOSE: Normalize dashboard owners payload to stable display labels.
# @PRE: owners payload can be scalar, object or list.
@@ -339,9 +339,9 @@ class SupersetClient:
if label and label not in normalized:
normalized.append(label)
return normalized
# [/DEF:backend.src.core.superset_client.SupersetClient._extract_owner_labels:Function]
# [/DEF:SupersetClient._extract_owner_labels:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._extract_user_display:Function]
# [DEF:SupersetClient._extract_user_display:Function]
# @COMPLEXITY: 1
# @PURPOSE: Normalize user payload to a stable display name.
# @PRE: user payload can be string, dict or None.
@@ -384,43 +384,59 @@ class SupersetClient:
return normalized
# [/DEF:backend.src.core.superset_client.SupersetClient._sanitize_user_text:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_dashboard:Function]
# [DEF:SupersetClient.get_dashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches a single dashboard by ID.
# @PRE: Client is authenticated and dashboard_id exists.
# @PURPOSE: Fetches a single dashboard by ID or slug.
# @PRE: Client is authenticated and dashboard_ref exists.
# @POST: Returns dashboard payload from Superset API.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.network.request]
def get_dashboard(self, dashboard_id: int) -> Dict:
with belief_scope("SupersetClient.get_dashboard", f"id={dashboard_id}"):
response = self.network.request(method="GET", endpoint=f"/dashboard/{dashboard_id}")
# @DATA_CONTRACT: Input[dashboard_ref: Union[int, str]] -> Output[Dict]
# @RELATION: [CALLS] ->[APIClient.request]
def get_dashboard(self, dashboard_ref: Union[int, str]) -> Dict:
with belief_scope("SupersetClient.get_dashboard", f"ref={dashboard_ref}"):
response = self.network.request(method="GET", endpoint=f"/dashboard/{dashboard_ref}")
return cast(Dict, response)
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dashboard:Function]
# [/DEF:SupersetClient.get_dashboard:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_chart:Function]
# [DEF:SupersetClient.get_dashboard_permalink_state:Function]
# @COMPLEXITY: 2
# @PURPOSE: Fetches stored dashboard permalink state by permalink key.
# @PRE: Client is authenticated and permalink key exists.
# @POST: Returns dashboard permalink state payload from Superset API.
# @DATA_CONTRACT: Input[permalink_key: str] -> Output[Dict]
# @RELATION: [CALLS] ->[APIClient.request]
def get_dashboard_permalink_state(self, permalink_key: str) -> Dict:
with belief_scope("SupersetClient.get_dashboard_permalink_state", f"key={permalink_key}"):
response = self.network.request(
method="GET",
endpoint=f"/dashboard/permalink/{permalink_key}"
)
return cast(Dict, response)
# [/DEF:SupersetClient.get_dashboard_permalink_state:Function]
# [DEF:SupersetClient.get_chart:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches a single chart by ID.
# @PRE: Client is authenticated and chart_id exists.
# @POST: Returns chart payload from Superset API.
# @DATA_CONTRACT: Input[chart_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[APIClient.request]
def get_chart(self, chart_id: int) -> Dict:
with belief_scope("SupersetClient.get_chart", f"id={chart_id}"):
response = self.network.request(method="GET", endpoint=f"/chart/{chart_id}")
return cast(Dict, response)
# [/DEF:backend.src.core.superset_client.SupersetClient.get_chart:Function]
# [/DEF:SupersetClient.get_chart:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_dashboard_detail:Function]
# [DEF:SupersetClient.get_dashboard_detail:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches detailed dashboard information including related charts and datasets.
# @PRE: Client is authenticated and dashboard_id exists.
# @PRE: Client is authenticated and dashboard reference exists.
# @POST: Returns dashboard metadata with charts and datasets lists.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.get_dashboard]
# @RELATION: [CALLS] ->[self.get_chart]
def get_dashboard_detail(self, dashboard_id: int) -> Dict:
with belief_scope("SupersetClient.get_dashboard_detail", f"id={dashboard_id}"):
dashboard_response = self.get_dashboard(dashboard_id)
# @DATA_CONTRACT: Input[dashboard_ref: Union[int, str]] -> Output[Dict]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_dashboard]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_chart]
def get_dashboard_detail(self, dashboard_ref: Union[int, str]) -> Dict:
with belief_scope("SupersetClient.get_dashboard_detail", f"ref={dashboard_ref}"):
dashboard_response = self.get_dashboard(dashboard_ref)
dashboard_data = dashboard_response.get("result", dashboard_response)
charts: List[Dict] = []
@@ -456,7 +472,7 @@ class SupersetClient:
try:
charts_response = self.network.request(
method="GET",
endpoint=f"/dashboard/{dashboard_id}/charts"
endpoint=f"/dashboard/{dashboard_ref}/charts"
)
charts_payload = charts_response.get("result", []) if isinstance(charts_response, dict) else []
for chart_obj in charts_payload:
@@ -486,7 +502,7 @@ class SupersetClient:
try:
datasets_response = self.network.request(
method="GET",
endpoint=f"/dashboard/{dashboard_id}/datasets"
endpoint=f"/dashboard/{dashboard_ref}/datasets"
)
datasets_payload = datasets_response.get("result", []) if isinstance(datasets_response, dict) else []
for dataset_obj in datasets_payload:
@@ -592,9 +608,10 @@ class SupersetClient:
for dataset in datasets:
unique_datasets[dataset["id"]] = dataset
resolved_dashboard_id = dashboard_data.get("id", dashboard_ref)
return {
"id": dashboard_data.get("id", dashboard_id),
"title": dashboard_data.get("dashboard_title") or dashboard_data.get("title") or f"Dashboard {dashboard_id}",
"id": resolved_dashboard_id,
"title": dashboard_data.get("dashboard_title") or dashboard_data.get("title") or f"Dashboard {resolved_dashboard_id}",
"slug": dashboard_data.get("slug"),
"url": dashboard_data.get("url"),
"description": dashboard_data.get("description") or "",
@@ -607,13 +624,13 @@ class SupersetClient:
}
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dashboard_detail:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_charts:Function]
# [DEF:SupersetClient.get_charts:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches all charts with pagination support.
# @PRE: Client is authenticated.
# @POST: Returns total count and charts list.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
# @RELATION: [CALLS] ->[self._fetch_all_pages]
# @RELATION: [CALLS] ->[SupersetClient._fetch_all_pages]
def get_charts(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_charts"):
validated_query = self._validate_query_params(query or {})
@@ -625,9 +642,9 @@ class SupersetClient:
pagination_options={"base_query": validated_query, "results_field": "result"},
)
return len(paginated_data), paginated_data
# [/DEF:backend.src.core.superset_client.SupersetClient.get_charts:Function]
# [/DEF:SupersetClient.get_charts:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._extract_chart_ids_from_layout:Function]
# [DEF:SupersetClient._extract_chart_ids_from_layout:Function]
# @COMPLEXITY: 1
# @PURPOSE: Traverses dashboard layout metadata and extracts chart IDs from common keys.
# @PRE: payload can be dict/list/scalar.
@@ -667,7 +684,7 @@ class SupersetClient:
# @POST: Returns ZIP content and filename.
# @DATA_CONTRACT: Input[dashboard_id: int] -> Output[Tuple[bytes, str]]
# @SIDE_EFFECT: Performs network I/O to download archive.
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.APIClient.request]
def export_dashboard(self, dashboard_id: int) -> Tuple[bytes, str]:
with belief_scope("export_dashboard"):
app_logger.info("[export_dashboard][Enter] Exporting dashboard %s.", dashboard_id)
@@ -692,8 +709,8 @@ class SupersetClient:
# @POST: Dashboard is imported or re-imported after deletion.
# @DATA_CONTRACT: Input[file_name: Union[str, Path]] -> Output[Dict]
# @SIDE_EFFECT: Performs network I/O to upload archive.
# @RELATION: [CALLS] ->[self._do_import]
# @RELATION: [CALLS] ->[self.delete_dashboard]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient._do_import]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.delete_dashboard]
def import_dashboard(self, file_name: Union[str, Path], dash_id: Optional[int] = None, dash_slug: Optional[str] = None) -> Dict:
with belief_scope("import_dashboard"):
if file_name is None:
@@ -723,7 +740,7 @@ class SupersetClient:
# @PRE: dashboard_id must exist.
# @POST: Dashboard is removed from Superset.
# @SIDE_EFFECT: Deletes resource from upstream Superset environment.
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[APIClient.request]
def delete_dashboard(self, dashboard_id: Union[int, str]) -> None:
with belief_scope("delete_dashboard"):
app_logger.info("[delete_dashboard][Enter] Deleting dashboard %s.", dashboard_id)
@@ -735,13 +752,13 @@ class SupersetClient:
app_logger.warning("[delete_dashboard][Warning] Unexpected response while deleting %s: %s", dashboard_id, response)
# [/DEF:backend.src.core.superset_client.SupersetClient.delete_dashboard:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_datasets:Function]
# [DEF:SupersetClient.get_datasets:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает полный список датасетов, автоматически обрабатывая пагинацию.
# @PRE: Client is authenticated.
# @POST: Returns total count and list of datasets.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
# @RELATION: [CALLS] ->[self._fetch_all_pages]
# @RELATION: [CALLS] ->[SupersetClient._fetch_all_pages]
def get_datasets(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_datasets"):
app_logger.info("[get_datasets][Enter] Fetching datasets.")
@@ -754,9 +771,9 @@ class SupersetClient:
total_count = len(paginated_data)
app_logger.info("[get_datasets][Exit] Found %d datasets.", total_count)
return total_count, paginated_data
# [/DEF:backend.src.core.superset_client.SupersetClient.get_datasets:Function]
# [/DEF:SupersetClient.get_datasets:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_datasets_summary:Function]
# [DEF:SupersetClient.get_datasets_summary:Function]
# @COMPLEXITY: 3
# @PURPOSE: Fetches dataset metadata optimized for the Dataset Hub grid.
# @PRE: Client is authenticated.
@@ -788,8 +805,8 @@ class SupersetClient:
# @POST: Returns detailed dataset info with columns and linked dashboards.
# @PARAM: dataset_id (int) - The dataset ID to fetch details for.
# @RETURN: Dict - Dataset details with columns and linked_dashboards.
# @RELATION: CALLS -> self.get_dataset
# @RELATION: CALLS -> self.network.request (for related_objects)
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_dataset]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.APIClient.request]
def get_dataset_detail(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset_detail", f"id={dataset_id}"):
def as_bool(value, default=False):
@@ -900,7 +917,7 @@ class SupersetClient:
# @PRE: dataset_id must exist.
# @POST: Returns dataset details.
# @DATA_CONTRACT: Input[dataset_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.APIClient.request]
def get_dataset(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset", f"id={dataset_id}"):
app_logger.info("[get_dataset][Enter] Fetching dataset %s.", dataset_id)
@@ -910,14 +927,196 @@ class SupersetClient:
return response
# [/DEF:backend.src.core.superset_client.SupersetClient.get_dataset:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.update_dataset:Function]
# [DEF:SupersetClient.compile_dataset_preview:Function]
# @COMPLEXITY: 4
# @PURPOSE: Compile dataset preview SQL through the real Superset chart-data endpoint and return normalized SQL output.
# @PRE: dataset_id must be valid and template_params/effective_filters must represent the current preview session inputs.
# @POST: Returns normalized compiled SQL plus raw upstream response without guessing unsupported endpoints.
# @DATA_CONTRACT: Input[dataset_id:int, template_params:Dict, effective_filters:List[Dict]] -> Output[Dict[str, Any]]
# @RELATION: [CALLS] ->[SupersetClient.get_dataset]
# @RELATION: [CALLS] ->[SupersetClient.build_dataset_preview_query_context]
# @RELATION: [CALLS] ->[APIClient.request]
# @RELATION: [CALLS] ->[SupersetClient._extract_compiled_sql_from_chart_data_response]
# @SIDE_EFFECT: Performs upstream dataset lookup and chart-data network I/O against Superset.
def compile_dataset_preview(
self,
dataset_id: int,
template_params: Optional[Dict[str, Any]] = None,
effective_filters: Optional[List[Dict[str, Any]]] = None,
) -> Dict[str, Any]:
with belief_scope("SupersetClient.compile_dataset_preview", f"id={dataset_id}"):
app_logger.reason(
"Compiling dataset preview via Superset chart-data endpoint",
extra={
"dataset_id": dataset_id,
"template_param_count": len(template_params or {}),
"filter_count": len(effective_filters or []),
},
)
dataset_response = self.get_dataset(dataset_id)
dataset_record = dataset_response.get("result", dataset_response) if isinstance(dataset_response, dict) else {}
query_context = self.build_dataset_preview_query_context(
dataset_id=dataset_id,
dataset_record=dataset_record,
template_params=template_params or {},
effective_filters=effective_filters or [],
)
response = self.network.request(
method="POST",
endpoint="/chart/data",
data=json.dumps(query_context),
headers={"Content-Type": "application/json"},
)
normalized = self._extract_compiled_sql_from_chart_data_response(response)
normalized["query_context"] = query_context
app_logger.reflect(
"Dataset preview compilation returned normalized SQL payload",
extra={
"dataset_id": dataset_id,
"compiled_sql_length": len(str(normalized.get("compiled_sql") or "")),
},
)
return normalized
# [/DEF:backend.src.core.superset_client.SupersetClient.compile_dataset_preview:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.build_dataset_preview_query_context:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build a reduced-scope chart-data query context for deterministic dataset preview compilation.
# @PRE: dataset_record should come from Superset dataset detail when possible.
# @POST: Returns an explicit chart-data payload based on current session inputs and dataset metadata.
# @DATA_CONTRACT: Input[dataset_id:int,dataset_record:Dict,template_params:Dict,effective_filters:List[Dict]] -> Output[Dict[str, Any]]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient._normalize_effective_filters_for_query_context]
# @SIDE_EFFECT: Emits reasoning and reflection logs for deterministic preview payload construction.
def build_dataset_preview_query_context(
self,
dataset_id: int,
dataset_record: Dict[str, Any],
template_params: Dict[str, Any],
effective_filters: List[Dict[str, Any]],
) -> Dict[str, Any]:
with belief_scope("SupersetClient.build_dataset_preview_query_context", f"id={dataset_id}"):
normalized_template_params = deepcopy(template_params or {})
normalized_filters = self._normalize_effective_filters_for_query_context(effective_filters or [])
datasource_payload: Dict[str, Any] = {
"id": dataset_id,
"type": "table",
}
datasource = dataset_record.get("datasource")
if isinstance(datasource, dict):
datasource_id = datasource.get("id")
datasource_type = datasource.get("type")
if datasource_id is not None:
datasource_payload["id"] = datasource_id
if datasource_type:
datasource_payload["type"] = datasource_type
query_object: Dict[str, Any] = {
"filters": normalized_filters,
"extras": {"where": ""},
"columns": [],
"metrics": ["count"],
"orderby": [],
"annotation_layers": [],
"row_limit": 1000,
"series_limit": 0,
"url_params": normalized_template_params,
"custom_params": normalized_template_params,
}
schema = dataset_record.get("schema")
if schema:
query_object["schema"] = schema
time_range = dataset_record.get("default_time_range")
if time_range:
query_object["time_range"] = time_range
result_format = dataset_record.get("result_format") or "json"
result_type = dataset_record.get("result_type") or "full"
return {
"datasource": datasource_payload,
"queries": [query_object],
"form_data": {
"datasource": f"{datasource_payload['id']}__{datasource_payload['type']}",
"viz_type": "table",
"slice_id": None,
"query_mode": "raw",
"url_params": normalized_template_params,
},
"result_format": result_format,
"result_type": result_type,
}
# [/DEF:backend.src.core.superset_client.SupersetClient.build_dataset_preview_query_context:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._normalize_effective_filters_for_query_context:Function]
# @COMPLEXITY: 3
# @PURPOSE: Convert execution mappings into Superset chart-data filter objects.
# @PRE: effective_filters may contain mapping metadata and arbitrary scalar/list values.
# @POST: Returns only valid filter dictionaries suitable for the chart-data query payload.
def _normalize_effective_filters_for_query_context(
self,
effective_filters: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
with belief_scope("SupersetClient._normalize_effective_filters_for_query_context"):
normalized_filters: List[Dict[str, Any]] = []
for item in effective_filters:
if not isinstance(item, dict):
continue
column = str(item.get("variable_name") or item.get("filter_name") or "").strip()
if not column:
continue
value = item.get("effective_value")
if value is None:
continue
operator = "IN" if isinstance(value, list) else "=="
normalized_filters.append(
{
"col": column,
"op": operator,
"val": value,
}
)
return normalized_filters
# [/DEF:backend.src.core.superset_client.SupersetClient._normalize_effective_filters_for_query_context:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._extract_compiled_sql_from_chart_data_response:Function]
# @COMPLEXITY: 3
# @PURPOSE: Normalize compiled SQL from a chart-data response by reading result[].query fields first.
# @PRE: response must be the decoded response body from /api/v1/chart/data.
# @POST: Returns compiled SQL and raw response or raises SupersetAPIError when the endpoint does not expose query text.
def _extract_compiled_sql_from_chart_data_response(self, response: Any) -> Dict[str, Any]:
with belief_scope("SupersetClient._extract_compiled_sql_from_chart_data_response"):
if not isinstance(response, dict):
raise SupersetAPIError("Superset chart/data response was not a JSON object")
result_payload = response.get("result")
if not isinstance(result_payload, list):
raise SupersetAPIError("Superset chart/data response did not include a result list")
for item in result_payload:
if not isinstance(item, dict):
continue
compiled_sql = str(item.get("query") or "").strip()
if compiled_sql:
return {
"compiled_sql": compiled_sql,
"raw_response": response,
}
raise SupersetAPIError("Superset chart/data response did not expose compiled SQL in result[].query")
# [/DEF:backend.src.core.superset_client.SupersetClient._extract_compiled_sql_from_chart_data_response:Function]
# [DEF:SupersetClient.update_dataset:Function]
# @COMPLEXITY: 3
# @PURPOSE: Обновляет данные датасета по его ID.
# @PRE: dataset_id must exist.
# @POST: Dataset is updated in Superset.
# @DATA_CONTRACT: Input[dataset_id: int, data: Dict] -> Output[Dict]
# @SIDE_EFFECT: Modifies resource in upstream Superset environment.
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[APIClient.request]
def update_dataset(self, dataset_id: int, data: Dict) -> Dict:
with belief_scope("SupersetClient.update_dataset", f"id={dataset_id}"):
app_logger.info("[update_dataset][Enter] Updating dataset %s.", dataset_id)
@@ -930,15 +1129,15 @@ class SupersetClient:
response = cast(Dict, response)
app_logger.info("[update_dataset][Exit] Updated dataset %s.", dataset_id)
return response
# [/DEF:backend.src.core.superset_client.SupersetClient.update_dataset:Function]
# [/DEF:SupersetClient.update_dataset:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_databases:Function]
# [DEF:SupersetClient.get_databases:Function]
# @COMPLEXITY: 3
# @PURPOSE: Получает полный список баз данных.
# @PRE: Client is authenticated.
# @POST: Returns total count and list of databases.
# @DATA_CONTRACT: Input[query: Optional[Dict]] -> Output[Tuple[int, List[Dict]]]
# @RELATION: [CALLS] ->[self._fetch_all_pages]
# @RELATION: [CALLS] ->[SupersetClient._fetch_all_pages]
def get_databases(self, query: Optional[Dict] = None) -> Tuple[int, List[Dict]]:
with belief_scope("get_databases"):
app_logger.info("[get_databases][Enter] Fetching databases.")
@@ -953,7 +1152,7 @@ class SupersetClient:
total_count = len(paginated_data)
app_logger.info("[get_databases][Exit] Found %d databases.", total_count)
return total_count, paginated_data
# [/DEF:backend.src.core.superset_client.SupersetClient.get_databases:Function]
# [/DEF:SupersetClient.get_databases:Function]
# [DEF:backend.src.core.superset_client.SupersetClient.get_database:Function]
# @COMPLEXITY: 3
@@ -961,7 +1160,7 @@ class SupersetClient:
# @PRE: database_id must exist.
# @POST: Returns database details.
# @DATA_CONTRACT: Input[database_id: int] -> Output[Dict]
# @RELATION: [CALLS] ->[self.network.request]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.APIClient.request]
def get_database(self, database_id: int) -> Dict:
with belief_scope("get_database"):
app_logger.info("[get_database][Enter] Fetching database %s.", database_id)
@@ -977,7 +1176,7 @@ class SupersetClient:
# @PRE: Client is authenticated.
# @POST: Returns list of database summaries.
# @DATA_CONTRACT: None -> Output[List[Dict]]
# @RELATION: [CALLS] ->[self.get_databases]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_databases]
def get_databases_summary(self) -> List[Dict]:
with belief_scope("SupersetClient.get_databases_summary"):
query = {
@@ -998,7 +1197,7 @@ class SupersetClient:
# @PRE: db_uuid must be a valid UUID string.
# @POST: Returns database info or None.
# @DATA_CONTRACT: Input[db_uuid: str] -> Output[Optional[Dict]]
# @RELATION: [CALLS] ->[self.get_databases]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_databases]
def get_database_by_uuid(self, db_uuid: str) -> Optional[Dict]:
with belief_scope("SupersetClient.get_database_by_uuid", f"uuid={db_uuid}"):
query = {
@@ -1008,12 +1207,12 @@ class SupersetClient:
return databases[0] if databases else None
# [/DEF:backend.src.core.superset_client.SupersetClient.get_database_by_uuid:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._resolve_target_id_for_delete:Function]
# [DEF:SupersetClient._resolve_target_id_for_delete:Function]
# @COMPLEXITY: 1
# @PURPOSE: Resolves a dashboard ID from either an ID or a slug.
# @PRE: Either dash_id or dash_slug should be provided.
# @POST: Returns the resolved ID or None.
# @RELATION: [CALLS] ->[self.get_dashboards]
# @RELATION: [CALLS] ->[SupersetClient.get_dashboards]
def _resolve_target_id_for_delete(self, dash_id: Optional[int], dash_slug: Optional[str]) -> Optional[int]:
with belief_scope("_resolve_target_id_for_delete"):
if dash_id is not None:
@@ -1029,14 +1228,14 @@ class SupersetClient:
except Exception as e:
app_logger.warning("[_resolve_target_id_for_delete][Warning] Could not resolve slug '%s' to ID: %s", dash_slug, e)
return None
# [/DEF:backend.src.core.superset_client.SupersetClient._resolve_target_id_for_delete:Function]
# [/DEF:SupersetClient._resolve_target_id_for_delete:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._do_import:Function]
# [DEF:SupersetClient._do_import:Function]
# @COMPLEXITY: 1
# @PURPOSE: Performs the actual multipart upload for import.
# @PRE: file_name must be a path to an existing ZIP file.
# @POST: Returns the API response from the upload.
# @RELATION: [CALLS] ->[self.network.upload_file]
# @RELATION: [CALLS] ->[APIClient.upload_file]
def _do_import(self, file_name: Union[str, Path]) -> Dict:
with belief_scope("_do_import"):
app_logger.debug(f"[_do_import][State] Uploading file: {file_name}")
@@ -1051,7 +1250,7 @@ class SupersetClient:
extra_data={"overwrite": "true"},
timeout=self.env.timeout * 2,
)
# [/DEF:backend.src.core.superset_client.SupersetClient._do_import:Function]
# [/DEF:SupersetClient._do_import:Function]
# [DEF:backend.src.core.superset_client.SupersetClient._validate_export_response:Function]
# @COMPLEXITY: 1
@@ -1101,7 +1300,7 @@ class SupersetClient:
# @PURPOSE: Fetches the total number of items for a given endpoint.
# @PRE: endpoint must be a valid Superset API path.
# @POST: Returns the total count as an integer.
# @RELATION: [CALLS] ->[self.network.fetch_paginated_count]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.APIClient.fetch_paginated_count]
def _fetch_total_object_count(self, endpoint: str) -> int:
with belief_scope("_fetch_total_object_count"):
return self.network.fetch_paginated_count(

View File

@@ -1,4 +1,4 @@
# [DEF:backend.src.core.utils.async_network:Module]
# [DEF:AsyncNetworkModule:Module]
#
# @COMPLEXITY: 5
# @SEMANTICS: network, httpx, async, superset, authentication, cache
@@ -8,7 +8,7 @@
# @POST: Async network clients reuse cached auth tokens and expose stable async request/error translation flow.
# @SIDE_EFFECT: Performs upstream HTTP I/O and mutates process-local auth cache entries.
# @DATA_CONTRACT: Input[config: Dict[str, Any]] -> Output[authenticated async Superset HTTP interactions]
# @RELATION: DEPENDS_ON -> backend.src.core.utils.network.SupersetAuthCache
# @RELATION: [DEPENDS_ON] ->[SupersetAuthCache]
# @INVARIANT: Async client reuses cached auth tokens per environment credentials and invalidates on 401.
# [SECTION: IMPORTS]
@@ -29,22 +29,24 @@ from .network import (
# [/SECTION]
# [DEF:backend.src.core.utils.async_network.AsyncAPIClient:Class]
# [DEF:AsyncAPIClient:Class]
# @COMPLEXITY: 3
# @PURPOSE: Async Superset API client backed by httpx.AsyncClient with shared auth cache.
# @RELATION: [DEPENDS_ON] ->[backend.src.core.utils.network.SupersetAuthCache]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.SupersetAuthCache.get]
# @RELATION: [CALLS] ->[backend.src.core.utils.network.SupersetAuthCache.set]
# @RELATION: [DEPENDS_ON] ->[SupersetAuthCache]
# @RELATION: [CALLS] ->[SupersetAuthCache.get]
# @RELATION: [CALLS] ->[SupersetAuthCache.set]
class AsyncAPIClient:
DEFAULT_TIMEOUT = 30
_auth_locks: Dict[tuple[str, str, bool], asyncio.Lock] = {}
# [DEF:backend.src.core.utils.async_network.AsyncAPIClient.__init__:Function]
# [DEF:AsyncAPIClient.__init__:Function]
# @COMPLEXITY: 3
# @PURPOSE: Initialize async API client for one environment.
# @PRE: config contains base_url and auth payload.
# @POST: Client is ready for async request/authentication flow.
# @DATA_CONTRACT: Input[config: Dict[str, Any]] -> self._auth_cache_key[str]
# @RELATION: [CALLS] ->[AsyncAPIClient._normalize_base_url]
# @RELATION: [DEPENDS_ON] ->[SupersetAuthCache]
def __init__(self, config: Dict[str, Any], verify_ssl: bool = True, timeout: int = DEFAULT_TIMEOUT):
self.base_url: str = self._normalize_base_url(config.get("base_url", ""))
self.api_base_url: str = f"{self.base_url}/api/v1"
@@ -63,9 +65,9 @@ class AsyncAPIClient:
verify_ssl,
)
# [/DEF:__init__:Function]
# [/DEF:AsyncAPIClient.__init__:Function]
# [DEF:backend.src.core.utils.async_network.AsyncAPIClient._normalize_base_url:Function]
# [DEF:AsyncAPIClient._normalize_base_url:Function]
# @COMPLEXITY: 1
# @PURPOSE: Normalize base URL for Superset API root construction.
# @POST: Returns canonical base URL without trailing slash and duplicate /api/v1 suffix.
@@ -74,9 +76,9 @@ class AsyncAPIClient:
if normalized.lower().endswith("/api/v1"):
normalized = normalized[:-len("/api/v1")]
return normalized.rstrip("/")
# [/DEF:_normalize_base_url:Function]
# [/DEF:AsyncAPIClient._normalize_base_url:Function]
# [DEF:_build_api_url:Function]
# [DEF:AsyncAPIClient._build_api_url:Function]
# @COMPLEXITY: 1
# @PURPOSE: Build full API URL from relative Superset endpoint.
# @POST: Returns absolute URL for upstream request.
@@ -89,9 +91,9 @@ class AsyncAPIClient:
if normalized_endpoint.startswith("/api/v1/") or normalized_endpoint == "/api/v1":
return f"{self.base_url}{normalized_endpoint}"
return f"{self.api_base_url}{normalized_endpoint}"
# [/DEF:_build_api_url:Function]
# [/DEF:AsyncAPIClient._build_api_url:Function]
# [DEF:_get_auth_lock:Function]
# [DEF:AsyncAPIClient._get_auth_lock:Function]
# @COMPLEXITY: 1
# @PURPOSE: Return per-cache-key async lock to serialize fresh login attempts.
# @POST: Returns stable asyncio.Lock instance.
@@ -103,14 +105,16 @@ class AsyncAPIClient:
created_lock = asyncio.Lock()
cls._auth_locks[cache_key] = created_lock
return created_lock
# [/DEF:_get_auth_lock:Function]
# [/DEF:AsyncAPIClient._get_auth_lock:Function]
# [DEF:authenticate:Function]
# [DEF:AsyncAPIClient.authenticate:Function]
# @COMPLEXITY: 3
# @PURPOSE: Authenticate against Superset and cache access/csrf tokens.
# @POST: Client tokens are populated and reusable across requests.
# @SIDE_EFFECT: Performs network requests to Superset authentication endpoints.
# @DATA_CONTRACT: None -> Output[Dict[str, str]]
# @RELATION: [CALLS] ->[SupersetAuthCache.get]
# @RELATION: [CALLS] ->[SupersetAuthCache.set]
async def authenticate(self) -> Dict[str, str]:
cached_tokens = SupersetAuthCache.get(self._auth_cache_key)
if cached_tokens and cached_tokens.get("access_token") and cached_tokens.get("csrf_token"):
@@ -163,13 +167,13 @@ class AsyncAPIClient:
except (httpx.HTTPError, KeyError) as exc:
SupersetAuthCache.invalidate(self._auth_cache_key)
raise NetworkError(f"Network or parsing error during authentication: {exc}") from exc
# [/DEF:authenticate:Function]
# [/DEF:AsyncAPIClient.authenticate:Function]
# [DEF:get_headers:Function]
# [DEF:AsyncAPIClient.get_headers:Function]
# @COMPLEXITY: 3
# @PURPOSE: Return authenticated Superset headers for async requests.
# @POST: Headers include Authorization and CSRF tokens.
# @RELATION: CALLS -> self.authenticate
# @RELATION: [CALLS] ->[AsyncAPIClient.authenticate]
async def get_headers(self) -> Dict[str, str]:
if not self._authenticated:
await self.authenticate()
@@ -179,16 +183,16 @@ class AsyncAPIClient:
"Referer": self.base_url,
"Content-Type": "application/json",
}
# [/DEF:get_headers:Function]
# [/DEF:AsyncAPIClient.get_headers:Function]
# [DEF:request:Function]
# [DEF:AsyncAPIClient.request:Function]
# @COMPLEXITY: 3
# @PURPOSE: Perform one authenticated async Superset API request.
# @POST: Returns JSON payload or raw httpx.Response when raw_response=true.
# @SIDE_EFFECT: Performs network I/O.
# @RELATION: [CALLS] ->[self.get_headers]
# @RELATION: [CALLS] ->[self._handle_http_error]
# @RELATION: [CALLS] ->[self._handle_network_error]
# @RELATION: [CALLS] ->[AsyncAPIClient.get_headers]
# @RELATION: [CALLS] ->[AsyncAPIClient._handle_http_error]
# @RELATION: [CALLS] ->[AsyncAPIClient._handle_network_error]
async def request(
self,
method: str,
@@ -216,9 +220,9 @@ class AsyncAPIClient:
self._handle_http_error(exc, endpoint)
except httpx.HTTPError as exc:
self._handle_network_error(exc, full_url)
# [/DEF:request:Function]
# [/DEF:AsyncAPIClient.request:Function]
# [DEF:_handle_http_error:Function]
# [DEF:AsyncAPIClient._handle_http_error:Function]
# @COMPLEXITY: 3
# @PURPOSE: Translate upstream HTTP errors into stable domain exceptions.
# @POST: Raises domain-specific exception for caller flow control.
@@ -229,15 +233,40 @@ class AsyncAPIClient:
if status_code in [502, 503, 504]:
raise NetworkError(f"Environment unavailable (Status {status_code})", status_code=status_code) from exc
if status_code == 404:
raise DashboardNotFoundError(endpoint) from exc
if self._is_dashboard_endpoint(endpoint):
raise DashboardNotFoundError(endpoint) from exc
raise SupersetAPIError(
f"API resource not found at endpoint '{endpoint}'",
status_code=status_code,
endpoint=endpoint,
subtype="not_found",
) from exc
if status_code == 403:
raise PermissionDeniedError() from exc
if status_code == 401:
raise AuthenticationError() from exc
raise SupersetAPIError(f"API Error {status_code}: {exc.response.text}") from exc
# [/DEF:_handle_http_error:Function]
# [/DEF:AsyncAPIClient._handle_http_error:Function]
# [DEF:_handle_network_error:Function]
# [DEF:AsyncAPIClient._is_dashboard_endpoint:Function]
# @COMPLEXITY: 2
# @PURPOSE: Determine whether an API endpoint represents a dashboard resource for 404 translation.
# @POST: Returns true only for dashboard-specific endpoints.
def _is_dashboard_endpoint(self, endpoint: str) -> bool:
normalized_endpoint = str(endpoint or "").strip().lower()
if not normalized_endpoint:
return False
if normalized_endpoint.startswith("http://") or normalized_endpoint.startswith("https://"):
try:
normalized_endpoint = "/" + normalized_endpoint.split("/api/v1", 1)[1].lstrip("/")
except IndexError:
return False
if normalized_endpoint.startswith("/api/v1/"):
normalized_endpoint = normalized_endpoint[len("/api/v1"):]
return normalized_endpoint.startswith("/dashboard/") or normalized_endpoint == "/dashboard"
# [/DEF:backend.src.core.utils.async_network.AsyncAPIClient._is_dashboard_endpoint:Function]
# [DEF:backend.src.core.utils.async_network.AsyncAPIClient._handle_network_error:Function]
# @COMPLEXITY: 3
# @PURPOSE: Translate generic httpx errors into NetworkError.
# @POST: Raises NetworkError with URL context.
@@ -251,16 +280,16 @@ class AsyncAPIClient:
else:
message = f"Unknown network error: {exc}"
raise NetworkError(message, url=url) from exc
# [/DEF:_handle_network_error:Function]
# [/DEF:backend.src.core.utils.async_network.AsyncAPIClient._handle_network_error:Function]
# [DEF:aclose:Function]
# [DEF:backend.src.core.utils.async_network.AsyncAPIClient.aclose:Function]
# @COMPLEXITY: 3
# @PURPOSE: Close underlying httpx client.
# @POST: Client resources are released.
# @SIDE_EFFECT: Closes network connections.
async def aclose(self) -> None:
await self._client.aclose()
# [/DEF:aclose:Function]
# [/DEF:AsyncAPIClient:Class]
# [/DEF:backend.src.core.utils.async_network.AsyncAPIClient.aclose:Function]
# [/DEF:backend.src.core.utils.async_network.AsyncAPIClient:Class]
# [/DEF:backend.src.core.utils.async_network:Module]

View File

@@ -1,11 +1,10 @@
# [DEF:network:Module]
# [DEF:NetworkModule:Module]
#
# @COMPLEXITY: 3
# @SEMANTICS: network, http, client, api, requests, session, authentication
# @PURPOSE: Инкапсулирует низкоуровневую HTTP-логику для взаимодействия с Superset API, включая аутентификацию, управление сессией, retry-логику и обработку ошибок.
# @LAYER: Infra
# @RELATION: DEPENDS_ON -> backend.src.core.logger
# @RELATION: DEPENDS_ON -> requests
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
# @PUBLIC_API: APIClient
# [SECTION: IMPORTS]
@@ -82,7 +81,7 @@ class DashboardNotFoundError(SupersetAPIError):
# [DEF:NetworkError:Class]
# @PURPOSE: Exception raised when a network level error occurs.
class NetworkError(Exception):
# [DEF:network.APIClient.__init__:Function]
# [DEF:NetworkError.__init__:Function]
# @PURPOSE: Initializes the network error.
# @PRE: message is a string.
# @POST: NetworkError is initialized.
@@ -90,11 +89,11 @@ class NetworkError(Exception):
with belief_scope("NetworkError.__init__"):
self.context = context
super().__init__(f"[NETWORK_FAILURE] {message} | Context: {self.context}")
# [/DEF:__init__:Function]
# [/DEF:NetworkError.__init__:Function]
# [/DEF:NetworkError:Class]
# [DEF:network.SupersetAuthCache:Class]
# [DEF:SupersetAuthCache:Class]
# @PURPOSE: Process-local cache for Superset access/csrf tokens keyed by environment credentials.
# @PRE: base_url and username are stable strings.
# @POST: Cached entries expire automatically by TTL and can be reused across requests.
@@ -152,8 +151,8 @@ class SupersetAuthCache:
# [DEF:APIClient:Class]
# @COMPLEXITY: 3
# @PURPOSE: Synchronous Superset API client with process-local auth token caching.
# @RELATION: DEPENDS_ON -> network.SupersetAuthCache
# @RELATION: DEPENDS_ON -> logger
# @RELATION: [DEPENDS_ON] ->[SupersetAuthCache]
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
class APIClient:
DEFAULT_TIMEOUT = 30
@@ -256,7 +255,7 @@ class APIClient:
return f"{self.api_base_url}{normalized_endpoint}"
# [/DEF:_build_api_url:Function]
# [DEF:authenticate:Function]
# [DEF:APIClient.authenticate:Function]
# @PURPOSE: Выполняет аутентификацию в Superset API и получает access и CSRF токены.
# @PRE: self.auth and self.base_url must be valid.
# @POST: `self._tokens` заполнен, `self._authenticated` установлен в `True`.
@@ -364,7 +363,14 @@ class APIClient:
if status_code == 502 or status_code == 503 or status_code == 504:
raise NetworkError(f"Environment unavailable (Status {status_code})", status_code=status_code) from e
if status_code == 404:
raise DashboardNotFoundError(endpoint) from e
if self._is_dashboard_endpoint(endpoint):
raise DashboardNotFoundError(endpoint) from e
raise SupersetAPIError(
f"API resource not found at endpoint '{endpoint}'",
status_code=status_code,
endpoint=endpoint,
subtype="not_found",
) from e
if status_code == 403:
raise PermissionDeniedError() from e
if status_code == 401:
@@ -372,6 +378,24 @@ class APIClient:
raise SupersetAPIError(f"API Error {status_code}: {e.response.text}") from e
# [/DEF:_handle_http_error:Function]
# [DEF:_is_dashboard_endpoint:Function]
# @PURPOSE: Determine whether an API endpoint represents a dashboard resource for 404 translation.
# @PRE: endpoint may be relative or absolute.
# @POST: Returns true only for dashboard-specific endpoints.
def _is_dashboard_endpoint(self, endpoint: str) -> bool:
normalized_endpoint = str(endpoint or "").strip().lower()
if not normalized_endpoint:
return False
if normalized_endpoint.startswith("http://") or normalized_endpoint.startswith("https://"):
try:
normalized_endpoint = "/" + normalized_endpoint.split("/api/v1", 1)[1].lstrip("/")
except IndexError:
return False
if normalized_endpoint.startswith("/api/v1/"):
normalized_endpoint = normalized_endpoint[len("/api/v1"):]
return normalized_endpoint.startswith("/dashboard/") or normalized_endpoint == "/dashboard"
# [/DEF:_is_dashboard_endpoint:Function]
# [DEF:_handle_network_error:Function]
# @PURPOSE: (Helper) Преобразует сетевые ошибки в `NetworkError`.
# @PARAM: e (requests.exceptions.RequestException) - Ошибка.
@@ -505,4 +529,4 @@ class APIClient:
# [/DEF:APIClient:Class]
# [/DEF:backend.core.utils.network:Module]
# [/DEF:NetworkModule:Module]

View File

@@ -3,9 +3,8 @@
# @SEMANTICS: dataset_review, superset, compilation_preview, sql_lab_launch, execution_truth
# @PURPOSE: Interact with Superset preview compilation and SQL Lab execution endpoints using the current approved execution context.
# @LAYER: Infra
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[SupersetClient]
# @RELATION: [DEPENDS_ON] ->[CompiledPreview]
# @RELATION: [DEPENDS_ON] ->[DatasetRunContext]
# @PRE: effective template params and dataset execution reference are available.
# @POST: preview and launch calls return Superset-originated artifacts or explicit errors.
# @SIDE_EFFECT: performs upstream Superset preview and SQL Lab calls.
@@ -54,7 +53,7 @@ class SqlLabLaunchPayload:
# [DEF:SupersetCompilationAdapter:Class]
# @COMPLEXITY: 4
# @PURPOSE: Delegate preview compilation and SQL Lab launch to Superset without local SQL fabrication.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[SupersetClient]
# @PRE: environment is configured and Superset is reachable for the target session.
# @POST: adapter can return explicit ready/failed preview artifacts and canonical SQL Lab references.
# @SIDE_EFFECT: issues network requests to Superset API surfaces.
@@ -222,57 +221,49 @@ class SupersetCompilationAdapter:
# [DEF:SupersetCompilationAdapter._request_superset_preview:Function]
# @COMPLEXITY: 4
# @PURPOSE: Probe supported Superset preview surfaces and return the first explicit compilation response.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @PURPOSE: Request preview compilation through explicit client support backed by real Superset endpoints only.
# @RELATION: [CALLS] ->[SupersetClient.compile_dataset_preview]
# @PRE: payload contains a valid dataset identifier and deterministic execution inputs for one preview attempt.
# @POST: returns the first upstream response that exposes compiled SQL without fabricating local SQL.
# @SIDE_EFFECT: issues one or more Superset preview requests until a supported surface responds.
# @POST: returns one normalized upstream compilation response without endpoint guessing.
# @SIDE_EFFECT: issues one Superset chart-data request through the client.
# @DATA_CONTRACT: Input[PreviewCompilationPayload] -> Output[Dict[str,Any]]
def _request_superset_preview(self, payload: PreviewCompilationPayload) -> Dict[str, Any]:
request_payload = {
"dataset_id": payload.dataset_id,
"template_params": payload.template_params,
"effective_filters": payload.effective_filters,
"session_id": payload.session_id,
}
candidate_calls = self._build_preview_call_candidates(payload.dataset_id, request_payload)
errors: List[str] = []
try:
logger.reason(
"Attempting deterministic Superset preview compilation via chart/data",
extra={
"dataset_id": payload.dataset_id,
"session_id": payload.session_id,
"filter_count": len(payload.effective_filters),
"template_param_count": len(payload.template_params),
},
)
response = self.client.compile_dataset_preview(
dataset_id=payload.dataset_id,
template_params=payload.template_params,
effective_filters=payload.effective_filters,
)
except Exception as exc:
logger.explore(
"Superset preview compilation via chart/data failed",
extra={
"dataset_id": payload.dataset_id,
"session_id": payload.session_id,
"error": str(exc),
},
)
raise RuntimeError(str(exc)) from exc
for candidate in candidate_calls:
call_kind = candidate["kind"]
target = candidate["target"]
try:
logger.reason(
"Attempting Superset preview compilation candidate",
extra={"kind": call_kind, "target": target},
)
if call_kind == "client_method":
method = getattr(self.client, target)
response = method(request_payload)
else:
response = self.client.network.request(
method=candidate["http_method"],
endpoint=target,
data=candidate["data"],
headers={"Content-Type": "application/json"},
)
normalized = self._normalize_preview_response(response)
if normalized is not None:
return normalized
except Exception as exc:
errors.append(f"{call_kind}:{target}:{exc}")
logger.explore(
"Superset preview compilation candidate failed",
extra={"kind": call_kind, "target": target, "error": str(exc)},
)
raise RuntimeError("; ".join(errors) or "No Superset preview surface accepted the request")
normalized = self._normalize_preview_response(response)
if normalized is None:
raise RuntimeError("Superset chart/data compilation response could not be normalized")
return normalized
# [/DEF:SupersetCompilationAdapter._request_superset_preview:Function]
# [DEF:SupersetCompilationAdapter._request_sql_lab_session:Function]
# @COMPLEXITY: 4
# @PURPOSE: Probe supported SQL Lab execution surfaces and return the first successful response.
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[SupersetClient.get_dataset]
# @PRE: payload carries non-empty Superset-originated SQL and a preview identifier for the current launch.
# @POST: returns the first successful SQL Lab execution response from Superset.
# @SIDE_EFFECT: issues Superset dataset lookup and SQL Lab execution requests.
@@ -317,49 +308,6 @@ class SupersetCompilationAdapter:
raise RuntimeError("; ".join(errors) or "No Superset SQL Lab surface accepted the request")
# [/DEF:SupersetCompilationAdapter._request_sql_lab_session:Function]
# [DEF:SupersetCompilationAdapter._build_preview_call_candidates:Function]
# @COMPLEXITY: 2
# @PURPOSE: Assemble preview candidate call shapes in priority order.
def _build_preview_call_candidates(
self,
dataset_id: int,
request_payload: Dict[str, Any],
) -> List[Dict[str, Any]]:
candidates: List[Dict[str, Any]] = []
for method_name in (
"compile_sql_preview",
"compile_preview",
"get_compiled_sql_preview",
):
if hasattr(self.client, method_name):
candidates.append({"kind": "client_method", "target": method_name})
encoded_payload = self._dump_json(request_payload)
candidates.extend(
[
{
"kind": "network",
"target": f"/dataset/{dataset_id}/preview",
"http_method": "POST",
"data": encoded_payload,
},
{
"kind": "network",
"target": f"/dataset/{dataset_id}/sql",
"http_method": "POST",
"data": encoded_payload,
},
{
"kind": "network",
"target": "/sqllab/format_sql/",
"http_method": "POST",
"data": encoded_payload,
},
]
)
return candidates
# [/DEF:SupersetCompilationAdapter._build_preview_call_candidates:Function]
# [DEF:SupersetCompilationAdapter._normalize_preview_response:Function]
# @COMPLEXITY: 3
# @PURPOSE: Normalize candidate Superset preview responses into one compiled-sql structure.

View File

@@ -94,6 +94,8 @@ class SupersetContextExtractor:
dataset_id = self._extract_numeric_identifier(path_parts, "dataset")
dashboard_id = self._extract_numeric_identifier(path_parts, "dashboard")
dashboard_ref = self._extract_dashboard_reference(path_parts)
dashboard_permalink_key = self._extract_dashboard_permalink_key(path_parts)
chart_id = self._extract_numeric_identifier(path_parts, "chart")
resource_type = "unknown"
@@ -108,13 +110,82 @@ class SupersetContextExtractor:
"Resolved direct dataset link",
extra={"dataset_id": dataset_id},
)
elif dashboard_id is not None:
elif dashboard_permalink_key is not None:
resource_type = "dashboard"
partial_recovery = True
dataset_ref = f"dashboard_permalink:{dashboard_permalink_key}"
unresolved_references.append("dashboard_permalink_dataset_binding_unresolved")
logger.reason(
"Resolving dashboard permalink state from Superset",
extra={"permalink_key": dashboard_permalink_key},
)
permalink_payload = self.client.get_dashboard_permalink_state(dashboard_permalink_key)
permalink_state = (
permalink_payload.get("state", permalink_payload)
if isinstance(permalink_payload, dict)
else {}
)
if isinstance(permalink_state, dict):
for key, value in permalink_state.items():
query_state.setdefault(key, value)
resolved_dashboard_id = self._extract_dashboard_id_from_state(permalink_state)
resolved_chart_id = self._extract_chart_id_from_state(permalink_state)
if resolved_dashboard_id is not None:
dashboard_id = resolved_dashboard_id
unresolved_references = [
item
for item in unresolved_references
if item != "dashboard_permalink_dataset_binding_unresolved"
]
dataset_id, unresolved_references = self._recover_dataset_binding_from_dashboard(
dashboard_id=dashboard_id,
dataset_ref=dataset_ref,
unresolved_references=unresolved_references,
)
if dataset_id is not None:
dataset_ref = f"dataset:{dataset_id}"
elif resolved_chart_id is not None:
chart_id = resolved_chart_id
unresolved_references = [
item
for item in unresolved_references
if item != "dashboard_permalink_dataset_binding_unresolved"
]
try:
chart_payload = self.client.get_chart(chart_id)
chart_data = chart_payload.get("result", chart_payload) if isinstance(chart_payload, dict) else {}
datasource_id = chart_data.get("datasource_id")
if datasource_id is not None:
dataset_id = int(datasource_id)
dataset_ref = f"dataset:{dataset_id}"
logger.reason(
"Recovered dataset reference from permalink chart context",
extra={"chart_id": chart_id, "dataset_id": dataset_id},
)
else:
unresolved_references.append("chart_dataset_binding_unresolved")
except Exception as exc:
unresolved_references.append("chart_dataset_binding_unresolved")
logger.explore(
"Chart lookup failed during permalink recovery",
extra={"chart_id": chart_id, "error": str(exc)},
)
else:
logger.explore(
"Dashboard permalink state was not a structured object",
extra={"permalink_key": dashboard_permalink_key},
)
elif dashboard_id is not None or dashboard_ref is not None:
resource_type = "dashboard"
resolved_dashboard_ref = dashboard_id if dashboard_id is not None else dashboard_ref
logger.reason(
"Resolving dashboard-bound dataset from Superset",
extra={"dashboard_id": dashboard_id},
extra={"dashboard_ref": resolved_dashboard_ref},
)
dashboard_detail = self.client.get_dashboard_detail(dashboard_id)
dashboard_detail = self.client.get_dashboard_detail(resolved_dashboard_ref)
resolved_dashboard_id = dashboard_detail.get("id")
if resolved_dashboard_id is not None:
dashboard_id = int(resolved_dashboard_id)
datasets = dashboard_detail.get("datasets") or []
if datasets:
first_dataset = datasets[0]
@@ -460,6 +531,127 @@ class SupersetContextExtractor:
return int(candidate)
# [/DEF:SupersetContextExtractor._extract_numeric_identifier:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_reference:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract a dashboard id-or-slug reference from a Superset URL path.
def _extract_dashboard_reference(self, path_parts: List[str]) -> Optional[str]:
if "dashboard" not in path_parts:
return None
try:
resource_index = path_parts.index("dashboard")
except ValueError:
return None
if resource_index + 1 >= len(path_parts):
return None
candidate = str(path_parts[resource_index + 1]).strip()
if not candidate or candidate == "p":
return None
return candidate
# [/DEF:SupersetContextExtractor._extract_dashboard_reference:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract a dashboard permalink key from a Superset URL path.
def _extract_dashboard_permalink_key(self, path_parts: List[str]) -> Optional[str]:
if "dashboard" not in path_parts:
return None
try:
resource_index = path_parts.index("dashboard")
except ValueError:
return None
if resource_index + 2 >= len(path_parts):
return None
permalink_marker = str(path_parts[resource_index + 1]).strip()
permalink_key = str(path_parts[resource_index + 2]).strip()
if permalink_marker != "p" or not permalink_key:
return None
return permalink_key
# [/DEF:SupersetContextExtractor._extract_dashboard_permalink_key:Function]
# [DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract a dashboard identifier from returned permalink state when present.
def _extract_dashboard_id_from_state(self, state: Dict[str, Any]) -> Optional[int]:
return self._search_nested_numeric_key(
payload=state,
candidate_keys={"dashboardId", "dashboard_id", "dashboard_id_value"},
)
# [/DEF:SupersetContextExtractor._extract_dashboard_id_from_state:Function]
# [DEF:SupersetContextExtractor._extract_chart_id_from_state:Function]
# @COMPLEXITY: 2
# @PURPOSE: Extract a chart identifier from returned permalink state when dashboard id is absent.
def _extract_chart_id_from_state(self, state: Dict[str, Any]) -> Optional[int]:
return self._search_nested_numeric_key(
payload=state,
candidate_keys={"slice_id", "sliceId", "chartId", "chart_id"},
)
# [/DEF:SupersetContextExtractor._extract_chart_id_from_state:Function]
# [DEF:SupersetContextExtractor._search_nested_numeric_key:Function]
# @COMPLEXITY: 3
# @PURPOSE: Recursively search nested dict/list payloads for the first numeric value under a candidate key set.
def _search_nested_numeric_key(self, payload: Any, candidate_keys: Set[str]) -> Optional[int]:
if isinstance(payload, dict):
for key, value in payload.items():
if key in candidate_keys:
try:
if value is not None:
return int(value)
except (TypeError, ValueError):
pass
found = self._search_nested_numeric_key(value, candidate_keys)
if found is not None:
return found
elif isinstance(payload, list):
for item in payload:
found = self._search_nested_numeric_key(item, candidate_keys)
if found is not None:
return found
return None
# [/DEF:SupersetContextExtractor._search_nested_numeric_key:Function]
# [DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function]
# @COMPLEXITY: 3
# @PURPOSE: Recover a dataset binding from resolved dashboard context while preserving explicit unresolved markers.
def _recover_dataset_binding_from_dashboard(
self,
dashboard_id: int,
dataset_ref: Optional[str],
unresolved_references: List[str],
) -> tuple[Optional[int], List[str]]:
dashboard_detail = self.client.get_dashboard_detail(dashboard_id)
datasets = dashboard_detail.get("datasets") or []
if datasets:
first_dataset = datasets[0]
resolved_dataset_id = first_dataset.get("id")
if resolved_dataset_id is not None:
resolved_dataset = int(resolved_dataset_id)
logger.reason(
"Recovered dataset reference from dashboard permalink context",
extra={
"dashboard_id": dashboard_id,
"dataset_id": resolved_dataset,
"dataset_count": len(datasets),
"dataset_ref": dataset_ref,
},
)
if len(datasets) > 1 and "multiple_dashboard_datasets" not in unresolved_references:
unresolved_references.append("multiple_dashboard_datasets")
return resolved_dataset, unresolved_references
if "dashboard_dataset_id_missing" not in unresolved_references:
unresolved_references.append("dashboard_dataset_id_missing")
return None, unresolved_references
if "dashboard_dataset_binding_missing" not in unresolved_references:
unresolved_references.append("dashboard_dataset_binding_missing")
return None, unresolved_references
# [/DEF:SupersetContextExtractor._recover_dataset_binding_from_dashboard:Function]
# [DEF:SupersetContextExtractor._decode_query_state:Function]
# @COMPLEXITY: 2
# @PURPOSE: Decode query-string structures used by Superset URL state transport.
@@ -470,7 +662,7 @@ class SupersetContextExtractor:
continue
raw_value = values[-1]
decoded_value = unquote(raw_value)
if key in {"native_filters", "native_filters_key", "form_data", "q"}:
if key in {"native_filters", "form_data", "q"}:
try:
query_state[key] = json.loads(decoded_value)
continue
@@ -514,6 +706,36 @@ class SupersetContextExtractor:
}
)
dashboard_data_mask = query_state.get("dataMask")
if isinstance(dashboard_data_mask, dict):
for filter_key, item in dashboard_data_mask.items():
if not isinstance(item, dict):
continue
filter_state = item.get("filterState")
extra_form_data = item.get("extraFormData")
display_name = None
raw_value = None
if isinstance(filter_state, dict):
display_name = filter_state.get("label")
raw_value = filter_state.get("value")
if raw_value is None and isinstance(extra_form_data, dict):
extra_filters = extra_form_data.get("filters")
if isinstance(extra_filters, list) and extra_filters:
first_filter = extra_filters[0]
if isinstance(first_filter, dict):
raw_value = first_filter.get("val")
imported_filters.append(
{
"filter_name": str(item.get("id") or filter_key),
"raw_value": raw_value,
"display_name": display_name,
"source": "superset_permalink",
"recovery_status": "recovered" if raw_value is not None else "partial",
"requires_confirmation": raw_value is None,
"notes": "Recovered from Superset dashboard permalink state",
}
)
form_data_payload = query_state.get("form_data")
if isinstance(form_data_payload, dict):
extra_filters = form_data_payload.get("extra_filters") or []

View File

@@ -45,16 +45,25 @@ from src.models.dataset_review import (
DatasetProfile,
DatasetReviewSession,
DatasetRunContext,
ExecutionMapping,
FilterConfidenceState,
FilterRecoveryStatus,
FilterSource,
FindingArea,
FindingSeverity,
ImportedFilter,
LaunchStatus,
MappingMethod,
MappingStatus,
PreviewStatus,
RecommendedAction,
ReadinessState,
ResolutionState,
SessionPhase,
SessionStatus,
TemplateVariable,
ValidationFinding,
VariableKind,
)
from src.services.dataset_review.repositories.session_repository import (
DatasetReviewSessionRepository,
@@ -248,6 +257,17 @@ class DatasetReviewOrchestrator:
)
persisted_session = self.repository.create_session(session)
recovered_filters: List[ImportedFilter] = []
template_variables: List[TemplateVariable] = []
execution_mappings: List[ExecutionMapping] = []
if normalized_source_kind == "superset_link" and parsed_context is not None:
recovered_filters, template_variables, execution_mappings, findings = self._build_recovery_bootstrap(
environment=environment,
session=persisted_session,
parsed_context=parsed_context,
findings=findings,
)
profile = self._build_initial_profile(
session_id=persisted_session.session_id,
parsed_context=parsed_context,
@@ -276,6 +296,14 @@ class DatasetReviewOrchestrator:
profile,
findings,
)
if recovered_filters or template_variables or execution_mappings:
persisted_session = self.repository.save_recovery_state(
persisted_session.session_id,
command.user.id,
recovered_filters,
template_variables,
execution_mappings,
)
active_task_id = self._enqueue_recovery_task(
command=command,
@@ -644,6 +672,115 @@ class DatasetReviewOrchestrator:
return findings
# [/DEF:DatasetReviewOrchestrator._build_partial_recovery_findings:Function]
# [DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function]
# @COMPLEXITY: 4
# @PURPOSE: Recover and materialize initial imported filters, template variables, and draft execution mappings after session creation.
def _build_recovery_bootstrap(
self,
environment,
session: DatasetReviewSession,
parsed_context: SupersetParsedContext,
findings: List[ValidationFinding],
) -> tuple[List[ImportedFilter], List[TemplateVariable], List[ExecutionMapping], List[ValidationFinding]]:
extractor = SupersetContextExtractor(environment)
imported_filters_payload = extractor.recover_imported_filters(parsed_context)
if imported_filters_payload is None:
imported_filters_payload = []
imported_filters = [
ImportedFilter(
session_id=session.session_id,
filter_name=str(item.get("filter_name") or f"imported_filter_{index}"),
display_name=item.get("display_name"),
raw_value=item.get("raw_value"),
normalized_value=item.get("normalized_value"),
source=FilterSource(str(item.get("source") or FilterSource.SUPERSET_URL.value)),
confidence_state=FilterConfidenceState(
str(item.get("confidence_state") or FilterConfidenceState.UNRESOLVED.value)
),
requires_confirmation=bool(item.get("requires_confirmation", False)),
recovery_status=FilterRecoveryStatus(
str(item.get("recovery_status") or FilterRecoveryStatus.PARTIAL.value)
),
notes=item.get("notes"),
)
for index, item in enumerate(imported_filters_payload)
]
template_variables: List[TemplateVariable] = []
execution_mappings: List[ExecutionMapping] = []
if session.dataset_id is not None:
try:
dataset_payload = extractor.client.get_dataset_detail(session.dataset_id)
discovered_variables = extractor.discover_template_variables(dataset_payload)
template_variables = [
TemplateVariable(
session_id=session.session_id,
variable_name=str(item.get("variable_name") or f"variable_{index}"),
expression_source=str(item.get("expression_source") or ""),
variable_kind=VariableKind(str(item.get("variable_kind") or VariableKind.UNKNOWN.value)),
is_required=bool(item.get("is_required", True)),
default_value=item.get("default_value"),
mapping_status=MappingStatus(str(item.get("mapping_status") or MappingStatus.UNMAPPED.value)),
)
for index, item in enumerate(discovered_variables)
]
except Exception as exc:
if "dataset_template_variable_discovery_failed" not in parsed_context.unresolved_references:
parsed_context.unresolved_references.append("dataset_template_variable_discovery_failed")
if not any(
finding.caused_by_ref == "dataset_template_variable_discovery_failed"
for finding in findings
):
findings.append(
ValidationFinding(
area=FindingArea.TEMPLATE_MAPPING,
severity=FindingSeverity.WARNING,
code="TEMPLATE_VARIABLE_DISCOVERY_FAILED",
title="Template variables could not be discovered",
message="Session remains usable, but dataset template variables still need review.",
resolution_state=ResolutionState.OPEN,
caused_by_ref="dataset_template_variable_discovery_failed",
)
)
logger.explore(
"Template variable discovery failed during session bootstrap",
extra={"session_id": session.session_id, "dataset_id": session.dataset_id, "error": str(exc)},
)
filter_lookup = {
str(imported_filter.filter_name or "").strip().lower(): imported_filter
for imported_filter in imported_filters
if str(imported_filter.filter_name or "").strip()
}
for template_variable in template_variables:
matched_filter = filter_lookup.get(str(template_variable.variable_name or "").strip().lower())
if matched_filter is None:
continue
requires_explicit_approval = bool(
matched_filter.requires_confirmation
or matched_filter.recovery_status != FilterRecoveryStatus.RECOVERED
)
execution_mappings.append(
ExecutionMapping(
session_id=session.session_id,
filter_id=matched_filter.filter_id,
variable_id=template_variable.variable_id,
mapping_method=MappingMethod.DIRECT_MATCH,
raw_input_value=matched_filter.raw_value,
effective_value=matched_filter.normalized_value if matched_filter.normalized_value is not None else matched_filter.raw_value,
transformation_note="Bootstrapped from Superset recovery context",
warning_level=None if not requires_explicit_approval else None,
requires_explicit_approval=requires_explicit_approval,
approval_state=ApprovalState.PENDING if requires_explicit_approval else ApprovalState.NOT_REQUIRED,
approved_by_user_id=None,
approved_at=None,
)
)
return imported_filters, template_variables, execution_mappings, findings
# [/DEF:DatasetReviewOrchestrator._build_recovery_bootstrap:Function]
# [DEF:DatasetReviewOrchestrator._build_execution_snapshot:Function]
# @COMPLEXITY: 4
# @PURPOSE: Build effective filters, template params, approvals, and fingerprint for preview and launch gating.

View File

@@ -23,9 +23,12 @@ from src.models.dataset_review import (
ValidationFinding,
CompiledPreview,
DatasetRunContext,
ExecutionMapping,
ImportedFilter,
SemanticFieldEntry,
SessionCollaborator,
SessionEvent,
TemplateVariable,
)
from src.core.logger import belief_scope, logger
from src.services.dataset_review.event_logger import SessionEventLogger
@@ -202,6 +205,71 @@ class DatasetReviewSessionRepository:
return self.load_session_detail(session_id, user_id)
# [/DEF:save_prof_find:Function]
# [DEF:save_recovery_state:Function]
# @COMPLEXITY: 4
# @PURPOSE: Persist imported filters, template variables, and initial execution mappings for one owned session.
# @RELATION: [DEPENDS_ON] -> [ImportedFilter]
# @RELATION: [DEPENDS_ON] -> [TemplateVariable]
# @RELATION: [DEPENDS_ON] -> [ExecutionMapping]
def save_recovery_state(
self,
session_id: str,
user_id: str,
imported_filters: List[ImportedFilter],
template_variables: List[TemplateVariable],
execution_mappings: List[ExecutionMapping],
) -> DatasetReviewSession:
with belief_scope("DatasetReviewSessionRepository.save_recovery_state"):
session = self._get_owned_session(session_id, user_id)
logger.reason(
"Persisting dataset review recovery bootstrap state",
extra={
"session_id": session_id,
"user_id": user_id,
"imported_filters_count": len(imported_filters),
"template_variables_count": len(template_variables),
"execution_mappings_count": len(execution_mappings),
},
)
self.db.query(ExecutionMapping).filter(
ExecutionMapping.session_id == session_id
).delete()
self.db.query(TemplateVariable).filter(
TemplateVariable.session_id == session_id
).delete()
self.db.query(ImportedFilter).filter(
ImportedFilter.session_id == session_id
).delete()
for imported_filter in imported_filters:
imported_filter.session_id = session_id
self.db.add(imported_filter)
for template_variable in template_variables:
template_variable.session_id = session_id
self.db.add(template_variable)
self.db.flush()
for execution_mapping in execution_mappings:
execution_mapping.session_id = session_id
self.db.add(execution_mapping)
self.db.commit()
logger.reflect(
"Dataset review recovery bootstrap state committed",
extra={
"session_id": session.session_id,
"user_id": user_id,
"imported_filters_count": len(imported_filters),
"template_variables_count": len(template_variables),
"execution_mappings_count": len(execution_mappings),
},
)
return self.load_session_detail(session_id, user_id)
# [/DEF:save_recovery_state:Function]
# [DEF:save_prev:Function]
# @COMPLEXITY: 4
# @PURPOSE: Persist a preview snapshot and mark prior session previews stale.