semantic

2026-03-18 08:45:15 +03:00
parent 3094a2b58b
commit 6d64124e88
17 changed files with 1563 additions and 31159 deletions
--- a/backend/src/api/routes/tests/test_dataset_review_api.py
+++ b/backend/src/api/routes/tests/test_dataset_review_api.py
@@ -331,6 +331,7 @@ def test_parse_superset_link_dashboard_partial_recovery():
    )
    fake_client = MagicMock()
    fake_client.get_dashboard_detail.return_value = {
+        "id": 10,
        "datasets": [{"id": 42}, {"id": 77}],
    }
    fake_client.get_dataset_detail.return_value = {
@@ -352,6 +353,123 @@ def test_parse_superset_link_dashboard_partial_recovery():
 # [/DEF:test_parse_superset_link_dashboard_partial_recovery:Function]


+# [DEF:test_parse_superset_link_dashboard_slug_recovery:Function]
+# @PURPOSE: Verify dashboard slug links resolve through dashboard detail endpoints and recover dataset context.
+def test_parse_superset_link_dashboard_slug_recovery():
+    env = Environment(
+        id="env-1",
+        name="DEV",
+        url="http://superset.local",
+        username="demo",
+        password="secret",
+    )
+    fake_client = MagicMock()
+    fake_client.get_dashboard_detail.return_value = {
+        "id": 15,
+        "datasets": [{"id": 42}],
+    }
+    fake_client.get_dataset_detail.return_value = {
+        "table_name": "sales",
+        "schema": "public",
+    }
+
+    extractor = SupersetContextExtractor(environment=env, client=fake_client)
+    result = extractor.parse_superset_link(
+        "https://ss-dev.bebesh.ru/superset/dashboard/slack/?native_filters_key=8ZLV4M-UXOM"
+    )
+
+    assert result.dataset_id == 42
+    assert result.dashboard_id == 15
+    assert result.dataset_ref == "public.sales"
+    assert result.partial_recovery is False
+    assert result.query_state["native_filters_key"] == "8ZLV4M-UXOM"
+    fake_client.get_dashboard_detail.assert_called_once_with("slack")
+# [/DEF:test_parse_superset_link_dashboard_slug_recovery:Function]
+
+
+# [DEF:test_parse_superset_link_dashboard_permalink_partial_recovery:Function]
+# @PURPOSE: Verify dashboard permalink links no longer fail parsing and preserve permalink filter state for partial recovery.
+def test_parse_superset_link_dashboard_permalink_partial_recovery():
+    env = Environment(
+        id="env-1",
+        name="DEV",
+        url="http://superset.local",
+        username="demo",
+        password="secret",
+    )
+    fake_client = MagicMock()
+    fake_client.get_dashboard_permalink_state.return_value = {
+        "state": {
+            "dataMask": {
+                "NATIVE_FILTER-1": {
+                    "id": "country",
+                    "filterState": {
+                        "label": "Country",
+                        "value": ["DE"],
+                    },
+                    "extraFormData": {
+                        "filters": [{"col": "country", "op": "IN", "val": ["DE"]}],
+                    },
+                }
+            }
+        }
+    }
+
+    extractor = SupersetContextExtractor(environment=env, client=fake_client)
+    result = extractor.parse_superset_link(
+        "http://ss-dev.bebesh.ru/superset/dashboard/p/QabXy6wG30Z/"
+    )
+
+    assert result.resource_type == "dashboard"
+    assert result.dataset_id is None
+    assert result.dashboard_id is None
+    assert result.dataset_ref == "dashboard_permalink:QabXy6wG30Z"
+    assert result.partial_recovery is True
+    assert "dashboard_permalink_dataset_binding_unresolved" in result.unresolved_references
+    assert result.imported_filters[0]["filter_name"] == "country"
+    assert result.imported_filters[0]["raw_value"] == ["DE"]
+    fake_client.get_dashboard_permalink_state.assert_called_once_with("QabXy6wG30Z")
+
+
+# [DEF:test_parse_superset_link_dashboard_permalink_recovers_dataset_from_nested_dashboard_state:Function]
+# @PURPOSE: Verify permalink state with nested dashboard id recovers dataset binding and keeps imported filters.
+def test_parse_superset_link_dashboard_permalink_recovers_dataset_from_nested_dashboard_state():
+    env = Environment(
+        id="env-1",
+        name="DEV",
+        url="http://superset.local",
+        username="demo",
+        password="secret",
+    )
+    fake_client = MagicMock()
+    fake_client.get_dashboard_permalink_state.return_value = {
+        "state": {
+            "form_data": {"dashboardId": 22},
+            "dataMask": {
+                "NATIVE_FILTER-1": {
+                    "id": "country",
+                    "filterState": {"label": "Country", "value": ["DE"]},
+                }
+            },
+        }
+    }
+    fake_client.get_dashboard_detail.return_value = {"id": 22, "datasets": [{"id": 42}]}
+    fake_client.get_dataset_detail.return_value = {"table_name": "sales", "schema": "public"}
+
+    extractor = SupersetContextExtractor(environment=env, client=fake_client)
+    result = extractor.parse_superset_link(
+        "http://ss-dev.bebesh.ru/superset/dashboard/p/QabXy6wG30Z/"
+    )
+
+    assert result.dashboard_id == 22
+    assert result.dataset_id == 42
+    assert result.dataset_ref == "public.sales"
+    assert "dashboard_permalink_dataset_binding_unresolved" not in result.unresolved_references
+    assert result.imported_filters[0]["filter_name"] == "country"
+# [/DEF:test_parse_superset_link_dashboard_permalink_recovers_dataset_from_nested_dashboard_state:Function]
+# [/DEF:test_parse_superset_link_dashboard_permalink_partial_recovery:Function]
+
+
 # [DEF:test_resolve_from_dictionary_prefers_exact_match:Function]
 # @PURPOSE: Verify trusted dictionary exact matches outrank fuzzy candidates and unresolved fields stay explicit.
 def test_resolve_from_dictionary_prefers_exact_match():
@@ -400,6 +518,7 @@ def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_ap

    repository.create_session.return_value = created_session
    repository.save_profile_and_findings.return_value = created_session
+    repository.save_recovery_state.return_value = created_session
    repository.db = MagicMock()

    orchestrator = DatasetReviewOrchestrator(
@@ -415,11 +534,23 @@ def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_ap
        chart_id=None,
        partial_recovery=True,
        unresolved_references=["dashboard_dataset_binding_missing"],
+        imported_filters=[],
    )

+    fake_extractor = MagicMock()
+    fake_extractor.parse_superset_link.return_value = parsed_context
+    fake_extractor.recover_imported_filters.return_value = []
+    fake_extractor.client.get_dataset_detail.return_value = {
+        "id": 42,
+        "sql": "",
+        "columns": [],
+        "metrics": [],
+    }
+    fake_extractor.discover_template_variables.return_value = []
+
    with patch(
-        "src.services.dataset_review.orchestrator.SupersetContextExtractor.parse_superset_link",
-        return_value=parsed_context,
+        "src.services.dataset_review.orchestrator.SupersetContextExtractor",
+        side_effect=[fake_extractor, fake_extractor],
    ):
        result = orchestrator.start_session(
            StartSessionCommand(
@@ -438,6 +569,94 @@ def test_orchestrator_start_session_preserves_partial_recovery(dataset_review_ap
 # [/DEF:test_orchestrator_start_session_preserves_partial_recovery:Function]


+# [DEF:test_orchestrator_start_session_bootstraps_recovery_state:Function]
+# @PURPOSE: Verify session start persists recovered filters, template variables, and initial execution mappings for review workspace bootstrap.
+def test_orchestrator_start_session_bootstraps_recovery_state(dataset_review_api_dependencies):
+    repository = MagicMock()
+    created_session = _make_session()
+    created_session.readiness_state = ReadinessState.RECOVERY_REQUIRED
+    created_session.current_phase = SessionPhase.RECOVERY
+
+    repository.create_session.return_value = created_session
+    repository.save_profile_and_findings.return_value = created_session
+    repository.save_recovery_state.return_value = created_session
+    repository.db = MagicMock()
+
+    orchestrator = DatasetReviewOrchestrator(
+        repository=repository,
+        config_manager=dataset_review_api_dependencies["config_manager"],
+        task_manager=None,
+    )
+
+    parsed_context = SimpleNamespace(
+        dataset_ref="public.sales",
+        dataset_id=42,
+        dashboard_id=10,
+        chart_id=None,
+        partial_recovery=True,
+        unresolved_references=["dashboard_dataset_binding_missing"],
+        imported_filters=[{"filter_name": "country", "raw_value": ["DE"]}],
+    )
+
+    fake_extractor = MagicMock()
+    fake_extractor.parse_superset_link.return_value = parsed_context
+    fake_extractor.recover_imported_filters.return_value = [
+        {
+            "filter_name": "country",
+            "display_name": "Country",
+            "raw_value": ["DE"],
+            "normalized_value": ["DE"],
+            "source": "superset_url",
+            "confidence_state": "imported",
+            "requires_confirmation": False,
+            "recovery_status": "recovered",
+            "notes": "Recovered from permalink state",
+        }
+    ]
+    fake_extractor.client.get_dataset_detail.return_value = {
+        "id": 42,
+        "sql": "select * from sales where country in {{ filter_values('country') }}",
+        "columns": [],
+        "metrics": [],
+    }
+    fake_extractor.discover_template_variables.return_value = [
+        {
+            "variable_name": "country",
+            "expression_source": "{{ filter_values('country') }}",
+            "variable_kind": "native_filter",
+            "is_required": True,
+            "default_value": None,
+            "mapping_status": "unmapped",
+        }
+    ]
+
+    with patch(
+        "src.services.dataset_review.orchestrator.SupersetContextExtractor",
+        side_effect=[fake_extractor, fake_extractor],
+    ):
+        result = orchestrator.start_session(
+            StartSessionCommand(
+                user=dataset_review_api_dependencies["user"],
+                environment_id="env-1",
+                source_kind="superset_link",
+                source_input="http://superset.local/dashboard/10",
+            )
+        )
+
+    assert result.session.readiness_state == ReadinessState.RECOVERY_REQUIRED
+    repository.save_recovery_state.assert_called_once()
+    saved_filters = repository.save_recovery_state.call_args.args[2]
+    saved_variables = repository.save_recovery_state.call_args.args[3]
+    saved_mappings = repository.save_recovery_state.call_args.args[4]
+    assert len(saved_filters) == 1
+    assert saved_filters[0].filter_name == "country"
+    assert len(saved_variables) == 1
+    assert saved_variables[0].variable_name == "country"
+    assert len(saved_mappings) == 1
+    assert saved_mappings[0].raw_input_value == ["DE"]
+# [/DEF:test_orchestrator_start_session_bootstraps_recovery_state:Function]
+
+
 # [DEF:test_start_session_endpoint_returns_created_summary:Function]
 # @PURPOSE: Verify POST session lifecycle endpoint returns a persisted ownership-scoped summary.
 def test_start_session_endpoint_returns_created_summary(dataset_review_api_dependencies):
@@ -828,6 +1047,54 @@ def test_us3_mapping_patch_approval_preview_and_launch_endpoints(dataset_review_
 # [/DEF:test_us3_mapping_patch_approval_preview_and_launch_endpoints:Function]


+# [DEF:test_us3_preview_endpoint_returns_failed_preview_without_false_dashboard_not_found_contract_drift:Function]
+# @PURPOSE: Preview endpoint should preserve API contract and surface generic upstream preview failures without fabricating dashboard-not-found semantics for non-dashboard 404s.
+def test_us3_preview_endpoint_returns_failed_preview_without_false_dashboard_not_found_contract_drift(
+    dataset_review_api_dependencies,
+):
+    session = _make_us3_session()
+    repository = MagicMock()
+    repository.load_session_detail.return_value = session
+    repository.db = MagicMock()
+    repository.event_logger = MagicMock(spec=SessionEventLogger)
+
+    failed_preview = SimpleNamespace(
+        preview_id="preview-failed",
+        session_id="sess-1",
+        preview_status=PreviewStatus.FAILED,
+        compiled_sql=None,
+        preview_fingerprint="fingerprint-failed",
+        compiled_by="superset",
+        error_code="superset_preview_failed",
+        error_details="RuntimeError: [API_FAILURE] API resource not found at endpoint '/chart/data' | Context: {'status_code': 404, 'endpoint': '/chart/data', 'subtype': 'not_found'}",
+        compiled_at=None,
+        created_at=datetime.now(timezone.utc),
+    )
+    orchestrator = MagicMock()
+    orchestrator.prepare_launch_preview.return_value = PreparePreviewResult(
+        session=session,
+        preview=failed_preview,
+        blocked_reasons=[],
+    )
+
+    app.dependency_overrides[_get_repository] = lambda: repository
+    app.dependency_overrides[_get_orchestrator] = lambda: orchestrator
+
+    response = client.post("/api/dataset-orchestration/sessions/sess-1/preview")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["preview_id"] == "preview-failed"
+    assert payload["preview_status"] == "failed"
+    assert payload["compiled_sql"] is None
+    assert payload["compiled_by"] == "superset"
+    assert payload["error_code"] == "superset_preview_failed"
+    assert "/chart/data" in payload["error_details"]
+    assert "API resource not found" in payload["error_details"]
+    assert "Dashboard not found" not in payload["error_details"]
+# [/DEF:test_us3_preview_endpoint_returns_failed_preview_without_false_dashboard_not_found_contract_drift:Function]
+
+
 # [DEF:test_us3_launch_endpoint_requires_launch_permission:Function]
 # @PURPOSE: Launch endpoint should enforce the contract RBAC permission instead of the generic session-manage permission.
 def test_us3_launch_endpoint_requires_launch_permission(dataset_review_api_dependencies):