fix dashboard validation fallback and semantic relation parsing

2026-03-15 16:32:39 +03:00
parent 6b66f2fb49
commit eba0fab091
4 changed files with 145 additions and 26 deletions
--- a/backend/src/services/tests/test_resource_service.py
+++ b/backend/src/services/tests/test_resource_service.py
@@ -308,6 +308,91 @@ async def test_get_dashboards_with_status_handles_mixed_naive_and_aware_task_dat
 # [/DEF:test_get_dashboards_with_status_handles_mixed_naive_and_aware_task_datetimes:Function]


+# [DEF:test_get_dashboards_with_status_prefers_latest_decisive_validation_status_over_newer_unknown:Function]
+# @TEST: get_dashboards_with_status keeps latest task identity while falling back to older decisive validation status.
+# @PRE: Same dashboard has older WARN and newer UNKNOWN validation tasks.
+# @POST: Returned last_task points to newest task but preserves WARN as last meaningful validation state.
+@pytest.mark.anyio
+async def test_get_dashboards_with_status_prefers_latest_decisive_validation_status_over_newer_unknown():
+    with patch("src.services.resource_service.SupersetClient") as mock_client, \
+         patch("src.services.resource_service.GitService"):
+
+        from src.services.resource_service import ResourceService
+
+        service = ResourceService()
+        mock_client.return_value.get_dashboards_summary.return_value = [
+            {"id": 4, "title": "Dashboard 4", "slug": "deck"}
+        ]
+
+        task_warn = MagicMock()
+        task_warn.id = "task-warn"
+        task_warn.plugin_id = "llm_dashboard_validation"
+        task_warn.status = "SUCCESS"
+        task_warn.params = {"dashboard_id": "4", "environment_id": "prod"}
+        task_warn.result = {"status": "WARN"}
+        task_warn.started_at = datetime(2024, 1, 1, 11, 0, 0)
+
+        task_unknown = MagicMock()
+        task_unknown.id = "task-unknown"
+        task_unknown.plugin_id = "llm_dashboard_validation"
+        task_unknown.status = "RUNNING"
+        task_unknown.params = {"dashboard_id": "4", "environment_id": "prod"}
+        task_unknown.result = {"status": "UNKNOWN"}
+        task_unknown.started_at = datetime(2024, 1, 1, 12, 0, 0)
+
+        env = MagicMock()
+        env.id = "prod"
+
+        result = await service.get_dashboards_with_status(env, [task_warn, task_unknown])
+
+        assert result[0]["last_task"]["task_id"] == "task-unknown"
+        assert result[0]["last_task"]["status"] == "RUNNING"
+        assert result[0]["last_task"]["validation_status"] == "WARN"
+# [/DEF:test_get_dashboards_with_status_prefers_latest_decisive_validation_status_over_newer_unknown:Function]
+
+
+# [DEF:test_get_dashboards_with_status_falls_back_to_latest_unknown_without_decisive_history:Function]
+# @TEST: get_dashboards_with_status still returns newest UNKNOWN when no decisive validation exists.
+# @PRE: Same dashboard has only UNKNOWN validation tasks.
+# @POST: Returned last_task keeps newest UNKNOWN task.
+@pytest.mark.anyio
+async def test_get_dashboards_with_status_falls_back_to_latest_unknown_without_decisive_history():
+    with patch("src.services.resource_service.SupersetClient") as mock_client, \
+         patch("src.services.resource_service.GitService"):
+
+        from src.services.resource_service import ResourceService
+
+        service = ResourceService()
+        mock_client.return_value.get_dashboards_summary.return_value = [
+            {"id": 5, "title": "Dashboard 5", "slug": "ops"}
+        ]
+
+        task_unknown_old = MagicMock()
+        task_unknown_old.id = "task-unknown-old"
+        task_unknown_old.plugin_id = "llm_dashboard_validation"
+        task_unknown_old.status = "SUCCESS"
+        task_unknown_old.params = {"dashboard_id": "5", "environment_id": "prod"}
+        task_unknown_old.result = {"status": "UNKNOWN"}
+        task_unknown_old.started_at = datetime(2024, 1, 1, 11, 0, 0)
+
+        task_unknown_new = MagicMock()
+        task_unknown_new.id = "task-unknown-new"
+        task_unknown_new.plugin_id = "llm_dashboard_validation"
+        task_unknown_new.status = "SUCCESS"
+        task_unknown_new.params = {"dashboard_id": "5", "environment_id": "prod"}
+        task_unknown_new.result = {"status": "UNKNOWN"}
+        task_unknown_new.started_at = datetime(2024, 1, 1, 12, 0, 0)
+
+        env = MagicMock()
+        env.id = "prod"
+
+        result = await service.get_dashboards_with_status(env, [task_unknown_old, task_unknown_new])
+
+        assert result[0]["last_task"]["task_id"] == "task-unknown-new"
+        assert result[0]["last_task"]["validation_status"] == "UNKNOWN"
+# [/DEF:test_get_dashboards_with_status_falls_back_to_latest_unknown_without_decisive_history:Function]
+
+
 # [DEF:test_get_last_task_for_resource_handles_mixed_naive_and_aware_created_at:Function]
 # @TEST: _get_last_task_for_resource handles mixed naive/aware created_at values.
 # @PRE: Matching tasks include naive and aware created_at timestamps.
--- a/backend/src/services/resource_service.py
+++ b/backend/src/services/resource_service.py
@@ -189,15 +189,36 @@ class ResourceService:
            )
            return self._normalize_datetime_for_compare(raw_time)

-        last_task = max(matched_tasks, key=_task_time)
-        raw_result = getattr(last_task, "result", None)
-        validation_status = None
-        if isinstance(raw_result, dict):
-            validation_status = self._normalize_validation_status(raw_result.get("status"))
+        projected_tasks = []
+        for task in matched_tasks:
+            raw_result = getattr(task, "result", None)
+            validation_status = None
+            if isinstance(raw_result, dict):
+                validation_status = self._normalize_validation_status(raw_result.get("status"))
+            projected_tasks.append(
+                (
+                    task,
+                    validation_status,
+                    _task_time(task),
+                )
+            )
+
+        projected_tasks.sort(key=lambda item: item[2], reverse=True)
+        latest_task, latest_validation_status, _ = projected_tasks[0]
+        decisive_task = next(
+            (
+                item for item in projected_tasks
+                if item[1] in {"PASS", "WARN", "FAIL"}
+            ),
+            None,
+        )
+        validation_status = latest_validation_status
+        if validation_status == "UNKNOWN" and decisive_task is not None:
+            validation_status = decisive_task[1]

        return {
-            "task_id": str(getattr(last_task, "id", "")),
-            "status": self._normalize_task_status(getattr(last_task, "status", "")),
+            "task_id": str(getattr(latest_task, "id", "")),
+            "status": self._normalize_task_status(getattr(latest_task, "status", "")),
            "validation_status": validation_status,
        }
    # [/DEF:_get_last_llm_task_for_dashboard:Function]
--- a/generate_semantic_map.py
+++ b/generate_semantic_map.py
@@ -200,30 +200,41 @@ class SemanticEntity:
            except ValueError:
                base_tier = Tier.STANDARD

-            # Dynamic Tier Adjustments based on User Feedback
+            file_path_lower = self.file_path.lower()
+            is_test_entity = (
+                "test" in file_path_lower
+                or "/__tests__/" in self.file_path
+                or self.name.startswith("test_")
+            )

            # 1. Tests should never be higher than STANDARD
-            if "test" in self.file_path.lower() or "/__tests__/" in self.file_path or self.name.startswith("test_"):
-                if base_tier == Tier.CRITICAL:
-                    return Tier.STANDARD
+            if is_test_entity and base_tier == Tier.CRITICAL:
+                return Tier.STANDARD

-            # 2. Svelte components -> TRIVIAL/STANDARD (unless layout/page)
+            # 2. Non-route Svelte entities should not be escalated beyond STANDARD by path heuristics.
            if self.file_path.endswith(".svelte"):
-                if "+page" not in self.name and "+layout" not in self.name and "Page" not in self.name and "Layout" not in self.name:
-                    if base_tier == Tier.CRITICAL:
-                        return Tier.STANDARD
-
-            # 3. Tooling scripts
-            if "scripts/" in self.file_path or "_tui.py" in self.file_path:
-                if base_tier == Tier.CRITICAL:
+                is_route_level_svelte = any(
+                    marker in self.name for marker in ["+page", "+layout", "Page", "Layout"]
+                )
+                if not is_route_level_svelte and base_tier == Tier.CRITICAL:
                    return Tier.STANDARD

-            # 4. Promote critical security/data paths
+            # 3. Tooling scripts should not be escalated beyond STANDARD.
+            if ("scripts/" in self.file_path or "_tui.py" in self.file_path) and base_tier == Tier.CRITICAL:
+                return Tier.STANDARD
+
+            # 4. Promote only module-like entities in critical domains by path heuristic.
+            # This prevents path segments like "migration" from forcing every nested
+            # Block/Function/Component in a route file into CRITICAL validation.
            critical_keywords = ["auth", "security", "jwt", "database", "migration", "config", "session"]
-            if any(keyword in self.file_path.lower() for keyword in critical_keywords) and "test" not in self.file_path.lower():
-                # Allow explicit overrides to lower tiers if explicitly tagged TRIVIAL, otherwise promote logic mapping
-                if base_tier != Tier.TRIVIAL: 
-                    return Tier.CRITICAL
+            module_like_types = {"Module", "Class", "Store"}
+            if (
+                self.type in module_like_types
+                and any(keyword in file_path_lower for keyword in critical_keywords)
+                and not is_test_entity
+                and base_tier != Tier.TRIVIAL
+            ):
+                return Tier.CRITICAL

            return base_tier
    # [/DEF:get_tier:Function]
@@ -440,7 +451,7 @@ def get_patterns(lang: str) -> Dict[str, Pattern]:
                "js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[-\w\.]+):(?P<type>\w+)\]"),
                "html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
                "jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[A-Za-z_]+)\s*:?\s*(?P<value>.*)"),
-                "relation": re.compile(r"//\s*@RELATION:\s*\[?(?P<type>\w+)\]?\s*->\s*\[?(?P<target>[^\]]+)\]?"),
+                "relation": re.compile(r"(?:<!--\s*|//\s*|\*\s*|\s*)@RELATION:\s*\[?(?P<type>\w+)\]?\s*->\s*\[?(?P<target>[^\]\n]+)\]?"),
                "func_def": re.compile(r"^\s*(export\s+)?(async\s+)?function\s+(?P<name>\w+)"),
                "console_log": re.compile(r"console\.(info|warn|debug)\s*\(\s*['\"`]\[[\w\.-]+\]\[(EXPLORE|REASON|REFLECT|[A-Za-z0-9_:]+)\]"),
                # Svelte-specific patterns
--- a/specs/020-task-reports-design/tasks.md
+++ b/specs/020-task-reports-design/tasks.md
@@ -128,6 +128,8 @@
 - [x] T041 Fix reports list sorting/filtering for mixed offset-naive and offset-aware datetimes to prevent `GET /api/reports` 500 during active migration (`backend/src/services/reports/report_service.py`, `backend/src/api/routes/__tests__/test_reports_api.py`)
 - [x] T042 Add frontend submit-guard for dashboard migration/backup modal actions to prevent duplicate task creation on repeated clicks (`frontend/src/routes/dashboards/+page.svelte`)
 - [x] T043 Update semantic map generator to ignore few-shot examples under `.ai/shots/` (`generate_semantic_map.py`)
+- [x] T044 Fix semantic map generator Svelte relation parsing and prevent path-based CRITICAL over-promotion for nested route entities (`generate_semantic_map.py`)
+- [x] T045 Preserve latest dashboard validation task identity while falling back only the displayed validation result and broaden Svelte multiline `@RELATION` parsing (`backend/src/services/resource_service.py`, `backend/src/services/__tests__/test_resource_service.py`, `generate_semantic_map.py`)

 ---