fix: finalize semantic repair and test updates

2026-03-21 15:07:06 +03:00
parent 005797334b
commit 9b47b9b667
99 changed files with 2484 additions and 985 deletions
--- a/backend/src/services/health_service.py
+++ b/backend/src/services/health_service.py
@@ -19,30 +19,44 @@ from ..core.superset_client import SupersetClient
 from ..core.task_manager.cleanup import TaskCleanupService
 from ..core.task_manager import TaskManager

+
 # [DEF:HealthService:Class]
 # @COMPLEXITY: 4
 # @PURPOSE: Aggregate latest dashboard validation state and manage persisted health report lifecycle.
+# @PRE: Service is constructed with a live SQLAlchemy session and optional config manager.
+# @POST: Exposes health summary aggregation and validation report deletion operations.
+# @SIDE_EFFECT: Maintains in-memory dashboard metadata caches and may coordinate cleanup through collaborators.
+# @DATA_CONTRACT: Input[Session, Optional[Any]] -> Output[HealthSummaryResponse|bool]
+# @RELATION: [DEPENDS_ON] ->[sqlalchemy.orm.Session]
 # @RELATION: [DEPENDS_ON] ->[backend.src.models.llm.ValidationRecord]
 # @RELATION: [DEPENDS_ON] ->[backend.src.schemas.health.DashboardHealthItem]
 # @RELATION: [DEPENDS_ON] ->[backend.src.schemas.health.HealthSummaryResponse]
 # @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
 # @RELATION: [CALLS] ->[backend.src.core.task_manager.cleanup.TaskCleanupService]
 class HealthService:
-    _dashboard_summary_cache: Dict[str, Tuple[float, Dict[str, Dict[str, Optional[str]]]]] = {}
+    _dashboard_summary_cache: Dict[
+        str, Tuple[float, Dict[str, Dict[str, Optional[str]]]]
+    ] = {}
    _dashboard_summary_cache_ttl_seconds = 60.0

    """
    @PURPOSE: Service for managing and querying dashboard health data.
    """
+
    # [DEF:HealthService_init:Function]
    # @COMPLEXITY: 3
    # @PURPOSE: Initialize health service with DB session and optional config access for dashboard metadata resolution.
    # @PRE: db is a valid SQLAlchemy session.
    # @POST: Service is ready to aggregate summaries and delete health reports.
-    def __init__(self, db: Session, config_manager = None):
+    # @SIDE_EFFECT: Initializes per-instance dashboard metadata cache.
+    # @DATA_CONTRACT: Input[db: Session, config_manager: Optional[Any]] -> Output[HealthService]
+    # @RELATION: [BINDS_TO] ->[backend.src.services.health_service.HealthService]
+    # @RELATION: [DEPENDS_ON] ->[sqlalchemy.orm.Session]
+    def __init__(self, db: Session, config_manager=None):
        self.db = db
        self.config_manager = config_manager
        self._dashboard_meta_cache: Dict[Tuple[str, str], Dict[str, Optional[str]]] = {}
+
    # [/DEF:HealthService_init:Function]

    # [DEF:_prime_dashboard_meta_cache:Function]
@@ -51,6 +65,10 @@ class HealthService:
    # @PRE: records may contain mixed numeric and slug dashboard identifiers.
    # @POST: Numeric dashboard ids for known environments are cached when discoverable.
    # @SIDE_EFFECT: May call Superset dashboard list API once per referenced environment.
+    # @DATA_CONTRACT: Input[records: List[ValidationRecord]] -> Output[None]
+    # @RELATION: [DEPENDS_ON] ->[backend.src.models.llm.ValidationRecord]
+    # @RELATION: [DEPENDS_ON] ->[backend.src.core.superset_client.SupersetClient]
+    # @RELATION: [CALLS] ->[config_manager.get_environments]
    # @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_dashboards_summary]
    def _prime_dashboard_meta_cache(self, records: List[ValidationRecord]) -> None:
        if not self.config_manager or not records:
@@ -87,10 +105,13 @@ class HealthService:
                continue

            try:
-                cached_meta = self.__class__._dashboard_summary_cache.get(environment_id)
+                cached_meta = self.__class__._dashboard_summary_cache.get(
+                    environment_id
+                )
                cache_is_fresh = (
                    cached_meta is not None
-                    and (time.monotonic() - cached_meta[0]) < self.__class__._dashboard_summary_cache_ttl_seconds
+                    and (time.monotonic() - cached_meta[0])
+                    < self.__class__._dashboard_summary_cache_ttl_seconds
                )
                if cache_is_fresh:
                    dashboard_meta_map = cached_meta[1]
@@ -109,9 +130,11 @@ class HealthService:
                        dashboard_meta_map,
                    )
                for dashboard_id in dashboard_ids:
-                    self._dashboard_meta_cache[(environment_id, dashboard_id)] = dashboard_meta_map.get(
-                        dashboard_id,
-                        {"slug": None, "title": None},
+                    self._dashboard_meta_cache[(environment_id, dashboard_id)] = (
+                        dashboard_meta_map.get(
+                            dashboard_id,
+                            {"slug": None, "title": None},
+                        )
                    )
            except Exception as exc:
                logger.warning(
@@ -124,6 +147,7 @@ class HealthService:
                        "slug": None,
                        "title": None,
                    }
+
    # [/DEF:_prime_dashboard_meta_cache:Function]

    # [DEF:_resolve_dashboard_meta:Function]
@@ -131,7 +155,10 @@ class HealthService:
    # @PURPOSE: Resolve slug/title for a dashboard referenced by persisted validation record.
    # @PRE: dashboard_id may be numeric or slug-like; environment_id may be empty.
    # @POST: Returns dict with `slug` and `title` keys, using cache when possible.
-    def _resolve_dashboard_meta(self, dashboard_id: str, environment_id: Optional[str]) -> Dict[str, Optional[str]]:
+    # @SIDE_EFFECT: Writes default cache entries for unresolved numeric dashboard ids.
+    def _resolve_dashboard_meta(
+        self, dashboard_id: str, environment_id: Optional[str]
+    ) -> Dict[str, Optional[str]]:
        normalized_dashboard_id = str(dashboard_id or "").strip()
        normalized_environment_id = str(environment_id or "").strip()
        if not normalized_dashboard_id:
@@ -151,6 +178,7 @@ class HealthService:
        meta = {"slug": None, "title": None}
        self._dashboard_meta_cache[cache_key] = meta
        return meta
+
    # [/DEF:_resolve_dashboard_meta:Function]

    # [DEF:get_health_summary:Function]
@@ -162,7 +190,9 @@ class HealthService:
    # @DATA_CONTRACT: Input[environment_id: Optional[str]] -> Output[HealthSummaryResponse]
    # @RELATION: [CALLS] ->[self._prime_dashboard_meta_cache]
    # @RELATION: [CALLS] ->[self._resolve_dashboard_meta]
-    async def get_health_summary(self, environment_id: str = None) -> HealthSummaryResponse:
+    async def get_health_summary(
+        self, environment_id: str = None
+    ) -> HealthSummaryResponse:
        """
        @PURPOSE: Aggregates the latest validation status for all dashboards.
        @PRE: environment_id (optional) to filter by environment.
@@ -170,23 +200,25 @@ class HealthService:
        """
        # [REASON] We need the latest ValidationRecord for each unique dashboard_id.
        # We use a subquery to find the max timestamp per dashboard_id.
-        
+
        subquery = self.db.query(
            ValidationRecord.dashboard_id,
-            func.max(ValidationRecord.timestamp).label("max_ts")
+            func.max(ValidationRecord.timestamp).label("max_ts"),
        )
        if environment_id:
-            subquery = subquery.filter(ValidationRecord.environment_id == environment_id)
+            subquery = subquery.filter(
+                ValidationRecord.environment_id == environment_id
+            )
        subquery = subquery.group_by(ValidationRecord.dashboard_id).subquery()

        query = self.db.query(ValidationRecord).join(
            subquery,
-            (ValidationRecord.dashboard_id == subquery.c.dashboard_id) &
-            (ValidationRecord.timestamp == subquery.c.max_ts)
+            (ValidationRecord.dashboard_id == subquery.c.dashboard_id)
+            & (ValidationRecord.timestamp == subquery.c.max_ts),
        )
-        
+
        records = query.all()
-        
+
        self._prime_dashboard_meta_cache(records)

        items = []
@@ -208,27 +240,32 @@ class HealthService:
                status = "UNKNOWN"

            meta = self._resolve_dashboard_meta(rec.dashboard_id, rec.environment_id)
-            items.append(DashboardHealthItem(
-                record_id=rec.id,
-                dashboard_id=rec.dashboard_id,
-                dashboard_slug=meta.get("slug"),
-                dashboard_title=meta.get("title"),
-                environment_id=rec.environment_id or "unknown",
-                status=status,
-                last_check=rec.timestamp,
-                task_id=rec.task_id,
-                summary=rec.summary
-            ))
+            items.append(
+                DashboardHealthItem(
+                    record_id=rec.id,
+                    dashboard_id=rec.dashboard_id,
+                    dashboard_slug=meta.get("slug"),
+                    dashboard_title=meta.get("title"),
+                    environment_id=rec.environment_id or "unknown",
+                    status=status,
+                    last_check=rec.timestamp,
+                    task_id=rec.task_id,
+                    summary=rec.summary,
+                )
+            )
+
+        logger.info(
+            f"[HealthService][get_health_summary] Aggregated {len(items)} dashboard health records."
+        )

-        logger.info(f"[HealthService][get_health_summary] Aggregated {len(items)} dashboard health records.")
-        
        return HealthSummaryResponse(
            items=items,
            pass_count=pass_count,
            warn_count=warn_count,
            fail_count=fail_count,
-            unknown_count=unknown_count
+            unknown_count=unknown_count,
        )
+
    # [/DEF:get_health_summary:Function]

    # [DEF:delete_validation_report:Function]
@@ -238,9 +275,19 @@ class HealthService:
    # @POST: Returns True only when a matching record was deleted.
    # @SIDE_EFFECT: Deletes DB rows, optional screenshot file, and optional task/log persistence.
    # @DATA_CONTRACT: Input[record_id: str, task_manager: Optional[TaskManager]] -> Output[bool]
+    # @RELATION: [DEPENDS_ON] ->[backend.src.models.llm.ValidationRecord]
+    # @RELATION: [DEPENDS_ON] ->[backend.src.core.task_manager.TaskManager]
+    # @RELATION: [CALLS] ->[os.path.exists]
+    # @RELATION: [CALLS] ->[os.remove]
    # @RELATION: [CALLS] ->[backend.src.core.task_manager.cleanup.TaskCleanupService.delete_task_with_logs]
-    def delete_validation_report(self, record_id: str, task_manager: Optional[TaskManager] = None) -> bool:
-        record = self.db.query(ValidationRecord).filter(ValidationRecord.id == record_id).first()
+    def delete_validation_report(
+        self, record_id: str, task_manager: Optional[TaskManager] = None
+    ) -> bool:
+        record = (
+            self.db.query(ValidationRecord)
+            .filter(ValidationRecord.id == record_id)
+            .first()
+        )
        if not record:
            return False

@@ -250,7 +297,9 @@ class HealthService:
        if record.environment_id is None:
            peer_query = peer_query.filter(ValidationRecord.environment_id.is_(None))
        else:
-            peer_query = peer_query.filter(ValidationRecord.environment_id == record.environment_id)
+            peer_query = peer_query.filter(
+                ValidationRecord.environment_id == record.environment_id
+            )

        records_to_delete = peer_query.all()
        screenshot_paths = [
@@ -306,8 +355,10 @@ class HealthService:
                )

        return True
+
    # [/DEF:delete_validation_report:Function]
-    
+
+
 # [/DEF:HealthService:Class]

 # [/DEF:health_service:Module]