fix: finalize semantic repair and test updates

This commit is contained in:
2026-03-21 15:07:06 +03:00
parent 005797334b
commit 9b47b9b667
99 changed files with 2484 additions and 985 deletions

View File

@@ -19,30 +19,44 @@ from ..core.superset_client import SupersetClient
from ..core.task_manager.cleanup import TaskCleanupService
from ..core.task_manager import TaskManager
# [DEF:HealthService:Class]
# @COMPLEXITY: 4
# @PURPOSE: Aggregate latest dashboard validation state and manage persisted health report lifecycle.
# @PRE: Service is constructed with a live SQLAlchemy session and optional config manager.
# @POST: Exposes health summary aggregation and validation report deletion operations.
# @SIDE_EFFECT: Maintains in-memory dashboard metadata caches and may coordinate cleanup through collaborators.
# @DATA_CONTRACT: Input[Session, Optional[Any]] -> Output[HealthSummaryResponse|bool]
# @RELATION: [DEPENDS_ON] ->[sqlalchemy.orm.Session]
# @RELATION: [DEPENDS_ON] ->[backend.src.models.llm.ValidationRecord]
# @RELATION: [DEPENDS_ON] ->[backend.src.schemas.health.DashboardHealthItem]
# @RELATION: [DEPENDS_ON] ->[backend.src.schemas.health.HealthSummaryResponse]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[backend.src.core.task_manager.cleanup.TaskCleanupService]
class HealthService:
_dashboard_summary_cache: Dict[str, Tuple[float, Dict[str, Dict[str, Optional[str]]]]] = {}
_dashboard_summary_cache: Dict[
str, Tuple[float, Dict[str, Dict[str, Optional[str]]]]
] = {}
_dashboard_summary_cache_ttl_seconds = 60.0
"""
@PURPOSE: Service for managing and querying dashboard health data.
"""
# [DEF:HealthService_init:Function]
# @COMPLEXITY: 3
# @PURPOSE: Initialize health service with DB session and optional config access for dashboard metadata resolution.
# @PRE: db is a valid SQLAlchemy session.
# @POST: Service is ready to aggregate summaries and delete health reports.
def __init__(self, db: Session, config_manager = None):
# @SIDE_EFFECT: Initializes per-instance dashboard metadata cache.
# @DATA_CONTRACT: Input[db: Session, config_manager: Optional[Any]] -> Output[HealthService]
# @RELATION: [BINDS_TO] ->[backend.src.services.health_service.HealthService]
# @RELATION: [DEPENDS_ON] ->[sqlalchemy.orm.Session]
def __init__(self, db: Session, config_manager=None):
self.db = db
self.config_manager = config_manager
self._dashboard_meta_cache: Dict[Tuple[str, str], Dict[str, Optional[str]]] = {}
# [/DEF:HealthService_init:Function]
# [DEF:_prime_dashboard_meta_cache:Function]
@@ -51,6 +65,10 @@ class HealthService:
# @PRE: records may contain mixed numeric and slug dashboard identifiers.
# @POST: Numeric dashboard ids for known environments are cached when discoverable.
# @SIDE_EFFECT: May call Superset dashboard list API once per referenced environment.
# @DATA_CONTRACT: Input[records: List[ValidationRecord]] -> Output[None]
# @RELATION: [DEPENDS_ON] ->[backend.src.models.llm.ValidationRecord]
# @RELATION: [DEPENDS_ON] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [CALLS] ->[config_manager.get_environments]
# @RELATION: [CALLS] ->[backend.src.core.superset_client.SupersetClient.get_dashboards_summary]
def _prime_dashboard_meta_cache(self, records: List[ValidationRecord]) -> None:
if not self.config_manager or not records:
@@ -87,10 +105,13 @@ class HealthService:
continue
try:
cached_meta = self.__class__._dashboard_summary_cache.get(environment_id)
cached_meta = self.__class__._dashboard_summary_cache.get(
environment_id
)
cache_is_fresh = (
cached_meta is not None
and (time.monotonic() - cached_meta[0]) < self.__class__._dashboard_summary_cache_ttl_seconds
and (time.monotonic() - cached_meta[0])
< self.__class__._dashboard_summary_cache_ttl_seconds
)
if cache_is_fresh:
dashboard_meta_map = cached_meta[1]
@@ -109,9 +130,11 @@ class HealthService:
dashboard_meta_map,
)
for dashboard_id in dashboard_ids:
self._dashboard_meta_cache[(environment_id, dashboard_id)] = dashboard_meta_map.get(
dashboard_id,
{"slug": None, "title": None},
self._dashboard_meta_cache[(environment_id, dashboard_id)] = (
dashboard_meta_map.get(
dashboard_id,
{"slug": None, "title": None},
)
)
except Exception as exc:
logger.warning(
@@ -124,6 +147,7 @@ class HealthService:
"slug": None,
"title": None,
}
# [/DEF:_prime_dashboard_meta_cache:Function]
# [DEF:_resolve_dashboard_meta:Function]
@@ -131,7 +155,10 @@ class HealthService:
# @PURPOSE: Resolve slug/title for a dashboard referenced by persisted validation record.
# @PRE: dashboard_id may be numeric or slug-like; environment_id may be empty.
# @POST: Returns dict with `slug` and `title` keys, using cache when possible.
def _resolve_dashboard_meta(self, dashboard_id: str, environment_id: Optional[str]) -> Dict[str, Optional[str]]:
# @SIDE_EFFECT: Writes default cache entries for unresolved numeric dashboard ids.
def _resolve_dashboard_meta(
self, dashboard_id: str, environment_id: Optional[str]
) -> Dict[str, Optional[str]]:
normalized_dashboard_id = str(dashboard_id or "").strip()
normalized_environment_id = str(environment_id or "").strip()
if not normalized_dashboard_id:
@@ -151,6 +178,7 @@ class HealthService:
meta = {"slug": None, "title": None}
self._dashboard_meta_cache[cache_key] = meta
return meta
# [/DEF:_resolve_dashboard_meta:Function]
# [DEF:get_health_summary:Function]
@@ -162,7 +190,9 @@ class HealthService:
# @DATA_CONTRACT: Input[environment_id: Optional[str]] -> Output[HealthSummaryResponse]
# @RELATION: [CALLS] ->[self._prime_dashboard_meta_cache]
# @RELATION: [CALLS] ->[self._resolve_dashboard_meta]
async def get_health_summary(self, environment_id: str = None) -> HealthSummaryResponse:
async def get_health_summary(
self, environment_id: str = None
) -> HealthSummaryResponse:
"""
@PURPOSE: Aggregates the latest validation status for all dashboards.
@PRE: environment_id (optional) to filter by environment.
@@ -170,23 +200,25 @@ class HealthService:
"""
# [REASON] We need the latest ValidationRecord for each unique dashboard_id.
# We use a subquery to find the max timestamp per dashboard_id.
subquery = self.db.query(
ValidationRecord.dashboard_id,
func.max(ValidationRecord.timestamp).label("max_ts")
func.max(ValidationRecord.timestamp).label("max_ts"),
)
if environment_id:
subquery = subquery.filter(ValidationRecord.environment_id == environment_id)
subquery = subquery.filter(
ValidationRecord.environment_id == environment_id
)
subquery = subquery.group_by(ValidationRecord.dashboard_id).subquery()
query = self.db.query(ValidationRecord).join(
subquery,
(ValidationRecord.dashboard_id == subquery.c.dashboard_id) &
(ValidationRecord.timestamp == subquery.c.max_ts)
(ValidationRecord.dashboard_id == subquery.c.dashboard_id)
& (ValidationRecord.timestamp == subquery.c.max_ts),
)
records = query.all()
self._prime_dashboard_meta_cache(records)
items = []
@@ -208,27 +240,32 @@ class HealthService:
status = "UNKNOWN"
meta = self._resolve_dashboard_meta(rec.dashboard_id, rec.environment_id)
items.append(DashboardHealthItem(
record_id=rec.id,
dashboard_id=rec.dashboard_id,
dashboard_slug=meta.get("slug"),
dashboard_title=meta.get("title"),
environment_id=rec.environment_id or "unknown",
status=status,
last_check=rec.timestamp,
task_id=rec.task_id,
summary=rec.summary
))
items.append(
DashboardHealthItem(
record_id=rec.id,
dashboard_id=rec.dashboard_id,
dashboard_slug=meta.get("slug"),
dashboard_title=meta.get("title"),
environment_id=rec.environment_id or "unknown",
status=status,
last_check=rec.timestamp,
task_id=rec.task_id,
summary=rec.summary,
)
)
logger.info(
f"[HealthService][get_health_summary] Aggregated {len(items)} dashboard health records."
)
logger.info(f"[HealthService][get_health_summary] Aggregated {len(items)} dashboard health records.")
return HealthSummaryResponse(
items=items,
pass_count=pass_count,
warn_count=warn_count,
fail_count=fail_count,
unknown_count=unknown_count
unknown_count=unknown_count,
)
# [/DEF:get_health_summary:Function]
# [DEF:delete_validation_report:Function]
@@ -238,9 +275,19 @@ class HealthService:
# @POST: Returns True only when a matching record was deleted.
# @SIDE_EFFECT: Deletes DB rows, optional screenshot file, and optional task/log persistence.
# @DATA_CONTRACT: Input[record_id: str, task_manager: Optional[TaskManager]] -> Output[bool]
# @RELATION: [DEPENDS_ON] ->[backend.src.models.llm.ValidationRecord]
# @RELATION: [DEPENDS_ON] ->[backend.src.core.task_manager.TaskManager]
# @RELATION: [CALLS] ->[os.path.exists]
# @RELATION: [CALLS] ->[os.remove]
# @RELATION: [CALLS] ->[backend.src.core.task_manager.cleanup.TaskCleanupService.delete_task_with_logs]
def delete_validation_report(self, record_id: str, task_manager: Optional[TaskManager] = None) -> bool:
record = self.db.query(ValidationRecord).filter(ValidationRecord.id == record_id).first()
def delete_validation_report(
self, record_id: str, task_manager: Optional[TaskManager] = None
) -> bool:
record = (
self.db.query(ValidationRecord)
.filter(ValidationRecord.id == record_id)
.first()
)
if not record:
return False
@@ -250,7 +297,9 @@ class HealthService:
if record.environment_id is None:
peer_query = peer_query.filter(ValidationRecord.environment_id.is_(None))
else:
peer_query = peer_query.filter(ValidationRecord.environment_id == record.environment_id)
peer_query = peer_query.filter(
ValidationRecord.environment_id == record.environment_id
)
records_to_delete = peer_query.all()
screenshot_paths = [
@@ -306,8 +355,10 @@ class HealthService:
)
return True
# [/DEF:delete_validation_report:Function]
# [/DEF:HealthService:Class]
# [/DEF:health_service:Module]