semantic clean up

2026-03-10 19:38:10 +03:00
parent 31717870e3
commit 542835e0ff
31 changed files with 5392 additions and 6647 deletions
--- a/backend/src/core/migration/risk_assessor.py
+++ b/backend/src/core/migration/risk_assessor.py
@@ -1,118 +1,170 @@
 # [DEF:backend.src.core.migration.risk_assessor:Module]
-# @TIER: STANDARD
-# @SEMANTICS: migration, dry_run, risk, scoring
-# @PURPOSE: Risk evaluation helpers for migration pre-flight reporting.
-# @LAYER: Core
-# @RELATION: USED_BY -> backend.src.core.migration.dry_run_orchestrator
+# @TIER: CRITICAL
+# @SEMANTICS: migration, dry_run, risk, scoring, preflight
+# @PURPOSE: Compute deterministic migration risk items and aggregate score for dry-run reporting.
+# @LAYER: Domain
+# @RELATION: [DEPENDS_ON] ->[backend.src.core.superset_client.SupersetClient]
+# @RELATION: [DISPATCHES] ->[backend.src.core.migration.dry_run_orchestrator.MigrationDryRunService.run]
+# @INVARIANT: Risk scoring must remain bounded to [0,100] and preserve severity-to-weight mapping.
+# @TEST_CONTRACT: [source_objects,target_objects,diff,target_client] -> [List[RiskItem]]
+# @TEST_SCENARIO: [overwrite_update_objects] -> [medium overwrite_existing risk is emitted for each update diff item]
+# @TEST_SCENARIO: [missing_datasource_dataset] -> [high missing_datasource risk is emitted]
+# @TEST_SCENARIO: [owner_mismatch_dashboard] -> [low owner_mismatch risk is emitted]
+# @TEST_EDGE: [missing_field] -> [object without uuid is ignored by indexer]
+# @TEST_EDGE: [invalid_type] -> [non-list owners input normalizes to empty identifiers]
+# @TEST_EDGE: [external_fail] -> [target_client get_databases exception propagates to caller]
+# @TEST_INVARIANT: [score_upper_bound_100] -> VERIFIED_BY: [severity_weight_aggregation]
+# @UX_STATE: [Idle] -> [N/A backend domain module]
+# @UX_FEEDBACK: [N/A] -> [No direct UI side effects in this module]
+# @UX_RECOVERY: [N/A] -> [Caller-level retry/recovery]
+# @UX_REACTIVITY: [N/A] -> [Backend synchronous function contracts]

 from typing import Any, Dict, List

+from ..logger import logger, belief_scope
 from ..superset_client import SupersetClient


 # [DEF:index_by_uuid:Function]
 # @PURPOSE: Build UUID-index from normalized objects.
+# @PRE: Input list items are dict-like payloads potentially containing "uuid".
+# @POST: Returns mapping keyed by string uuid; only truthy uuid values are included.
+# @SIDE_EFFECT: Emits reasoning/reflective logs only.
+# @DATA_CONTRACT: List[Dict[str, Any]] -> Dict[str, Dict[str, Any]]
 def index_by_uuid(objects: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
-    indexed: Dict[str, Dict[str, Any]] = {}
-    for obj in objects:
-        uuid = obj.get("uuid")
-        if uuid:
-            indexed[str(uuid)] = obj
-    return indexed
+    with belief_scope("risk_assessor.index_by_uuid"):
+        logger.reason("Building UUID index", extra={"objects_count": len(objects)})
+        indexed: Dict[str, Dict[str, Any]] = {}
+        for obj in objects:
+            uuid = obj.get("uuid")
+            if uuid:
+                indexed[str(uuid)] = obj
+        logger.reflect("UUID index built", extra={"indexed_count": len(indexed)})
+        return indexed
 # [/DEF:index_by_uuid:Function]


 # [DEF:extract_owner_identifiers:Function]
 # @PURPOSE: Normalize owner payloads for stable comparison.
+# @PRE: Owners may be list payload, scalar values, or None.
+# @POST: Returns sorted unique owner identifiers as strings.
+# @SIDE_EFFECT: Emits reasoning/reflective logs only.
+# @DATA_CONTRACT: Any -> List[str]
 def extract_owner_identifiers(owners: Any) -> List[str]:
-    if not isinstance(owners, list):
-        return []
-    ids: List[str] = []
-    for owner in owners:
-        if isinstance(owner, dict):
-            if owner.get("username"):
-                ids.append(str(owner["username"]))
-            elif owner.get("id") is not None:
-                ids.append(str(owner["id"]))
-        elif owner is not None:
-            ids.append(str(owner))
-    return sorted(set(ids))
+    with belief_scope("risk_assessor.extract_owner_identifiers"):
+        logger.reason("Normalizing owner identifiers")
+        if not isinstance(owners, list):
+            logger.reflect("Owners payload is not list; returning empty identifiers")
+            return []
+        ids: List[str] = []
+        for owner in owners:
+            if isinstance(owner, dict):
+                if owner.get("username"):
+                    ids.append(str(owner["username"]))
+                elif owner.get("id") is not None:
+                    ids.append(str(owner["id"]))
+            elif owner is not None:
+                ids.append(str(owner))
+        normalized_ids = sorted(set(ids))
+        logger.reflect("Owner identifiers normalized", extra={"owner_count": len(normalized_ids)})
+        return normalized_ids
 # [/DEF:extract_owner_identifiers:Function]


 # [DEF:build_risks:Function]
 # @PURPOSE: Build risk list from computed diffs and target catalog state.
+# @PRE: source_objects/target_objects/diff contain dashboards/charts/datasets keys with expected list structures.
+# @PRE: target_client is authenticated/usable for database list retrieval.
+# @POST: Returns list of deterministic risk items derived from overwrite, missing datasource, reference, and owner mismatch checks.
+# @SIDE_EFFECT: Calls target Superset API for databases metadata and emits logs.
+# @DATA_CONTRACT: (
+# @DATA_CONTRACT:   Dict[str, List[Dict[str, Any]]],
+# @DATA_CONTRACT:   Dict[str, List[Dict[str, Any]]],
+# @DATA_CONTRACT:   Dict[str, Dict[str, List[Dict[str, Any]]]],
+# @DATA_CONTRACT:   SupersetClient
+# @DATA_CONTRACT: ) -> List[Dict[str, Any]]
 def build_risks(
    source_objects: Dict[str, List[Dict[str, Any]]],
    target_objects: Dict[str, List[Dict[str, Any]]],
    diff: Dict[str, Dict[str, List[Dict[str, Any]]]],
    target_client: SupersetClient,
 ) -> List[Dict[str, Any]]:
-    risks: List[Dict[str, Any]] = []
-    for object_type in ("dashboards", "charts", "datasets"):
-        for item in diff[object_type]["update"]:
-            risks.append({
-                "code": "overwrite_existing",
-                "severity": "medium",
-                "object_type": object_type[:-1],
-                "object_uuid": item["uuid"],
-                "message": f"Object will be updated in target: {item.get('title') or item['uuid']}",
-            })
+    with belief_scope("risk_assessor.build_risks"):
+        logger.reason("Building migration risks from diff payload")
+        risks: List[Dict[str, Any]] = []
+        for object_type in ("dashboards", "charts", "datasets"):
+            for item in diff[object_type]["update"]:
+                risks.append({
+                    "code": "overwrite_existing",
+                    "severity": "medium",
+                    "object_type": object_type[:-1],
+                    "object_uuid": item["uuid"],
+                    "message": f"Object will be updated in target: {item.get('title') or item['uuid']}",
+                })

-    target_dataset_uuids = set(index_by_uuid(target_objects["datasets"]).keys())
-    _, target_databases = target_client.get_databases(query={"columns": ["uuid"]})
-    target_database_uuids = {str(item.get("uuid")) for item in target_databases if item.get("uuid")}
+        target_dataset_uuids = set(index_by_uuid(target_objects["datasets"]).keys())
+        _, target_databases = target_client.get_databases(query={"columns": ["uuid"]})
+        target_database_uuids = {str(item.get("uuid")) for item in target_databases if item.get("uuid")}

-    for dataset in source_objects["datasets"]:
-        db_uuid = dataset.get("database_uuid")
-        if db_uuid and str(db_uuid) not in target_database_uuids:
-            risks.append({
-                "code": "missing_datasource",
-                "severity": "high",
-                "object_type": "dataset",
-                "object_uuid": dataset.get("uuid"),
-                "message": f"Target datasource is missing for dataset {dataset.get('title') or dataset.get('uuid')}",
-            })
+        for dataset in source_objects["datasets"]:
+            db_uuid = dataset.get("database_uuid")
+            if db_uuid and str(db_uuid) not in target_database_uuids:
+                risks.append({
+                    "code": "missing_datasource",
+                    "severity": "high",
+                    "object_type": "dataset",
+                    "object_uuid": dataset.get("uuid"),
+                    "message": f"Target datasource is missing for dataset {dataset.get('title') or dataset.get('uuid')}",
+                })

-    for chart in source_objects["charts"]:
-        ds_uuid = chart.get("dataset_uuid")
-        if ds_uuid and str(ds_uuid) not in target_dataset_uuids:
-            risks.append({
-                "code": "breaking_reference",
-                "severity": "high",
-                "object_type": "chart",
-                "object_uuid": chart.get("uuid"),
-                "message": f"Chart references dataset not found on target: {ds_uuid}",
-            })
+        for chart in source_objects["charts"]:
+            ds_uuid = chart.get("dataset_uuid")
+            if ds_uuid and str(ds_uuid) not in target_dataset_uuids:
+                risks.append({
+                    "code": "breaking_reference",
+                    "severity": "high",
+                    "object_type": "chart",
+                    "object_uuid": chart.get("uuid"),
+                    "message": f"Chart references dataset not found on target: {ds_uuid}",
+                })

-    source_dash = index_by_uuid(source_objects["dashboards"])
-    target_dash = index_by_uuid(target_objects["dashboards"])
-    for item in diff["dashboards"]["update"]:
-        source_obj = source_dash.get(item["uuid"])
-        target_obj = target_dash.get(item["uuid"])
-        if not source_obj or not target_obj:
-            continue
-        source_owners = extract_owner_identifiers(source_obj.get("owners"))
-        target_owners = extract_owner_identifiers(target_obj.get("owners"))
-        if source_owners and target_owners and source_owners != target_owners:
-            risks.append({
-                "code": "owner_mismatch",
-                "severity": "low",
-                "object_type": "dashboard",
-                "object_uuid": item["uuid"],
-                "message": f"Owner mismatch for dashboard {item.get('title') or item['uuid']}",
-            })
-    return risks
+        source_dash = index_by_uuid(source_objects["dashboards"])
+        target_dash = index_by_uuid(target_objects["dashboards"])
+        for item in diff["dashboards"]["update"]:
+            source_obj = source_dash.get(item["uuid"])
+            target_obj = target_dash.get(item["uuid"])
+            if not source_obj or not target_obj:
+                continue
+            source_owners = extract_owner_identifiers(source_obj.get("owners"))
+            target_owners = extract_owner_identifiers(target_obj.get("owners"))
+            if source_owners and target_owners and source_owners != target_owners:
+                risks.append({
+                    "code": "owner_mismatch",
+                    "severity": "low",
+                    "object_type": "dashboard",
+                    "object_uuid": item["uuid"],
+                    "message": f"Owner mismatch for dashboard {item.get('title') or item['uuid']}",
+                })
+        logger.reflect("Risk list assembled", extra={"risk_count": len(risks)})
+        return risks
 # [/DEF:build_risks:Function]


 # [DEF:score_risks:Function]
 # @PURPOSE: Aggregate risk list into score and level.
+# @PRE: risk_items contains optional severity fields expected in {high,medium,low} or defaults to low weight.
+# @POST: Returns dict with score in [0,100], derived level, and original items.
+# @SIDE_EFFECT: Emits reasoning/reflective logs only.
+# @DATA_CONTRACT: List[Dict[str, Any]] -> Dict[str, Any]
 def score_risks(risk_items: List[Dict[str, Any]]) -> Dict[str, Any]:
-    weights = {"high": 25, "medium": 10, "low": 5}
-    score = min(100, sum(weights.get(item.get("severity", "low"), 5) for item in risk_items))
-    level = "low" if score < 25 else "medium" if score < 60 else "high"
-    return {"score": score, "level": level, "items": risk_items}
+    with belief_scope("risk_assessor.score_risks"):
+        logger.reason("Scoring risk items", extra={"risk_items_count": len(risk_items)})
+        weights = {"high": 25, "medium": 10, "low": 5}
+        score = min(100, sum(weights.get(item.get("severity", "low"), 5) for item in risk_items))
+        level = "low" if score < 25 else "medium" if score < 60 else "high"
+        result = {"score": score, "level": level, "items": risk_items}
+        logger.reflect("Risk score computed", extra={"score": score, "level": level})
+        return result
 # [/DEF:score_risks:Function]