semantic clean up

This commit is contained in:
2026-03-10 19:38:10 +03:00
parent 31717870e3
commit 542835e0ff
31 changed files with 5392 additions and 6647 deletions

View File

@@ -1,118 +1,170 @@
# [DEF:backend.src.core.migration.risk_assessor:Module]
# @TIER: STANDARD
# @SEMANTICS: migration, dry_run, risk, scoring
# @PURPOSE: Risk evaluation helpers for migration pre-flight reporting.
# @LAYER: Core
# @RELATION: USED_BY -> backend.src.core.migration.dry_run_orchestrator
# @TIER: CRITICAL
# @SEMANTICS: migration, dry_run, risk, scoring, preflight
# @PURPOSE: Compute deterministic migration risk items and aggregate score for dry-run reporting.
# @LAYER: Domain
# @RELATION: [DEPENDS_ON] ->[backend.src.core.superset_client.SupersetClient]
# @RELATION: [DISPATCHES] ->[backend.src.core.migration.dry_run_orchestrator.MigrationDryRunService.run]
# @INVARIANT: Risk scoring must remain bounded to [0,100] and preserve severity-to-weight mapping.
# @TEST_CONTRACT: [source_objects,target_objects,diff,target_client] -> [List[RiskItem]]
# @TEST_SCENARIO: [overwrite_update_objects] -> [medium overwrite_existing risk is emitted for each update diff item]
# @TEST_SCENARIO: [missing_datasource_dataset] -> [high missing_datasource risk is emitted]
# @TEST_SCENARIO: [owner_mismatch_dashboard] -> [low owner_mismatch risk is emitted]
# @TEST_EDGE: [missing_field] -> [object without uuid is ignored by indexer]
# @TEST_EDGE: [invalid_type] -> [non-list owners input normalizes to empty identifiers]
# @TEST_EDGE: [external_fail] -> [target_client get_databases exception propagates to caller]
# @TEST_INVARIANT: [score_upper_bound_100] -> VERIFIED_BY: [severity_weight_aggregation]
# @UX_STATE: [Idle] -> [N/A backend domain module]
# @UX_FEEDBACK: [N/A] -> [No direct UI side effects in this module]
# @UX_RECOVERY: [N/A] -> [Caller-level retry/recovery]
# @UX_REACTIVITY: [N/A] -> [Backend synchronous function contracts]
from typing import Any, Dict, List
from ..logger import logger, belief_scope
from ..superset_client import SupersetClient
# [DEF:index_by_uuid:Function]
# @PURPOSE: Build UUID-index from normalized objects.
# @PRE: Input list items are dict-like payloads potentially containing "uuid".
# @POST: Returns mapping keyed by string uuid; only truthy uuid values are included.
# @SIDE_EFFECT: Emits reasoning/reflective logs only.
# @DATA_CONTRACT: List[Dict[str, Any]] -> Dict[str, Dict[str, Any]]
def index_by_uuid(objects: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
indexed: Dict[str, Dict[str, Any]] = {}
for obj in objects:
uuid = obj.get("uuid")
if uuid:
indexed[str(uuid)] = obj
return indexed
with belief_scope("risk_assessor.index_by_uuid"):
logger.reason("Building UUID index", extra={"objects_count": len(objects)})
indexed: Dict[str, Dict[str, Any]] = {}
for obj in objects:
uuid = obj.get("uuid")
if uuid:
indexed[str(uuid)] = obj
logger.reflect("UUID index built", extra={"indexed_count": len(indexed)})
return indexed
# [/DEF:index_by_uuid:Function]
# [DEF:extract_owner_identifiers:Function]
# @PURPOSE: Normalize owner payloads for stable comparison.
# @PRE: Owners may be list payload, scalar values, or None.
# @POST: Returns sorted unique owner identifiers as strings.
# @SIDE_EFFECT: Emits reasoning/reflective logs only.
# @DATA_CONTRACT: Any -> List[str]
def extract_owner_identifiers(owners: Any) -> List[str]:
if not isinstance(owners, list):
return []
ids: List[str] = []
for owner in owners:
if isinstance(owner, dict):
if owner.get("username"):
ids.append(str(owner["username"]))
elif owner.get("id") is not None:
ids.append(str(owner["id"]))
elif owner is not None:
ids.append(str(owner))
return sorted(set(ids))
with belief_scope("risk_assessor.extract_owner_identifiers"):
logger.reason("Normalizing owner identifiers")
if not isinstance(owners, list):
logger.reflect("Owners payload is not list; returning empty identifiers")
return []
ids: List[str] = []
for owner in owners:
if isinstance(owner, dict):
if owner.get("username"):
ids.append(str(owner["username"]))
elif owner.get("id") is not None:
ids.append(str(owner["id"]))
elif owner is not None:
ids.append(str(owner))
normalized_ids = sorted(set(ids))
logger.reflect("Owner identifiers normalized", extra={"owner_count": len(normalized_ids)})
return normalized_ids
# [/DEF:extract_owner_identifiers:Function]
# [DEF:build_risks:Function]
# @PURPOSE: Build risk list from computed diffs and target catalog state.
# @PRE: source_objects/target_objects/diff contain dashboards/charts/datasets keys with expected list structures.
# @PRE: target_client is authenticated/usable for database list retrieval.
# @POST: Returns list of deterministic risk items derived from overwrite, missing datasource, reference, and owner mismatch checks.
# @SIDE_EFFECT: Calls target Superset API for databases metadata and emits logs.
# @DATA_CONTRACT: (
# @DATA_CONTRACT: Dict[str, List[Dict[str, Any]]],
# @DATA_CONTRACT: Dict[str, List[Dict[str, Any]]],
# @DATA_CONTRACT: Dict[str, Dict[str, List[Dict[str, Any]]]],
# @DATA_CONTRACT: SupersetClient
# @DATA_CONTRACT: ) -> List[Dict[str, Any]]
def build_risks(
source_objects: Dict[str, List[Dict[str, Any]]],
target_objects: Dict[str, List[Dict[str, Any]]],
diff: Dict[str, Dict[str, List[Dict[str, Any]]]],
target_client: SupersetClient,
) -> List[Dict[str, Any]]:
risks: List[Dict[str, Any]] = []
for object_type in ("dashboards", "charts", "datasets"):
for item in diff[object_type]["update"]:
risks.append({
"code": "overwrite_existing",
"severity": "medium",
"object_type": object_type[:-1],
"object_uuid": item["uuid"],
"message": f"Object will be updated in target: {item.get('title') or item['uuid']}",
})
with belief_scope("risk_assessor.build_risks"):
logger.reason("Building migration risks from diff payload")
risks: List[Dict[str, Any]] = []
for object_type in ("dashboards", "charts", "datasets"):
for item in diff[object_type]["update"]:
risks.append({
"code": "overwrite_existing",
"severity": "medium",
"object_type": object_type[:-1],
"object_uuid": item["uuid"],
"message": f"Object will be updated in target: {item.get('title') or item['uuid']}",
})
target_dataset_uuids = set(index_by_uuid(target_objects["datasets"]).keys())
_, target_databases = target_client.get_databases(query={"columns": ["uuid"]})
target_database_uuids = {str(item.get("uuid")) for item in target_databases if item.get("uuid")}
target_dataset_uuids = set(index_by_uuid(target_objects["datasets"]).keys())
_, target_databases = target_client.get_databases(query={"columns": ["uuid"]})
target_database_uuids = {str(item.get("uuid")) for item in target_databases if item.get("uuid")}
for dataset in source_objects["datasets"]:
db_uuid = dataset.get("database_uuid")
if db_uuid and str(db_uuid) not in target_database_uuids:
risks.append({
"code": "missing_datasource",
"severity": "high",
"object_type": "dataset",
"object_uuid": dataset.get("uuid"),
"message": f"Target datasource is missing for dataset {dataset.get('title') or dataset.get('uuid')}",
})
for dataset in source_objects["datasets"]:
db_uuid = dataset.get("database_uuid")
if db_uuid and str(db_uuid) not in target_database_uuids:
risks.append({
"code": "missing_datasource",
"severity": "high",
"object_type": "dataset",
"object_uuid": dataset.get("uuid"),
"message": f"Target datasource is missing for dataset {dataset.get('title') or dataset.get('uuid')}",
})
for chart in source_objects["charts"]:
ds_uuid = chart.get("dataset_uuid")
if ds_uuid and str(ds_uuid) not in target_dataset_uuids:
risks.append({
"code": "breaking_reference",
"severity": "high",
"object_type": "chart",
"object_uuid": chart.get("uuid"),
"message": f"Chart references dataset not found on target: {ds_uuid}",
})
for chart in source_objects["charts"]:
ds_uuid = chart.get("dataset_uuid")
if ds_uuid and str(ds_uuid) not in target_dataset_uuids:
risks.append({
"code": "breaking_reference",
"severity": "high",
"object_type": "chart",
"object_uuid": chart.get("uuid"),
"message": f"Chart references dataset not found on target: {ds_uuid}",
})
source_dash = index_by_uuid(source_objects["dashboards"])
target_dash = index_by_uuid(target_objects["dashboards"])
for item in diff["dashboards"]["update"]:
source_obj = source_dash.get(item["uuid"])
target_obj = target_dash.get(item["uuid"])
if not source_obj or not target_obj:
continue
source_owners = extract_owner_identifiers(source_obj.get("owners"))
target_owners = extract_owner_identifiers(target_obj.get("owners"))
if source_owners and target_owners and source_owners != target_owners:
risks.append({
"code": "owner_mismatch",
"severity": "low",
"object_type": "dashboard",
"object_uuid": item["uuid"],
"message": f"Owner mismatch for dashboard {item.get('title') or item['uuid']}",
})
return risks
source_dash = index_by_uuid(source_objects["dashboards"])
target_dash = index_by_uuid(target_objects["dashboards"])
for item in diff["dashboards"]["update"]:
source_obj = source_dash.get(item["uuid"])
target_obj = target_dash.get(item["uuid"])
if not source_obj or not target_obj:
continue
source_owners = extract_owner_identifiers(source_obj.get("owners"))
target_owners = extract_owner_identifiers(target_obj.get("owners"))
if source_owners and target_owners and source_owners != target_owners:
risks.append({
"code": "owner_mismatch",
"severity": "low",
"object_type": "dashboard",
"object_uuid": item["uuid"],
"message": f"Owner mismatch for dashboard {item.get('title') or item['uuid']}",
})
logger.reflect("Risk list assembled", extra={"risk_count": len(risks)})
return risks
# [/DEF:build_risks:Function]
# [DEF:score_risks:Function]
# @PURPOSE: Aggregate risk list into score and level.
# @PRE: risk_items contains optional severity fields expected in {high,medium,low} or defaults to low weight.
# @POST: Returns dict with score in [0,100], derived level, and original items.
# @SIDE_EFFECT: Emits reasoning/reflective logs only.
# @DATA_CONTRACT: List[Dict[str, Any]] -> Dict[str, Any]
def score_risks(risk_items: List[Dict[str, Any]]) -> Dict[str, Any]:
weights = {"high": 25, "medium": 10, "low": 5}
score = min(100, sum(weights.get(item.get("severity", "low"), 5) for item in risk_items))
level = "low" if score < 25 else "medium" if score < 60 else "high"
return {"score": score, "level": level, "items": risk_items}
with belief_scope("risk_assessor.score_risks"):
logger.reason("Scoring risk items", extra={"risk_items_count": len(risk_items)})
weights = {"high": 25, "medium": 10, "low": 5}
score = min(100, sum(weights.get(item.get("severity", "low"), 5) for item in risk_items))
level = "low" if score < 25 else "medium" if score < 60 else "high"
result = {"score": score, "level": level, "items": risk_items}
logger.reflect("Risk score computed", extra={"score": score, "level": level})
return result
# [/DEF:score_risks:Function]