semantic clean up
This commit is contained in:
@@ -1,118 +1,170 @@
|
||||
# [DEF:backend.src.core.migration.risk_assessor:Module]
|
||||
# @TIER: STANDARD
|
||||
# @SEMANTICS: migration, dry_run, risk, scoring
|
||||
# @PURPOSE: Risk evaluation helpers for migration pre-flight reporting.
|
||||
# @LAYER: Core
|
||||
# @RELATION: USED_BY -> backend.src.core.migration.dry_run_orchestrator
|
||||
# @TIER: CRITICAL
|
||||
# @SEMANTICS: migration, dry_run, risk, scoring, preflight
|
||||
# @PURPOSE: Compute deterministic migration risk items and aggregate score for dry-run reporting.
|
||||
# @LAYER: Domain
|
||||
# @RELATION: [DEPENDS_ON] ->[backend.src.core.superset_client.SupersetClient]
|
||||
# @RELATION: [DISPATCHES] ->[backend.src.core.migration.dry_run_orchestrator.MigrationDryRunService.run]
|
||||
# @INVARIANT: Risk scoring must remain bounded to [0,100] and preserve severity-to-weight mapping.
|
||||
# @TEST_CONTRACT: [source_objects,target_objects,diff,target_client] -> [List[RiskItem]]
|
||||
# @TEST_SCENARIO: [overwrite_update_objects] -> [medium overwrite_existing risk is emitted for each update diff item]
|
||||
# @TEST_SCENARIO: [missing_datasource_dataset] -> [high missing_datasource risk is emitted]
|
||||
# @TEST_SCENARIO: [owner_mismatch_dashboard] -> [low owner_mismatch risk is emitted]
|
||||
# @TEST_EDGE: [missing_field] -> [object without uuid is ignored by indexer]
|
||||
# @TEST_EDGE: [invalid_type] -> [non-list owners input normalizes to empty identifiers]
|
||||
# @TEST_EDGE: [external_fail] -> [target_client get_databases exception propagates to caller]
|
||||
# @TEST_INVARIANT: [score_upper_bound_100] -> VERIFIED_BY: [severity_weight_aggregation]
|
||||
# @UX_STATE: [Idle] -> [N/A backend domain module]
|
||||
# @UX_FEEDBACK: [N/A] -> [No direct UI side effects in this module]
|
||||
# @UX_RECOVERY: [N/A] -> [Caller-level retry/recovery]
|
||||
# @UX_REACTIVITY: [N/A] -> [Backend synchronous function contracts]
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from ..logger import logger, belief_scope
|
||||
from ..superset_client import SupersetClient
|
||||
|
||||
|
||||
# [DEF:index_by_uuid:Function]
|
||||
# @PURPOSE: Build UUID-index from normalized objects.
|
||||
# @PRE: Input list items are dict-like payloads potentially containing "uuid".
|
||||
# @POST: Returns mapping keyed by string uuid; only truthy uuid values are included.
|
||||
# @SIDE_EFFECT: Emits reasoning/reflective logs only.
|
||||
# @DATA_CONTRACT: List[Dict[str, Any]] -> Dict[str, Dict[str, Any]]
|
||||
def index_by_uuid(objects: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
|
||||
indexed: Dict[str, Dict[str, Any]] = {}
|
||||
for obj in objects:
|
||||
uuid = obj.get("uuid")
|
||||
if uuid:
|
||||
indexed[str(uuid)] = obj
|
||||
return indexed
|
||||
with belief_scope("risk_assessor.index_by_uuid"):
|
||||
logger.reason("Building UUID index", extra={"objects_count": len(objects)})
|
||||
indexed: Dict[str, Dict[str, Any]] = {}
|
||||
for obj in objects:
|
||||
uuid = obj.get("uuid")
|
||||
if uuid:
|
||||
indexed[str(uuid)] = obj
|
||||
logger.reflect("UUID index built", extra={"indexed_count": len(indexed)})
|
||||
return indexed
|
||||
# [/DEF:index_by_uuid:Function]
|
||||
|
||||
|
||||
# [DEF:extract_owner_identifiers:Function]
|
||||
# @PURPOSE: Normalize owner payloads for stable comparison.
|
||||
# @PRE: Owners may be list payload, scalar values, or None.
|
||||
# @POST: Returns sorted unique owner identifiers as strings.
|
||||
# @SIDE_EFFECT: Emits reasoning/reflective logs only.
|
||||
# @DATA_CONTRACT: Any -> List[str]
|
||||
def extract_owner_identifiers(owners: Any) -> List[str]:
|
||||
if not isinstance(owners, list):
|
||||
return []
|
||||
ids: List[str] = []
|
||||
for owner in owners:
|
||||
if isinstance(owner, dict):
|
||||
if owner.get("username"):
|
||||
ids.append(str(owner["username"]))
|
||||
elif owner.get("id") is not None:
|
||||
ids.append(str(owner["id"]))
|
||||
elif owner is not None:
|
||||
ids.append(str(owner))
|
||||
return sorted(set(ids))
|
||||
with belief_scope("risk_assessor.extract_owner_identifiers"):
|
||||
logger.reason("Normalizing owner identifiers")
|
||||
if not isinstance(owners, list):
|
||||
logger.reflect("Owners payload is not list; returning empty identifiers")
|
||||
return []
|
||||
ids: List[str] = []
|
||||
for owner in owners:
|
||||
if isinstance(owner, dict):
|
||||
if owner.get("username"):
|
||||
ids.append(str(owner["username"]))
|
||||
elif owner.get("id") is not None:
|
||||
ids.append(str(owner["id"]))
|
||||
elif owner is not None:
|
||||
ids.append(str(owner))
|
||||
normalized_ids = sorted(set(ids))
|
||||
logger.reflect("Owner identifiers normalized", extra={"owner_count": len(normalized_ids)})
|
||||
return normalized_ids
|
||||
# [/DEF:extract_owner_identifiers:Function]
|
||||
|
||||
|
||||
# [DEF:build_risks:Function]
|
||||
# @PURPOSE: Build risk list from computed diffs and target catalog state.
|
||||
# @PRE: source_objects/target_objects/diff contain dashboards/charts/datasets keys with expected list structures.
|
||||
# @PRE: target_client is authenticated/usable for database list retrieval.
|
||||
# @POST: Returns list of deterministic risk items derived from overwrite, missing datasource, reference, and owner mismatch checks.
|
||||
# @SIDE_EFFECT: Calls target Superset API for databases metadata and emits logs.
|
||||
# @DATA_CONTRACT: (
|
||||
# @DATA_CONTRACT: Dict[str, List[Dict[str, Any]]],
|
||||
# @DATA_CONTRACT: Dict[str, List[Dict[str, Any]]],
|
||||
# @DATA_CONTRACT: Dict[str, Dict[str, List[Dict[str, Any]]]],
|
||||
# @DATA_CONTRACT: SupersetClient
|
||||
# @DATA_CONTRACT: ) -> List[Dict[str, Any]]
|
||||
def build_risks(
|
||||
source_objects: Dict[str, List[Dict[str, Any]]],
|
||||
target_objects: Dict[str, List[Dict[str, Any]]],
|
||||
diff: Dict[str, Dict[str, List[Dict[str, Any]]]],
|
||||
target_client: SupersetClient,
|
||||
) -> List[Dict[str, Any]]:
|
||||
risks: List[Dict[str, Any]] = []
|
||||
for object_type in ("dashboards", "charts", "datasets"):
|
||||
for item in diff[object_type]["update"]:
|
||||
risks.append({
|
||||
"code": "overwrite_existing",
|
||||
"severity": "medium",
|
||||
"object_type": object_type[:-1],
|
||||
"object_uuid": item["uuid"],
|
||||
"message": f"Object will be updated in target: {item.get('title') or item['uuid']}",
|
||||
})
|
||||
with belief_scope("risk_assessor.build_risks"):
|
||||
logger.reason("Building migration risks from diff payload")
|
||||
risks: List[Dict[str, Any]] = []
|
||||
for object_type in ("dashboards", "charts", "datasets"):
|
||||
for item in diff[object_type]["update"]:
|
||||
risks.append({
|
||||
"code": "overwrite_existing",
|
||||
"severity": "medium",
|
||||
"object_type": object_type[:-1],
|
||||
"object_uuid": item["uuid"],
|
||||
"message": f"Object will be updated in target: {item.get('title') or item['uuid']}",
|
||||
})
|
||||
|
||||
target_dataset_uuids = set(index_by_uuid(target_objects["datasets"]).keys())
|
||||
_, target_databases = target_client.get_databases(query={"columns": ["uuid"]})
|
||||
target_database_uuids = {str(item.get("uuid")) for item in target_databases if item.get("uuid")}
|
||||
target_dataset_uuids = set(index_by_uuid(target_objects["datasets"]).keys())
|
||||
_, target_databases = target_client.get_databases(query={"columns": ["uuid"]})
|
||||
target_database_uuids = {str(item.get("uuid")) for item in target_databases if item.get("uuid")}
|
||||
|
||||
for dataset in source_objects["datasets"]:
|
||||
db_uuid = dataset.get("database_uuid")
|
||||
if db_uuid and str(db_uuid) not in target_database_uuids:
|
||||
risks.append({
|
||||
"code": "missing_datasource",
|
||||
"severity": "high",
|
||||
"object_type": "dataset",
|
||||
"object_uuid": dataset.get("uuid"),
|
||||
"message": f"Target datasource is missing for dataset {dataset.get('title') or dataset.get('uuid')}",
|
||||
})
|
||||
for dataset in source_objects["datasets"]:
|
||||
db_uuid = dataset.get("database_uuid")
|
||||
if db_uuid and str(db_uuid) not in target_database_uuids:
|
||||
risks.append({
|
||||
"code": "missing_datasource",
|
||||
"severity": "high",
|
||||
"object_type": "dataset",
|
||||
"object_uuid": dataset.get("uuid"),
|
||||
"message": f"Target datasource is missing for dataset {dataset.get('title') or dataset.get('uuid')}",
|
||||
})
|
||||
|
||||
for chart in source_objects["charts"]:
|
||||
ds_uuid = chart.get("dataset_uuid")
|
||||
if ds_uuid and str(ds_uuid) not in target_dataset_uuids:
|
||||
risks.append({
|
||||
"code": "breaking_reference",
|
||||
"severity": "high",
|
||||
"object_type": "chart",
|
||||
"object_uuid": chart.get("uuid"),
|
||||
"message": f"Chart references dataset not found on target: {ds_uuid}",
|
||||
})
|
||||
for chart in source_objects["charts"]:
|
||||
ds_uuid = chart.get("dataset_uuid")
|
||||
if ds_uuid and str(ds_uuid) not in target_dataset_uuids:
|
||||
risks.append({
|
||||
"code": "breaking_reference",
|
||||
"severity": "high",
|
||||
"object_type": "chart",
|
||||
"object_uuid": chart.get("uuid"),
|
||||
"message": f"Chart references dataset not found on target: {ds_uuid}",
|
||||
})
|
||||
|
||||
source_dash = index_by_uuid(source_objects["dashboards"])
|
||||
target_dash = index_by_uuid(target_objects["dashboards"])
|
||||
for item in diff["dashboards"]["update"]:
|
||||
source_obj = source_dash.get(item["uuid"])
|
||||
target_obj = target_dash.get(item["uuid"])
|
||||
if not source_obj or not target_obj:
|
||||
continue
|
||||
source_owners = extract_owner_identifiers(source_obj.get("owners"))
|
||||
target_owners = extract_owner_identifiers(target_obj.get("owners"))
|
||||
if source_owners and target_owners and source_owners != target_owners:
|
||||
risks.append({
|
||||
"code": "owner_mismatch",
|
||||
"severity": "low",
|
||||
"object_type": "dashboard",
|
||||
"object_uuid": item["uuid"],
|
||||
"message": f"Owner mismatch for dashboard {item.get('title') or item['uuid']}",
|
||||
})
|
||||
return risks
|
||||
source_dash = index_by_uuid(source_objects["dashboards"])
|
||||
target_dash = index_by_uuid(target_objects["dashboards"])
|
||||
for item in diff["dashboards"]["update"]:
|
||||
source_obj = source_dash.get(item["uuid"])
|
||||
target_obj = target_dash.get(item["uuid"])
|
||||
if not source_obj or not target_obj:
|
||||
continue
|
||||
source_owners = extract_owner_identifiers(source_obj.get("owners"))
|
||||
target_owners = extract_owner_identifiers(target_obj.get("owners"))
|
||||
if source_owners and target_owners and source_owners != target_owners:
|
||||
risks.append({
|
||||
"code": "owner_mismatch",
|
||||
"severity": "low",
|
||||
"object_type": "dashboard",
|
||||
"object_uuid": item["uuid"],
|
||||
"message": f"Owner mismatch for dashboard {item.get('title') or item['uuid']}",
|
||||
})
|
||||
logger.reflect("Risk list assembled", extra={"risk_count": len(risks)})
|
||||
return risks
|
||||
# [/DEF:build_risks:Function]
|
||||
|
||||
|
||||
# [DEF:score_risks:Function]
|
||||
# @PURPOSE: Aggregate risk list into score and level.
|
||||
# @PRE: risk_items contains optional severity fields expected in {high,medium,low} or defaults to low weight.
|
||||
# @POST: Returns dict with score in [0,100], derived level, and original items.
|
||||
# @SIDE_EFFECT: Emits reasoning/reflective logs only.
|
||||
# @DATA_CONTRACT: List[Dict[str, Any]] -> Dict[str, Any]
|
||||
def score_risks(risk_items: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
weights = {"high": 25, "medium": 10, "low": 5}
|
||||
score = min(100, sum(weights.get(item.get("severity", "low"), 5) for item in risk_items))
|
||||
level = "low" if score < 25 else "medium" if score < 60 else "high"
|
||||
return {"score": score, "level": level, "items": risk_items}
|
||||
with belief_scope("risk_assessor.score_risks"):
|
||||
logger.reason("Scoring risk items", extra={"risk_items_count": len(risk_items)})
|
||||
weights = {"high": 25, "medium": 10, "low": 5}
|
||||
score = min(100, sum(weights.get(item.get("severity", "low"), 5) for item in risk_items))
|
||||
level = "low" if score < 25 else "medium" if score < 60 else "high"
|
||||
result = {"score": score, "level": level, "items": risk_items}
|
||||
logger.reflect("Risk score computed", extra={"score": score, "level": level})
|
||||
return result
|
||||
# [/DEF:score_risks:Function]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user