semantic clean up
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
# [DEF:backend.src.core.migration_engine:Module]
|
||||
#
|
||||
# @SEMANTICS: migration, engine, zip, yaml, transformation
|
||||
# @PURPOSE: Handles the interception and transformation of Superset asset ZIP archives.
|
||||
# @LAYER: Core
|
||||
# @RELATION: DEPENDS_ON -> PyYAML
|
||||
# @TIER: CRITICAL
|
||||
# @SEMANTICS: migration, engine, zip, yaml, transformation, cross-filter, id-mapping
|
||||
# @PURPOSE: Transforms Superset export ZIP archives while preserving archive integrity and patching mapped identifiers.
|
||||
# @LAYER: Domain
|
||||
# @RELATION: [DEPENDS_ON] ->[src.core.logger]
|
||||
# @RELATION: [DEPENDS_ON] ->[src.core.mapping_service.IdMappingService]
|
||||
# @RELATION: [DEPENDS_ON] ->[src.models.mapping.ResourceType]
|
||||
# @RELATION: [DEPENDS_ON] ->[yaml]
|
||||
#
|
||||
# @INVARIANT: ZIP structure must be preserved after transformation.
|
||||
# @INVARIANT: ZIP structure and non-targeted metadata must remain valid after transformation.
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
import zipfile
|
||||
@@ -26,10 +30,15 @@ from src.models.mapping import ResourceType
|
||||
class MigrationEngine:
|
||||
|
||||
# [DEF:__init__:Function]
|
||||
# @PURPOSE: Initializes the migration engine with optional ID mapping service.
|
||||
# @PURPOSE: Initializes migration orchestration dependencies for ZIP/YAML metadata transformations.
|
||||
# @PRE: mapping_service is None or implements batch remote ID lookup for ResourceType.CHART.
|
||||
# @POST: self.mapping_service is assigned and available for optional cross-filter patching flows.
|
||||
# @SIDE_EFFECT: Mutates in-memory engine state by storing dependency reference.
|
||||
# @DATA_CONTRACT: Input[Optional[IdMappingService]] -> Output[MigrationEngine]
|
||||
# @PARAM: mapping_service (Optional[IdMappingService]) - Used for resolving target environment integer IDs.
|
||||
def __init__(self, mapping_service: Optional[IdMappingService] = None):
|
||||
self.mapping_service = mapping_service
|
||||
with belief_scope("MigrationEngine.__init__"):
|
||||
self.mapping_service = mapping_service
|
||||
# [/DEF:__init__:Function]
|
||||
|
||||
# [DEF:transform_zip:Function]
|
||||
@@ -40,9 +49,11 @@ class MigrationEngine:
|
||||
# @PARAM: strip_databases (bool) - Whether to remove the databases directory from the archive.
|
||||
# @PARAM: target_env_id (Optional[str]) - Used if fix_cross_filters is True to know which environment map to use.
|
||||
# @PARAM: fix_cross_filters (bool) - Whether to patch dashboard json_metadata.
|
||||
# @PRE: zip_path must point to a valid Superset export archive.
|
||||
# @POST: Transformed archive is saved to output_path.
|
||||
# @RETURN: bool - True if successful.
|
||||
# @PRE: zip_path points to a readable ZIP; output_path parent is writable; db_mapping keys/values are UUID strings.
|
||||
# @POST: Returns True only when extraction, transformation, and packaging complete without exception.
|
||||
# @SIDE_EFFECT: Reads/writes filesystem archives, creates temporary directory, emits structured logs.
|
||||
# @DATA_CONTRACT: Input[(str zip_path, str output_path, Dict[str,str] db_mapping, bool strip_databases, Optional[str] target_env_id, bool fix_cross_filters)] -> Output[bool]
|
||||
# @RETURN: bool - True if successful.
|
||||
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True, target_env_id: Optional[str] = None, fix_cross_filters: bool = False) -> bool:
|
||||
"""
|
||||
Transform a Superset export ZIP by replacing database UUIDs and optionally fixing cross-filters.
|
||||
@@ -105,48 +116,60 @@ class MigrationEngine:
|
||||
# @PURPOSE: Replaces database_uuid in a single YAML file.
|
||||
# @PARAM: file_path (Path) - Path to the YAML file.
|
||||
# @PARAM: db_mapping (Dict[str, str]) - UUID mapping dictionary.
|
||||
# @PRE: file_path must exist and be readable.
|
||||
# @POST: File is modified in-place if source UUID matches mapping.
|
||||
# @PRE: file_path exists, is readable YAML, and db_mapping contains source->target UUID pairs.
|
||||
# @POST: database_uuid is replaced in-place only when source UUID is present in db_mapping.
|
||||
# @SIDE_EFFECT: Reads and conditionally rewrites YAML file on disk.
|
||||
# @DATA_CONTRACT: Input[(Path file_path, Dict[str,str] db_mapping)] -> Output[None]
|
||||
def _transform_yaml(self, file_path: Path, db_mapping: Dict[str, str]):
|
||||
with open(file_path, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
with belief_scope("MigrationEngine._transform_yaml"):
|
||||
with open(file_path, 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
if not data:
|
||||
return
|
||||
if not data:
|
||||
return
|
||||
|
||||
# Superset dataset YAML structure:
|
||||
# database_uuid: ...
|
||||
source_uuid = data.get('database_uuid')
|
||||
if source_uuid in db_mapping:
|
||||
data['database_uuid'] = db_mapping[source_uuid]
|
||||
with open(file_path, 'w') as f:
|
||||
yaml.dump(data, f)
|
||||
# Superset dataset YAML structure:
|
||||
# database_uuid: ...
|
||||
source_uuid = data.get('database_uuid')
|
||||
if source_uuid in db_mapping:
|
||||
data['database_uuid'] = db_mapping[source_uuid]
|
||||
with open(file_path, 'w') as f:
|
||||
yaml.dump(data, f)
|
||||
# [/DEF:_transform_yaml:Function]
|
||||
|
||||
# [DEF:_extract_chart_uuids_from_archive:Function]
|
||||
# @PURPOSE: Scans the unpacked ZIP to map local exported integer IDs back to their UUIDs.
|
||||
# @PARAM: temp_dir (Path) - Root dir of unpacked archive
|
||||
# @PURPOSE: Scans extracted chart YAML files and builds a source chart ID to UUID lookup map.
|
||||
# @PRE: temp_dir exists and points to extracted archive root with optional chart YAML resources.
|
||||
# @POST: Returns a best-effort Dict[int, str] containing only parseable chart id/uuid pairs.
|
||||
# @SIDE_EFFECT: Reads chart YAML files from filesystem; suppresses per-file parsing failures.
|
||||
# @DATA_CONTRACT: Input[Path] -> Output[Dict[int,str]]
|
||||
# @PARAM: temp_dir (Path) - Root dir of unpacked archive.
|
||||
# @RETURN: Dict[int, str] - Mapping of source Integer ID to UUID.
|
||||
def _extract_chart_uuids_from_archive(self, temp_dir: Path) -> Dict[int, str]:
|
||||
# Implementation Note: This is a placeholder for the logic that extracts
|
||||
# actual Source IDs. In a real scenario, this involves parsing chart YAMLs
|
||||
# or manifesting the export metadata structure where source IDs are stored.
|
||||
# For simplicity in US1 MVP, we assume it's read from chart files if present.
|
||||
mapping = {}
|
||||
chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(temp_dir.glob("**/charts/*.yaml"))
|
||||
for cf in set(chart_files):
|
||||
try:
|
||||
with open(cf, 'r') as f:
|
||||
cdata = yaml.safe_load(f)
|
||||
if cdata and 'id' in cdata and 'uuid' in cdata:
|
||||
mapping[cdata['id']] = cdata['uuid']
|
||||
except Exception:
|
||||
pass
|
||||
return mapping
|
||||
with belief_scope("MigrationEngine._extract_chart_uuids_from_archive"):
|
||||
# Implementation Note: This is a placeholder for the logic that extracts
|
||||
# actual Source IDs. In a real scenario, this involves parsing chart YAMLs
|
||||
# or manifesting the export metadata structure where source IDs are stored.
|
||||
# For simplicity in US1 MVP, we assume it's read from chart files if present.
|
||||
mapping = {}
|
||||
chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(temp_dir.glob("**/charts/*.yaml"))
|
||||
for cf in set(chart_files):
|
||||
try:
|
||||
with open(cf, 'r') as f:
|
||||
cdata = yaml.safe_load(f)
|
||||
if cdata and 'id' in cdata and 'uuid' in cdata:
|
||||
mapping[cdata['id']] = cdata['uuid']
|
||||
except Exception:
|
||||
pass
|
||||
return mapping
|
||||
# [/DEF:_extract_chart_uuids_from_archive:Function]
|
||||
|
||||
# [DEF:_patch_dashboard_metadata:Function]
|
||||
# @PURPOSE: Replaces integer IDs in json_metadata.
|
||||
# @PURPOSE: Rewrites dashboard json_metadata chart/dataset integer identifiers using target environment mappings.
|
||||
# @PRE: file_path points to dashboard YAML with json_metadata; target_env_id is non-empty; source_map contains source id->uuid.
|
||||
# @POST: json_metadata is re-serialized with mapped integer IDs when remote mappings are available; otherwise file remains unchanged.
|
||||
# @SIDE_EFFECT: Reads/writes YAML file, performs mapping lookup via mapping_service, emits logs for recoverable/terminal failures.
|
||||
# @DATA_CONTRACT: Input[(Path file_path, str target_env_id, Dict[int,str] source_map)] -> Output[None]
|
||||
# @PARAM: file_path (Path)
|
||||
# @PARAM: target_env_id (str)
|
||||
# @PARAM: source_map (Dict[int, str])
|
||||
|
||||
Reference in New Issue
Block a user