fix: commit semantic repair changes

This commit is contained in:
2026-03-21 11:22:25 +03:00
parent 0900208c1a
commit abee05558f
272 changed files with 4603 additions and 1668 deletions

View File

@@ -25,10 +25,11 @@ from src.core.mapping_service import IdMappingService
from src.models.mapping import ResourceType
# [/SECTION]
# [DEF:MigrationEngine:Class]
# @PURPOSE: Engine for transforming Superset export ZIPs.
# @RELATION: CONTAINS -> [__init__, transform_zip, _transform_yaml, _extract_chart_uuids_from_archive, _patch_dashboard_metadata]
class MigrationEngine:
# [DEF:__init__:Function]
# @PURPOSE: Initializes migration orchestration dependencies for ZIP/YAML metadata transformations.
# @PRE: mapping_service is None or implements batch remote ID lookup for ResourceType.CHART.
@@ -41,10 +42,12 @@ class MigrationEngine:
logger.reason("Initializing MigrationEngine")
self.mapping_service = mapping_service
logger.reflect("MigrationEngine initialized")
# [/DEF:__init__:Function]
# [DEF:transform_zip:Function]
# @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, patches cross-filters, and re-packages.
# @RELATION: DEPENDS_ON -> [_transform_yaml, _extract_chart_uuids_from_archive, _patch_dashboard_metadata]
# @PARAM: zip_path (str) - Path to the source ZIP file.
# @PARAM: output_path (str) - Path where the transformed ZIP will be saved.
# @PARAM: db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID.
@@ -56,52 +59,76 @@ class MigrationEngine:
# @SIDE_EFFECT: Reads/writes filesystem archives, creates temporary directory, emits structured logs.
# @DATA_CONTRACT: Input[(str zip_path, str output_path, Dict[str,str] db_mapping, bool strip_databases, Optional[str] target_env_id, bool fix_cross_filters)] -> Output[bool]
# @RETURN: bool - True if successful.
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True, target_env_id: Optional[str] = None, fix_cross_filters: bool = False) -> bool:
def transform_zip(
self,
zip_path: str,
output_path: str,
db_mapping: Dict[str, str],
strip_databases: bool = True,
target_env_id: Optional[str] = None,
fix_cross_filters: bool = False,
) -> bool:
"""
Transform a Superset export ZIP by replacing database UUIDs and optionally fixing cross-filters.
"""
with belief_scope("MigrationEngine.transform_zip"):
logger.reason(f"Starting ZIP transformation: {zip_path} -> {output_path}")
with tempfile.TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
try:
# 1. Extract
logger.reason(f"Extracting source archive to {temp_dir}")
with zipfile.ZipFile(zip_path, 'r') as zf:
with zipfile.ZipFile(zip_path, "r") as zf:
zf.extractall(temp_dir)
# 2. Transform YAMLs (Databases)
dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml"))
dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(
temp_dir.glob("**/datasets/*.yaml")
)
dataset_files = list(set(dataset_files))
logger.reason(f"Transforming {len(dataset_files)} dataset YAML files")
logger.reason(
f"Transforming {len(dataset_files)} dataset YAML files"
)
for ds_file in dataset_files:
self._transform_yaml(ds_file, db_mapping)
# 2.5 Patch Cross-Filters (Dashboards)
if fix_cross_filters:
if self.mapping_service and target_env_id:
dash_files = list(temp_dir.glob("**/dashboards/**/*.yaml")) + list(temp_dir.glob("**/dashboards/*.yaml"))
dash_files = list(
temp_dir.glob("**/dashboards/**/*.yaml")
) + list(temp_dir.glob("**/dashboards/*.yaml"))
dash_files = list(set(dash_files))
logger.reason(f"Patching cross-filters for {len(dash_files)} dashboards")
logger.reason(
f"Patching cross-filters for {len(dash_files)} dashboards"
)
# Gather all source UUID-to-ID mappings from the archive first
source_id_to_uuid_map = self._extract_chart_uuids_from_archive(temp_dir)
source_id_to_uuid_map = (
self._extract_chart_uuids_from_archive(temp_dir)
)
for dash_file in dash_files:
self._patch_dashboard_metadata(dash_file, target_env_id, source_id_to_uuid_map)
self._patch_dashboard_metadata(
dash_file, target_env_id, source_id_to_uuid_map
)
else:
logger.explore("Cross-filter patching requested but mapping service or target_env_id is missing")
logger.explore(
"Cross-filter patching requested but mapping service or target_env_id is missing"
)
# 3. Re-package
logger.reason(f"Re-packaging transformed archive (strip_databases={strip_databases})")
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
logger.reason(
f"Re-packaging transformed archive (strip_databases={strip_databases})"
)
with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(temp_dir):
rel_root = Path(root).relative_to(temp_dir)
if strip_databases and "databases" in rel_root.parts:
continue
@@ -109,12 +136,13 @@ class MigrationEngine:
file_path = Path(root) / file
arcname = file_path.relative_to(temp_dir)
zf.write(file_path, arcname)
logger.reflect("ZIP transformation completed successfully")
return True
except Exception as e:
logger.explore(f"Error transforming ZIP: {e}")
return False
# [/DEF:transform_zip:Function]
# [DEF:_transform_yaml:Function]
@@ -131,19 +159,20 @@ class MigrationEngine:
logger.explore(f"YAML file not found: {file_path}")
raise FileNotFoundError(str(file_path))
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
data = yaml.safe_load(f)
if not data:
return
source_uuid = data.get('database_uuid')
source_uuid = data.get("database_uuid")
if source_uuid in db_mapping:
logger.reason(f"Replacing database UUID in {file_path.name}")
data['database_uuid'] = db_mapping[source_uuid]
with open(file_path, 'w') as f:
data["database_uuid"] = db_mapping[source_uuid]
with open(file_path, "w") as f:
yaml.dump(data, f)
logger.reflect(f"Database UUID patched in {file_path.name}")
# [/DEF:_transform_yaml:Function]
# [DEF:_extract_chart_uuids_from_archive:Function]
@@ -161,16 +190,19 @@ class MigrationEngine:
# or manifesting the export metadata structure where source IDs are stored.
# For simplicity in US1 MVP, we assume it's read from chart files if present.
mapping = {}
chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(temp_dir.glob("**/charts/*.yaml"))
chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(
temp_dir.glob("**/charts/*.yaml")
)
for cf in set(chart_files):
try:
with open(cf, 'r') as f:
with open(cf, "r") as f:
cdata = yaml.safe_load(f)
if cdata and 'id' in cdata and 'uuid' in cdata:
mapping[cdata['id']] = cdata['uuid']
if cdata and "id" in cdata and "uuid" in cdata:
mapping[cdata["id"]] = cdata["uuid"]
except Exception:
pass
return mapping
# [/DEF:_extract_chart_uuids_from_archive:Function]
# [DEF:_patch_dashboard_metadata:Function]
@@ -182,29 +214,37 @@ class MigrationEngine:
# @PARAM: file_path (Path)
# @PARAM: target_env_id (str)
# @PARAM: source_map (Dict[int, str])
def _patch_dashboard_metadata(self, file_path: Path, target_env_id: str, source_map: Dict[int, str]):
def _patch_dashboard_metadata(
self, file_path: Path, target_env_id: str, source_map: Dict[int, str]
):
with belief_scope("MigrationEngine._patch_dashboard_metadata"):
try:
if not file_path.exists():
return
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
data = yaml.safe_load(f)
if not data or 'json_metadata' not in data:
if not data or "json_metadata" not in data:
return
metadata_str = data['json_metadata']
metadata_str = data["json_metadata"]
if not metadata_str:
return
# Fetch target UUIDs for everything we know:
uuids_needed = list(source_map.values())
logger.reason(f"Resolving {len(uuids_needed)} remote IDs for dashboard metadata patching")
target_ids = self.mapping_service.get_remote_ids_batch(target_env_id, ResourceType.CHART, uuids_needed)
logger.reason(
f"Resolving {len(uuids_needed)} remote IDs for dashboard metadata patching"
)
target_ids = self.mapping_service.get_remote_ids_batch(
target_env_id, ResourceType.CHART, uuids_needed
)
if not target_ids:
logger.reflect("No remote target IDs found in mapping database for this dashboard.")
logger.reflect(
"No remote target IDs found in mapping database for this dashboard."
)
return
# Map Source Int -> Target Int
@@ -215,33 +255,48 @@ class MigrationEngine:
source_to_target[s_id] = target_ids[s_uuid]
else:
missing_targets.append(s_id)
if missing_targets:
logger.explore(f"Missing target IDs for source IDs: {missing_targets}. Cross-filters might break.")
logger.explore(
f"Missing target IDs for source IDs: {missing_targets}. Cross-filters might break."
)
if not source_to_target:
logger.reflect("No source IDs matched remotely. Skipping patch.")
return
logger.reason(f"Patching {len(source_to_target)} ID references in json_metadata")
logger.reason(
f"Patching {len(source_to_target)} ID references in json_metadata"
)
new_metadata_str = metadata_str
for s_id, t_id in source_to_target.items():
new_metadata_str = re.sub(r'("datasetId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
new_metadata_str = re.sub(r'("chartId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
new_metadata_str = re.sub(
r'("datasetId"\s*:\s*)' + str(s_id) + r"(\b)",
r"\g<1>" + str(t_id) + r"\g<2>",
new_metadata_str,
)
new_metadata_str = re.sub(
r'("chartId"\s*:\s*)' + str(s_id) + r"(\b)",
r"\g<1>" + str(t_id) + r"\g<2>",
new_metadata_str,
)
# Re-parse to validate valid JSON
data['json_metadata'] = json.dumps(json.loads(new_metadata_str))
with open(file_path, 'w') as f:
data["json_metadata"] = json.dumps(json.loads(new_metadata_str))
with open(file_path, "w") as f:
yaml.dump(data, f)
logger.reflect(f"Dashboard metadata patched and saved: {file_path.name}")
logger.reflect(
f"Dashboard metadata patched and saved: {file_path.name}"
)
except Exception as e:
logger.explore(f"Metadata patch failed for {file_path.name}: {e}")
# [/DEF:_patch_dashboard_metadata:Function]
# [/DEF:MigrationEngine:Class]
# [/DEF:backend.src.core.migration_engine:Module]