semantic update

This commit is contained in:
2026-03-10 21:33:09 +03:00
parent 542835e0ff
commit b77fa45e4e
10 changed files with 2084 additions and 1499 deletions

View File

@@ -38,7 +38,9 @@ class MigrationEngine:
# @PARAM: mapping_service (Optional[IdMappingService]) - Used for resolving target environment integer IDs.
def __init__(self, mapping_service: Optional[IdMappingService] = None):
with belief_scope("MigrationEngine.__init__"):
logger.reason("Initializing MigrationEngine")
self.mapping_service = mapping_service
logger.reflect("MigrationEngine initialized")
# [/DEF:__init__:Function]
# [DEF:transform_zip:Function]
@@ -59,12 +61,14 @@ class MigrationEngine:
Transform a Superset export ZIP by replacing database UUIDs and optionally fixing cross-filters.
"""
with belief_scope("MigrationEngine.transform_zip"):
logger.reason(f"Starting ZIP transformation: {zip_path} -> {output_path}")
with tempfile.TemporaryDirectory() as temp_dir_str:
temp_dir = Path(temp_dir_str)
try:
# 1. Extract
logger.info(f"[MigrationEngine.transform_zip][Action] Extracting ZIP: {zip_path}")
logger.reason(f"Extracting source archive to {temp_dir}")
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(temp_dir)
@@ -72,33 +76,33 @@ class MigrationEngine:
dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml"))
dataset_files = list(set(dataset_files))
logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dataset_files)} dataset files.")
logger.reason(f"Transforming {len(dataset_files)} dataset YAML files")
for ds_file in dataset_files:
logger.info(f"[MigrationEngine.transform_zip][Action] Transforming dataset: {ds_file}")
self._transform_yaml(ds_file, db_mapping)
# 2.5 Patch Cross-Filters (Dashboards)
if fix_cross_filters and self.mapping_service and target_env_id:
dash_files = list(temp_dir.glob("**/dashboards/**/*.yaml")) + list(temp_dir.glob("**/dashboards/*.yaml"))
dash_files = list(set(dash_files))
logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dash_files)} dashboard files for patching.")
# Gather all source UUID-to-ID mappings from the archive first
source_id_to_uuid_map = self._extract_chart_uuids_from_archive(temp_dir)
for dash_file in dash_files:
logger.info(f"[MigrationEngine.transform_zip][Action] Patching dashboard: {dash_file}")
self._patch_dashboard_metadata(dash_file, target_env_id, source_id_to_uuid_map)
if fix_cross_filters:
if self.mapping_service and target_env_id:
dash_files = list(temp_dir.glob("**/dashboards/**/*.yaml")) + list(temp_dir.glob("**/dashboards/*.yaml"))
dash_files = list(set(dash_files))
logger.reason(f"Patching cross-filters for {len(dash_files)} dashboards")
# Gather all source UUID-to-ID mappings from the archive first
source_id_to_uuid_map = self._extract_chart_uuids_from_archive(temp_dir)
for dash_file in dash_files:
self._patch_dashboard_metadata(dash_file, target_env_id, source_id_to_uuid_map)
else:
logger.explore("Cross-filter patching requested but mapping service or target_env_id is missing")
# 3. Re-package
logger.info(f"[MigrationEngine.transform_zip][Action] Re-packaging ZIP to: {output_path} (strip_databases={strip_databases})")
logger.reason(f"Re-packaging transformed archive (strip_databases={strip_databases})")
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for root, dirs, files in os.walk(temp_dir):
rel_root = Path(root).relative_to(temp_dir)
if strip_databases and "databases" in rel_root.parts:
logger.info(f"[MigrationEngine.transform_zip][Action] Skipping file in databases directory: {rel_root}")
continue
for file in files:
@@ -106,9 +110,10 @@ class MigrationEngine:
arcname = file_path.relative_to(temp_dir)
zf.write(file_path, arcname)
logger.reflect("ZIP transformation completed successfully")
return True
except Exception as e:
logger.error(f"[MigrationEngine.transform_zip][Coherence:Failed] Error transforming ZIP: {e}")
logger.explore(f"Error transforming ZIP: {e}")
return False
# [/DEF:transform_zip:Function]
@@ -122,19 +127,23 @@ class MigrationEngine:
# @DATA_CONTRACT: Input[(Path file_path, Dict[str,str] db_mapping)] -> Output[None]
def _transform_yaml(self, file_path: Path, db_mapping: Dict[str, str]):
with belief_scope("MigrationEngine._transform_yaml"):
if not file_path.exists():
logger.explore(f"YAML file not found: {file_path}")
return
with open(file_path, 'r') as f:
data = yaml.safe_load(f)
if not data:
return
# Superset dataset YAML structure:
# database_uuid: ...
source_uuid = data.get('database_uuid')
if source_uuid in db_mapping:
logger.reason(f"Replacing database UUID in {file_path.name}")
data['database_uuid'] = db_mapping[source_uuid]
with open(file_path, 'w') as f:
yaml.dump(data, f)
logger.reflect(f"Database UUID patched in {file_path.name}")
# [/DEF:_transform_yaml:Function]
# [DEF:_extract_chart_uuids_from_archive:Function]
@@ -176,6 +185,9 @@ class MigrationEngine:
def _patch_dashboard_metadata(self, file_path: Path, target_env_id: str, source_map: Dict[int, str]):
with belief_scope("MigrationEngine._patch_dashboard_metadata"):
try:
if not file_path.exists():
return
with open(file_path, 'r') as f:
data = yaml.safe_load(f)
@@ -186,18 +198,13 @@ class MigrationEngine:
if not metadata_str:
return
metadata = json.loads(metadata_str)
modified = False
# We need to deeply traverse and replace. For MVP, string replacement over the raw JSON is an option,
# but careful dict traversal is safer.
# Fetch target UUIDs for everything we know:
uuids_needed = list(source_map.values())
logger.reason(f"Resolving {len(uuids_needed)} remote IDs for dashboard metadata patching")
target_ids = self.mapping_service.get_remote_ids_batch(target_env_id, ResourceType.CHART, uuids_needed)
if not target_ids:
logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No remote target IDs found in mapping database.")
logger.reflect("No remote target IDs found in mapping database for this dashboard.")
return
# Map Source Int -> Target Int
@@ -210,21 +217,16 @@ class MigrationEngine:
missing_targets.append(s_id)
if missing_targets:
logger.warning(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Recoverable] Missing target IDs for source IDs: {missing_targets}. Cross-filters for these IDs might break.")
logger.explore(f"Missing target IDs for source IDs: {missing_targets}. Cross-filters might break.")
if not source_to_target:
logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No source IDs matched remotely. Skipping patch.")
logger.reflect("No source IDs matched remotely. Skipping patch.")
return
# Complex metadata traversal would go here (e.g. for native_filter_configuration)
# We use regex replacement over the string for safety over unknown nested dicts.
logger.reason(f"Patching {len(source_to_target)} ID references in json_metadata")
new_metadata_str = metadata_str
# Replace chartId and datasetId assignments explicitly.
# Pattern: "datasetId": 42 or "chartId": 42
for s_id, t_id in source_to_target.items():
# Replace in native_filter_configuration targets
new_metadata_str = re.sub(r'("datasetId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
new_metadata_str = re.sub(r'("chartId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
@@ -233,10 +235,10 @@ class MigrationEngine:
with open(file_path, 'w') as f:
yaml.dump(data, f)
logger.info(f"[MigrationEngine._patch_dashboard_metadata][Reason] Re-serialized modified JSON metadata for dashboard.")
logger.reflect(f"Dashboard metadata patched and saved: {file_path.name}")
except Exception as e:
logger.error(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Failed] Metadata patch failed: {e}")
logger.explore(f"Metadata patch failed for {file_path.name}: {e}")
# [/DEF:_patch_dashboard_metadata:Function]