362 lines
14 KiB
Python
362 lines
14 KiB
Python
# [DEF:backend.src.scripts.migrate_sqlite_to_postgres:Module]
|
|
#
|
|
# @TIER: STANDARD
|
|
# @SEMANTICS: migration, sqlite, postgresql, config, task_logs, task_records
|
|
# @PURPOSE: Migrates legacy config and task history from SQLite/file storage to PostgreSQL.
|
|
# @LAYER: Scripts
|
|
# @RELATION: READS_FROM -> backend/tasks.db
|
|
# @RELATION: READS_FROM -> backend/config.json
|
|
# @RELATION: WRITES_TO -> postgresql.task_records
|
|
# @RELATION: WRITES_TO -> postgresql.task_logs
|
|
# @RELATION: WRITES_TO -> postgresql.app_configurations
|
|
#
|
|
# @INVARIANT: Script is idempotent for task_records and app_configurations.
|
|
|
|
# [SECTION: IMPORTS]
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Iterable, Optional
|
|
|
|
from sqlalchemy import create_engine, text
|
|
from sqlalchemy.exc import SQLAlchemyError
|
|
|
|
from src.core.logger import belief_scope, logger
|
|
# [/SECTION]
|
|
|
|
|
|
# [DEF:Constants:Section]
|
|
DEFAULT_TARGET_URL = os.getenv(
|
|
"DATABASE_URL",
|
|
os.getenv("POSTGRES_URL", "postgresql+psycopg2://postgres:postgres@localhost:5432/ss_tools"),
|
|
)
|
|
# [/DEF:Constants:Section]
|
|
|
|
|
|
# [DEF:_json_load_if_needed:Function]
|
|
# @TIER: STANDARD
|
|
# @PURPOSE: Parses JSON-like values from SQLite TEXT/JSON columns to Python objects.
|
|
# @PRE: value is scalar JSON/text/list/dict or None.
|
|
# @POST: Returns normalized Python object or original scalar value.
|
|
def _json_load_if_needed(value: Any) -> Any:
|
|
with belief_scope("_json_load_if_needed"):
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, (dict, list)):
|
|
return value
|
|
if isinstance(value, str):
|
|
raw = value.strip()
|
|
if not raw:
|
|
return None
|
|
if raw[0] in "{[":
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return value
|
|
return value
|
|
# [/DEF:_json_load_if_needed:Function]
|
|
|
|
|
|
# [DEF:_find_legacy_config_path:Function]
|
|
# @PURPOSE: Resolves the existing legacy config.json path from candidates.
|
|
def _find_legacy_config_path(explicit_path: Optional[str]) -> Optional[Path]:
|
|
with belief_scope("_find_legacy_config_path"):
|
|
if explicit_path:
|
|
p = Path(explicit_path)
|
|
return p if p.exists() else None
|
|
|
|
candidates = [
|
|
Path("backend/config.json"),
|
|
Path("config.json"),
|
|
]
|
|
for candidate in candidates:
|
|
if candidate.exists():
|
|
return candidate
|
|
return None
|
|
# [/DEF:_find_legacy_config_path:Function]
|
|
|
|
|
|
# [DEF:_connect_sqlite:Function]
|
|
# @PURPOSE: Opens a SQLite connection with row factory.
|
|
def _connect_sqlite(path: Path) -> sqlite3.Connection:
|
|
with belief_scope("_connect_sqlite"):
|
|
conn = sqlite3.connect(str(path))
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
# [/DEF:_connect_sqlite:Function]
|
|
|
|
|
|
# [DEF:_ensure_target_schema:Function]
|
|
# @PURPOSE: Ensures required PostgreSQL tables exist before migration.
|
|
def _ensure_target_schema(engine) -> None:
|
|
with belief_scope("_ensure_target_schema"):
|
|
stmts: Iterable[str] = (
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS app_configurations (
|
|
id TEXT PRIMARY KEY,
|
|
payload JSONB NOT NULL,
|
|
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
)
|
|
""",
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS task_records (
|
|
id TEXT PRIMARY KEY,
|
|
type TEXT NOT NULL,
|
|
status TEXT NOT NULL,
|
|
environment_id TEXT NULL,
|
|
started_at TIMESTAMPTZ NULL,
|
|
finished_at TIMESTAMPTZ NULL,
|
|
logs JSONB NULL,
|
|
error TEXT NULL,
|
|
result JSONB NULL,
|
|
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
params JSONB NULL
|
|
)
|
|
""",
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS task_logs (
|
|
id INTEGER PRIMARY KEY,
|
|
task_id TEXT NOT NULL,
|
|
timestamp TIMESTAMPTZ NOT NULL,
|
|
level VARCHAR(16) NOT NULL,
|
|
source VARCHAR(64) NOT NULL DEFAULT 'system',
|
|
message TEXT NOT NULL,
|
|
metadata_json TEXT NULL,
|
|
CONSTRAINT fk_task_logs_task
|
|
FOREIGN KEY(task_id)
|
|
REFERENCES task_records(id)
|
|
ON DELETE CASCADE
|
|
)
|
|
""",
|
|
"CREATE INDEX IF NOT EXISTS ix_task_logs_task_timestamp ON task_logs (task_id, timestamp)",
|
|
"CREATE INDEX IF NOT EXISTS ix_task_logs_task_level ON task_logs (task_id, level)",
|
|
"CREATE INDEX IF NOT EXISTS ix_task_logs_task_source ON task_logs (task_id, source)",
|
|
"""
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (
|
|
SELECT 1 FROM pg_class WHERE relkind = 'S' AND relname = 'task_logs_id_seq'
|
|
) THEN
|
|
PERFORM 1;
|
|
ELSE
|
|
CREATE SEQUENCE task_logs_id_seq OWNED BY task_logs.id;
|
|
END IF;
|
|
END $$;
|
|
""",
|
|
"ALTER TABLE task_logs ALTER COLUMN id SET DEFAULT nextval('task_logs_id_seq')",
|
|
)
|
|
with engine.begin() as conn:
|
|
for stmt in stmts:
|
|
conn.execute(text(stmt))
|
|
# [/DEF:_ensure_target_schema:Function]
|
|
|
|
|
|
# [DEF:_migrate_config:Function]
|
|
# @PURPOSE: Migrates legacy config.json into app_configurations(global).
|
|
def _migrate_config(engine, legacy_config_path: Optional[Path]) -> int:
|
|
with belief_scope("_migrate_config"):
|
|
if legacy_config_path is None:
|
|
logger.info("[_migrate_config][Action] No legacy config.json found, skipping")
|
|
return 0
|
|
|
|
payload = json.loads(legacy_config_path.read_text(encoding="utf-8"))
|
|
with engine.begin() as conn:
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO app_configurations (id, payload, updated_at)
|
|
VALUES ('global', CAST(:payload AS JSONB), NOW())
|
|
ON CONFLICT (id)
|
|
DO UPDATE SET payload = EXCLUDED.payload, updated_at = NOW()
|
|
"""
|
|
),
|
|
{"payload": json.dumps(payload, ensure_ascii=True)},
|
|
)
|
|
logger.info("[_migrate_config][Coherence:OK] Config migrated from %s", legacy_config_path)
|
|
return 1
|
|
# [/DEF:_migrate_config:Function]
|
|
|
|
|
|
# [DEF:_migrate_tasks_and_logs:Function]
|
|
# @PURPOSE: Migrates task_records and task_logs from SQLite into PostgreSQL.
|
|
def _migrate_tasks_and_logs(engine, sqlite_conn: sqlite3.Connection) -> Dict[str, int]:
|
|
with belief_scope("_migrate_tasks_and_logs"):
|
|
stats = {"task_records_total": 0, "task_records_inserted": 0, "task_logs_total": 0, "task_logs_inserted": 0}
|
|
|
|
rows = sqlite_conn.execute(
|
|
"""
|
|
SELECT id, type, status, environment_id, started_at, finished_at, logs, error, result, created_at, params
|
|
FROM task_records
|
|
ORDER BY created_at ASC
|
|
"""
|
|
).fetchall()
|
|
stats["task_records_total"] = len(rows)
|
|
|
|
with engine.begin() as conn:
|
|
existing_env_ids = {
|
|
row[0]
|
|
for row in conn.execute(text("SELECT id FROM environments")).fetchall()
|
|
}
|
|
for row in rows:
|
|
params_obj = _json_load_if_needed(row["params"])
|
|
result_obj = _json_load_if_needed(row["result"])
|
|
logs_obj = _json_load_if_needed(row["logs"])
|
|
environment_id = row["environment_id"]
|
|
if environment_id and environment_id not in existing_env_ids:
|
|
# Legacy task may reference environments that were not migrated; keep task row and drop FK value.
|
|
environment_id = None
|
|
|
|
res = conn.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO task_records (
|
|
id, type, status, environment_id, started_at, finished_at,
|
|
logs, error, result, created_at, params
|
|
) VALUES (
|
|
:id, :type, :status, :environment_id, :started_at, :finished_at,
|
|
CAST(:logs AS JSONB), :error, CAST(:result AS JSONB), :created_at, CAST(:params AS JSONB)
|
|
)
|
|
ON CONFLICT (id) DO NOTHING
|
|
"""
|
|
),
|
|
{
|
|
"id": row["id"],
|
|
"type": row["type"],
|
|
"status": row["status"],
|
|
"environment_id": environment_id,
|
|
"started_at": row["started_at"],
|
|
"finished_at": row["finished_at"],
|
|
"logs": json.dumps(logs_obj, ensure_ascii=True) if logs_obj is not None else None,
|
|
"error": row["error"],
|
|
"result": json.dumps(result_obj, ensure_ascii=True) if result_obj is not None else None,
|
|
"created_at": row["created_at"],
|
|
"params": json.dumps(params_obj, ensure_ascii=True) if params_obj is not None else None,
|
|
},
|
|
)
|
|
if res.rowcount and res.rowcount > 0:
|
|
stats["task_records_inserted"] += int(res.rowcount)
|
|
|
|
log_rows = sqlite_conn.execute(
|
|
"""
|
|
SELECT id, task_id, timestamp, level, source, message, metadata_json
|
|
FROM task_logs
|
|
ORDER BY id ASC
|
|
"""
|
|
).fetchall()
|
|
stats["task_logs_total"] = len(log_rows)
|
|
|
|
with engine.begin() as conn:
|
|
for row in log_rows:
|
|
# Preserve original IDs to keep migration idempotent.
|
|
res = conn.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO task_logs (id, task_id, timestamp, level, source, message, metadata_json)
|
|
VALUES (:id, :task_id, :timestamp, :level, :source, :message, :metadata_json)
|
|
ON CONFLICT (id) DO NOTHING
|
|
"""
|
|
),
|
|
{
|
|
"id": row["id"],
|
|
"task_id": row["task_id"],
|
|
"timestamp": row["timestamp"],
|
|
"level": row["level"],
|
|
"source": row["source"] or "system",
|
|
"message": row["message"],
|
|
"metadata_json": row["metadata_json"],
|
|
},
|
|
)
|
|
if res.rowcount and res.rowcount > 0:
|
|
stats["task_logs_inserted"] += int(res.rowcount)
|
|
|
|
# Ensure sequence is aligned after explicit id inserts.
|
|
conn.execute(
|
|
text(
|
|
"""
|
|
SELECT setval(
|
|
'task_logs_id_seq',
|
|
COALESCE((SELECT MAX(id) FROM task_logs), 1),
|
|
TRUE
|
|
)
|
|
"""
|
|
)
|
|
)
|
|
|
|
logger.info(
|
|
"[_migrate_tasks_and_logs][Coherence:OK] task_records=%s/%s task_logs=%s/%s",
|
|
stats["task_records_inserted"],
|
|
stats["task_records_total"],
|
|
stats["task_logs_inserted"],
|
|
stats["task_logs_total"],
|
|
)
|
|
return stats
|
|
# [/DEF:_migrate_tasks_and_logs:Function]
|
|
|
|
|
|
# [DEF:run_migration:Function]
|
|
# @PURPOSE: Orchestrates migration from SQLite/file to PostgreSQL.
|
|
def run_migration(sqlite_path: Path, target_url: str, legacy_config_path: Optional[Path]) -> Dict[str, int]:
|
|
with belief_scope("run_migration"):
|
|
logger.info("[run_migration][Entry] sqlite=%s target=%s", sqlite_path, target_url)
|
|
if not sqlite_path.exists():
|
|
raise FileNotFoundError(f"SQLite source not found: {sqlite_path}")
|
|
|
|
sqlite_conn = _connect_sqlite(sqlite_path)
|
|
engine = create_engine(target_url, pool_pre_ping=True)
|
|
try:
|
|
_ensure_target_schema(engine)
|
|
config_upserted = _migrate_config(engine, legacy_config_path)
|
|
stats = _migrate_tasks_and_logs(engine, sqlite_conn)
|
|
stats["config_upserted"] = config_upserted
|
|
return stats
|
|
finally:
|
|
sqlite_conn.close()
|
|
# [/DEF:run_migration:Function]
|
|
|
|
|
|
# [DEF:main:Function]
|
|
# @PURPOSE: CLI entrypoint.
|
|
def main() -> int:
|
|
with belief_scope("main"):
|
|
parser = argparse.ArgumentParser(
|
|
description="Migrate legacy config.json and task logs from SQLite to PostgreSQL.",
|
|
)
|
|
parser.add_argument(
|
|
"--sqlite-path",
|
|
default="backend/tasks.db",
|
|
help="Path to source SQLite DB with task_records/task_logs (default: backend/tasks.db).",
|
|
)
|
|
parser.add_argument(
|
|
"--target-url",
|
|
default=DEFAULT_TARGET_URL,
|
|
help="Target PostgreSQL SQLAlchemy URL (default: DATABASE_URL/POSTGRES_URL env).",
|
|
)
|
|
parser.add_argument(
|
|
"--config-path",
|
|
default=None,
|
|
help="Optional path to legacy config.json (auto-detected when omitted).",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
sqlite_path = Path(args.sqlite_path)
|
|
legacy_config_path = _find_legacy_config_path(args.config_path)
|
|
try:
|
|
stats = run_migration(sqlite_path=sqlite_path, target_url=args.target_url, legacy_config_path=legacy_config_path)
|
|
print("Migration completed.")
|
|
print(json.dumps(stats, indent=2))
|
|
return 0
|
|
except (SQLAlchemyError, OSError, sqlite3.Error, ValueError) as e:
|
|
logger.error("[main][Coherence:Failed] Migration failed: %s", e)
|
|
print(f"Migration failed: {e}")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|
|
# [/DEF:main:Function]
|
|
|
|
# [/DEF:backend.src.scripts.migrate_sqlite_to_postgres:Module]
|