500 lines
18 KiB
Python
Executable File
500 lines
18 KiB
Python
Executable File
# [DEF:AppModule:Module]
|
|
# @COMPLEXITY: 5
|
|
# @SEMANTICS: app, main, entrypoint, fastapi
|
|
# @PURPOSE: The main entry point for the FastAPI application. It initializes the app, configures CORS, sets up dependencies, includes API routers, and defines the WebSocket endpoint for log streaming.
|
|
# @LAYER: UI (API)
|
|
# @RELATION: [DEPENDS_ON] ->[AppDependencies]
|
|
# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
|
|
# @INVARIANT: Only one FastAPI app instance exists per process.
|
|
# @INVARIANT: All WebSocket connections must be properly cleaned up on disconnect.
|
|
# @PRE: Python environment and dependencies installed; configuration database available.
|
|
# @POST: FastAPI app instance is created, middleware configured, and routes registered.
|
|
# @SIDE_EFFECT: Starts background scheduler and binds network ports for HTTP/WS traffic.
|
|
# @DATA_CONTRACT: [HTTP Request | WS Message] -> [HTTP Response | JSON Log Stream]
|
|
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# project_root is used for static files mounting
|
|
project_root = Path(__file__).resolve().parent.parent.parent
|
|
|
|
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, HTTPException
|
|
from starlette.middleware.sessions import SessionMiddleware
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.staticfiles import StaticFiles
|
|
from fastapi.responses import FileResponse
|
|
import asyncio
|
|
|
|
from .dependencies import get_task_manager, get_scheduler_service
|
|
from .core.encryption_key import ensure_encryption_key
|
|
from .core.utils.network import NetworkError
|
|
from .core.logger import logger, belief_scope
|
|
from .core.database import AuthSessionLocal
|
|
from .core.auth.security import get_password_hash
|
|
from .models.auth import User, Role
|
|
from .api.routes import (
|
|
plugins,
|
|
tasks,
|
|
settings,
|
|
environments,
|
|
mappings,
|
|
migration,
|
|
connections,
|
|
git,
|
|
storage,
|
|
admin,
|
|
llm,
|
|
dashboards,
|
|
datasets,
|
|
reports,
|
|
assistant,
|
|
clean_release,
|
|
clean_release_v2,
|
|
profile,
|
|
health,
|
|
dataset_review,
|
|
)
|
|
from .api import auth
|
|
|
|
# [DEF:FastAPI_App:Global]
|
|
# @COMPLEXITY: 3
|
|
# @SEMANTICS: app, fastapi, instance, route-registry
|
|
# @PURPOSE: Canonical FastAPI application instance for route, middleware, and websocket registration.
|
|
# @RELATION: DEPENDS_ON -> [ApiRoutesModule]
|
|
# @RELATION: BINDS_TO -> [API_Routes]
|
|
app = FastAPI(
|
|
title="Superset Tools API",
|
|
description="API for managing Superset automation tools and plugins.",
|
|
version="1.0.0",
|
|
)
|
|
# [/DEF:FastAPI_App:Global]
|
|
|
|
|
|
# [DEF:ensure_initial_admin_user:Function]
|
|
# @COMPLEXITY: 3
|
|
# @PURPOSE: Ensures initial admin user exists when bootstrap env flags are enabled.
|
|
def ensure_initial_admin_user() -> None:
|
|
raw_flag = os.getenv("INITIAL_ADMIN_CREATE", "false").strip().lower()
|
|
if raw_flag not in {"1", "true", "yes", "on"}:
|
|
return
|
|
username = os.getenv("INITIAL_ADMIN_USERNAME", "").strip()
|
|
password = os.getenv("INITIAL_ADMIN_PASSWORD", "").strip()
|
|
if not username or not password:
|
|
logger.warning(
|
|
"INITIAL_ADMIN_CREATE is enabled but INITIAL_ADMIN_USERNAME/INITIAL_ADMIN_PASSWORD is missing; skipping bootstrap."
|
|
)
|
|
return
|
|
|
|
db = AuthSessionLocal()
|
|
try:
|
|
admin_role = db.query(Role).filter(Role.name == "Admin").first()
|
|
if not admin_role:
|
|
admin_role = Role(name="Admin", description="System Administrator")
|
|
db.add(admin_role)
|
|
db.commit()
|
|
db.refresh(admin_role)
|
|
|
|
existing_user = db.query(User).filter(User.username == username).first()
|
|
if existing_user:
|
|
logger.info(
|
|
"Initial admin bootstrap skipped: user '%s' already exists.", username
|
|
)
|
|
return
|
|
|
|
new_user = User(
|
|
username=username,
|
|
email=None,
|
|
password_hash=get_password_hash(password),
|
|
auth_source="LOCAL",
|
|
is_active=True,
|
|
)
|
|
new_user.roles.append(admin_role)
|
|
db.add(new_user)
|
|
db.commit()
|
|
logger.info(
|
|
"Initial admin user '%s' created from environment bootstrap.", username
|
|
)
|
|
except Exception as exc:
|
|
db.rollback()
|
|
logger.error("Failed to bootstrap initial admin user: %s", exc)
|
|
raise
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
# [/DEF:ensure_initial_admin_user:Function]
|
|
|
|
|
|
# [DEF:startup_event:Function]
|
|
# @COMPLEXITY: 3
|
|
# @PURPOSE: Handles application startup tasks, such as starting the scheduler.
|
|
# @RELATION: [CALLS] ->[AppDependencies]
|
|
# @PRE: None.
|
|
# @POST: Scheduler is started.
|
|
# Startup event
|
|
@app.on_event("startup")
|
|
async def startup_event():
|
|
with belief_scope("startup_event"):
|
|
ensure_encryption_key()
|
|
ensure_initial_admin_user()
|
|
scheduler = get_scheduler_service()
|
|
scheduler.start()
|
|
|
|
|
|
# [/DEF:startup_event:Function]
|
|
|
|
|
|
# [DEF:shutdown_event:Function]
|
|
# @COMPLEXITY: 3
|
|
# @PURPOSE: Handles application shutdown tasks, such as stopping the scheduler.
|
|
# @RELATION: [CALLS] ->[AppDependencies]
|
|
# @PRE: None.
|
|
# @POST: Scheduler is stopped.
|
|
# Shutdown event
|
|
@app.on_event("shutdown")
|
|
async def shutdown_event():
|
|
with belief_scope("shutdown_event"):
|
|
scheduler = get_scheduler_service()
|
|
scheduler.stop()
|
|
|
|
|
|
# [/DEF:shutdown_event:Function]
|
|
|
|
# [DEF:app_middleware:Block]
|
|
# @PURPOSE: Configure application-wide middleware (Session, CORS).
|
|
# Configure Session Middleware (required by Authlib for OAuth2 flow)
|
|
from .core.auth.config import auth_config
|
|
|
|
app.add_middleware(SessionMiddleware, secret_key=auth_config.SECRET_KEY)
|
|
|
|
# Configure CORS
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"], # Adjust this in production
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
# [/DEF:app_middleware:Block]
|
|
|
|
|
|
# [DEF:network_error_handler:Function]
|
|
# @COMPLEXITY: 1
|
|
# @PURPOSE: Global exception handler for NetworkError.
|
|
# @PRE: request is a FastAPI Request object.
|
|
# @POST: Returns 503 HTTP Exception.
|
|
# @PARAM: request (Request) - The incoming request object.
|
|
# @PARAM: exc (NetworkError) - The exception instance.
|
|
@app.exception_handler(NetworkError)
|
|
async def network_error_handler(request: Request, exc: NetworkError):
|
|
with belief_scope("network_error_handler"):
|
|
logger.error(f"Network error: {exc}")
|
|
return HTTPException(
|
|
status_code=503,
|
|
detail="Environment unavailable. Please check if the Superset instance is running.",
|
|
)
|
|
|
|
|
|
# [/DEF:network_error_handler:Function]
|
|
|
|
|
|
# [DEF:log_requests:Function]
|
|
# @COMPLEXITY: 3
|
|
# @PURPOSE: Middleware to log incoming HTTP requests and their response status.
|
|
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
|
|
# @PRE: request is a FastAPI Request object.
|
|
# @POST: Logs request and response details.
|
|
# @PARAM: request (Request) - The incoming request object.
|
|
# @PARAM: call_next (Callable) - The next middleware or route handler.
|
|
@app.middleware("http")
|
|
async def log_requests(request: Request, call_next):
|
|
with belief_scope("log_requests"):
|
|
# Avoid spamming logs for polling endpoints
|
|
is_polling = request.url.path.endswith("/api/tasks") and request.method == "GET"
|
|
|
|
if not is_polling:
|
|
logger.info(f"Incoming request: {request.method} {request.url.path}")
|
|
|
|
try:
|
|
response = await call_next(request)
|
|
if not is_polling:
|
|
logger.info(
|
|
f"Response status: {response.status_code} for {request.url.path}"
|
|
)
|
|
return response
|
|
except NetworkError as e:
|
|
logger.error(f"Network error caught in middleware: {e}")
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Environment unavailable. Please check if the Superset instance is running.",
|
|
)
|
|
|
|
|
|
# [/DEF:log_requests:Function]
|
|
|
|
# [DEF:API_Routes:Block]
|
|
# @COMPLEXITY: 3
|
|
# @PURPOSE: Register all FastAPI route groups exposed by the application entrypoint.
|
|
# @RELATION: DEPENDS_ON -> [FastAPI_App]
|
|
# @RELATION: DEPENDS_ON -> [Route_Group_Contracts]
|
|
# @RELATION: DEPENDS_ON -> [AuthApi]
|
|
# @RELATION: DEPENDS_ON -> [AdminApi]
|
|
# @RELATION: DEPENDS_ON -> [PluginsRouter]
|
|
# @RELATION: DEPENDS_ON -> [TasksRouter]
|
|
# @RELATION: DEPENDS_ON -> [SettingsRouter]
|
|
# @RELATION: DEPENDS_ON -> [ConnectionsRouter]
|
|
# @RELATION: DEPENDS_ON -> [ReportsRouter]
|
|
# @RELATION: DEPENDS_ON -> [LlmRoutes]
|
|
# @RELATION: DEPENDS_ON -> [CleanReleaseV2Api]
|
|
# Include API routes
|
|
app.include_router(auth.router)
|
|
app.include_router(admin.router)
|
|
app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"])
|
|
app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
|
|
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
|
|
app.include_router(
|
|
connections.router, prefix="/api/settings/connections", tags=["Connections"]
|
|
)
|
|
app.include_router(environments.router, tags=["Environments"])
|
|
app.include_router(mappings.router, prefix="/api/mappings", tags=["Mappings"])
|
|
app.include_router(migration.router)
|
|
app.include_router(git.router, prefix="/api/git", tags=["Git"])
|
|
app.include_router(llm.router, prefix="/api/llm", tags=["LLM"])
|
|
app.include_router(storage.router, prefix="/api/storage", tags=["Storage"])
|
|
app.include_router(dashboards.router)
|
|
app.include_router(datasets.router)
|
|
app.include_router(reports.router)
|
|
app.include_router(assistant.router, prefix="/api/assistant", tags=["Assistant"])
|
|
app.include_router(clean_release.router)
|
|
app.include_router(clean_release_v2.router)
|
|
app.include_router(profile.router)
|
|
app.include_router(dataset_review.router)
|
|
app.include_router(health.router)
|
|
# [/DEF:API_Routes:Block]
|
|
|
|
|
|
# [DEF:api.include_routers:Action]
|
|
# @COMPLEXITY: 1
|
|
# @PURPOSE: Registers all API routers with the FastAPI application.
|
|
# @LAYER: API
|
|
# @SEMANTICS: routes, registration, api
|
|
# [/DEF:api.include_routers:Action]
|
|
|
|
|
|
# [DEF:websocket_endpoint:Function]
|
|
# @COMPLEXITY: 5
|
|
# @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task with server-side filtering.
|
|
# @RELATION: [CALLS] ->[TaskManagerPackage]
|
|
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
|
|
# @PRE: task_id must be a valid task ID.
|
|
# @POST: WebSocket connection is managed and logs are streamed until disconnect.
|
|
# @SIDE_EFFECT: Subscribes to TaskManager log queue and broadcasts messages over network.
|
|
# @DATA_CONTRACT: [task_id: str, source: str, level: str] -> [JSON log entry objects]
|
|
# @INVARIANT: Every accepted WebSocket subscription is unsubscribed exactly once even when streaming fails or the client disconnects.
|
|
# @UX_STATE: Connecting -> Streaming -> (Disconnected)
|
|
#
|
|
# @TEST_CONTRACT: WebSocketLogStreamApi ->
|
|
# {
|
|
# required_fields: {websocket: WebSocket, task_id: str},
|
|
# optional_fields: {source: str, level: str},
|
|
# invariants: [
|
|
# "Accepts the WebSocket connection",
|
|
# "Applies source and level filters correctly to streamed logs",
|
|
# "Cleans up subscriptions on disconnect"
|
|
# ]
|
|
# }
|
|
# @TEST_FIXTURE: valid_ws_connection -> {"task_id": "test_1", "source": "plugin"}
|
|
# @TEST_EDGE: task_not_found_ws -> closes connection or sends error
|
|
# @TEST_EDGE: empty_task_logs -> waits for new logs
|
|
# @TEST_INVARIANT: consistent_streaming -> verifies: [valid_ws_connection]
|
|
@app.websocket("/ws/logs/{task_id}")
|
|
async def websocket_endpoint(
|
|
websocket: WebSocket, task_id: str, source: str = None, level: str = None
|
|
):
|
|
"""
|
|
WebSocket endpoint for real-time log streaming with optional server-side filtering.
|
|
|
|
Query Parameters:
|
|
source: Filter logs by source component (e.g., "plugin", "superset_api")
|
|
level: Filter logs by minimum level (DEBUG, INFO, WARNING, ERROR)
|
|
"""
|
|
with belief_scope("websocket_endpoint", f"task_id={task_id}"):
|
|
await websocket.accept()
|
|
|
|
source_filter = source.lower() if source else None
|
|
level_filter = level.upper() if level else None
|
|
level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
|
|
min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
|
|
|
|
logger.reason(
|
|
"Accepted WebSocket log stream connection",
|
|
extra={
|
|
"task_id": task_id,
|
|
"source_filter": source_filter,
|
|
"level_filter": level_filter,
|
|
"min_level": min_level,
|
|
},
|
|
)
|
|
|
|
task_manager = get_task_manager()
|
|
queue = await task_manager.subscribe_logs(task_id)
|
|
logger.reason(
|
|
"Subscribed WebSocket client to task log queue",
|
|
extra={"task_id": task_id},
|
|
)
|
|
|
|
def matches_filters(log_entry) -> bool:
|
|
"""Check if log entry matches the filter criteria."""
|
|
log_source = getattr(log_entry, "source", None)
|
|
if source_filter and str(log_source or "").lower() != source_filter:
|
|
return False
|
|
|
|
if level_filter:
|
|
log_level = level_hierarchy.get(str(log_entry.level).upper(), 0)
|
|
if log_level < min_level:
|
|
return False
|
|
|
|
return True
|
|
|
|
try:
|
|
logger.reason(
|
|
"Starting task log stream replay and live forwarding",
|
|
extra={"task_id": task_id},
|
|
)
|
|
|
|
initial_logs = task_manager.get_task_logs(task_id)
|
|
initial_sent = 0
|
|
for log_entry in initial_logs:
|
|
if matches_filters(log_entry):
|
|
log_dict = log_entry.dict()
|
|
log_dict["timestamp"] = log_dict["timestamp"].isoformat()
|
|
await websocket.send_json(log_dict)
|
|
initial_sent += 1
|
|
|
|
logger.reflect(
|
|
"Initial task log replay completed",
|
|
extra={
|
|
"task_id": task_id,
|
|
"replayed_logs": initial_sent,
|
|
"total_available_logs": len(initial_logs),
|
|
},
|
|
)
|
|
|
|
task = task_manager.get_task(task_id)
|
|
if task and task.status == "AWAITING_INPUT" and task.input_request:
|
|
synthetic_log = {
|
|
"timestamp": task.logs[-1].timestamp.isoformat()
|
|
if task.logs
|
|
else "2024-01-01T00:00:00",
|
|
"level": "INFO",
|
|
"message": "Task paused for user input (Connection Re-established)",
|
|
"context": {"input_request": task.input_request},
|
|
}
|
|
await websocket.send_json(synthetic_log)
|
|
logger.reason(
|
|
"Replayed awaiting-input prompt to restored WebSocket client",
|
|
extra={"task_id": task_id, "task_status": task.status},
|
|
)
|
|
|
|
while True:
|
|
log_entry = await queue.get()
|
|
|
|
if not matches_filters(log_entry):
|
|
continue
|
|
|
|
log_dict = log_entry.dict()
|
|
log_dict["timestamp"] = log_dict["timestamp"].isoformat()
|
|
await websocket.send_json(log_dict)
|
|
logger.reflect(
|
|
"Forwarded task log entry to WebSocket client",
|
|
extra={
|
|
"task_id": task_id,
|
|
"level": log_dict.get("level"),
|
|
},
|
|
)
|
|
|
|
if (
|
|
"Task completed successfully" in log_entry.message
|
|
or "Task failed" in log_entry.message
|
|
):
|
|
logger.reason(
|
|
"Observed terminal task log entry; delaying to preserve client visibility",
|
|
extra={"task_id": task_id, "message": log_entry.message},
|
|
)
|
|
await asyncio.sleep(2)
|
|
|
|
except WebSocketDisconnect:
|
|
logger.reason(
|
|
"WebSocket client disconnected from task log stream",
|
|
extra={"task_id": task_id},
|
|
)
|
|
except Exception as exc:
|
|
logger.explore(
|
|
"WebSocket log streaming encountered an unexpected failure",
|
|
extra={"task_id": task_id, "error": str(exc)},
|
|
)
|
|
raise
|
|
finally:
|
|
task_manager.unsubscribe_logs(task_id, queue)
|
|
logger.reflect(
|
|
"Released WebSocket log queue subscription",
|
|
extra={"task_id": task_id},
|
|
)
|
|
|
|
|
|
# [/DEF:websocket_endpoint:Function]
|
|
|
|
# [DEF:StaticFiles:Mount]
|
|
# @COMPLEXITY: 1
|
|
# @SEMANTICS: static, frontend, spa
|
|
# @PURPOSE: Mounts the frontend build directory to serve static assets.
|
|
frontend_path = project_root / "frontend" / "build"
|
|
if frontend_path.exists():
|
|
app.mount(
|
|
"/_app", StaticFiles(directory=str(frontend_path / "_app")), name="static"
|
|
)
|
|
|
|
# [DEF:serve_spa:Function]
|
|
# @COMPLEXITY: 1
|
|
# @PURPOSE: Serves the SPA frontend for any path not matched by API routes.
|
|
# @PRE: frontend_path exists.
|
|
# @POST: Returns the requested file or index.html.
|
|
@app.get("/{file_path:path}", include_in_schema=False)
|
|
async def serve_spa(file_path: str):
|
|
with belief_scope("serve_spa"):
|
|
# Only serve SPA for non-API paths
|
|
# API routes are registered separately and should be matched by FastAPI first
|
|
if file_path and (
|
|
file_path.startswith("api/")
|
|
or file_path.startswith("/api/")
|
|
or file_path == "api"
|
|
):
|
|
# This should not happen if API routers are properly registered
|
|
# Return 404 instead of serving HTML
|
|
raise HTTPException(
|
|
status_code=404, detail=f"API endpoint not found: {file_path}"
|
|
)
|
|
|
|
full_path = frontend_path / file_path
|
|
if file_path and full_path.is_file():
|
|
return FileResponse(str(full_path))
|
|
return FileResponse(str(frontend_path / "index.html"))
|
|
|
|
# [/DEF:serve_spa:Function]
|
|
else:
|
|
# [DEF:read_root:Function]
|
|
# @COMPLEXITY: 1
|
|
# @PURPOSE: A simple root endpoint to confirm that the API is running when frontend is missing.
|
|
# @PRE: None.
|
|
# @POST: Returns a JSON message indicating API status.
|
|
@app.get("/")
|
|
async def read_root():
|
|
with belief_scope("read_root"):
|
|
return {
|
|
"message": "Superset Tools API is running (Frontend build not found)"
|
|
}
|
|
|
|
# [/DEF:read_root:Function]
|
|
# [/DEF:StaticFiles:Mount]
|
|
# [/DEF:AppModule:Module]
|