# [DEF:AppModule:Module] # @COMPLEXITY: 5 # @SEMANTICS: app, main, entrypoint, fastapi # @PURPOSE: The main entry point for the FastAPI application. It initializes the app, configures CORS, sets up dependencies, includes API routers, and defines the WebSocket endpoint for log streaming. # @LAYER: UI (API) # @RELATION: [DEPENDS_ON] ->[AppDependencies] # @RELATION: [DEPENDS_ON] ->[ApiRoutesModule] # @INVARIANT: Only one FastAPI app instance exists per process. # @INVARIANT: All WebSocket connections must be properly cleaned up on disconnect. # @PRE: Python environment and dependencies installed; configuration database available. # @POST: FastAPI app instance is created, middleware configured, and routes registered. # @SIDE_EFFECT: Starts background scheduler and binds network ports for HTTP/WS traffic. # @DATA_CONTRACT: [HTTP Request | WS Message] -> [HTTP Response | JSON Log Stream] import os from pathlib import Path # project_root is used for static files mounting project_root = Path(__file__).resolve().parent.parent.parent from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, HTTPException from starlette.middleware.sessions import SessionMiddleware from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse import asyncio from .dependencies import get_task_manager, get_scheduler_service from .core.encryption_key import ensure_encryption_key from .core.utils.network import NetworkError from .core.logger import logger, belief_scope from .core.database import AuthSessionLocal from .core.auth.security import get_password_hash from .models.auth import User, Role from .api.routes import ( plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health, dataset_review, ) from .api import auth # [DEF:FastAPI_App:Global] # @COMPLEXITY: 3 # @SEMANTICS: app, fastapi, instance, route-registry # @PURPOSE: Canonical FastAPI application instance for route, middleware, and websocket registration. # @RELATION: DEPENDS_ON -> [ApiRoutesModule] # @RELATION: BINDS_TO -> [API_Routes] app = FastAPI( title="Superset Tools API", description="API for managing Superset automation tools and plugins.", version="1.0.0", ) # [/DEF:FastAPI_App:Global] # [DEF:ensure_initial_admin_user:Function] # @COMPLEXITY: 3 # @PURPOSE: Ensures initial admin user exists when bootstrap env flags are enabled. def ensure_initial_admin_user() -> None: raw_flag = os.getenv("INITIAL_ADMIN_CREATE", "false").strip().lower() if raw_flag not in {"1", "true", "yes", "on"}: return username = os.getenv("INITIAL_ADMIN_USERNAME", "").strip() password = os.getenv("INITIAL_ADMIN_PASSWORD", "").strip() if not username or not password: logger.warning( "INITIAL_ADMIN_CREATE is enabled but INITIAL_ADMIN_USERNAME/INITIAL_ADMIN_PASSWORD is missing; skipping bootstrap." ) return db = AuthSessionLocal() try: admin_role = db.query(Role).filter(Role.name == "Admin").first() if not admin_role: admin_role = Role(name="Admin", description="System Administrator") db.add(admin_role) db.commit() db.refresh(admin_role) existing_user = db.query(User).filter(User.username == username).first() if existing_user: logger.info( "Initial admin bootstrap skipped: user '%s' already exists.", username ) return new_user = User( username=username, email=None, password_hash=get_password_hash(password), auth_source="LOCAL", is_active=True, ) new_user.roles.append(admin_role) db.add(new_user) db.commit() logger.info( "Initial admin user '%s' created from environment bootstrap.", username ) except Exception as exc: db.rollback() logger.error("Failed to bootstrap initial admin user: %s", exc) raise finally: db.close() # [/DEF:ensure_initial_admin_user:Function] # [DEF:startup_event:Function] # @COMPLEXITY: 3 # @PURPOSE: Handles application startup tasks, such as starting the scheduler. # @RELATION: [CALLS] ->[AppDependencies] # @PRE: None. # @POST: Scheduler is started. # Startup event @app.on_event("startup") async def startup_event(): with belief_scope("startup_event"): ensure_encryption_key() ensure_initial_admin_user() scheduler = get_scheduler_service() scheduler.start() # [/DEF:startup_event:Function] # [DEF:shutdown_event:Function] # @COMPLEXITY: 3 # @PURPOSE: Handles application shutdown tasks, such as stopping the scheduler. # @RELATION: [CALLS] ->[AppDependencies] # @PRE: None. # @POST: Scheduler is stopped. # Shutdown event @app.on_event("shutdown") async def shutdown_event(): with belief_scope("shutdown_event"): scheduler = get_scheduler_service() scheduler.stop() # [/DEF:shutdown_event:Function] # [DEF:app_middleware:Block] # @PURPOSE: Configure application-wide middleware (Session, CORS). # Configure Session Middleware (required by Authlib for OAuth2 flow) from .core.auth.config import auth_config app.add_middleware(SessionMiddleware, secret_key=auth_config.SECRET_KEY) # Configure CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], # Adjust this in production allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # [/DEF:app_middleware:Block] # [DEF:network_error_handler:Function] # @COMPLEXITY: 1 # @PURPOSE: Global exception handler for NetworkError. # @PRE: request is a FastAPI Request object. # @POST: Returns 503 HTTP Exception. # @PARAM: request (Request) - The incoming request object. # @PARAM: exc (NetworkError) - The exception instance. @app.exception_handler(NetworkError) async def network_error_handler(request: Request, exc: NetworkError): with belief_scope("network_error_handler"): logger.error(f"Network error: {exc}") return HTTPException( status_code=503, detail="Environment unavailable. Please check if the Superset instance is running.", ) # [/DEF:network_error_handler:Function] # [DEF:log_requests:Function] # @COMPLEXITY: 3 # @PURPOSE: Middleware to log incoming HTTP requests and their response status. # @RELATION: [DEPENDS_ON] ->[LoggerModule] # @PRE: request is a FastAPI Request object. # @POST: Logs request and response details. # @PARAM: request (Request) - The incoming request object. # @PARAM: call_next (Callable) - The next middleware or route handler. @app.middleware("http") async def log_requests(request: Request, call_next): with belief_scope("log_requests"): # Avoid spamming logs for polling endpoints is_polling = request.url.path.endswith("/api/tasks") and request.method == "GET" if not is_polling: logger.info(f"Incoming request: {request.method} {request.url.path}") try: response = await call_next(request) if not is_polling: logger.info( f"Response status: {response.status_code} for {request.url.path}" ) return response except NetworkError as e: logger.error(f"Network error caught in middleware: {e}") raise HTTPException( status_code=503, detail="Environment unavailable. Please check if the Superset instance is running.", ) # [/DEF:log_requests:Function] # [DEF:API_Routes:Block] # @COMPLEXITY: 3 # @PURPOSE: Register all FastAPI route groups exposed by the application entrypoint. # @RELATION: DEPENDS_ON -> [FastAPI_App] # @RELATION: DEPENDS_ON -> [Route_Group_Contracts] # @RELATION: DEPENDS_ON -> [AuthApi] # @RELATION: DEPENDS_ON -> [AdminApi] # @RELATION: DEPENDS_ON -> [PluginsRouter] # @RELATION: DEPENDS_ON -> [TasksRouter] # @RELATION: DEPENDS_ON -> [SettingsRouter] # @RELATION: DEPENDS_ON -> [ConnectionsRouter] # @RELATION: DEPENDS_ON -> [ReportsRouter] # @RELATION: DEPENDS_ON -> [LlmRoutes] # @RELATION: DEPENDS_ON -> [CleanReleaseV2Api] # Include API routes app.include_router(auth.router) app.include_router(admin.router) app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"]) app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"]) app.include_router(settings.router, prefix="/api/settings", tags=["Settings"]) app.include_router( connections.router, prefix="/api/settings/connections", tags=["Connections"] ) app.include_router(environments.router, tags=["Environments"]) app.include_router(mappings.router, prefix="/api/mappings", tags=["Mappings"]) app.include_router(migration.router) app.include_router(git.router, prefix="/api/git", tags=["Git"]) app.include_router(llm.router, prefix="/api/llm", tags=["LLM"]) app.include_router(storage.router, prefix="/api/storage", tags=["Storage"]) app.include_router(dashboards.router) app.include_router(datasets.router) app.include_router(reports.router) app.include_router(assistant.router, prefix="/api/assistant", tags=["Assistant"]) app.include_router(clean_release.router) app.include_router(clean_release_v2.router) app.include_router(profile.router) app.include_router(dataset_review.router) app.include_router(health.router) # [/DEF:API_Routes:Block] # [DEF:api.include_routers:Action] # @COMPLEXITY: 1 # @PURPOSE: Registers all API routers with the FastAPI application. # @LAYER: API # @SEMANTICS: routes, registration, api # [/DEF:api.include_routers:Action] # [DEF:websocket_endpoint:Function] # @COMPLEXITY: 5 # @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task with server-side filtering. # @RELATION: [CALLS] ->[TaskManagerPackage] # @RELATION: [DEPENDS_ON] ->[LoggerModule] # @PRE: task_id must be a valid task ID. # @POST: WebSocket connection is managed and logs are streamed until disconnect. # @SIDE_EFFECT: Subscribes to TaskManager log queue and broadcasts messages over network. # @DATA_CONTRACT: [task_id: str, source: str, level: str] -> [JSON log entry objects] # @INVARIANT: Every accepted WebSocket subscription is unsubscribed exactly once even when streaming fails or the client disconnects. # @UX_STATE: Connecting -> Streaming -> (Disconnected) # # @TEST_CONTRACT: WebSocketLogStreamApi -> # { # required_fields: {websocket: WebSocket, task_id: str}, # optional_fields: {source: str, level: str}, # invariants: [ # "Accepts the WebSocket connection", # "Applies source and level filters correctly to streamed logs", # "Cleans up subscriptions on disconnect" # ] # } # @TEST_FIXTURE: valid_ws_connection -> {"task_id": "test_1", "source": "plugin"} # @TEST_EDGE: task_not_found_ws -> closes connection or sends error # @TEST_EDGE: empty_task_logs -> waits for new logs # @TEST_INVARIANT: consistent_streaming -> verifies: [valid_ws_connection] @app.websocket("/ws/logs/{task_id}") async def websocket_endpoint( websocket: WebSocket, task_id: str, source: str = None, level: str = None ): """ WebSocket endpoint for real-time log streaming with optional server-side filtering. Query Parameters: source: Filter logs by source component (e.g., "plugin", "superset_api") level: Filter logs by minimum level (DEBUG, INFO, WARNING, ERROR) """ with belief_scope("websocket_endpoint", f"task_id={task_id}"): await websocket.accept() source_filter = source.lower() if source else None level_filter = level.upper() if level else None level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3} min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0 logger.reason( "Accepted WebSocket log stream connection", extra={ "task_id": task_id, "source_filter": source_filter, "level_filter": level_filter, "min_level": min_level, }, ) task_manager = get_task_manager() queue = await task_manager.subscribe_logs(task_id) logger.reason( "Subscribed WebSocket client to task log queue", extra={"task_id": task_id}, ) def matches_filters(log_entry) -> bool: """Check if log entry matches the filter criteria.""" log_source = getattr(log_entry, "source", None) if source_filter and str(log_source or "").lower() != source_filter: return False if level_filter: log_level = level_hierarchy.get(str(log_entry.level).upper(), 0) if log_level < min_level: return False return True try: logger.reason( "Starting task log stream replay and live forwarding", extra={"task_id": task_id}, ) initial_logs = task_manager.get_task_logs(task_id) initial_sent = 0 for log_entry in initial_logs: if matches_filters(log_entry): log_dict = log_entry.dict() log_dict["timestamp"] = log_dict["timestamp"].isoformat() await websocket.send_json(log_dict) initial_sent += 1 logger.reflect( "Initial task log replay completed", extra={ "task_id": task_id, "replayed_logs": initial_sent, "total_available_logs": len(initial_logs), }, ) task = task_manager.get_task(task_id) if task and task.status == "AWAITING_INPUT" and task.input_request: synthetic_log = { "timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00", "level": "INFO", "message": "Task paused for user input (Connection Re-established)", "context": {"input_request": task.input_request}, } await websocket.send_json(synthetic_log) logger.reason( "Replayed awaiting-input prompt to restored WebSocket client", extra={"task_id": task_id, "task_status": task.status}, ) while True: log_entry = await queue.get() if not matches_filters(log_entry): continue log_dict = log_entry.dict() log_dict["timestamp"] = log_dict["timestamp"].isoformat() await websocket.send_json(log_dict) logger.reflect( "Forwarded task log entry to WebSocket client", extra={ "task_id": task_id, "level": log_dict.get("level"), }, ) if ( "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message ): logger.reason( "Observed terminal task log entry; delaying to preserve client visibility", extra={"task_id": task_id, "message": log_entry.message}, ) await asyncio.sleep(2) except WebSocketDisconnect: logger.reason( "WebSocket client disconnected from task log stream", extra={"task_id": task_id}, ) except Exception as exc: logger.explore( "WebSocket log streaming encountered an unexpected failure", extra={"task_id": task_id, "error": str(exc)}, ) raise finally: task_manager.unsubscribe_logs(task_id, queue) logger.reflect( "Released WebSocket log queue subscription", extra={"task_id": task_id}, ) # [/DEF:websocket_endpoint:Function] # [DEF:StaticFiles:Mount] # @COMPLEXITY: 1 # @SEMANTICS: static, frontend, spa # @PURPOSE: Mounts the frontend build directory to serve static assets. frontend_path = project_root / "frontend" / "build" if frontend_path.exists(): app.mount( "/_app", StaticFiles(directory=str(frontend_path / "_app")), name="static" ) # [DEF:serve_spa:Function] # @COMPLEXITY: 1 # @PURPOSE: Serves the SPA frontend for any path not matched by API routes. # @PRE: frontend_path exists. # @POST: Returns the requested file or index.html. @app.get("/{file_path:path}", include_in_schema=False) async def serve_spa(file_path: str): with belief_scope("serve_spa"): # Only serve SPA for non-API paths # API routes are registered separately and should be matched by FastAPI first if file_path and ( file_path.startswith("api/") or file_path.startswith("/api/") or file_path == "api" ): # This should not happen if API routers are properly registered # Return 404 instead of serving HTML raise HTTPException( status_code=404, detail=f"API endpoint not found: {file_path}" ) full_path = frontend_path / file_path if file_path and full_path.is_file(): return FileResponse(str(full_path)) return FileResponse(str(frontend_path / "index.html")) # [/DEF:serve_spa:Function] else: # [DEF:read_root:Function] # @COMPLEXITY: 1 # @PURPOSE: A simple root endpoint to confirm that the API is running when frontend is missing. # @PRE: None. # @POST: Returns a JSON message indicating API status. @app.get("/") async def read_root(): with belief_scope("read_root"): return { "message": "Superset Tools API is running (Frontend build not found)" } # [/DEF:read_root:Function] # [/DEF:StaticFiles:Mount] # [/DEF:AppModule:Module]