feat(us1): add dataset review orchestration automatic review slice
This commit is contained in:
@@ -3,8 +3,8 @@
|
||||
# @SEMANTICS: app, main, entrypoint, fastapi
|
||||
# @PURPOSE: The main entry point for the FastAPI application. It initializes the app, configures CORS, sets up dependencies, includes API routers, and defines the WebSocket endpoint for log streaming.
|
||||
# @LAYER: UI (API)
|
||||
# @RELATION: DEPENDS_ON ->[AppDependencies]
|
||||
# @RELATION: DEPENDS_ON ->[backend.src.api.routes]
|
||||
# @RELATION: [DEPENDS_ON] ->[AppDependencies]
|
||||
# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
|
||||
# @INVARIANT: Only one FastAPI app instance exists per process.
|
||||
# @INVARIANT: All WebSocket connections must be properly cleaned up on disconnect.
|
||||
# @PRE: Python environment and dependencies installed; configuration database available.
|
||||
@@ -28,7 +28,7 @@ from .dependencies import get_task_manager, get_scheduler_service
|
||||
from .core.encryption_key import ensure_encryption_key
|
||||
from .core.utils.network import NetworkError
|
||||
from .core.logger import logger, belief_scope
|
||||
from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health
|
||||
from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health, dataset_review
|
||||
from .api import auth
|
||||
|
||||
# [DEF:App:Global]
|
||||
@@ -45,6 +45,7 @@ app = FastAPI(
|
||||
# [DEF:startup_event:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Handles application startup tasks, such as starting the scheduler.
|
||||
# @RELATION: [CALLS] ->[AppDependencies]
|
||||
# @PRE: None.
|
||||
# @POST: Scheduler is started.
|
||||
# Startup event
|
||||
@@ -59,6 +60,7 @@ async def startup_event():
|
||||
# [DEF:shutdown_event:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Handles application shutdown tasks, such as stopping the scheduler.
|
||||
# @RELATION: [CALLS] ->[AppDependencies]
|
||||
# @PRE: None.
|
||||
# @POST: Scheduler is stopped.
|
||||
# Shutdown event
|
||||
@@ -106,6 +108,7 @@ async def network_error_handler(request: Request, exc: NetworkError):
|
||||
# [DEF:log_requests:Function]
|
||||
# @COMPLEXITY: 3
|
||||
# @PURPOSE: Middleware to log incoming HTTP requests and their response status.
|
||||
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
|
||||
# @PRE: request is a FastAPI Request object.
|
||||
# @POST: Logs request and response details.
|
||||
# @PARAM: request (Request) - The incoming request object.
|
||||
@@ -154,6 +157,7 @@ app.include_router(assistant.router, prefix="/api/assistant", tags=["Assistant"]
|
||||
app.include_router(clean_release.router)
|
||||
app.include_router(clean_release_v2.router)
|
||||
app.include_router(profile.router)
|
||||
app.include_router(dataset_review.router)
|
||||
app.include_router(health.router)
|
||||
# [/DEF:api_routes:Block]
|
||||
|
||||
@@ -168,10 +172,13 @@ app.include_router(health.router)
|
||||
# [DEF:websocket_endpoint:Function]
|
||||
# @COMPLEXITY: 5
|
||||
# @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task with server-side filtering.
|
||||
# @RELATION: [CALLS] ->[TaskManagerPackage]
|
||||
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
|
||||
# @PRE: task_id must be a valid task ID.
|
||||
# @POST: WebSocket connection is managed and logs are streamed until disconnect.
|
||||
# @SIDE_EFFECT: Subscribes to TaskManager log queue and broadcasts messages over network.
|
||||
# @DATA_CONTRACT: [task_id: str, source: str, level: str] -> [JSON log entry objects]
|
||||
# @INVARIANT: Every accepted WebSocket subscription is unsubscribed exactly once even when streaming fails or the client disconnects.
|
||||
# @UX_STATE: Connecting -> Streaming -> (Disconnected)
|
||||
#
|
||||
# @TEST_CONTRACT: WebSocketLogStreamApi ->
|
||||
@@ -204,85 +211,121 @@ async def websocket_endpoint(
|
||||
"""
|
||||
with belief_scope("websocket_endpoint", f"task_id={task_id}"):
|
||||
await websocket.accept()
|
||||
|
||||
# Normalize filter parameters
|
||||
source_filter = source.lower() if source else None
|
||||
level_filter = level.upper() if level else None
|
||||
|
||||
# Level hierarchy for filtering
|
||||
level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
|
||||
min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
|
||||
|
||||
logger.info(f"WebSocket connection accepted for task {task_id} (source={source_filter}, level={level_filter})")
|
||||
task_manager = get_task_manager()
|
||||
queue = await task_manager.subscribe_logs(task_id)
|
||||
|
||||
def matches_filters(log_entry) -> bool:
|
||||
"""Check if log entry matches the filter criteria."""
|
||||
# Check source filter
|
||||
if source_filter and log_entry.source.lower() != source_filter:
|
||||
return False
|
||||
|
||||
# Check level filter
|
||||
if level_filter:
|
||||
log_level = level_hierarchy.get(log_entry.level.upper(), 0)
|
||||
if log_level < min_level:
|
||||
|
||||
source_filter = source.lower() if source else None
|
||||
level_filter = level.upper() if level else None
|
||||
level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
|
||||
min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
|
||||
|
||||
logger.reason(
|
||||
"Accepted WebSocket log stream connection",
|
||||
extra={
|
||||
"task_id": task_id,
|
||||
"source_filter": source_filter,
|
||||
"level_filter": level_filter,
|
||||
"min_level": min_level,
|
||||
},
|
||||
)
|
||||
|
||||
task_manager = get_task_manager()
|
||||
queue = await task_manager.subscribe_logs(task_id)
|
||||
logger.reason(
|
||||
"Subscribed WebSocket client to task log queue",
|
||||
extra={"task_id": task_id},
|
||||
)
|
||||
|
||||
def matches_filters(log_entry) -> bool:
|
||||
"""Check if log entry matches the filter criteria."""
|
||||
log_source = getattr(log_entry, "source", None)
|
||||
if source_filter and str(log_source or "").lower() != source_filter:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
try:
|
||||
# Stream new logs
|
||||
logger.info(f"Starting log stream for task {task_id}")
|
||||
|
||||
# Send initial logs first to build context (apply filters)
|
||||
initial_logs = task_manager.get_task_logs(task_id)
|
||||
for log_entry in initial_logs:
|
||||
if matches_filters(log_entry):
|
||||
|
||||
if level_filter:
|
||||
log_level = level_hierarchy.get(str(log_entry.level).upper(), 0)
|
||||
if log_level < min_level:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
try:
|
||||
logger.reason(
|
||||
"Starting task log stream replay and live forwarding",
|
||||
extra={"task_id": task_id},
|
||||
)
|
||||
|
||||
initial_logs = task_manager.get_task_logs(task_id)
|
||||
initial_sent = 0
|
||||
for log_entry in initial_logs:
|
||||
if matches_filters(log_entry):
|
||||
log_dict = log_entry.dict()
|
||||
log_dict["timestamp"] = log_dict["timestamp"].isoformat()
|
||||
await websocket.send_json(log_dict)
|
||||
initial_sent += 1
|
||||
|
||||
logger.reflect(
|
||||
"Initial task log replay completed",
|
||||
extra={
|
||||
"task_id": task_id,
|
||||
"replayed_logs": initial_sent,
|
||||
"total_available_logs": len(initial_logs),
|
||||
},
|
||||
)
|
||||
|
||||
task = task_manager.get_task(task_id)
|
||||
if task and task.status == "AWAITING_INPUT" and task.input_request:
|
||||
synthetic_log = {
|
||||
"timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00",
|
||||
"level": "INFO",
|
||||
"message": "Task paused for user input (Connection Re-established)",
|
||||
"context": {"input_request": task.input_request},
|
||||
}
|
||||
await websocket.send_json(synthetic_log)
|
||||
logger.reason(
|
||||
"Replayed awaiting-input prompt to restored WebSocket client",
|
||||
extra={"task_id": task_id, "task_status": task.status},
|
||||
)
|
||||
|
||||
while True:
|
||||
log_entry = await queue.get()
|
||||
|
||||
if not matches_filters(log_entry):
|
||||
continue
|
||||
|
||||
log_dict = log_entry.dict()
|
||||
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
|
||||
log_dict["timestamp"] = log_dict["timestamp"].isoformat()
|
||||
await websocket.send_json(log_dict)
|
||||
logger.reflect(
|
||||
"Forwarded task log entry to WebSocket client",
|
||||
extra={
|
||||
"task_id": task_id,
|
||||
"level": log_dict.get("level"),
|
||||
},
|
||||
)
|
||||
|
||||
# Force a check for AWAITING_INPUT status immediately upon connection
|
||||
# This ensures that if the task is already waiting when the user connects, they get the prompt.
|
||||
task = task_manager.get_task(task_id)
|
||||
if task and task.status == "AWAITING_INPUT" and task.input_request:
|
||||
# Construct a synthetic log entry to trigger the frontend handler
|
||||
# This is a bit of a hack but avoids changing the websocket protocol significantly
|
||||
synthetic_log = {
|
||||
"timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00",
|
||||
"level": "INFO",
|
||||
"message": "Task paused for user input (Connection Re-established)",
|
||||
"context": {"input_request": task.input_request}
|
||||
}
|
||||
await websocket.send_json(synthetic_log)
|
||||
if "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message:
|
||||
logger.reason(
|
||||
"Observed terminal task log entry; delaying to preserve client visibility",
|
||||
extra={"task_id": task_id, "message": log_entry.message},
|
||||
)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
while True:
|
||||
log_entry = await queue.get()
|
||||
|
||||
# Apply server-side filtering
|
||||
if not matches_filters(log_entry):
|
||||
continue
|
||||
|
||||
log_dict = log_entry.dict()
|
||||
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
|
||||
await websocket.send_json(log_dict)
|
||||
|
||||
# If task is finished, we could potentially close the connection
|
||||
# but let's keep it open for a bit or until the client disconnects
|
||||
if "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message:
|
||||
# Wait a bit to ensure client receives the last message
|
||||
await asyncio.sleep(2)
|
||||
# DO NOT BREAK here - allow client to keep connection open if they want to review logs
|
||||
# or until they disconnect. Breaking closes the socket immediately.
|
||||
# break
|
||||
|
||||
except WebSocketDisconnect:
|
||||
logger.info(f"WebSocket connection disconnected for task {task_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket error for task {task_id}: {e}")
|
||||
finally:
|
||||
task_manager.unsubscribe_logs(task_id, queue)
|
||||
except WebSocketDisconnect:
|
||||
logger.reason(
|
||||
"WebSocket client disconnected from task log stream",
|
||||
extra={"task_id": task_id},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.explore(
|
||||
"WebSocket log streaming encountered an unexpected failure",
|
||||
extra={"task_id": task_id, "error": str(exc)},
|
||||
)
|
||||
raise
|
||||
finally:
|
||||
task_manager.unsubscribe_logs(task_id, queue)
|
||||
logger.reflect(
|
||||
"Released WebSocket log queue subscription",
|
||||
extra={"task_id": task_id},
|
||||
)
|
||||
# [/DEF:websocket_endpoint:Function]
|
||||
|
||||
# [DEF:StaticFiles:Mount]
|
||||
|
||||
Reference in New Issue
Block a user