feat(us1): add dataset review orchestration automatic review slice

This commit is contained in:
2026-03-17 10:57:49 +03:00
parent e916cb1f17
commit 023bacde39
24 changed files with 4870 additions and 131 deletions

View File

@@ -3,8 +3,8 @@
# @SEMANTICS: app, main, entrypoint, fastapi
# @PURPOSE: The main entry point for the FastAPI application. It initializes the app, configures CORS, sets up dependencies, includes API routers, and defines the WebSocket endpoint for log streaming.
# @LAYER: UI (API)
# @RELATION: DEPENDS_ON ->[AppDependencies]
# @RELATION: DEPENDS_ON ->[backend.src.api.routes]
# @RELATION: [DEPENDS_ON] ->[AppDependencies]
# @RELATION: [DEPENDS_ON] ->[ApiRoutesModule]
# @INVARIANT: Only one FastAPI app instance exists per process.
# @INVARIANT: All WebSocket connections must be properly cleaned up on disconnect.
# @PRE: Python environment and dependencies installed; configuration database available.
@@ -28,7 +28,7 @@ from .dependencies import get_task_manager, get_scheduler_service
from .core.encryption_key import ensure_encryption_key
from .core.utils.network import NetworkError
from .core.logger import logger, belief_scope
from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health
from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm, dashboards, datasets, reports, assistant, clean_release, clean_release_v2, profile, health, dataset_review
from .api import auth
# [DEF:App:Global]
@@ -45,6 +45,7 @@ app = FastAPI(
# [DEF:startup_event:Function]
# @COMPLEXITY: 3
# @PURPOSE: Handles application startup tasks, such as starting the scheduler.
# @RELATION: [CALLS] ->[AppDependencies]
# @PRE: None.
# @POST: Scheduler is started.
# Startup event
@@ -59,6 +60,7 @@ async def startup_event():
# [DEF:shutdown_event:Function]
# @COMPLEXITY: 3
# @PURPOSE: Handles application shutdown tasks, such as stopping the scheduler.
# @RELATION: [CALLS] ->[AppDependencies]
# @PRE: None.
# @POST: Scheduler is stopped.
# Shutdown event
@@ -106,6 +108,7 @@ async def network_error_handler(request: Request, exc: NetworkError):
# [DEF:log_requests:Function]
# @COMPLEXITY: 3
# @PURPOSE: Middleware to log incoming HTTP requests and their response status.
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
# @PRE: request is a FastAPI Request object.
# @POST: Logs request and response details.
# @PARAM: request (Request) - The incoming request object.
@@ -154,6 +157,7 @@ app.include_router(assistant.router, prefix="/api/assistant", tags=["Assistant"]
app.include_router(clean_release.router)
app.include_router(clean_release_v2.router)
app.include_router(profile.router)
app.include_router(dataset_review.router)
app.include_router(health.router)
# [/DEF:api_routes:Block]
@@ -168,10 +172,13 @@ app.include_router(health.router)
# [DEF:websocket_endpoint:Function]
# @COMPLEXITY: 5
# @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task with server-side filtering.
# @RELATION: [CALLS] ->[TaskManagerPackage]
# @RELATION: [DEPENDS_ON] ->[LoggerModule]
# @PRE: task_id must be a valid task ID.
# @POST: WebSocket connection is managed and logs are streamed until disconnect.
# @SIDE_EFFECT: Subscribes to TaskManager log queue and broadcasts messages over network.
# @DATA_CONTRACT: [task_id: str, source: str, level: str] -> [JSON log entry objects]
# @INVARIANT: Every accepted WebSocket subscription is unsubscribed exactly once even when streaming fails or the client disconnects.
# @UX_STATE: Connecting -> Streaming -> (Disconnected)
#
# @TEST_CONTRACT: WebSocketLogStreamApi ->
@@ -204,85 +211,121 @@ async def websocket_endpoint(
"""
with belief_scope("websocket_endpoint", f"task_id={task_id}"):
await websocket.accept()
# Normalize filter parameters
source_filter = source.lower() if source else None
level_filter = level.upper() if level else None
# Level hierarchy for filtering
level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
logger.info(f"WebSocket connection accepted for task {task_id} (source={source_filter}, level={level_filter})")
task_manager = get_task_manager()
queue = await task_manager.subscribe_logs(task_id)
def matches_filters(log_entry) -> bool:
"""Check if log entry matches the filter criteria."""
# Check source filter
if source_filter and log_entry.source.lower() != source_filter:
return False
# Check level filter
if level_filter:
log_level = level_hierarchy.get(log_entry.level.upper(), 0)
if log_level < min_level:
source_filter = source.lower() if source else None
level_filter = level.upper() if level else None
level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
logger.reason(
"Accepted WebSocket log stream connection",
extra={
"task_id": task_id,
"source_filter": source_filter,
"level_filter": level_filter,
"min_level": min_level,
},
)
task_manager = get_task_manager()
queue = await task_manager.subscribe_logs(task_id)
logger.reason(
"Subscribed WebSocket client to task log queue",
extra={"task_id": task_id},
)
def matches_filters(log_entry) -> bool:
"""Check if log entry matches the filter criteria."""
log_source = getattr(log_entry, "source", None)
if source_filter and str(log_source or "").lower() != source_filter:
return False
return True
try:
# Stream new logs
logger.info(f"Starting log stream for task {task_id}")
# Send initial logs first to build context (apply filters)
initial_logs = task_manager.get_task_logs(task_id)
for log_entry in initial_logs:
if matches_filters(log_entry):
if level_filter:
log_level = level_hierarchy.get(str(log_entry.level).upper(), 0)
if log_level < min_level:
return False
return True
try:
logger.reason(
"Starting task log stream replay and live forwarding",
extra={"task_id": task_id},
)
initial_logs = task_manager.get_task_logs(task_id)
initial_sent = 0
for log_entry in initial_logs:
if matches_filters(log_entry):
log_dict = log_entry.dict()
log_dict["timestamp"] = log_dict["timestamp"].isoformat()
await websocket.send_json(log_dict)
initial_sent += 1
logger.reflect(
"Initial task log replay completed",
extra={
"task_id": task_id,
"replayed_logs": initial_sent,
"total_available_logs": len(initial_logs),
},
)
task = task_manager.get_task(task_id)
if task and task.status == "AWAITING_INPUT" and task.input_request:
synthetic_log = {
"timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00",
"level": "INFO",
"message": "Task paused for user input (Connection Re-established)",
"context": {"input_request": task.input_request},
}
await websocket.send_json(synthetic_log)
logger.reason(
"Replayed awaiting-input prompt to restored WebSocket client",
extra={"task_id": task_id, "task_status": task.status},
)
while True:
log_entry = await queue.get()
if not matches_filters(log_entry):
continue
log_dict = log_entry.dict()
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
log_dict["timestamp"] = log_dict["timestamp"].isoformat()
await websocket.send_json(log_dict)
logger.reflect(
"Forwarded task log entry to WebSocket client",
extra={
"task_id": task_id,
"level": log_dict.get("level"),
},
)
# Force a check for AWAITING_INPUT status immediately upon connection
# This ensures that if the task is already waiting when the user connects, they get the prompt.
task = task_manager.get_task(task_id)
if task and task.status == "AWAITING_INPUT" and task.input_request:
# Construct a synthetic log entry to trigger the frontend handler
# This is a bit of a hack but avoids changing the websocket protocol significantly
synthetic_log = {
"timestamp": task.logs[-1].timestamp.isoformat() if task.logs else "2024-01-01T00:00:00",
"level": "INFO",
"message": "Task paused for user input (Connection Re-established)",
"context": {"input_request": task.input_request}
}
await websocket.send_json(synthetic_log)
if "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message:
logger.reason(
"Observed terminal task log entry; delaying to preserve client visibility",
extra={"task_id": task_id, "message": log_entry.message},
)
await asyncio.sleep(2)
while True:
log_entry = await queue.get()
# Apply server-side filtering
if not matches_filters(log_entry):
continue
log_dict = log_entry.dict()
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
await websocket.send_json(log_dict)
# If task is finished, we could potentially close the connection
# but let's keep it open for a bit or until the client disconnects
if "Task completed successfully" in log_entry.message or "Task failed" in log_entry.message:
# Wait a bit to ensure client receives the last message
await asyncio.sleep(2)
# DO NOT BREAK here - allow client to keep connection open if they want to review logs
# or until they disconnect. Breaking closes the socket immediately.
# break
except WebSocketDisconnect:
logger.info(f"WebSocket connection disconnected for task {task_id}")
except Exception as e:
logger.error(f"WebSocket error for task {task_id}: {e}")
finally:
task_manager.unsubscribe_logs(task_id, queue)
except WebSocketDisconnect:
logger.reason(
"WebSocket client disconnected from task log stream",
extra={"task_id": task_id},
)
except Exception as exc:
logger.explore(
"WebSocket log streaming encountered an unexpected failure",
extra={"task_id": task_id, "error": str(exc)},
)
raise
finally:
task_manager.unsubscribe_logs(task_id, queue)
logger.reflect(
"Released WebSocket log queue subscription",
extra={"task_id": task_id},
)
# [/DEF:websocket_endpoint:Function]
# [DEF:StaticFiles:Mount]