Files
ss-tools/backend/src/api/routes/__tests__/test_datasets.py
busya ed3d5f3039 feat(027): Final Phase T038-T043 implementation
- T038: SessionEvent logger and persistence logic
  - Added SessionEventLogger service with explicit audit event persistence
  - Added SessionEvent model with events relationship on DatasetReviewSession
  - Integrated event logging into orchestrator flows and API mutation endpoints

- T039: Semantic source version propagation
  - Added source_version column to SemanticFieldEntry
  - Added propagate_source_version_update() to SemanticResolver
  - Preserves locked/manual field invariants during propagation

- T040: Batch approval API and UI actions
  - Added batch semantic approval endpoint (/fields/semantic/approve-batch)
  - Added batch mapping approval endpoint (/mappings/approve-batch)
  - Added batch approval actions to SemanticLayerReview and ExecutionMappingReview components
  - Aligned batch semantics with single-item approval contracts

- T041: Superset compatibility matrix tests
  - Added test_superset_matrix.py with preview and SQL Lab fallback coverage
  - Tests verify client method preference and matrix fallback behavior

- T042: RBAC audit sweep on session-mutation endpoints
  - Added _require_owner_mutation_scope() helper
  - Applied owner guards to update_session, delete_session, and all mutation endpoints
  - Ensured no bypass of existing permission checks

- T043: i18n coverage for dataset-review UI
  - Added workspace state labels (empty/importing/review) to en.json and ru.json
  - Added batch action labels for semantics and mappings
  - Fixed workspace state comparison to lowercase strings
  - Removed hardcoded workspace state display strings

Signed-off-by: Implementation Specialist <impl@ss-tools>
2026-03-17 14:29:33 +03:00

309 lines
11 KiB
Python

# [DEF:DatasetsApiTests:Module]
# @COMPLEXITY: 3
# @SEMANTICS: datasets, api, tests, pagination, mapping, docs
# @PURPOSE: Unit tests for datasets API endpoints.
# @LAYER: API
# @RELATION: DEPENDS_ON -> backend.src.api.routes.datasets
# @INVARIANT: Endpoint contracts remain stable for success and validation failure paths.
import pytest
from unittest.mock import MagicMock, patch, AsyncMock
from fastapi.testclient import TestClient
from src.app import app
from src.api.routes.datasets import DatasetsResponse, DatasetDetailResponse
from src.dependencies import get_current_user, has_permission, get_config_manager, get_task_manager, get_resource_service, get_mapping_service
# Global mock user for get_current_user dependency overrides
mock_user = MagicMock()
mock_user.username = "testuser"
mock_user.roles = []
admin_role = MagicMock()
admin_role.name = "Admin"
mock_user.roles.append(admin_role)
@pytest.fixture(autouse=True)
def mock_deps():
config_manager = MagicMock()
task_manager = MagicMock()
resource_service = MagicMock()
mapping_service = MagicMock()
app.dependency_overrides[get_config_manager] = lambda: config_manager
app.dependency_overrides[get_task_manager] = lambda: task_manager
app.dependency_overrides[get_resource_service] = lambda: resource_service
app.dependency_overrides[get_mapping_service] = lambda: mapping_service
app.dependency_overrides[get_current_user] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:migration", "READ")] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:migration", "EXECUTE")] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:backup", "EXECUTE")] = lambda: mock_user
app.dependency_overrides[has_permission("tasks", "READ")] = lambda: mock_user
yield {
"config": config_manager,
"task": task_manager,
"resource": resource_service,
"mapping": mapping_service
}
app.dependency_overrides.clear()
client = TestClient(app)
# [DEF:test_get_datasets_success:Function]
# @PURPOSE: Validate successful datasets listing contract for an existing environment.
# @TEST: GET /api/datasets returns 200 and valid schema
# @PRE: env_id exists
# @POST: Response matches DatasetsResponse schema
def test_get_datasets_success(mock_deps):
# Mock environment
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Mock resource service response
mock_deps["resource"].get_datasets_with_status = AsyncMock(
return_value=[
{
"id": 1,
"table_name": "sales_data",
"schema": "public",
"database": "sales_db",
"mapped_fields": {"total": 10, "mapped": 5},
"last_task": {"task_id": "task-1", "status": "SUCCESS"}
}
]
)
response = client.get("/api/datasets?env_id=prod")
assert response.status_code == 200
data = response.json()
assert "datasets" in data
assert len(data["datasets"]) >= 0
# Validate against Pydantic model
DatasetsResponse(**data)
# [/DEF:test_get_datasets_success:Function]
# [DEF:test_get_datasets_env_not_found:Function]
# @PURPOSE: Validate datasets listing returns 404 when the requested environment does not exist.
# @TEST: GET /api/datasets returns 404 if env_id missing
# @PRE: env_id does not exist
# @POST: Returns 404 error
def test_get_datasets_env_not_found(mock_deps):
mock_deps["config"].get_environments.return_value = []
response = client.get("/api/datasets?env_id=nonexistent")
assert response.status_code == 404
assert "Environment not found" in response.json()["detail"]
# [/DEF:test_get_datasets_env_not_found:Function]
# [DEF:test_get_datasets_invalid_pagination:Function]
# @PURPOSE: Validate datasets listing rejects invalid pagination parameters with 400 responses.
# @TEST: GET /api/datasets returns 400 for invalid page/page_size
# @PRE: page < 1 or page_size > 100
# @POST: Returns 400 error
def test_get_datasets_invalid_pagination(mock_deps):
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Invalid page
response = client.get("/api/datasets?env_id=prod&page=0")
assert response.status_code == 400
assert "Page must be >= 1" in response.json()["detail"]
# Invalid page_size (too small)
response = client.get("/api/datasets?env_id=prod&page_size=0")
assert response.status_code == 400
assert "Page size must be between 1 and 100" in response.json()["detail"]
# @TEST_EDGE: page_size > 100 exceeds max
response = client.get("/api/datasets?env_id=prod&page_size=101")
assert response.status_code == 400
assert "Page size must be between 1 and 100" in response.json()["detail"]
# [/DEF:test_get_datasets_invalid_pagination:Function]
# [DEF:test_map_columns_success:Function]
# @PURPOSE: Validate map-columns request creates an async mapping task and returns its identifier.
# @TEST: POST /api/datasets/map-columns creates mapping task
# @PRE: Valid env_id, dataset_ids, source_type
# @POST: Returns task_id
def test_map_columns_success(mock_deps):
# Mock environment
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Mock task manager
mock_task = MagicMock()
mock_task.id = "task-123"
mock_deps["task"].create_task = AsyncMock(return_value=mock_task)
response = client.post(
"/api/datasets/map-columns",
json={
"env_id": "prod",
"dataset_ids": [1, 2, 3],
"source_type": "postgresql"
}
)
assert response.status_code == 200
data = response.json()
assert "task_id" in data
# @POST/@SIDE_EFFECT: create_task was called
mock_deps["task"].create_task.assert_called_once()
# [/DEF:test_map_columns_success:Function]
# [DEF:test_map_columns_invalid_source_type:Function]
# @PURPOSE: Validate map-columns rejects unsupported source types with a 400 contract response.
# @TEST: POST /api/datasets/map-columns returns 400 for invalid source_type
# @PRE: source_type is not 'postgresql' or 'xlsx'
# @POST: Returns 400 error
def test_map_columns_invalid_source_type(mock_deps):
response = client.post(
"/api/datasets/map-columns",
json={
"env_id": "prod",
"dataset_ids": [1],
"source_type": "invalid"
}
)
assert response.status_code == 400
assert "Source type must be 'postgresql' or 'xlsx'" in response.json()["detail"]
# [/DEF:test_map_columns_invalid_source_type:Function]
# [DEF:test_generate_docs_success:Function]
# @TEST: POST /api/datasets/generate-docs creates doc generation task
# @PRE: Valid env_id, dataset_ids, llm_provider
# @PURPOSE: Validate generate-docs request creates an async documentation task and returns its identifier.
# @POST: Returns task_id
def test_generate_docs_success(mock_deps):
# Mock environment
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Mock task manager
mock_task = MagicMock()
mock_task.id = "task-456"
mock_deps["task"].create_task = AsyncMock(return_value=mock_task)
response = client.post(
"/api/datasets/generate-docs",
json={
"env_id": "prod",
"dataset_ids": [1],
"llm_provider": "openai"
}
)
assert response.status_code == 200
data = response.json()
assert "task_id" in data
# @POST/@SIDE_EFFECT: create_task was called
mock_deps["task"].create_task.assert_called_once()
# [/DEF:test_generate_docs_success:Function]
# [DEF:test_map_columns_empty_ids:Function]
# @PURPOSE: Validate map-columns rejects empty dataset identifier lists.
# @TEST: POST /api/datasets/map-columns returns 400 for empty dataset_ids
# @PRE: dataset_ids is empty
# @POST: Returns 400 error
def test_map_columns_empty_ids(mock_deps):
"""@PRE: dataset_ids must be non-empty."""
response = client.post(
"/api/datasets/map-columns",
json={
"env_id": "prod",
"dataset_ids": [],
"source_type": "postgresql"
}
)
assert response.status_code == 400
assert "At least one dataset ID must be provided" in response.json()["detail"]
# [/DEF:test_map_columns_empty_ids:Function]
# [DEF:test_generate_docs_empty_ids:Function]
# @PURPOSE: Validate generate-docs rejects empty dataset identifier lists.
# @TEST: POST /api/datasets/generate-docs returns 400 for empty dataset_ids
# @PRE: dataset_ids is empty
# @POST: Returns 400 error
def test_generate_docs_empty_ids(mock_deps):
"""@PRE: dataset_ids must be non-empty."""
response = client.post(
"/api/datasets/generate-docs",
json={
"env_id": "prod",
"dataset_ids": [],
"llm_provider": "openai"
}
)
assert response.status_code == 400
assert "At least one dataset ID must be provided" in response.json()["detail"]
# [/DEF:test_generate_docs_empty_ids:Function]
# [DEF:test_generate_docs_env_not_found:Function]
# @TEST: POST /api/datasets/generate-docs returns 404 for missing env
# @PRE: env_id does not exist
# @PURPOSE: Validate generate-docs returns 404 when the requested environment cannot be resolved.
# @POST: Returns 404 error
def test_generate_docs_env_not_found(mock_deps):
"""@PRE: env_id must be a valid environment."""
mock_deps["config"].get_environments.return_value = []
response = client.post(
"/api/datasets/generate-docs",
json={
"env_id": "ghost",
"dataset_ids": [1],
"llm_provider": "openai"
}
)
assert response.status_code == 404
assert "Environment not found" in response.json()["detail"]
# [/DEF:test_generate_docs_env_not_found:Function]
# [DEF:test_get_datasets_superset_failure:Function]
# @PURPOSE: Validate datasets listing surfaces a 503 contract when Superset access fails.
# @TEST_EDGE: external_superset_failure -> {status: 503}
def test_get_datasets_superset_failure(mock_deps):
"""@TEST_EDGE: external_superset_failure -> {status: 503}"""
mock_env = MagicMock()
mock_env.id = "bad_conn"
mock_deps["config"].get_environments.return_value = [mock_env]
mock_deps["task"].get_all_tasks.return_value = []
mock_deps["resource"].get_datasets_with_status = AsyncMock(
side_effect=Exception("Connection refused")
)
response = client.get("/api/datasets?env_id=bad_conn")
assert response.status_code == 503
assert "Failed to fetch datasets" in response.json()["detail"]
# [/DEF:test_get_datasets_superset_failure:Function]
# [/DEF:DatasetsApiTests:Module]