fix: finalize semantic repair and test updates

This commit is contained in:
2026-03-21 15:07:06 +03:00
parent 005797334b
commit 9b47b9b667
99 changed files with 2484 additions and 985 deletions

View File

@@ -3,7 +3,7 @@
# @SEMANTICS: datasets, api, tests, pagination, mapping, docs
# @PURPOSE: Unit tests for datasets API endpoints.
# @LAYER: API
# @RELATION: DEPENDS_ON -> backend.src.api.routes.datasets
# @RELATION: DEPENDS_ON -> [src.api.routes.datasets:Module]
# @INVARIANT: Endpoint contracts remain stable for success and validation failure paths.
import pytest
@@ -11,7 +11,14 @@ from unittest.mock import MagicMock, patch, AsyncMock
from fastapi.testclient import TestClient
from src.app import app
from src.api.routes.datasets import DatasetsResponse, DatasetDetailResponse
from src.dependencies import get_current_user, has_permission, get_config_manager, get_task_manager, get_resource_service, get_mapping_service
from src.dependencies import (
get_current_user,
has_permission,
get_config_manager,
get_task_manager,
get_resource_service,
get_mapping_service,
)
# Global mock user for get_current_user dependency overrides
mock_user = MagicMock()
@@ -21,49 +28,58 @@ admin_role = MagicMock()
admin_role.name = "Admin"
mock_user.roles.append(admin_role)
@pytest.fixture(autouse=True)
def mock_deps():
# @INVARIANT: unconstrained mock — no spec= enforced; attribute typos will silently pass
config_manager = MagicMock()
# @INVARIANT: unconstrained mock — no spec= enforced; attribute typos will silently pass
task_manager = MagicMock()
# @INVARIANT: unconstrained mock — no spec= enforced; attribute typos will silently pass
resource_service = MagicMock()
mapping_service = MagicMock()
app.dependency_overrides[get_config_manager] = lambda: config_manager
app.dependency_overrides[get_task_manager] = lambda: task_manager
app.dependency_overrides[get_resource_service] = lambda: resource_service
app.dependency_overrides[get_mapping_service] = lambda: mapping_service
app.dependency_overrides[get_current_user] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:migration", "READ")] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:migration", "EXECUTE")] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:backup", "EXECUTE")] = lambda: mock_user
app.dependency_overrides[has_permission("plugin:migration", "READ")] = (
lambda: mock_user
)
app.dependency_overrides[has_permission("plugin:migration", "EXECUTE")] = (
lambda: mock_user
)
app.dependency_overrides[has_permission("plugin:backup", "EXECUTE")] = (
lambda: mock_user
)
app.dependency_overrides[has_permission("tasks", "READ")] = lambda: mock_user
yield {
"config": config_manager,
"task": task_manager,
"resource": resource_service,
"mapping": mapping_service
"mapping": mapping_service,
}
app.dependency_overrides.clear()
client = TestClient(app)
# [DEF:test_get_datasets_success:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate successful datasets listing contract for an existing environment.
# @TEST: GET /api/datasets returns 200 and valid schema
# @PRE: env_id exists
# @POST: Response matches DatasetsResponse schema
# [DEF:test_get_datasets_success:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_get_datasets_success(mock_deps):
# Mock environment
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Mock resource service response
mock_deps["resource"].get_datasets_with_status = AsyncMock(
return_value=[
@@ -73,13 +89,13 @@ def test_get_datasets_success(mock_deps):
"schema": "public",
"database": "sales_db",
"mapped_fields": {"total": 10, "mapped": 5},
"last_task": {"task_id": "task-1", "status": "SUCCESS"}
"last_task": {"task_id": "task-1", "status": "SUCCESS"},
}
]
)
response = client.get("/api/datasets?env_id=prod")
assert response.status_code == 200
data = response.json()
assert "datasets" in data
@@ -92,20 +108,16 @@ def test_get_datasets_success(mock_deps):
# [DEF:test_get_datasets_env_not_found:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate datasets listing returns 404 when the requested environment does not exist.
# @TEST: GET /api/datasets returns 404 if env_id missing
# @PRE: env_id does not exist
# @POST: Returns 404 error
# [/DEF:test_get_datasets_success:Function]
# [DEF:test_get_datasets_env_not_found:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_get_datasets_env_not_found(mock_deps):
mock_deps["config"].get_environments.return_value = []
response = client.get("/api/datasets?env_id=nonexistent")
assert response.status_code == 404
assert "Environment not found" in response.json()["detail"]
@@ -114,15 +126,11 @@ def test_get_datasets_env_not_found(mock_deps):
# [DEF:test_get_datasets_invalid_pagination:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate datasets listing rejects invalid pagination parameters with 400 responses.
# @TEST: GET /api/datasets returns 400 for invalid page/page_size
# @PRE: page < 1 or page_size > 100
# @POST: Returns 400 error
# [/DEF:test_get_datasets_env_not_found:Function]
# [DEF:test_get_datasets_invalid_pagination:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_get_datasets_invalid_pagination(mock_deps):
mock_env = MagicMock()
mock_env.id = "prod"
@@ -132,7 +140,7 @@ def test_get_datasets_invalid_pagination(mock_deps):
response = client.get("/api/datasets?env_id=prod&page=0")
assert response.status_code == 400
assert "Page must be >= 1" in response.json()["detail"]
# Invalid page_size (too small)
response = client.get("/api/datasets?env_id=prod&page_size=0")
assert response.status_code == 400
@@ -148,21 +156,17 @@ def test_get_datasets_invalid_pagination(mock_deps):
# [DEF:test_map_columns_success:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate map-columns request creates an async mapping task and returns its identifier.
# @TEST: POST /api/datasets/map-columns creates mapping task
# @PRE: Valid env_id, dataset_ids, source_type
# @POST: Returns task_id
# [/DEF:test_get_datasets_invalid_pagination:Function]
# [DEF:test_map_columns_success:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_map_columns_success(mock_deps):
# Mock environment
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Mock task manager
mock_task = MagicMock()
mock_task.id = "task-123"
@@ -170,13 +174,9 @@ def test_map_columns_success(mock_deps):
response = client.post(
"/api/datasets/map-columns",
json={
"env_id": "prod",
"dataset_ids": [1, 2, 3],
"source_type": "postgresql"
}
json={"env_id": "prod", "dataset_ids": [1, 2, 3], "source_type": "postgresql"},
)
assert response.status_code == 200
data = response.json()
assert "task_id" in data
@@ -188,25 +188,17 @@ def test_map_columns_success(mock_deps):
# [DEF:test_map_columns_invalid_source_type:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate map-columns rejects unsupported source types with a 400 contract response.
# @TEST: POST /api/datasets/map-columns returns 400 for invalid source_type
# @PRE: source_type is not 'postgresql' or 'xlsx'
# @POST: Returns 400 error
# [/DEF:test_map_columns_success:Function]
# [DEF:test_map_columns_invalid_source_type:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_map_columns_invalid_source_type(mock_deps):
response = client.post(
"/api/datasets/map-columns",
json={
"env_id": "prod",
"dataset_ids": [1],
"source_type": "invalid"
}
json={"env_id": "prod", "dataset_ids": [1], "source_type": "invalid"},
)
assert response.status_code == 400
assert "Source type must be 'postgresql' or 'xlsx'" in response.json()["detail"]
@@ -215,21 +207,17 @@ def test_map_columns_invalid_source_type(mock_deps):
# [DEF:test_generate_docs_success:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @TEST: POST /api/datasets/generate-docs creates doc generation task
# @PRE: Valid env_id, dataset_ids, llm_provider
# @PURPOSE: Validate generate-docs request creates an async documentation task and returns its identifier.
# @POST: Returns task_id
# [/DEF:test_map_columns_invalid_source_type:Function]
# [DEF:test_generate_docs_success:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_generate_docs_success(mock_deps):
# Mock environment
mock_env = MagicMock()
mock_env.id = "prod"
mock_deps["config"].get_environments.return_value = [mock_env]
# Mock task manager
mock_task = MagicMock()
mock_task.id = "task-456"
@@ -237,13 +225,9 @@ def test_generate_docs_success(mock_deps):
response = client.post(
"/api/datasets/generate-docs",
json={
"env_id": "prod",
"dataset_ids": [1],
"llm_provider": "openai"
}
json={"env_id": "prod", "dataset_ids": [1], "llm_provider": "openai"},
)
assert response.status_code == 200
data = response.json()
assert "task_id" in data
@@ -255,87 +239,68 @@ def test_generate_docs_success(mock_deps):
# [DEF:test_map_columns_empty_ids:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate map-columns rejects empty dataset identifier lists.
# @TEST: POST /api/datasets/map-columns returns 400 for empty dataset_ids
# @PRE: dataset_ids is empty
# @POST: Returns 400 error
# [/DEF:test_generate_docs_success:Function]
# [DEF:test_map_columns_empty_ids:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_map_columns_empty_ids(mock_deps):
"""@PRE: dataset_ids must be non-empty."""
response = client.post(
"/api/datasets/map-columns",
json={
"env_id": "prod",
"dataset_ids": [],
"source_type": "postgresql"
}
json={"env_id": "prod", "dataset_ids": [], "source_type": "postgresql"},
)
assert response.status_code == 400
assert "At least one dataset ID must be provided" in response.json()["detail"]
# [/DEF:test_map_columns_empty_ids:Function]
# [DEF:test_generate_docs_empty_ids:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate generate-docs rejects empty dataset identifier lists.
# @TEST: POST /api/datasets/generate-docs returns 400 for empty dataset_ids
# @PRE: dataset_ids is empty
# @POST: Returns 400 error
# [/DEF:test_map_columns_empty_ids:Function]
# [DEF:test_generate_docs_empty_ids:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_generate_docs_empty_ids(mock_deps):
"""@PRE: dataset_ids must be non-empty."""
response = client.post(
"/api/datasets/generate-docs",
json={
"env_id": "prod",
"dataset_ids": [],
"llm_provider": "openai"
}
json={"env_id": "prod", "dataset_ids": [], "llm_provider": "openai"},
)
assert response.status_code == 400
assert "At least one dataset ID must be provided" in response.json()["detail"]
# [/DEF:test_generate_docs_empty_ids:Function]
# [DEF:test_generate_docs_env_not_found:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @TEST: POST /api/datasets/generate-docs returns 404 for missing env
# @PRE: env_id does not exist
# @PURPOSE: Validate generate-docs returns 404 when the requested environment cannot be resolved.
# @POST: Returns 404 error
# [/DEF:test_generate_docs_empty_ids:Function]
# [DEF:test_generate_docs_env_not_found:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
def test_generate_docs_env_not_found(mock_deps):
"""@PRE: env_id must be a valid environment."""
mock_deps["config"].get_environments.return_value = []
response = client.post(
"/api/datasets/generate-docs",
json={
"env_id": "ghost",
"dataset_ids": [1],
"llm_provider": "openai"
}
json={"env_id": "ghost", "dataset_ids": [1], "llm_provider": "openai"},
)
assert response.status_code == 404
assert "Environment not found" in response.json()["detail"]
# [/DEF:test_generate_docs_env_not_found:Function]
# [DEF:test_get_datasets_superset_failure:Function]
# @RELATION: BINDS_TO -> DatasetsApiTests
# @RELATION: BINDS_TO -> [DatasetsApiTests:Module]
# @PURPOSE: Validate datasets listing surfaces a 503 contract when Superset access fails.
# @TEST_EDGE: external_superset_failure -> {status: 503}
# [/DEF:test_generate_docs_env_not_found:Function]
# @POST: Returns 503 with stable error detail when upstream dataset fetch fails.
def test_get_datasets_superset_failure(mock_deps):
"""@TEST_EDGE: external_superset_failure -> {status: 503}"""
mock_env = MagicMock()
@@ -349,7 +314,9 @@ def test_get_datasets_superset_failure(mock_deps):
response = client.get("/api/datasets?env_id=bad_conn")
assert response.status_code == 503
assert "Failed to fetch datasets" in response.json()["detail"]
# [/DEF:test_get_datasets_superset_failure:Function]
# [/DEF:DatasetsApiTests:Module]
# [/DEF:DatasetsApiTests:Module]