diff --git a/.gitignore b/.gitignore index fd6bff94..bd9e47ad 100755 --- a/.gitignore +++ b/.gitignore @@ -65,13 +65,15 @@ backend/mappings.db backend/tasks.db -backend/logs -backend/auth.db -semantics/reports -backend/tasks.db - -# Universal / tooling -node_modules/ +backend/logs +backend/auth.db +semantics/reports +backend/tasks.db +backend/**/*.db +backend/**/*.sqlite + +# Universal / tooling +node_modules/ .venv/ coverage/ *.tmp diff --git a/README.md b/README.md index 17988389..cd361041 100755 --- a/README.md +++ b/README.md @@ -151,8 +151,10 @@ cd backend source .venv/bin/activate python src/scripts/init_auth_db.py +# При первом запуске будет создан backend/.env с ENCRYPTION_KEY + # Создание администратора -python src/scripts/create_admin.py --username admin --password admin +python src/scripts/create_admin.py --username admin --password '' ``` ## 🏢 Enterprise Clean Deployment (internal-only) diff --git a/backend/src/scripts/init_auth_db.py b/backend/src/scripts/init_auth_db.py index 7845079e..587c7994 100644 --- a/backend/src/scripts/init_auth_db.py +++ b/backend/src/scripts/init_auth_db.py @@ -8,8 +8,10 @@ # @INVARIANT: Safe to run multiple times (idempotent). # [SECTION: IMPORTS] +import os import sys from pathlib import Path +from cryptography.fernet import Fernet # Add src to path sys.path.append(str(Path(__file__).parent.parent.parent)) @@ -19,6 +21,41 @@ from src.core.logger import logger, belief_scope from src.scripts.seed_permissions import seed_permissions # [/SECTION] +ENV_FILE_PATH = Path(__file__).resolve().parents[2] / ".env" + + +# [DEF:ensure_encryption_key:Function] +# @PURPOSE: Ensure backend runtime has a persistent Fernet encryption key during first-time installation. +# @PRE: Backend root is writable or ENCRYPTION_KEY is already provided via environment. +# @POST: ENCRYPTION_KEY exists in process environment or backend/.env. +def ensure_encryption_key(env_file_path: Path = ENV_FILE_PATH) -> str: + existing_key = os.getenv("ENCRYPTION_KEY", "").strip() + if existing_key: + Fernet(existing_key.encode()) + logger.info("ENCRYPTION_KEY already provided via environment; skipping generation.") + return existing_key + + if env_file_path.exists(): + for raw_line in env_file_path.read_text(encoding="utf-8").splitlines(): + if raw_line.startswith("ENCRYPTION_KEY="): + persisted_key = raw_line.partition("=")[2].strip() + if persisted_key: + Fernet(persisted_key.encode()) + os.environ["ENCRYPTION_KEY"] = persisted_key + logger.info(f"Loaded existing ENCRYPTION_KEY from {env_file_path}.") + return persisted_key + + generated_key = Fernet.generate_key().decode() + with env_file_path.open("a", encoding="utf-8") as env_file: + if env_file.tell() > 0: + env_file.write("\n") + env_file.write(f"ENCRYPTION_KEY={generated_key}\n") + + os.environ["ENCRYPTION_KEY"] = generated_key + logger.info(f"Generated ENCRYPTION_KEY and persisted it to {env_file_path}.") + return generated_key +# [/DEF:ensure_encryption_key:Function] + # [DEF:run_init:Function] # @PURPOSE: Main entry point for the initialization script. # @POST: auth.db is initialized with the correct schema and seeded permissions. @@ -26,6 +63,7 @@ def run_init(): with belief_scope("init_auth_db"): logger.info("Initializing authentication database...") try: + ensure_encryption_key() init_db() logger.info("Authentication database initialized successfully.") @@ -40,4 +78,4 @@ def run_init(): if __name__ == "__main__": run_init() -# [/DEF:backend.src.scripts.init_auth_db:Module] \ No newline at end of file +# [/DEF:backend.src.scripts.init_auth_db:Module] diff --git a/backend/src/services/__tests__/test_encryption_manager.py b/backend/src/services/__tests__/test_encryption_manager.py index 6deb64fb..284c5db1 100644 --- a/backend/src/services/__tests__/test_encryption_manager.py +++ b/backend/src/services/__tests__/test_encryption_manager.py @@ -26,8 +26,7 @@ class TestEncryptionManager: """Construct EncryptionManager directly using Fernet (avoids relative import chain).""" # Re-implement the same logic as EncryptionManager to avoid import issues # with the llm_provider module's relative imports - import os - key = os.getenv("ENCRYPTION_KEY", "ZcytYzi0iHIl4Ttr-GdAEk117aGRogkGvN3wiTxrPpE=").encode() + key = Fernet.generate_key() fernet = Fernet(key) class EncryptionManager: @@ -99,6 +98,18 @@ class TestEncryptionManager: assert decrypted == "" # [/DEF:test_encrypt_empty_string:Function] + # [DEF:test_missing_key_fails_fast:Function] + # @PURPOSE: Missing ENCRYPTION_KEY must abort initialization instead of using a fallback secret. + # @PRE: ENCRYPTION_KEY is unset. + # @POST: RuntimeError raised during EncryptionManager construction. + def test_missing_key_fails_fast(self): + from src.services.llm_provider import EncryptionManager + + with patch.dict("os.environ", {}, clear=True): + with pytest.raises(RuntimeError, match="ENCRYPTION_KEY must be set"): + EncryptionManager() + # [/DEF:test_missing_key_fails_fast:Function] + # [DEF:test_custom_key_roundtrip:Function] # @PURPOSE: Custom Fernet key produces valid roundtrip. # @PRE: Generated Fernet key. diff --git a/backend/src/services/llm_provider.py b/backend/src/services/llm_provider.py index f43d3c08..c81a9949 100644 --- a/backend/src/services/llm_provider.py +++ b/backend/src/services/llm_provider.py @@ -6,18 +6,35 @@ # @RELATION: DEPENDS_ON -> backend.src.core.database # @RELATION: DEPENDS_ON -> backend.src.models.llm -from typing import List, Optional +from typing import List, Optional, TYPE_CHECKING from sqlalchemy.orm import Session from ..models.llm import LLMProvider -from ..plugins.llm_analysis.models import LLMProviderConfig from ..core.logger import belief_scope, logger from cryptography.fernet import Fernet import os +if TYPE_CHECKING: + from ..plugins.llm_analysis.models import LLMProviderConfig + +# [DEF:_require_fernet_key:Function] +# @TIER: CRITICAL +# @PURPOSE: Load and validate the Fernet key used for secret encryption. +# @PRE: ENCRYPTION_KEY environment variable must be set to a valid Fernet key. +# @POST: Returns validated key bytes ready for Fernet initialization. +def _require_fernet_key() -> bytes: + raw_key = os.getenv("ENCRYPTION_KEY", "").strip() + if not raw_key: + raise RuntimeError("ENCRYPTION_KEY must be set to a valid Fernet key") + + key = raw_key.encode() + Fernet(key) + return key +# [/DEF:_require_fernet_key:Function] + # [DEF:EncryptionManager:Class] # @TIER: CRITICAL # @PURPOSE: Handles encryption and decryption of sensitive data like API keys. -# @INVARIANT: Uses a secret key from environment or a default one (fallback only for dev). +# @INVARIANT: Uses only a validated secret key from environment. # # @TEST_CONTRACT: EncryptionManagerModel -> # { @@ -33,10 +50,10 @@ import os class EncryptionManager: # [DEF:EncryptionManager.__init__:Function] # @PURPOSE: Initialize the encryption manager with a Fernet key. - # @PRE: ENCRYPTION_KEY env var must be set or use default dev key. + # @PRE: ENCRYPTION_KEY env var must be set to a valid Fernet key. # @POST: Fernet instance ready for encryption/decryption. def __init__(self): - self.key = os.getenv("ENCRYPTION_KEY", "ZcytYzi0iHIl4Ttr-GdAEk117aGRogkGvN3wiTxrPpE=").encode() + self.key = _require_fernet_key() self.fernet = Fernet(self.key) # [/DEF:EncryptionManager.__init__:Function] @@ -97,7 +114,7 @@ class LLMProviderService: # @PURPOSE: Creates a new LLM provider with encrypted API key. # @PRE: config must contain valid provider configuration. # @POST: New provider created and persisted to database. - def create_provider(self, config: LLMProviderConfig) -> LLMProvider: + def create_provider(self, config: "LLMProviderConfig") -> LLMProvider: with belief_scope("create_provider"): encrypted_key = self.encryption.encrypt(config.api_key) db_provider = LLMProvider( @@ -119,7 +136,7 @@ class LLMProviderService: # @PURPOSE: Updates an existing LLM provider. # @PRE: provider_id must exist, config must be valid. # @POST: Provider updated and persisted to database. - def update_provider(self, provider_id: str, config: LLMProviderConfig) -> Optional[LLMProvider]: + def update_provider(self, provider_id: str, config: "LLMProviderConfig") -> Optional[LLMProvider]: with belief_scope("update_provider"): db_provider = self.get_provider(provider_id) if not db_provider: @@ -180,4 +197,4 @@ class LLMProviderService: # [/DEF:LLMProviderService:Class] -# [/DEF:backend.src.services.llm_provider:Module] \ No newline at end of file +# [/DEF:backend.src.services.llm_provider:Module] diff --git a/backend/test_auth_debug.py b/backend/test_auth_debug.py deleted file mode 100644 index bba50b3e..00000000 --- a/backend/test_auth_debug.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -"""Debug script to test Superset API authentication""" - -from pprint import pprint -from src.core.superset_client import SupersetClient -from src.core.config_manager import ConfigManager - - -def main(): - print("Debugging Superset API authentication...") - - config = ConfigManager() - - # Select first available environment - environments = config.get_environments() - - if not environments: - print("No environments configured") - return - - env = environments[0] - print(f"\nTesting environment: {env.name}") - print(f"URL: {env.url}") - - try: - # Test API client authentication - print("\n--- Testing API Authentication ---") - client = SupersetClient(env) - tokens = client.authenticate() - - print("\nAPI Auth Success!") - print(f"Access Token: {tokens.get('access_token', 'N/A')}") - print(f"CSRF Token: {tokens.get('csrf_token', 'N/A')}") - - # Debug cookies from session - print("\n--- Session Cookies ---") - for cookie in client.network.session.cookies: - print(f"{cookie.name}={cookie.value}") - - # Test accessing UI via requests - print("\n--- Testing UI Access ---") - ui_url = env.url.rstrip('/').replace('/api/v1', '') - print(f"UI URL: {ui_url}") - - # Try to access UI home page - ui_response = client.network.session.get(ui_url, timeout=30, allow_redirects=True) - print(f"Status Code: {ui_response.status_code}") - print(f"URL: {ui_response.url}") - - # Check response headers - print("\n--- Response Headers ---") - pprint(dict(ui_response.headers)) - - print("\n--- Response Content Preview (200 chars) ---") - print(repr(ui_response.text[:200])) - - if ui_response.status_code == 200: - print("\nUI Access: Success") - - # Try to access a dashboard - # For testing, just use the home page - print("\n--- Checking if login is required ---") - if "login" in ui_response.url.lower() or "login" in ui_response.text.lower(): - print("❌ Not logged in to UI") - else: - print("✅ Logged in to UI") - - except Exception as e: - print(f"\n❌ Error: {type(e).__name__}: {e}") - import traceback - print("\nStack Trace:") - print(traceback.format_exc()) - - -if __name__ == "__main__": - main() diff --git a/backend/test_decryption.py b/backend/test_decryption.py deleted file mode 100644 index d05b74e8..00000000 --- a/backend/test_decryption.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 -"""Test script to debug API key decryption issue.""" - -from src.core.database import SessionLocal -from src.models.llm import LLMProvider -from cryptography.fernet import Fernet -import os - -# Get the encryption key -key = os.getenv("ENCRYPTION_KEY", "ZcytYzi0iHIl4Ttr-GdAEk117aGRogkGvN3wiTxrPpE=").encode() -print(f"Encryption key (first 20 chars): {key[:20]}") -print(f"Encryption key length: {len(key)}") - -# Create Fernet instance -fernet = Fernet(key) - -# Get provider from database -db = SessionLocal() -provider = db.query(LLMProvider).filter(LLMProvider.id == '6c899741-4108-4196-aea4-f38ad2f0150e').first() - -if provider: - print("\nProvider found:") - print(f" ID: {provider.id}") - print(f" Name: {provider.name}") - print(f" Encrypted API Key (first 50 chars): {provider.api_key[:50]}") - print(f" Encrypted API Key Length: {len(provider.api_key)}") - - # Test decryption - print("\nAttempting decryption...") - try: - decrypted = fernet.decrypt(provider.api_key.encode()).decode() - print("Decryption successful!") - print(f" Decrypted key length: {len(decrypted)}") - print(f" Decrypted key (first 8 chars): {decrypted[:8]}") - print(f" Decrypted key is empty: {len(decrypted) == 0}") - except Exception as e: - print(f"Decryption failed with error: {e}") - print(f"Error type: {type(e).__name__}") - import traceback - traceback.print_exc() -else: - print("Provider not found") - -db.close() diff --git a/backend/test_encryption.py b/backend/test_encryption.py deleted file mode 100644 index 351a235c..00000000 --- a/backend/test_encryption.py +++ /dev/null @@ -1 +0,0 @@ -[{"key[": 20, ")\n\n# Create Fernet instance\nfernet = Fernet(key)\n\n# Test encrypting an empty string\nempty_encrypted = fernet.encrypt(b\"": ".", "print(f": "nEncrypted empty string: {empty_encrypted"}, {"test-api-key-12345\"\ntest_encrypted = fernet.encrypt(test_key.encode()).decode()\nprint(f": "nEncrypted test key: {test_encrypted"}, {"gAAAAABphhwSZie0OwXjJ78Fk-c4Uo6doNJXipX49AX7Bypzp4ohiRX3hXPXKb45R1vhNUOqbm6Ke3-eRwu_KdWMZ9chFBKmqw==\"\nprint(f": "nStored encrypted key: {stored_key"}, {"len(stored_key)}": "Check if stored key matches empty string encryption\nif stored_key == empty_encrypted:\n print(", "string!": "else:\n print(", "print(f": "mpty string encryption: {empty_encrypted"}, {"stored_key}": "Try to decrypt the stored key\ntry:\n decrypted = fernet.decrypt(stored_key.encode()).decode()\n print(f", "print(f": "ecrypted key length: {len(decrypted)"}, {")\nexcept Exception as e:\n print(f": "nDecryption failed with error: {e"}] \ No newline at end of file diff --git a/backend/tests/test_auth.py b/backend/tests/test_auth.py index 8a30f6a3..4469fbcd 100644 --- a/backend/tests/test_auth.py +++ b/backend/tests/test_auth.py @@ -7,12 +7,14 @@ sys.path.append(str(Path(__file__).parent.parent / "src")) import pytest from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker +from cryptography.fernet import Fernet from src.core.database import Base from src.models.auth import User, Role, Permission, ADGroupMapping from src.services.auth_service import AuthService from src.core.auth.repository import AuthRepository from src.core.auth.security import verify_password, get_password_hash from src.scripts.create_admin import create_admin +from src.scripts.init_auth_db import ensure_encryption_key # Create in-memory SQLite database for testing SQLALCHEMY_DATABASE_URL = "sqlite:///:memory:" @@ -189,3 +191,46 @@ def test_create_admin_is_idempotent_for_existing_user(monkeypatch, db_session): assert created_user.email is None assert verify_password("bootstrap-pass", created_user.password_hash) assert not verify_password("new-password", created_user.password_hash) + + +def test_ensure_encryption_key_generates_backend_env_file(monkeypatch, tmp_path): + """Test first-time initialization generates and persists a Fernet key.""" + env_file = tmp_path / ".env" + monkeypatch.delenv("ENCRYPTION_KEY", raising=False) + + generated_key = ensure_encryption_key(env_file) + + assert generated_key + assert env_file.exists() + assert env_file.read_text(encoding="utf-8").strip() == f"ENCRYPTION_KEY={generated_key}" + assert verify_fernet_key(generated_key) + + +def test_ensure_encryption_key_reuses_existing_env_file_value(monkeypatch, tmp_path): + """Test persisted key is reused without rewriting file contents.""" + env_file = tmp_path / ".env" + existing_key = Fernet.generate_key().decode() + env_file.write_text(f"ENCRYPTION_KEY={existing_key}\nOTHER=value\n", encoding="utf-8") + monkeypatch.delenv("ENCRYPTION_KEY", raising=False) + + reused_key = ensure_encryption_key(env_file) + + assert reused_key == existing_key + assert env_file.read_text(encoding="utf-8") == f"ENCRYPTION_KEY={existing_key}\nOTHER=value\n" + + +def test_ensure_encryption_key_prefers_process_environment(monkeypatch, tmp_path): + """Test explicit process environment has priority over file generation.""" + env_file = tmp_path / ".env" + runtime_key = Fernet.generate_key().decode() + monkeypatch.setenv("ENCRYPTION_KEY", runtime_key) + + resolved_key = ensure_encryption_key(env_file) + + assert resolved_key == runtime_key + assert not env_file.exists() + + +def verify_fernet_key(value: str) -> bool: + Fernet(value.encode()) + return True diff --git a/bootstrap.json b/bootstrap.json index a8ca01b3..efe77744 100644 --- a/bootstrap.json +++ b/bootstrap.json @@ -4,7 +4,8 @@ "source_snapshot_ref": "v1.0.0-rc1", "created_by": "operator", "allowed_hosts": [ - "internal-repo.company.com" + "rusal.ru", + "rusal.com" ], "prohibited_artifact_categories": [ "test-data", diff --git a/check_test_data.py b/check_test_data.py deleted file mode 100644 index c41aa2f7..00000000 --- a/check_test_data.py +++ /dev/null @@ -1,27 +0,0 @@ -import os - -def check_file(filepath): - try: - with open(filepath, 'r', encoding='utf-8') as f: - content = f.read() - if '@TIER: CRITICAL' in content: - if '@TEST_DATA' not in content: - return filepath - except Exception as e: - print(f"Error reading {filepath}: {e}") - return None - -missing_files = [] -for root_dir in ['backend/src', 'frontend/src']: - for dirpath, _, filenames in os.walk(root_dir): - for name in filenames: - ext = os.path.splitext(name)[1] - if ext in ['.py', '.js', '.ts', '.svelte']: - full_path = os.path.join(dirpath, name) - res = check_file(full_path) - if res: - missing_files.append(res) - -print("Files missing @TEST_DATA:") -for f in missing_files: - print(f) diff --git a/docs/installation.md b/docs/installation.md index 53f3d477..d49f6fab 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -234,10 +234,12 @@ source .venv/bin/activate python src/scripts/init_auth_db.py ``` +При первом запуске скрипт создаёт `backend/.env` и записывает туда `ENCRYPTION_KEY`, если ключ не был задан через окружение заранее. + ### 2. Создание администратора ```bash -python src/scripts/create_admin.py --username admin --password admin +python src/scripts/create_admin.py --username admin --password '' ``` **Важно**: После создания администратора измените пароль в продакшн-среде! diff --git a/docs/security-remediation.md b/docs/security-remediation.md new file mode 100644 index 00000000..ce5517cc --- /dev/null +++ b/docs/security-remediation.md @@ -0,0 +1,57 @@ +# Security Remediation + +## Immediate actions + +1. Revoke and rotate any Gitea PAT previously stored in `backend/mappings.db`. +2. Rotate any secrets encrypted with historical `ENCRYPTION_KEY` values. +3. Reset affected local admin/test credentials if they ever existed outside disposable dev environments. + +## Purge git history + +The repository history contains binary databases with sensitive data. Rewrite history before treating the repository as clean. + +Recommended targets: + +- `backend/mappings.db` +- `backend/tasks.db` +- `backend/auth.db` +- `backend/backend/auth.db` +- `backend/test_auth_debug.py` +- `backend/test_decryption.py` +- `backend/test_encryption.py` + +Example with `git filter-repo`: + +```bash +git filter-repo \ + --invert-paths \ + --path backend/mappings.db \ + --path backend/tasks.db \ + --path backend/auth.db \ + --path backend/backend/auth.db \ + --path backend/test_auth_debug.py \ + --path backend/test_decryption.py \ + --path backend/test_encryption.py +``` + +After rewrite: + +```bash +git for-each-ref --format='delete %(refname)' refs/original | git update-ref --stdin +git reflog expire --expire=now --all +git gc --prune=now --aggressive +git push --force --all +git push --force --tags +``` + +Everyone with old clones must re-clone or hard-reset to the rewritten history. + +## Ongoing checks + +Run: + +```bash +./scripts/scan_secrets.sh +``` + +before release and before pushing history-rewrite results. diff --git a/scripts/scan_secrets.sh b/scripts/scan_secrets.sh new file mode 100755 index 00000000..8e5bdf34 --- /dev/null +++ b/scripts/scan_secrets.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "${ROOT_DIR}" + +echo "[scan] working tree patterns" +rg -nI \ + --glob '!frontend/node_modules/**' \ + --glob '!.svelte-kit/**' \ + --glob '!dist/**' \ + --glob '!build/**' \ + '(AKIA[0-9A-Z]{16}|ASIA[0-9A-Z]{16}|AIza[0-9A-Za-z\-_]{35}|sk_live_[0-9A-Za-z]{16,}|sk_test_[0-9A-Za-z]{16,}|gh[pousr]_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9\-_]{20,}|hf_[A-Za-z0-9]{20,}|-----BEGIN (RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----|https?://[^/@[:space:]]+:[^@[:space:]]+@|ENCRYPTION_KEY[[:space:]]*=|AUTH_SECRET_KEY[[:space:]]*=)' \ + | rg -v 'oauth2:(token|secret)@' || true + +echo "[scan] tracked env and database artifacts" +git ls-files | rg '(^|/)\.env($|\.)|(^|/).*\.(db|sqlite|pem|p12|pfx|crt|key)$' || true + +echo "[scan] git history patterns" +git grep -nIE \ + '(AKIA[0-9A-Z]{16}|ASIA[0-9A-Z]{16}|AIza[0-9A-Za-z\-_]{35}|sk_live_[0-9A-Za-z]{16,}|sk_test_[0-9A-Za-z]{16,}|gh[pousr]_[A-Za-z0-9_]{20,}|github_pat_[A-Za-z0-9_]{20,}|glpat-[A-Za-z0-9\-_]{20,}|hf_[A-Za-z0-9]{20,}|-----BEGIN (RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----|https?://[^/@[:space:]]+:[^@[:space:]]+@|ENCRYPTION_KEY[[:space:]]*=|AUTH_SECRET_KEY[[:space:]]*=)' \ + $(git rev-list --all) \ + | rg -v 'oauth2:(token|secret)@' || true