# [DEF:NetworkModule:Module] # # @COMPLEXITY: 3 # @SEMANTICS: network, http, client, api, requests, session, authentication # @PURPOSE: Инкапсулирует низкоуровневую HTTP-логику для взаимодействия с Superset API, включая аутентификацию, управление сессией, retry-логику и обработку ошибок. # @LAYER: Infra # @RELATION: [DEPENDS_ON] ->[LoggerModule] # @PUBLIC_API: APIClient # [SECTION: IMPORTS] from typing import Optional, Dict, Any, List, Union, cast, Tuple import json import io from pathlib import Path import threading import time import requests from requests.adapters import HTTPAdapter import urllib3 from urllib3.util.retry import Retry from ..logger import logger as app_logger, belief_scope # [/SECTION] # [DEF:SupersetAPIError:Class] # @COMPLEXITY: 1 # @PURPOSE: Base exception for all Superset API related errors. class SupersetAPIError(Exception): # [DEF:__init__:Function] # @COMPLEXITY: 1 # @PURPOSE: Initializes the exception with a message and context. # @PRE: message is a string, context is a dict. # @POST: Exception is initialized with context. def __init__(self, message: str = "Superset API error", **context: Any): with belief_scope("SupersetAPIError.__init__"): self.context = context super().__init__(f"[API_FAILURE] {message} | Context: {self.context}") # [/DEF:__init__:Function] # [/DEF:SupersetAPIError:Class] # [DEF:AuthenticationError:Class] # @COMPLEXITY: 1 # @PURPOSE: Exception raised when authentication fails. class AuthenticationError(SupersetAPIError): # [DEF:__init__:Function] # @COMPLEXITY: 1 # @PURPOSE: Initializes the authentication error. # @PRE: message is a string, context is a dict. # @POST: AuthenticationError is initialized. def __init__(self, message: str = "Authentication failed", **context: Any): with belief_scope("AuthenticationError.__init__"): super().__init__(message, type="authentication", **context) # [/DEF:__init__:Function] # [/DEF:AuthenticationError:Class] # [DEF:PermissionDeniedError:Class] # @PURPOSE: Exception raised when access is denied. class PermissionDeniedError(AuthenticationError): # [DEF:__init__:Function] # @PURPOSE: Initializes the permission denied error. # @PRE: message is a string, context is a dict. # @POST: PermissionDeniedError is initialized. def __init__(self, message: str = "Permission denied", **context: Any): with belief_scope("PermissionDeniedError.__init__"): super().__init__(message, **context) # [/DEF:__init__:Function] # [/DEF:PermissionDeniedError:Class] # [DEF:DashboardNotFoundError:Class] # @PURPOSE: Exception raised when a dashboard cannot be found. class DashboardNotFoundError(SupersetAPIError): # [DEF:__init__:Function] # @PURPOSE: Initializes the not found error with resource ID. # @PRE: resource_id is provided. # @POST: DashboardNotFoundError is initialized. def __init__(self, resource_id: Union[int, str], message: str = "Dashboard not found", **context: Any): with belief_scope("DashboardNotFoundError.__init__"): super().__init__(f"Dashboard '{resource_id}' {message}", subtype="not_found", resource_id=resource_id, **context) # [/DEF:__init__:Function] # [/DEF:DashboardNotFoundError:Class] # [DEF:NetworkError:Class] # @PURPOSE: Exception raised when a network level error occurs. class NetworkError(Exception): # [DEF:NetworkError.__init__:Function] # @PURPOSE: Initializes the network error. # @PRE: message is a string. # @POST: NetworkError is initialized. def __init__(self, message: str = "Network connection failed", **context: Any): with belief_scope("NetworkError.__init__"): self.context = context super().__init__(f"[NETWORK_FAILURE] {message} | Context: {self.context}") # [/DEF:NetworkError.__init__:Function] # [/DEF:NetworkError:Class] # [DEF:SupersetAuthCache:Class] # @PURPOSE: Process-local cache for Superset access/csrf tokens keyed by environment credentials. # @PRE: base_url and username are stable strings. # @POST: Cached entries expire automatically by TTL and can be reused across requests. class SupersetAuthCache: TTL_SECONDS = 300 _lock = threading.Lock() _entries: Dict[Tuple[str, str, bool], Dict[str, Any]] = {} @classmethod def build_key(cls, base_url: str, auth: Optional[Dict[str, Any]], verify_ssl: bool) -> Tuple[str, str, bool]: username = "" if isinstance(auth, dict): username = str(auth.get("username") or "").strip() return (str(base_url or "").strip(), username, bool(verify_ssl)) @classmethod # [DEF:SupersetAuthCache.get:Function] def get(cls, key: Tuple[str, str, bool]) -> Optional[Dict[str, str]]: now = time.time() with cls._lock: payload = cls._entries.get(key) if not payload: return None expires_at = float(payload.get("expires_at") or 0) if expires_at <= now: cls._entries.pop(key, None) return None tokens = payload.get("tokens") if not isinstance(tokens, dict): cls._entries.pop(key, None) return None return { "access_token": str(tokens.get("access_token") or ""), "csrf_token": str(tokens.get("csrf_token") or ""), } # [/DEF:SupersetAuthCache.get:Function] @classmethod # [DEF:SupersetAuthCache.set:Function] def set(cls, key: Tuple[str, str, bool], tokens: Dict[str, str], ttl_seconds: Optional[int] = None) -> None: normalized_ttl = max(int(ttl_seconds or cls.TTL_SECONDS), 1) with cls._lock: cls._entries[key] = { "tokens": { "access_token": str(tokens.get("access_token") or ""), "csrf_token": str(tokens.get("csrf_token") or ""), }, "expires_at": time.time() + normalized_ttl, } # [/DEF:SupersetAuthCache.set:Function] @classmethod def invalidate(cls, key: Tuple[str, str, bool]) -> None: with cls._lock: cls._entries.pop(key, None) # [/DEF:SupersetAuthCache:Class] # [DEF:APIClient:Class] # @COMPLEXITY: 3 # @PURPOSE: Synchronous Superset API client with process-local auth token caching. # @RELATION: [DEPENDS_ON] ->[SupersetAuthCache] # @RELATION: [DEPENDS_ON] ->[LoggerModule] class APIClient: DEFAULT_TIMEOUT = 30 # [DEF:APIClient.__init__:Function] # @PURPOSE: Инициализирует API клиент с конфигурацией, сессией и логгером. # @PARAM: config (Dict[str, Any]) - Конфигурация. # @PARAM: verify_ssl (bool) - Проверять ли SSL. # @PARAM: timeout (int) - Таймаут запросов. # @PRE: config must contain 'base_url' and 'auth'. # @POST: APIClient instance is initialized with a session. def __init__(self, config: Dict[str, Any], verify_ssl: bool = True, timeout: int = DEFAULT_TIMEOUT): with belief_scope("__init__"): app_logger.info("[APIClient.__init__][Entry] Initializing APIClient.") self.base_url: str = self._normalize_base_url(config.get("base_url", "")) self.api_base_url: str = f"{self.base_url}/api/v1" self.auth = config.get("auth") self.request_settings = {"verify_ssl": verify_ssl, "timeout": timeout} self.session = self._init_session() self._tokens: Dict[str, str] = {} self._auth_cache_key = SupersetAuthCache.build_key( self.base_url, self.auth, verify_ssl, ) self._authenticated = False app_logger.info("[APIClient.__init__][Exit] APIClient initialized.") # [/DEF:APIClient.__init__:Function] # [DEF:_init_session:Function] # @PURPOSE: Создает и настраивает `requests.Session` с retry-логикой. # @PRE: self.request_settings must be initialized. # @POST: Returns a configured requests.Session instance. # @RETURN: requests.Session - Настроенная сессия. def _init_session(self) -> requests.Session: with belief_scope("_init_session"): session = requests.Session() # Create a custom adapter that handles TLS issues class TLSAdapter(HTTPAdapter): def init_poolmanager(self, connections, maxsize, block=False): from urllib3.poolmanager import PoolManager import ssl # Create an SSL context that ignores TLSv1 unrecognized name errors ctx = ssl.create_default_context() ctx.set_ciphers('HIGH:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!SRP:!CAMELLIA') # Ignore TLSV1_UNRECOGNIZED_NAME errors by disabling hostname verification # This is safe when verify_ssl is false (we're already not verifying the certificate) ctx.check_hostname = False self.poolmanager = PoolManager( num_pools=connections, maxsize=maxsize, block=block, ssl_context=ctx ) retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504]) adapter = TLSAdapter(max_retries=retries) session.mount('http://', adapter) session.mount('https://', adapter) if not self.request_settings["verify_ssl"]: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) app_logger.warning("[_init_session][State] SSL verification disabled.") # When verify_ssl is false, we should also disable hostname verification session.verify = False else: session.verify = True return session # [/DEF:_init_session:Function] # [DEF:_normalize_base_url:Function] # @PURPOSE: Normalize Superset environment URL to base host/path without trailing slash and /api/v1 suffix. # @PRE: raw_url can be empty. # @POST: Returns canonical base URL suitable for building API endpoints. # @RETURN: str def _normalize_base_url(self, raw_url: str) -> str: normalized = str(raw_url or "").strip().rstrip("/") if normalized.lower().endswith("/api/v1"): normalized = normalized[:-len("/api/v1")] return normalized.rstrip("/") # [/DEF:_normalize_base_url:Function] # [DEF:_build_api_url:Function] # @PURPOSE: Build absolute Superset API URL for endpoint using canonical /api/v1 base. # @PRE: endpoint is relative path or absolute URL. # @POST: Returns full URL without accidental duplicate slashes. # @RETURN: str def _build_api_url(self, endpoint: str) -> str: normalized_endpoint = str(endpoint or "").strip() if normalized_endpoint.startswith("http://") or normalized_endpoint.startswith("https://"): return normalized_endpoint if not normalized_endpoint.startswith("/"): normalized_endpoint = f"/{normalized_endpoint}" if normalized_endpoint.startswith("/api/v1/") or normalized_endpoint == "/api/v1": return f"{self.base_url}{normalized_endpoint}" return f"{self.api_base_url}{normalized_endpoint}" # [/DEF:_build_api_url:Function] # [DEF:APIClient.authenticate:Function] # @PURPOSE: Выполняет аутентификацию в Superset API и получает access и CSRF токены. # @PRE: self.auth and self.base_url must be valid. # @POST: `self._tokens` заполнен, `self._authenticated` установлен в `True`. # @RETURN: Dict[str, str] - Словарь с токенами. # @THROW: AuthenticationError, NetworkError - при ошибках. # @RELATION: [CALLS] ->[SupersetAuthCache.get] # @RELATION: [CALLS] ->[SupersetAuthCache.set] def authenticate(self) -> Dict[str, str]: with belief_scope("authenticate"): app_logger.info("[authenticate][Enter] Authenticating to %s", self.base_url) cached_tokens = SupersetAuthCache.get(self._auth_cache_key) if cached_tokens and cached_tokens.get("access_token") and cached_tokens.get("csrf_token"): self._tokens = cached_tokens self._authenticated = True app_logger.info("[authenticate][CacheHit] Reusing cached Superset auth tokens for %s", self.base_url) return self._tokens try: login_url = f"{self.api_base_url}/security/login" # Log the payload keys and values (masking password) masked_auth = {k: ("******" if k == "password" else v) for k, v in self.auth.items()} app_logger.info(f"[authenticate][Debug] Login URL: {login_url}") app_logger.info(f"[authenticate][Debug] Auth payload: {masked_auth}") response = self.session.post(login_url, json=self.auth, timeout=self.request_settings["timeout"]) if response.status_code != 200: app_logger.error(f"[authenticate][Error] Status: {response.status_code}, Response: {response.text}") response.raise_for_status() access_token = response.json()["access_token"] csrf_url = f"{self.api_base_url}/security/csrf_token/" csrf_response = self.session.get(csrf_url, headers={"Authorization": f"Bearer {access_token}"}, timeout=self.request_settings["timeout"]) csrf_response.raise_for_status() self._tokens = {"access_token": access_token, "csrf_token": csrf_response.json()["result"]} self._authenticated = True SupersetAuthCache.set(self._auth_cache_key, self._tokens) app_logger.info("[authenticate][Exit] Authenticated successfully.") return self._tokens except requests.exceptions.HTTPError as e: SupersetAuthCache.invalidate(self._auth_cache_key) status_code = e.response.status_code if e.response is not None else None if status_code in [502, 503, 504]: raise NetworkError(f"Environment unavailable during authentication (Status {status_code})", status_code=status_code) from e raise AuthenticationError(f"Authentication failed: {e}") from e except (requests.exceptions.RequestException, KeyError) as e: SupersetAuthCache.invalidate(self._auth_cache_key) raise NetworkError(f"Network or parsing error during authentication: {e}") from e # [/DEF:APIClient.authenticate:Function] @property # [DEF:headers:Function] # @PURPOSE: Возвращает HTTP-заголовки для аутентифицированных запросов. # @PRE: APIClient is initialized and authenticated or can be authenticated. # @POST: Returns headers including auth tokens. def headers(self) -> Dict[str, str]: if not self._authenticated: self.authenticate() return { "Authorization": f"Bearer {self._tokens['access_token']}", "X-CSRFToken": self._tokens.get("csrf_token", ""), "Referer": self.base_url, "Content-Type": "application/json" } # [/DEF:headers:Function] # [DEF:request:Function] # @PURPOSE: Выполняет универсальный HTTP-запрос к API. # @PARAM: method (str) - HTTP метод. # @PARAM: endpoint (str) - API эндпоинт. # @PARAM: headers (Optional[Dict]) - Дополнительные заголовки. # @PARAM: raw_response (bool) - Возвращать ли сырой ответ. # @PRE: method and endpoint must be strings. # @POST: Returns response content or raw Response object. # @RETURN: `requests.Response` если `raw_response=True`, иначе `dict`. # @THROW: SupersetAPIError, NetworkError и их подклассы. def request(self, method: str, endpoint: str, headers: Optional[Dict] = None, raw_response: bool = False, **kwargs) -> Union[requests.Response, Dict[str, Any]]: full_url = self._build_api_url(endpoint) _headers = self.headers.copy() if headers: _headers.update(headers) try: response = self.session.request(method, full_url, headers=_headers, **kwargs) response.raise_for_status() return response if raw_response else response.json() except requests.exceptions.HTTPError as e: if e.response is not None and e.response.status_code == 401: self._authenticated = False self._tokens = {} SupersetAuthCache.invalidate(self._auth_cache_key) self._handle_http_error(e, endpoint) except requests.exceptions.RequestException as e: self._handle_network_error(e, full_url) # [/DEF:request:Function] # [DEF:_handle_http_error:Function] # @PURPOSE: (Helper) Преобразует HTTP ошибки в кастомные исключения. # @PARAM: e (requests.exceptions.HTTPError) - Ошибка. # @PARAM: endpoint (str) - Эндпоинт. # @PRE: e must be a valid HTTPError with a response. # @POST: Raises a specific SupersetAPIError or subclass. def _handle_http_error(self, e: requests.exceptions.HTTPError, endpoint: str): with belief_scope("_handle_http_error"): status_code = e.response.status_code if status_code == 502 or status_code == 503 or status_code == 504: raise NetworkError(f"Environment unavailable (Status {status_code})", status_code=status_code) from e if status_code == 404: if self._is_dashboard_endpoint(endpoint): raise DashboardNotFoundError(endpoint) from e raise SupersetAPIError( f"API resource not found at endpoint '{endpoint}'", status_code=status_code, endpoint=endpoint, subtype="not_found", ) from e if status_code == 403: raise PermissionDeniedError() from e if status_code == 401: raise AuthenticationError() from e raise SupersetAPIError(f"API Error {status_code}: {e.response.text}") from e # [/DEF:_handle_http_error:Function] # [DEF:_is_dashboard_endpoint:Function] # @PURPOSE: Determine whether an API endpoint represents a dashboard resource for 404 translation. # @PRE: endpoint may be relative or absolute. # @POST: Returns true only for dashboard-specific endpoints. def _is_dashboard_endpoint(self, endpoint: str) -> bool: normalized_endpoint = str(endpoint or "").strip().lower() if not normalized_endpoint: return False if normalized_endpoint.startswith("http://") or normalized_endpoint.startswith("https://"): try: normalized_endpoint = "/" + normalized_endpoint.split("/api/v1", 1)[1].lstrip("/") except IndexError: return False if normalized_endpoint.startswith("/api/v1/"): normalized_endpoint = normalized_endpoint[len("/api/v1"):] return normalized_endpoint.startswith("/dashboard/") or normalized_endpoint == "/dashboard" # [/DEF:_is_dashboard_endpoint:Function] # [DEF:_handle_network_error:Function] # @PURPOSE: (Helper) Преобразует сетевые ошибки в `NetworkError`. # @PARAM: e (requests.exceptions.RequestException) - Ошибка. # @PARAM: url (str) - URL. # @PRE: e must be a RequestException. # @POST: Raises a NetworkError. def _handle_network_error(self, e: requests.exceptions.RequestException, url: str): with belief_scope("_handle_network_error"): if isinstance(e, requests.exceptions.Timeout): msg = "Request timeout" elif isinstance(e, requests.exceptions.ConnectionError): msg = "Connection error" else: msg = f"Unknown network error: {e}" raise NetworkError(msg, url=url) from e # [/DEF:_handle_network_error:Function] # [DEF:upload_file:Function] # @PURPOSE: Загружает файл на сервер через multipart/form-data. # @PARAM: endpoint (str) - Эндпоинт. # @PARAM: file_info (Dict[str, Any]) - Информация о файле. # @PARAM: extra_data (Optional[Dict]) - Дополнительные данные. # @PARAM: timeout (Optional[int]) - Таймаут. # @PRE: file_info must contain 'file_obj' and 'file_name'. # @POST: File is uploaded and response returned. # @RETURN: Ответ API в виде словаря. # @THROW: SupersetAPIError, NetworkError, TypeError. def upload_file(self, endpoint: str, file_info: Dict[str, Any], extra_data: Optional[Dict] = None, timeout: Optional[int] = None) -> Dict: with belief_scope("upload_file"): full_url = self._build_api_url(endpoint) _headers = self.headers.copy() _headers.pop('Content-Type', None) file_obj, file_name, form_field = file_info.get("file_obj"), file_info.get("file_name"), file_info.get("form_field", "file") files_payload = {} if isinstance(file_obj, (str, Path)): with open(file_obj, 'rb') as f: files_payload = {form_field: (file_name, f.read(), 'application/x-zip-compressed')} elif isinstance(file_obj, io.BytesIO): files_payload = {form_field: (file_name, file_obj.getvalue(), 'application/x-zip-compressed')} else: raise TypeError(f"Unsupported file_obj type: {type(file_obj)}") return self._perform_upload(full_url, files_payload, extra_data, _headers, timeout) # [/DEF:upload_file:Function] # [DEF:_perform_upload:Function] # @PURPOSE: (Helper) Выполняет POST запрос с файлом. # @PARAM: url (str) - URL. # @PARAM: files (Dict) - Файлы. # @PARAM: data (Optional[Dict]) - Данные. # @PARAM: headers (Dict) - Заголовки. # @PARAM: timeout (Optional[int]) - Таймаут. # @PRE: url, files, and headers must be provided. # @POST: POST request is performed and JSON response returned. # @RETURN: Dict - Ответ. def _perform_upload(self, url: str, files: Dict, data: Optional[Dict], headers: Dict, timeout: Optional[int]) -> Dict: with belief_scope("_perform_upload"): try: response = self.session.post(url, files=files, data=data or {}, headers=headers, timeout=timeout or self.request_settings["timeout"]) response.raise_for_status() if response.status_code == 200: try: return response.json() except Exception as json_e: app_logger.debug(f"[_perform_upload][Debug] Response is not valid JSON: {response.text[:200]}...") raise SupersetAPIError(f"API error during upload: Response is not valid JSON: {json_e}") from json_e return response.json() except requests.exceptions.HTTPError as e: raise SupersetAPIError(f"API error during upload: {e.response.text}") from e except requests.exceptions.RequestException as e: raise NetworkError(f"Network error during upload: {e}", url=url) from e # [/DEF:_perform_upload:Function] # [DEF:fetch_paginated_count:Function] # @PURPOSE: Получает общее количество элементов для пагинации. # @PARAM: endpoint (str) - Эндпоинт. # @PARAM: query_params (Dict) - Параметры запроса. # @PARAM: count_field (str) - Поле с количеством. # @PRE: query_params must be a dictionary. # @POST: Returns total count of items. # @RETURN: int - Количество. def fetch_paginated_count(self, endpoint: str, query_params: Dict, count_field: str = "count") -> int: with belief_scope("fetch_paginated_count"): response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query_params)})) return response_json.get(count_field, 0) # [/DEF:fetch_paginated_count:Function] # [DEF:fetch_paginated_data:Function] # @PURPOSE: Автоматически собирает данные со всех страниц пагинированного эндпоинта. # @PARAM: endpoint (str) - Эндпоинт. # @PARAM: pagination_options (Dict[str, Any]) - Опции пагинации. # @PRE: pagination_options must contain 'base_query', 'results_field'. 'total_count' is optional. # @POST: Returns all items across all pages. # @RETURN: List[Any] - Список данных. def fetch_paginated_data(self, endpoint: str, pagination_options: Dict[str, Any]) -> List[Any]: with belief_scope("fetch_paginated_data"): base_query = pagination_options["base_query"] total_count = pagination_options.get("total_count") results_field = pagination_options["results_field"] count_field = pagination_options.get("count_field", "count") page_size = base_query.get('page_size', 1000) assert page_size > 0, "'page_size' must be a positive number." results = [] page = 0 # Fetch first page to get data and total count if not provided query = {**base_query, 'page': page} response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query)})) first_page_results = response_json.get(results_field, []) results.extend(first_page_results) if total_count is None: total_count = response_json.get(count_field, len(first_page_results)) app_logger.debug(f"[fetch_paginated_data][State] Total count resolved from first page: {total_count}") # Fetch remaining pages total_pages = (total_count + page_size - 1) // page_size for page in range(1, total_pages): query = {**base_query, 'page': page} response_json = cast(Dict[str, Any], self.request("GET", endpoint, params={"q": json.dumps(query)})) results.extend(response_json.get(results_field, [])) return results # [/DEF:fetch_paginated_data:Function] # [/DEF:APIClient:Class] # [/DEF:NetworkModule:Module]