Source code for scr_financial.data.collectors.eba_collector

"""
EBA data collector for financial network analysis.

This module provides the EBACollector class for retrieving data from the
European Banking Authority (EBA) transparency exercises and stress tests.
"""

import logging
import pandas as pd
import numpy as np
from typing import List, Optional
from datetime import datetime

logger = logging.getLogger(__name__)

# Solvency parameters per bank used by sample data generators.
# Keys are bank identifiers; values hold base CET1 ratio and T1 leverage ratio.
_BANK_SOLVENCY_PARAMS: dict[str, dict[str, float]] = {
    "DE_DBK": {"base_cet1": 13.2, "base_t1_leverage": 4.5},   # Deutsche Bank
    "FR_BNP": {"base_cet1": 12.8, "base_t1_leverage": 4.7},   # BNP Paribas
    "ES_SAN": {"base_cet1": 11.9, "base_t1_leverage": 5.1},   # Santander
    "IT_UCG": {"base_cet1": 13.5, "base_t1_leverage": 5.3},   # UniCredit
    "NL_ING": {"base_cet1": 14.2, "base_t1_leverage": 4.9},   # ING
    "SE_NDA": {"base_cet1": 16.1, "base_t1_leverage": 5.5},   # Nordea
    "CH_UBS": {"base_cet1": 14.5, "base_t1_leverage": 5.2},   # UBS
    "UK_BARC": {"base_cet1": 13.8, "base_t1_leverage": 4.8},  # Barclays
    "UK_HSBC": {"base_cet1": 15.2, "base_t1_leverage": 5.5},  # HSBC
    "FR_ACA": {"base_cet1": 13.1, "base_t1_leverage": 4.6},   # Credit Agricole
}

# Liquidity parameters per bank used by sample data generators.
# Keys are bank identifiers; values hold base LCR and NSFR ratios.
_BANK_LIQUIDITY_PARAMS: dict[str, dict[str, float]] = {
    "DE_DBK": {"base_lcr": 145.0, "base_nsfr": 115.0},   # Deutsche Bank
    "FR_BNP": {"base_lcr": 152.0, "base_nsfr": 118.0},   # BNP Paribas
    "ES_SAN": {"base_lcr": 158.0, "base_nsfr": 112.0},   # Santander
    "IT_UCG": {"base_lcr": 165.0, "base_nsfr": 110.0},   # UniCredit
    "NL_ING": {"base_lcr": 138.0, "base_nsfr": 115.0},   # ING
    "SE_NDA": {"base_lcr": 172.0, "base_nsfr": 120.0},   # Nordea
    "CH_UBS": {"base_lcr": 168.0, "base_nsfr": 117.0},   # UBS
    "UK_BARC": {"base_lcr": 156.0, "base_nsfr": 113.0},  # Barclays
    "UK_HSBC": {"base_lcr": 160.0, "base_nsfr": 116.0},  # HSBC
    "FR_ACA": {"base_lcr": 149.0, "base_nsfr": 114.0},   # Credit Agricole
}

_DEFAULT_BASE_SOLVENCY: dict[str, float] = {"base_cet1": 12.0, "base_t1_leverage": 4.5}
_DEFAULT_BASE_LIQUIDITY: dict[str, float] = {"base_lcr": 150.0, "base_nsfr": 110.0}


[docs] class EBACollector: """ Collector for European Banking Authority (EBA) data. This class provides methods for retrieving data from EBA transparency exercises and stress tests, which include key bank metrics like CET1 ratio, leverage ratio, and liquidity coverage ratio. """
[docs] def __init__(self) -> None: """Initialize the EBA data collector.""" self.base_url = "https://www.eba.europa.eu/sites/default/documents/files/" self.transparency_url = self.base_url + "transparency-exercise/" self.stress_test_url = self.base_url + "eu-wide-stress-testing/" self._rng = np.random.default_rng(seed=42)
[docs] def __repr__(self) -> str: """Return a string representation of this collector. Returns: str: Class name and base URL. """ return f"{self.__class__.__name__}(base_url={self.base_url!r})"
[docs] def collect_transparency_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Collect data from EBA transparency exercises. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing bank solvency metrics. """ # TODO: replace with real EBA API call when credentials are available # Convert dates to datetime start = pd.to_datetime(start_date) end = pd.to_datetime(end_date) # Determine which transparency exercises to include based on dates exercises = [] if start <= pd.to_datetime("2020-12-31") <= end: exercises.append("2020") if start <= pd.to_datetime("2021-12-31") <= end: exercises.append("2021") if start <= pd.to_datetime("2022-12-31") <= end: exercises.append("2022") if start <= pd.to_datetime("2023-12-31") <= end: exercises.append("2023") if start <= pd.to_datetime("2024-12-31") <= end: exercises.append("2024") # If no exercises match the date range, use sample data if not exercises: return self._get_sample_transparency_data(start_date, end_date, bank_list) # Initialize empty DataFrame to store results all_data = pd.DataFrame() # Collect data from each exercise for exercise in exercises: try: # In a real implementation, this would download from EBA website # For now, we'll use sample data exercise_data = self._get_sample_transparency_data( start_date, end_date, bank_list ) exercise_data["exercise"] = exercise all_data = pd.concat([all_data, exercise_data]) except Exception as exc: logger.error( "Error collecting %s transparency data: %s", exercise, exc ) # Filter by bank list if provided if bank_list is not None and not all_data.empty: all_data = all_data[all_data["bank_id"].isin(bank_list)] return all_data
[docs] def collect_aggregated_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Collect aggregated data from EBA. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing bank liquidity metrics. """ # TODO: replace with real EBA API call when credentials are available # For now, use sample data return self._get_sample_aggregated_data(start_date, end_date, bank_list)
def _get_sample_transparency_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Generate sample transparency exercise data for testing. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing sample bank solvency metrics. """ # Default bank list if none provided if bank_list is None: bank_list = [ "DE_DBK", "FR_BNP", "ES_SAN", "IT_UCG", "NL_ING", "SE_NDA", "CH_UBS", "UK_BARC", "UK_HSBC", "FR_ACA", ] # Generate dates within the range date_range = pd.date_range(start=start_date, end=end_date, freq="Q") # Create sample data data = [] for bank_id in bank_list: params = _BANK_SOLVENCY_PARAMS.get(bank_id, _DEFAULT_BASE_SOLVENCY) base_cet1 = params["base_cet1"] base_t1_leverage = params["base_t1_leverage"] # Add some random variation over time for date in date_range: # Add noise and slight trend time_factor = (date - pd.to_datetime(start_date)).days / 365 cet1 = base_cet1 + 0.5 * time_factor + self._rng.normal(0, 0.3) t1_leverage = ( base_t1_leverage + 0.2 * time_factor + self._rng.normal(0, 0.15) ) # Add row to data data.append( { "date": date, "bank_id": bank_id, "CET1_ratio": round(cet1, 2), "Tier1_leverage_ratio": round(t1_leverage, 2), "total_assets": round( self._rng.uniform(500, 2000) * 1e9, 0 ), # in EUR "risk_weighted_assets": round( self._rng.uniform(200, 800) * 1e9, 0 ), # in EUR } ) return pd.DataFrame(data) def _get_sample_aggregated_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Generate sample aggregated data for testing. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing sample bank liquidity metrics. """ # Default bank list if none provided if bank_list is None: bank_list = [ "DE_DBK", "FR_BNP", "ES_SAN", "IT_UCG", "NL_ING", "SE_NDA", "CH_UBS", "UK_BARC", "UK_HSBC", "FR_ACA", ] # Generate dates within the range (semi-annual) date_range = pd.date_range(start=start_date, end=end_date, freq="6M") # Create sample data data = [] for bank_id in bank_list: params = _BANK_LIQUIDITY_PARAMS.get(bank_id, _DEFAULT_BASE_LIQUIDITY) base_lcr = params["base_lcr"] base_nsfr = params["base_nsfr"] # Add some random variation over time for date in date_range: # Add noise and slight trend time_factor = (date - pd.to_datetime(start_date)).days / 365 lcr = base_lcr + 5 * time_factor + self._rng.normal(0, 5) nsfr = base_nsfr + 2 * time_factor + self._rng.normal(0, 3) # Add row to data data.append( { "date": date, "bank_id": bank_id, "LCR": round(lcr, 1), "NSFR": round(nsfr, 1), "liquid_assets": round( self._rng.uniform(100, 500) * 1e9, 0 ), # in EUR "net_cash_outflows": round( self._rng.uniform(50, 300) * 1e9, 0 ), # in EUR } ) return pd.DataFrame(data)