Source code for scr_financial.data.collectors.ecb_collector

"""
ECB data collector for financial network analysis.

This module provides the ECBCollector class for retrieving data from the
European Central Bank (ECB) Statistical Data Warehouse and other ECB sources.
"""

import logging
import pandas as pd
import numpy as np
from typing import List, Optional

logger = logging.getLogger(__name__)


[docs] class ECBCollector: """ Collector for European Central Bank (ECB) data. This class provides methods for retrieving data from ECB sources, including TARGET2 interbank payment data, CISS systemic stress indicator, and G-SIB data. """
[docs] def __init__(self) -> None: """Initialize the ECB data collector.""" self.sdw_base_url = "https://sdw.ecb.europa.eu/quickview.do?SERIES_KEY=" self.target2_base_url = ( "https://www.ecb.europa.eu/paym/target/target2/profuse/download/html/" ) self._rng = np.random.default_rng(seed=42)
[docs] def __repr__(self) -> str: """Return a string representation of this collector. Returns: str: Class name and base URL. """ return f"{self.__class__.__name__}(sdw_base_url={self.sdw_base_url!r})"
[docs] def collect_target2_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Collect TARGET2 interbank payment data. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing interbank payment volumes. """ # TODO: replace with real EBA API call when credentials are available # For now, use sample data return self._get_sample_target2_data(start_date, end_date, bank_list)
[docs] def collect_ciss_data(self, start_date: str, end_date: str) -> pd.DataFrame: """Collect ECB Composite Indicator of Systemic Stress (CISS) data. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. Returns: DataFrame containing CISS values. """ # TODO: replace with real EBA API call when credentials are available # For now, use sample data return self._get_sample_ciss_data(start_date, end_date)
[docs] def collect_gsib_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Collect G-SIB (Global Systemically Important Bank) indicator data. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing G-SIB indicators. """ # TODO: replace with real EBA API call when credentials are available # For now, use sample data return self._get_sample_gsib_data(start_date, end_date, bank_list)
def _get_sample_target2_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Generate sample TARGET2 data for testing. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing sample interbank payment volumes. """ # Default bank list if none provided if bank_list is None: bank_list = [ "DE_DBK", "FR_BNP", "ES_SAN", "IT_UCG", "NL_ING", "SE_NDA", "CH_UBS", "UK_BARC", "UK_HSBC", "FR_ACA", ] # Generate dates within the range (monthly) date_range = pd.date_range(start=start_date, end=end_date, freq="M") # Create sample data data = [] # Base exposure matrix (symmetric) base_exposures = { "DE_DBK": { "FR_BNP": 15, "ES_SAN": 8, "IT_UCG": 12, "NL_ING": 10, "SE_NDA": 5, "CH_UBS": 7, "UK_BARC": 9, "UK_HSBC": 11, "FR_ACA": 6, }, "FR_BNP": { "DE_DBK": 15, "ES_SAN": 14, "IT_UCG": 9, "NL_ING": 8, "SE_NDA": 4, "CH_UBS": 6, "UK_BARC": 10, "UK_HSBC": 12, "FR_ACA": 18, }, "ES_SAN": { "DE_DBK": 8, "FR_BNP": 14, "IT_UCG": 13, "NL_ING": 7, "SE_NDA": 3, "CH_UBS": 5, "UK_BARC": 8, "UK_HSBC": 9, "FR_ACA": 7, }, "IT_UCG": { "DE_DBK": 12, "FR_BNP": 9, "ES_SAN": 13, "NL_ING": 6, "SE_NDA": 4, "CH_UBS": 5, "UK_BARC": 7, "UK_HSBC": 8, "FR_ACA": 6, }, "NL_ING": { "DE_DBK": 10, "FR_BNP": 8, "ES_SAN": 7, "IT_UCG": 6, "SE_NDA": 9, "CH_UBS": 8, "UK_BARC": 6, "UK_HSBC": 7, "FR_ACA": 5, }, "SE_NDA": { "DE_DBK": 5, "FR_BNP": 4, "ES_SAN": 3, "IT_UCG": 4, "NL_ING": 9, "CH_UBS": 7, "UK_BARC": 4, "UK_HSBC": 5, "FR_ACA": 3, }, "CH_UBS": { "DE_DBK": 7, "FR_BNP": 6, "ES_SAN": 5, "IT_UCG": 5, "NL_ING": 8, "SE_NDA": 7, "UK_BARC": 9, "UK_HSBC": 10, "FR_ACA": 4, }, "UK_BARC": { "DE_DBK": 9, "FR_BNP": 10, "ES_SAN": 8, "IT_UCG": 7, "NL_ING": 6, "SE_NDA": 4, "CH_UBS": 9, "UK_HSBC": 15, "FR_ACA": 7, }, "UK_HSBC": { "DE_DBK": 11, "FR_BNP": 12, "ES_SAN": 9, "IT_UCG": 8, "NL_ING": 7, "SE_NDA": 5, "CH_UBS": 10, "UK_BARC": 15, "FR_ACA": 8, }, "FR_ACA": { "DE_DBK": 6, "FR_BNP": 18, "ES_SAN": 7, "IT_UCG": 6, "NL_ING": 5, "SE_NDA": 3, "CH_UBS": 4, "UK_BARC": 7, "UK_HSBC": 8, }, } for date in date_range: # Time factor for trend time_factor = (date - pd.to_datetime(start_date)).days / 365 for source in bank_list: for target in bank_list: if source != target: # Get base exposure with some time trend and noise if ( source in base_exposures and target in base_exposures[source] ): base = base_exposures[source][target] # Add trend (increasing over time) and noise exposure = ( base * (1 + 0.1 * time_factor) * (1 + self._rng.normal(0, 0.15)) ) # Convert to billions of EUR exposure = exposure * 1e9 data.append( { "date": date, "source": source, "target": target, "weight": round(exposure, 0), } ) else: logger.debug( "No base exposure defined for pair %s%s, skipping", source, target, ) return pd.DataFrame(data) def _get_sample_ciss_data(self, start_date: str, end_date: str) -> pd.DataFrame: """Generate sample CISS data for testing. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. Returns: DataFrame containing sample CISS values. """ # Generate dates within the range (weekly) date_range = pd.date_range(start=start_date, end=end_date, freq="W") # Create sample data data = [] # Base CISS value base_ciss = 0.15 # Add some random variation and events for date in date_range: # Time factor time_factor = (date - pd.to_datetime(start_date)).days / 365 # Add some stress events if _is_stress_period(date, "2008-09") or _is_stress_period( date, "2008-10" ): # Financial crisis ciss = 0.8 + self._rng.normal(0, 0.05) elif _is_stress_period(date, "2011-08") or _is_stress_period( date, "2011-09" ): # European debt crisis ciss = 0.6 + self._rng.normal(0, 0.05) elif _is_stress_period(date, "2020-03") or _is_stress_period( date, "2020-04" ): # COVID-19 ciss = 0.7 + self._rng.normal(0, 0.05) else: # Normal times with slight trend and noise ciss = base_ciss + 0.02 * time_factor + self._rng.normal(0, 0.03) # Ensure CISS is between 0 and 1 ciss = max(0, min(1, ciss)) data.append({"date": date, "CISS": round(ciss, 4)}) return pd.DataFrame(data) def _get_sample_gsib_data( self, start_date: str, end_date: str, bank_list: Optional[List[str]] = None, ) -> pd.DataFrame: """Generate sample G-SIB data for testing. Args: start_date: Start date for data collection in 'YYYY-MM-DD' format. end_date: End date for data collection in 'YYYY-MM-DD' format. bank_list: List of bank identifiers to include. Returns: DataFrame containing sample G-SIB indicators. """ # Default bank list if none provided if bank_list is None: bank_list = [ "DE_DBK", "FR_BNP", "ES_SAN", "IT_UCG", "NL_ING", "SE_NDA", "CH_UBS", "UK_BARC", "UK_HSBC", "FR_ACA", ] # Generate dates within the range (annual) date_range = pd.date_range(start=start_date, end=end_date, freq="A") # Create sample data data = [] # Base G-SIB scores for each bank base_scores = { "DE_DBK": 210, # Deutsche Bank "FR_BNP": 230, # BNP Paribas "ES_SAN": 190, # Santander "IT_UCG": 170, # UniCredit "NL_ING": 160, # ING "SE_NDA": 120, # Nordea "CH_UBS": 200, # UBS "UK_BARC": 180, # Barclays "UK_HSBC": 250, # HSBC "FR_ACA": 170, # Credit Agricole } # Base cross-jurisdictional claims and liabilities base_cross_claims = { "DE_DBK": 450, # Deutsche Bank "FR_BNP": 480, # BNP Paribas "ES_SAN": 320, # Santander "IT_UCG": 280, # UniCredit "NL_ING": 250, # ING "SE_NDA": 180, # Nordea "CH_UBS": 420, # UBS "UK_BARC": 350, # Barclays "UK_HSBC": 520, # HSBC "FR_ACA": 290, # Credit Agricole } # Base intra-financial assets and liabilities base_intra_assets = { "DE_DBK": 380, # Deutsche Bank "FR_BNP": 410, # BNP Paribas "ES_SAN": 290, # Santander "IT_UCG": 250, # UniCredit "NL_ING": 220, # ING "SE_NDA": 160, # Nordea "CH_UBS": 370, # UBS "UK_BARC": 320, # Barclays "UK_HSBC": 450, # HSBC "FR_ACA": 260, # Credit Agricole } for date in date_range: # Time factor for trend time_factor = (date - pd.to_datetime(start_date)).days / 365 for bank_id in bank_list: # Get base values base_score = base_scores.get(bank_id, 150) base_cross = base_cross_claims.get(bank_id, 300) base_intra = base_intra_assets.get(bank_id, 250) # Add trend and noise gsib_score = ( base_score * (1 + 0.05 * time_factor) * (1 + self._rng.normal(0, 0.05)) ) cross_claims = ( base_cross * (1 + 0.08 * time_factor) * (1 + self._rng.normal(0, 0.08)) ) intra_assets = ( base_intra * (1 + 0.06 * time_factor) * (1 + self._rng.normal(0, 0.07)) ) # Convert to billions of EUR cross_claims = cross_claims * 1e9 intra_assets = intra_assets * 1e9 data.append( { "date": date, "bank_id": bank_id, "gsib_score": round(gsib_score, 0), "cross_jurisdictional_claims": round(cross_claims, 0), "intra_financial_assets": round(intra_assets, 0), } ) return pd.DataFrame(data)
def _is_stress_period(date: pd.Timestamp, year_month: str) -> bool: """Check whether a timestamp falls in a given year-month stress period. Args: date: The timestamp to test. year_month: A string of the form 'YYYY-MM'. Returns: True if the timestamp's year and month match *year_month*. """ return date.strftime("%Y-%m") == year_month