import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Load the Bloomberg-extracted data (data_root from config, or repo data/)
data_path = data_root / "bloomberg_database/uk_banks_2005_2010_with_failures.csv"
if not data_path.exists():
data_path = Path("../data/bloomberg_database/uk_banks_2005_2010_with_failures.csv")
# For this lab, we'll create example data matching real patterns
# In practice, you would load your Bloomberg extraction
# Real approximate price paths (indexed to 100 in Jan 2005)
dates = pd.date_range('2005-01-01', '2010-12-31', freq='ME')
# Survivors (simplified paths based on actual performance)
np.random.seed(42)
# Create realistic price paths
def create_price_path(start, crisis_low, end, crisis_month=44):
"""Create a price path with a crisis dip."""
n = len(dates)
pre_crisis = np.linspace(start, start * 1.3, crisis_month)
crisis = np.linspace(start * 1.3, crisis_low, 6)
recovery = np.linspace(crisis_low, end, n - crisis_month - 6)
return np.concatenate([pre_crisis, crisis, recovery])
prices = pd.DataFrame({
'Date': dates,
# Survivors
'BARC': create_price_path(100, 15, 45),
'LLOY': create_price_path(100, 12, 35),
'HSBA': create_price_path(100, 55, 85),
'STAN': create_price_path(100, 45, 90),
'RBS': create_price_path(100, 8, 25),
# Failed banks - prices go to near zero at delisting
'NRK': np.concatenate([create_price_path(100, 5, 5)[:38], [np.nan]*34]), # Delisted Feb 2008
'BB': np.concatenate([create_price_path(100, 2, 2)[:45], [np.nan]*27]), # Delisted Sep 2008
'HBOS': np.concatenate([create_price_path(100, 18, 18)[:48], [np.nan]*24]), # Merged Jan 2009
}).set_index('Date')
print("Data loaded successfully")
print(f"Date range: {prices.index[0].strftime('%Y-%m')} to {prices.index[-1].strftime('%Y-%m')}")
print(f"Securities: {list(prices.columns)}")Lab 02: Survivorship Bias in UK Banking
Using Bloomberg Data to Quantify Selection Bias
Lab Overview
Duration: 2 hours (1 hour Bloomberg session + 1 hour analysis)
Learning Objectives:
- Extract historical price data from Bloomberg Terminal
- Understand how to handle delisted securities
- Quantify survivorship bias using real financial data
- Appreciate the magnitude of bias in crisis periods
Prerequisites:
- Read Chapter 02: Data and Measurement in Finance
- Basic familiarity with Bloomberg Terminal navigation
Background: The UK Banking Crisis of 2008
The 2008 financial crisis provides a stark illustration of survivorship bias. If you look at UK banking returns today using only currently-listed banks, you miss the catastrophic losses from institutions that failed or were rescued.
| Bank | What Happened | Approximate Loss |
|---|---|---|
| Northern Rock | Nationalised Feb 2008 | ~95% loss |
| Bradford & Bingley | Nationalised Sep 2008 | ~98% loss |
| HBOS | Acquired by Lloyds Jan 2009 | ~82% loss before rescue |
| Royal Bank of Scotland | Government bailout | ~90% loss at nadir |
Students analysing “UK bank performance” using only current FTSE 100 banks would completely miss these catastrophic events.
Part 1: Bloomberg Data Extraction (60 minutes)
Step 1: Identify Current UK Banks
First, let’s establish which UK banks survive today and are commonly studied:
Bloomberg: UKX Index MEMB
Filter: GICS Sector = Financials, Sub-Industry = Diversified Banks
Current survivors typically include:
- BARC LN : Barclays
- LLOY LN : Lloyds Banking Group
- HSBA LN : HSBC Holdings
- STAN LN : Standard Chartered
- NWG LN : NatWest Group (formerly RBS)
Step 2: Extract Historical Prices for Failed Banks
Using the Bloomberg Excel Add-in, extract monthly adjusted prices for the period January 2005 to December 2010:
Securities to extract:
| Ticker | Name | Notes |
|---|---|---|
NRK LN Equity |
Northern Rock | Delisted Feb 2008 |
BB/ LN Equity |
Bradford & Bingley | Delisted Sep 2008 |
HBOS LN Equity |
HBOS | Merged into Lloyds Jan 2009 |
Bloomberg Excel Formula:
=BDH("NRK LN Equity","PX_LAST","2005-01-01","2008-02-22","Per=M","Days=A")
For delisted securities, Bloomberg retains historical data but you must specify the correct date range. Use the delisting date as your end date, not today’s date.
Step 3: Extract Prices for Survivors
Extract the same date range for current UK banks:
=BDH("BARC LN Equity","PX_LAST","2005-01-01","2010-12-31","Per=M","Days=A")
=BDH("LLOY LN Equity","PX_LAST","2005-01-01","2010-12-31","Per=M","Days=A")
=BDH("HSBA LN Equity","PX_LAST","2005-01-01","2010-12-31","Per=M","Days=A")
=BDH("STAN LN Equity","PX_LAST","2005-01-01","2010-12-31","Per=M","Days=A")
=BDH("RBS LN Equity","PX_LAST","2005-01-01","2010-12-31","Per=M","Days=A")
Step 4: Save Your Data
Export to CSV with the following structure:
Date, BARC, LLOY, HSBA, STAN, RBS, NRK, BB, HBOS
2005-01-31, 523.5, 445.2, 847.0, 1023.5, 1845.0, 987.5, 423.0, 845.0
...
Save as: uk_banks_2005_2010_with_failures.csv
Part 2: Survivorship Bias Analysis (60 minutes)
Load and Prepare Data
Calculate Returns
# Calculate monthly returns
returns = prices.pct_change().dropna()
# Identify survivor vs failed groups
survivors = ['BARC', 'LLOY', 'HSBA', 'STAN', 'RBS']
failed = ['NRK', 'BB', 'HBOS']
print("\nMonthly Return Statistics (Full Period):")
print("=" * 50)
print("\nSurvivors:")
print(returns[survivors].describe().round(4))
print("\nFailed Banks (until delisting):")
print(returns[failed].describe().round(4))Quantify Survivorship Bias
def calculate_portfolio_return(returns_df, columns, equal_weight=True):
"""
Calculate portfolio returns, handling missing values properly.
When a stock delists, we assume:
- Final return is the loss to delisting price
- Capital is then reallocated to remaining stocks
"""
portfolio_returns = []
for date in returns_df.index:
# Get returns for this date (only non-null values)
available = returns_df.loc[date, columns].dropna()
if len(available) > 0:
if equal_weight:
portfolio_returns.append(available.mean())
else:
portfolio_returns.append(available.sum() / len(columns))
else:
portfolio_returns.append(np.nan)
return pd.Series(portfolio_returns, index=returns_df.index)
# Calculate returns for different portfolios
survivors_only = calculate_portfolio_return(returns, survivors)
all_banks = calculate_portfolio_return(returns, survivors + failed)
# Cumulative wealth (starting with £100)
wealth_survivors = 100 * (1 + survivors_only).cumprod()
wealth_all = 100 * (1 + all_banks).cumprod()
print("\nSurvivorship Bias Analysis")
print("=" * 50)
print(f"\nCumulative Return (Jan 2005 - Dec 2010):")
print(f" Survivors only: {(wealth_survivors.iloc[-1]/100 - 1)*100:.1f}%")
print(f" Including failed: {(wealth_all.iloc[-1]/100 - 1)*100:.1f}%")
print(f"\nSurvivorship Bias: {(wealth_survivors.iloc[-1] - wealth_all.iloc[-1]):.1f} percentage points")
print(f"\nAnnualised Return:")
years = 6
print(f" Survivors only: {((wealth_survivors.iloc[-1]/100)**(1/years) - 1)*100:.2f}% p.a.")
print(f" Including failed: {((wealth_all.iloc[-1]/100)**(1/years) - 1)*100:.2f}% p.a.")Visualise the Difference
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# 1. Individual stock prices (normalised)
ax1 = axes[0, 0]
normalised = prices.div(prices.iloc[0]) * 100
for col in survivors:
normalised[col].plot(ax=ax1, label=col, alpha=0.7)
for col in failed:
normalised[col].dropna().plot(ax=ax1, label=f'{col} (failed)', linestyle='--', alpha=0.7)
ax1.axhline(y=100, color='gray', linestyle=':', alpha=0.5)
ax1.set_ylabel('Price (Jan 2005 = 100)')
ax1.set_title('UK Bank Stock Prices 2005-2010')
ax1.legend(loc='upper left', fontsize=8)
ax1.grid(alpha=0.3)
# 2. Cumulative portfolio wealth
ax2 = axes[0, 1]
wealth_survivors.plot(ax=ax2, label='Survivors Only', color='blue', lw=2)
wealth_all.plot(ax=ax2, label='Including Failed Banks', color='red', lw=2)
ax2.axhline(y=100, color='gray', linestyle=':', alpha=0.5)
ax2.fill_between(wealth_survivors.index, wealth_survivors, wealth_all,
alpha=0.3, color='coral', label='Survivorship Bias')
ax2.set_ylabel('Portfolio Value (£100 initial)')
ax2.set_title('The Cost of Survivorship Bias')
ax2.legend()
ax2.grid(alpha=0.3)
# 3. Rolling 12-month return difference
ax3 = axes[1, 0]
rolling_diff = (survivors_only.rolling(12).mean() - all_banks.rolling(12).mean()) * 100
rolling_diff.plot(ax=ax3, color='coral', lw=2)
ax3.axhline(y=0, color='gray', linestyle='-', lw=0.5)
ax3.fill_between(rolling_diff.index, 0, rolling_diff, alpha=0.3, color='coral')
ax3.set_ylabel('Monthly Return Difference (%)')
ax3.set_title('Rolling 12-Month Survivorship Bias')
ax3.grid(alpha=0.3)
# 4. Return distributions
ax4 = axes[1, 1]
ax4.hist(survivors_only.dropna() * 100, bins=30, alpha=0.6, label='Survivors Only', color='blue')
ax4.hist(all_banks.dropna() * 100, bins=30, alpha=0.6, label='Including Failed', color='red')
ax4.axvline(x=survivors_only.mean()*100, color='blue', linestyle='--', lw=2)
ax4.axvline(x=all_banks.mean()*100, color='red', linestyle='--', lw=2)
ax4.set_xlabel('Monthly Return (%)')
ax4.set_ylabel('Frequency')
ax4.set_title('Return Distribution Comparison')
ax4.legend()
ax4.grid(alpha=0.3)
plt.tight_layout()
plt.savefig('survivorship_bias_uk_banks.png', dpi=150, bbox_inches='tight')
plt.show()
print("\nFigure saved as 'survivorship_bias_uk_banks.png'")Discussion Questions
Magnitude of Bias: How does the survivorship bias you calculated compare to the ~1% often cited in academic studies? Why might crisis periods show larger bias?
Practical Implications: If you were backtesting a UK banking sector strategy, how would ignoring failed banks affect your:
- Estimated Sharpe ratio?
- Value-at-Risk calculations?
- Confidence in the strategy?
Data Challenges: What practical difficulties did you encounter when trying to get data for delisted securities? How might this affect academic research?
Beyond Banks: Where else might survivorship bias be particularly severe? Consider:
- Emerging market equities
- Cryptocurrency
- Hedge funds
- Small-cap stocks
Deliverables
Submit a short report (max 2 pages) containing:
Data Summary: Table showing the securities analysed, date ranges, and data sources
Key Calculation: Your estimate of survivorship bias in UK banking returns 2005-2010 (annualised percentage)
Visualisation: One chart showing the cumulative wealth paths with and without failed banks
Reflection: One paragraph on implications for your own future analysis work
Extension: Automating Survivorship-Free Databases
For students interested in production systems:
class SurvivorshipFreeDatabase:
"""
Framework for maintaining survivorship-free financial databases.
Key principles:
1. Capture all securities at point-in-time
2. Record delisting events with final prices
3. Maintain audit trail of universe changes
"""
def __init__(self):
self.securities = {} # ticker -> metadata
self.prices = pd.DataFrame()
self.events = [] # (date, ticker, event_type, details)
def add_security(self, ticker, metadata):
"""Add a new security to track."""
self.securities[ticker] = metadata
self.events.append((pd.Timestamp.now(), ticker, 'ADDED', metadata))
def record_delisting(self, ticker, date, reason, final_price):
"""Record a delisting event with proper handling."""
self.events.append((date, ticker, 'DELISTED', {
'reason': reason,
'final_price': final_price
}))
# Mark security as inactive but retain all historical data
self.securities[ticker]['status'] = 'DELISTED'
self.securities[ticker]['delisting_date'] = date
def get_universe_at_date(self, date):
"""Return the set of active securities at a specific date."""
active = []
for ticker, meta in self.securities.items():
if meta.get('status') != 'DELISTED':
active.append(ticker)
elif meta.get('delisting_date', pd.Timestamp.max) > date:
active.append(ticker)
return active
def calculate_returns(self, start_date, end_date, survivorship_free=True):
"""
Calculate portfolio returns with proper handling of delistings.
If survivorship_free=True, uses point-in-time universe.
If False, uses only securities that survived to end_date.
"""
pass # Implementation left as exercise
# This framework ensures you never accidentally introduce survivorship biasReferences
Elton, E.J., Gruber, M.J., & Blake, C.R. (1996). Survivor Bias and Mutual Fund Performance. Review of Financial Studies, 9(4), 1097-1120.
Brown, S.J., Goetzmann, W., Ibbotson, R.G., & Ross, S.A. (1992). Survivorship Bias in Performance Studies. Review of Financial Studies, 5(4), 553-580.
Rohleder, M., Scholz, H., & Wilkens, M. (2011). Survivorship Bias and Mutual Fund Performance: Relevance, Significance, and Methodical Differences. Review of Finance, 15(2), 441-474.