Add Support for Fama-French 3/5 Factor Model to Expected Return Module
Enhance the expected_returns module by adding a new function ff_expected_return() that computes expected returns using the Fama-French 3-factor and 5-factor models via OLS regression.
This will expand the current set of return estimation methods (mean_historical_return, capm_return, etc.) with a more advanced and academically grounded model, improving flexibility for users in quantitative finance applications.
def ff_expected_return( prices: pd.DataFrame, factor_data: pd.DataFrame, returns_data: bool = False, model: str = "ff3", compounding: bool = True, frequency: int = 252, log_returns: bool = False, ) -> pd.Series:
def ff_expected_return( prices, factor_data, returns_data=False, model="ff3", compounding=True, frequency=252, log_returns=False, ): """ Estimate expected returns using the Fama-French 3- or 5-Factor model.
:param prices: asset prices or returns if returns_data=True.
:type prices: pd.DataFrame
:param factor_data: DataFrame of Fama-French factors. Must include 'RF' and:
- ff3: 'Mkt-RF', 'SMB', 'HML'
- ff5: also 'RMW', 'CMA'
:type factor_data: pd.DataFrame
:param returns_data: If True, 'prices' is treated as returns data.
:type returns_data: bool
:param model: 'ff3' or 'ff5'.
:type model: str
:param compounding: Use geometric average if True, arithmetic if False.
:type compounding: bool
:param frequency: Annualization factor.
:type frequency: int
:param log_returns: If False, uses simple returns; log returns are unsupported here.
:type log_returns: bool
:return: Expected returns per asset.
:rtype: pd.Series
"""
if not isinstance(prices, pd.DataFrame):
warnings.warn("Input prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)
if not isinstance(factor_data, pd.DataFrame):
warnings.warn("Input factor_data is not in a dataframe", RuntimeWarning)
required_columns = ["RF", "Mkt-RF", "SMB", "HML"]
if model == "ff5":
required_columns += ["RMW", "CMA"]
for col in required_columns:
if col not in factor_data.columns:
raise ValueError(f"Factor data must include {col}")
# Compute assest returns
if returns_data:
returns = prices.copy()
else:
returns = returns_from_prices(prices, log_returns)
_check_returns(returns)
# Align Index
common_index = returns.index.intersection(factor_data.index)
if len(common_index) == 0:
raise ValueError("No overlapping dates between returns and factor data")
returns = returns.loc[common_index]
factors = factor_data.loc[common_index]
# Compute excess returns
excess_returns = returns.sub(factors["RF"], axis=0)
# Select Regressor
reg_factor = ["Mkt-RF", "SMB", "HML"]
if model == "ff5":
reg_factor += ["RMW", "CMA"]
X = sm.add_constant(factors[reg_factor])
expected_returns = {}
for asset in excess_returns.columns:
y = excess_returns[asset]
model = sm.OLS(y, X).fit()
predicted = model.predict(X)
if compounding:
expected_return = ((1 + predicted).prod()) ** (
frequency / len(predicted)
) - 1
else:
expected_return = predicted.mean() * frequency
expected_returns[asset] = expected_return
return pd.Series(expected_returns)
Test for enhancement
def test_ff3_expected_return_valid(): df = get_data().iloc[:100, :3] # limit assests and rows for speed dates = df.index
factors = pd.DataFrame(
{
"RF": np.random.normal(0.0001, 0.00001, size=100),
"Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
"SMB": np.random.normal(0.0002, 0.0005, size=100),
"HML": np.random.normal(0.0001, 0.0004, size=100),
},
index=dates,
)
mu = expected_returns.ff_expected_return(df, factors, model="ff3")
assert isinstance(mu, pd.Series)
assert mu.shape[0] == df.shape[1]
assert mu.notnull().all()
def test_ff5_expected_return_valid(): df = get_data().iloc[:100, :3] dates = df.index
# Generate mock Fama-French 5-factor data
factors = pd.DataFrame(
{
"RF": np.random.normal(0.0001, 0.00001, size=100),
"Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
"SMB": np.random.normal(0.0002, 0.0005, size=100),
"HML": np.random.normal(0.0001, 0.0004, size=100),
"RMW": np.random.normal(0.0001, 0.0003, size=100),
"CMA": np.random.normal(0.0001, 0.0003, size=100),
},
index=dates,
)
mu = expected_returns.ff_expected_return(df, factors, model="ff5")
assert isinstance(mu, pd.Series)
assert mu.shape[0] == df.shape[1]
assert mu.notnull().all()
def test_ff_expected_return_missing_factors(): df = get_data().iloc[:100, :3] dates = df.index
# Missing HML factor
factors = pd.DataFrame(
{
"RF": np.random.normal(0.0001, 0.00001, size=100),
"Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
"SMB": np.random.normal(0.0002, 0.0005, size=100),
},
index=dates,
)
with pytest.raises(ValueError) as excinfo:
expected_returns.ff_expected_return(df, factors, model="ff3")
assert "Missing required factor: HML" in str(excinfo.value)
def test_ff_expected_return_no_overlap(): df = get_data().iloc[:100, :3]
# Shift factor index so no overlap
factors = pd.DataFrame(
{
"RF": np.random.normal(0.0001, 0.00001, size=100),
"Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
"SMB": np.random.normal(0.0002, 0.0005, size=100),
"HML": np.random.normal(0.0001, 0.0004, size=100),
},
index=pd.date_range("2020-01-01", periods=100, freq="B"),
)
with pytest.raises(ValueError) as excinfo:
expected_returns.ff_expected_return(df, factors, model="ff3")
assert "No overlapping dates" in str(excinfo.value)
def test_ff_expected_return_compounding_toggle(): df = get_data().iloc[:100, :3] dates = df.index
factors = pd.DataFrame(
{
"RF": np.random.normal(0.0001, 0.00001, size=100),
"Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
"SMB": np.random.normal(0.0002, 0.0005, size=100),
"HML": np.random.normal(0.0001, 0.0004, size=100),
},
index=dates,
)
mu_geom = expected_returns.ff_expected_return(df, factors, compounding=True)
mu_arith = expected_returns.ff_expected_return(df, factors, compounding=False)
assert not mu_geom.equals(mu_arith)
def test_ff3_expected_return_static(): dates = pd.date_range("2022-01-01", periods=5, freq="D")
# Sample asset prices for 3 assets
prices = pd.DataFrame(
{
"Asset A": [100, 100.5, 101, 101.5, 102],
"Asset B": [50, 50.25, 50.5, 50.75, 51],
"Asset C": [200, 199.5, 199.8, 200.1, 200.5],
},
index=dates,
)
# Matching FF3 data
factors = pd.DataFrame(
{
"RF": [0.0001, 0.0001, 0.0001, 0.0001, 0.0001],
"Mkt-RF": [0.001, 0.0015, -0.0005, 0.0003, 0.0007],
"SMB": [0.0002, 0.0001, -0.0001, 0.0002, 0.0001],
"HML": [0.0003, -0.0002, 0.0004, 0.0001, 0.0000],
},
index=dates,
)
mu = expected_returns.ff_expected_return(
prices, factors, model="ff3", compounding=False, frequency=252
)
assert isinstance(mu, pd.Series)
assert mu.shape[0] == 3
assert list(mu.index) == ["Asset A", "Asset B", "Asset C"]
assert mu.notnull().all()
# Check rough bounds for sanity
assert mu.min() > -0.5
assert mu.max() < 10
def test_ff5_expected_return_static(): dates = pd.date_range("2022-01-01", periods=5, freq="D")
prices = pd.DataFrame(
{
"Asset A": [100, 100.5, 101, 101.5, 102],
"Asset B": [50, 50.25, 50.5, 50.75, 51],
"Asset C": [200, 199.5, 199.8, 200.1, 200.5],
},
index=dates,
)
factors = pd.DataFrame(
{
"RF": [0.0001] * 5,
"Mkt-RF": [0.001, 0.0015, -0.0005, 0.0003, 0.0007],
"SMB": [0.0002, 0.0001, -0.0001, 0.0002, 0.0001],
"HML": [0.0003, -0.0002, 0.0004, 0.0001, 0.0000],
"RMW": [0.0002, 0.0001, 0.0003, -0.0001, 0.0002],
"CMA": [0.0003, -0.0001, 0.0001, 0.0002, 0.0003],
},
index=dates,
)
mu = expected_returns.ff_expected_return(
prices, factors, model="ff5", compounding=False, frequency=252
)
assert isinstance(mu, pd.Series)
assert mu.shape[0] == 3
assert list(mu.index) == ["Asset A", "Asset B", "Asset C"]
assert mu.notnull().all()
assert mu.min() > -0.5
assert mu.max() < 10