PyPortfolioOpt icon indicating copy to clipboard operation
PyPortfolioOpt copied to clipboard

Add Support for Fama-French 3/5 Factor Model to Expected Return Module

Open DSM2499 opened this issue 7 months ago • 2 comments

Enhance the expected_returns module by adding a new function ff_expected_return() that computes expected returns using the Fama-French 3-factor and 5-factor models via OLS regression.

This will expand the current set of return estimation methods (mean_historical_return, capm_return, etc.) with a more advanced and academically grounded model, improving flexibility for users in quantitative finance applications.

def ff_expected_return( prices: pd.DataFrame, factor_data: pd.DataFrame, returns_data: bool = False, model: str = "ff3", compounding: bool = True, frequency: int = 252, log_returns: bool = False, ) -> pd.Series:

DSM2499 avatar May 16 '25 02:05 DSM2499

def ff_expected_return( prices, factor_data, returns_data=False, model="ff3", compounding=True, frequency=252, log_returns=False, ): """ Estimate expected returns using the Fama-French 3- or 5-Factor model.

:param prices: asset prices or returns if returns_data=True.
:type prices: pd.DataFrame
:param factor_data: DataFrame of Fama-French factors. Must include 'RF' and:
                    - ff3: 'Mkt-RF', 'SMB', 'HML'
                    - ff5: also 'RMW', 'CMA'
:type factor_data: pd.DataFrame
:param returns_data: If True, 'prices' is treated as returns data.
:type returns_data: bool
:param model: 'ff3' or 'ff5'.
:type model: str
:param compounding: Use geometric average if True, arithmetic if False.
:type compounding: bool
:param frequency: Annualization factor.
:type frequency: int
:param log_returns: If False, uses simple returns; log returns are unsupported here.
:type log_returns: bool
:return: Expected returns per asset.
:rtype: pd.Series
"""
if not isinstance(prices, pd.DataFrame):
    warnings.warn("Input prices are not in a dataframe", RuntimeWarning)
    prices = pd.DataFrame(prices)

if not isinstance(factor_data, pd.DataFrame):
    warnings.warn("Input factor_data is not in a dataframe", RuntimeWarning)

required_columns = ["RF", "Mkt-RF", "SMB", "HML"]
if model == "ff5":
    required_columns += ["RMW", "CMA"]

for col in required_columns:
    if col not in factor_data.columns:
        raise ValueError(f"Factor data must include {col}")

# Compute assest returns
if returns_data:
    returns = prices.copy()
else:
    returns = returns_from_prices(prices, log_returns)

_check_returns(returns)

# Align Index
common_index = returns.index.intersection(factor_data.index)
if len(common_index) == 0:
    raise ValueError("No overlapping dates between returns and factor data")

returns = returns.loc[common_index]
factors = factor_data.loc[common_index]

# Compute excess returns
excess_returns = returns.sub(factors["RF"], axis=0)

# Select Regressor
reg_factor = ["Mkt-RF", "SMB", "HML"]
if model == "ff5":
    reg_factor += ["RMW", "CMA"]

X = sm.add_constant(factors[reg_factor])
expected_returns = {}

for asset in excess_returns.columns:
    y = excess_returns[asset]
    model = sm.OLS(y, X).fit()
    predicted = model.predict(X)
    if compounding:
        expected_return = ((1 + predicted).prod()) ** (
            frequency / len(predicted)
        ) - 1
    else:
        expected_return = predicted.mean() * frequency
    expected_returns[asset] = expected_return

return pd.Series(expected_returns)

DSM2499 avatar May 16 '25 02:05 DSM2499

Test for enhancement

def test_ff3_expected_return_valid(): df = get_data().iloc[:100, :3] # limit assests and rows for speed dates = df.index

factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(df, factors, model="ff3")
assert isinstance(mu, pd.Series)
assert mu.shape[0] == df.shape[1]
assert mu.notnull().all()

def test_ff5_expected_return_valid(): df = get_data().iloc[:100, :3] dates = df.index

# Generate mock Fama-French 5-factor data
factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
        "RMW": np.random.normal(0.0001, 0.0003, size=100),
        "CMA": np.random.normal(0.0001, 0.0003, size=100),
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(df, factors, model="ff5")
assert isinstance(mu, pd.Series)
assert mu.shape[0] == df.shape[1]
assert mu.notnull().all()

def test_ff_expected_return_missing_factors(): df = get_data().iloc[:100, :3] dates = df.index

# Missing HML factor
factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
    },
    index=dates,
)

with pytest.raises(ValueError) as excinfo:
    expected_returns.ff_expected_return(df, factors, model="ff3")
assert "Missing required factor: HML" in str(excinfo.value)

def test_ff_expected_return_no_overlap(): df = get_data().iloc[:100, :3]

# Shift factor index so no overlap
factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
    },
    index=pd.date_range("2020-01-01", periods=100, freq="B"),
)

with pytest.raises(ValueError) as excinfo:
    expected_returns.ff_expected_return(df, factors, model="ff3")
assert "No overlapping dates" in str(excinfo.value)

def test_ff_expected_return_compounding_toggle(): df = get_data().iloc[:100, :3] dates = df.index

factors = pd.DataFrame(
    {
        "RF": np.random.normal(0.0001, 0.00001, size=100),
        "Mkt-RF": np.random.normal(0.0005, 0.001, size=100),
        "SMB": np.random.normal(0.0002, 0.0005, size=100),
        "HML": np.random.normal(0.0001, 0.0004, size=100),
    },
    index=dates,
)

mu_geom = expected_returns.ff_expected_return(df, factors, compounding=True)
mu_arith = expected_returns.ff_expected_return(df, factors, compounding=False)
assert not mu_geom.equals(mu_arith)

def test_ff3_expected_return_static(): dates = pd.date_range("2022-01-01", periods=5, freq="D")

# Sample asset prices for 3 assets
prices = pd.DataFrame(
    {
        "Asset A": [100, 100.5, 101, 101.5, 102],
        "Asset B": [50, 50.25, 50.5, 50.75, 51],
        "Asset C": [200, 199.5, 199.8, 200.1, 200.5],
    },
    index=dates,
)

# Matching FF3 data
factors = pd.DataFrame(
    {
        "RF": [0.0001, 0.0001, 0.0001, 0.0001, 0.0001],
        "Mkt-RF": [0.001, 0.0015, -0.0005, 0.0003, 0.0007],
        "SMB": [0.0002, 0.0001, -0.0001, 0.0002, 0.0001],
        "HML": [0.0003, -0.0002, 0.0004, 0.0001, 0.0000],
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(
    prices, factors, model="ff3", compounding=False, frequency=252
)

assert isinstance(mu, pd.Series)
assert mu.shape[0] == 3
assert list(mu.index) == ["Asset A", "Asset B", "Asset C"]
assert mu.notnull().all()
# Check rough bounds for sanity
assert mu.min() > -0.5
assert mu.max() < 10

def test_ff5_expected_return_static(): dates = pd.date_range("2022-01-01", periods=5, freq="D")

prices = pd.DataFrame(
    {
        "Asset A": [100, 100.5, 101, 101.5, 102],
        "Asset B": [50, 50.25, 50.5, 50.75, 51],
        "Asset C": [200, 199.5, 199.8, 200.1, 200.5],
    },
    index=dates,
)

factors = pd.DataFrame(
    {
        "RF": [0.0001] * 5,
        "Mkt-RF": [0.001, 0.0015, -0.0005, 0.0003, 0.0007],
        "SMB": [0.0002, 0.0001, -0.0001, 0.0002, 0.0001],
        "HML": [0.0003, -0.0002, 0.0004, 0.0001, 0.0000],
        "RMW": [0.0002, 0.0001, 0.0003, -0.0001, 0.0002],
        "CMA": [0.0003, -0.0001, 0.0001, 0.0002, 0.0003],
    },
    index=dates,
)

mu = expected_returns.ff_expected_return(
    prices, factors, model="ff5", compounding=False, frequency=252
)

assert isinstance(mu, pd.Series)
assert mu.shape[0] == 3
assert list(mu.index) == ["Asset A", "Asset B", "Asset C"]
assert mu.notnull().all()
assert mu.min() > -0.5
assert mu.max() < 10

DSM2499 avatar May 16 '25 02:05 DSM2499