Source code for macrosynergy.pnl.sharpe_stability_ratio

"""Sharpe Stability Ratio: HAC-robust t-stat for the mean rolling Sharpe, accounting for sample size and serial dependence."""

import warnings
from typing import Optional, Union

import numpy as np
import pandas as pd
from scipy.stats import norm
from statsmodels.tsa.stattools import acovf


[docs]def sharpe_stability_ratio(
    returns: Union[pd.Series, np.ndarray],
    window: int = 252,
    benchmark_sr: float = 0.0,
    annualization_factor: int = 252,
    min_periods: Optional[int] = None,
    probability: bool = False,
) -> float:
    """
    The SSR is the ratio between an average (rolling) Sharpe ratio and its
    estimated deviation from a true mean ("estimated parameter error"). Thus,
    it accounts not only for the risk-adjusted return of a strategy but also
    for its uncertainty given the sample size, seasonality, and autocorrelation.

    By default, Sharpe ratios and their estimated errors are calculated using
    rolling 252-trading-day windows on a daily return series. The error is
    estimated using the HAC (Heteroskedasticity and Autocorrelation Consistent)
    Newey-West (1987) variance estimator, which adjusts the variance of the
    sample mean for serial dependence in the rolling Sharpe series.

    The SSR can be interpreted as a signal-to-noise measure, indicating how
    strongly the Sharpe ratios deviate from zero after accounting for their
    serial dependence. If the SSR is above 1, its value exceeds its estimated
    noise, providing evidence of a non-zero mean. Under standard regularity
    conditions, the SSR approximates a t-statistic for the mean of the Sharpe
    ratio: a value of 1 corresponds to ~68% confidence in a non-zero mean,
    1.64 to ~90%, 1.96 to ~95%, and 2.58 to ~99%.

    For non-daily inputs, ``window`` and ``annualization_factor`` must be set
    together to match the input frequency (monthly: 12/12, weekly: 52/52).

    Parameters
    ----------
    returns : pd.Series or np.ndarray
        Return series at the frequency implied by ``window`` /
        ``annualization_factor``. NaNs are dropped.
    window : int, default 252
        Rolling window length (number of observations) for computing the
        Sharpe ratio series. Must be >= 2.
    benchmark_sr : float, default 0.0
        Benchmark Sharpe ratio (SR*) the mean rolling Sharpe is tested against.
    annualization_factor : int, default 252
        Periods per year. Must match input frequency.
    min_periods : int or None, default None
        Minimum non-NaN observations per window. Defaults to ``window``.
    probability : bool, default False
        If True, return the one-sided asymptotic normal probability that the
        mean rolling Sharpe ratio is above ``benchmark_sr``. If False, return
        the Sharpe Stability Ratio t-statistic.

    Returns
    -------
    float
        SSR t-statistic, or probability if ``probability`` is True. Returns NaN
        if data is insufficient or the rolling Sharpe series has zero variance.
    """
    if not isinstance(window, int) or window < 2:
        raise ValueError("window must be an integer >= 2")
    if not isinstance(annualization_factor, int) or annualization_factor <= 0:
        raise ValueError("annualization_factor must be a positive integer")

    # Coerce to numpy, drop NaN
    if isinstance(returns, pd.Series):
        ret = returns.dropna().values.astype(float)
    else:
        ret = np.asarray(returns, dtype=float)
        ret = ret[~np.isnan(ret)]

    if len(ret) < window + 2:
        return float("nan")

    # Rolling Sharpe series (annualized)
    ret_s = pd.Series(ret)
    _min_p = min_periods if min_periods is not None else window
    roll = ret_s.rolling(window=window, min_periods=_min_p)
    z_series = (roll.mean() / roll.std()) * np.sqrt(annualization_factor)
    z = z_series.values.astype(float)
    z = z[np.isfinite(z)]  # drop NaN and ±inf (e.g. from zero-variance windows)

    N = len(z)
    if N < 3:
        return float("nan")

    # Set bandwidth to window; acovf requires nlag < nobs - 1
    L = window
    if N <= L + 1:
        min_obs = 2 * window + 2
        warnings.warn(
            f"Insufficient data: need at least {min_obs} observations "
            f"(~{min_obs / annualization_factor:.1f} years); returning NaN.",
            UserWarning,
            stacklevel=2,
        )
        return float("nan")

    z_bar = float(np.mean(z))

    # Newey-West long-run variance
    lrv = _newey_west_lrv(z, L)

    # HAC standard error of the mean
    hac_se = np.sqrt(lrv / N)
    if hac_se == 0.0:
        return float("nan")

    t_stat = float((z_bar - benchmark_sr) / hac_se)
    if probability:
        return float(norm.cdf(t_stat))

    return t_stat


def _newey_west_lrv(z: np.ndarray, L: int) -> float:
    """Newey-West (1987) Bartlett-kernel long-run variance with bandwidth ``L``."""
    gammas = acovf(z, nlag=L, fft=True, demean=True)
    lrv = gammas[0]
    for k in range(1, L + 1):
        weight = 1.0 - k / (L + 1.0)
        lrv += 2.0 * weight * gammas[k]
    return max(float(lrv), 0.0)


def _newey_west_heuristic_bandwidth(N: int) -> int:
    """Newey-West heuristic bandwidth: ``L = floor(4*(N/100)**(2/9))``, fixed in N."""
    if N < 3:
        return 1
    L = int(np.floor(4.0 * (N / 100.0) ** (2.0 / 9.0)))
    return max(1, min(L, N - 1))


def _andrews_ar1_bandwidth(z: np.ndarray) -> int:
    """Andrews (1991) AR(1) plug-in bandwidth — adapts to series persistence."""
    N = len(z)
    if N < 3:
        return 1

    z_dm = z - z.mean()

    # OLS AR(1): regress z_dm[1:] on z_dm[:-1]
    denom = float(z_dm[:-1] @ z_dm[:-1])
    if denom == 0.0:
        return 1
    rho = float(z_dm[:-1] @ z_dm[1:]) / denom
    rho = float(np.clip(rho, -0.99, 0.99))

    # Andrews (1991) Table 1, Bartlett kernel
    a1 = 4.0 * rho**2 / (1.0 - rho**2) ** 2
    L = int(np.floor(1.1447 * (a1 * N) ** (1.0 / 3.0)))

    return max(1, min(L, N - 1))


if __name__ == "__main__":


    def _sharpe(r: pd.Series, ann: int = 252) -> float:
        return float(r.mean() / r.std() * np.sqrt(ann))

    N = 3000  # ~12y of daily obs

    # ---- Pair A: smooth drift vs multi-year regime switching ----
    rng = np.random.default_rng(0)

    a_consistent = pd.Series(rng.normal(0.0008, 0.01, N))

    rng = np.random.default_rng(1)
    regime = np.zeros(N)
    regime_len = 504  # ~2y on / ~2y off
    for i, start in enumerate(range(0, N, regime_len)):
        if i % 2 == 0:
            regime[start:start + regime_len] = 0.0016  # 2x drift when "on"
    a_episodic = pd.Series(regime + rng.normal(0.0, 0.01, N))

    # ---- Pair B: lower-Sharpe pair, same idea ----
    rng = np.random.default_rng(2)
    b_consistent = pd.Series(rng.normal(0.0004, 0.01, N))

    rng = np.random.default_rng(3)
    regime = np.zeros(N)
    for i, start in enumerate(range(0, N, regime_len)):
        if i % 2 == 0:
            regime[start:start + regime_len] = 0.0008
    b_episodic = pd.Series(regime + rng.normal(0.0, 0.01, N))

    cases = [
        ("Pair A consistent (smooth drift)", a_consistent),
        ("Pair A episodic   (2y on/off)   ", a_episodic),
        ("Pair B consistent (smooth drift)", b_consistent),
        ("Pair B episodic   (2y on/off)   ", b_episodic),
    ]

    print(f"{'case':38s}   {'SR':>6s}   {'SSR':>7s}")
    print("-" * 60)
    for label, series in cases:
        sr = _sharpe(series)
        ssr = sharpe_stability_ratio(series)
        print(f"{label:38s}   {sr:6.3f}   {ssr:7.3f}")