"""
Functions for calculating the hedge ratios of a panel of returns with respect to a
single return.
"""
import warnings
import numpy as np
import pandas as pd
from typing import List, Tuple
import statsmodels.api as sm
from statsmodels.regression.linear_model import RegressionResults
from macrosynergy.management.types import QuantamentalDataFrame
from macrosynergy.management.simulate import make_qdf
from macrosynergy.management.utils import (
reduce_df,
_map_to_business_day_frequency,
standardise_dataframe,
)
import matplotlib.pyplot as plt
[docs]def date_alignment(
unhedged_return: pd.Series, benchmark_return: pd.Series
) -> Tuple[pd.Timestamp, pd.Timestamp]:
"""
Method used to align the two Series over the same timestamps: the sample data for
the endogenous & exogenous variables must match throughout the re-estimation
calculation.
Parameters
----------
unhedged_return : ~pandas.DataFrame
the return series of the asset that is being hedged.
benchmark_return : ~pandas.Series
the return series of the asset being used to hedge against the main asset.
Returns
-------
~pandas.Timestamp, ~pandas.Timestamp
the shared start and end date across the two series.
"""
ma_dates = unhedged_return.index
ha_dates = benchmark_return.index
if ma_dates[0] > ha_dates[0]:
start_date = ma_dates[0]
else:
start_date = ha_dates[0]
if ma_dates[-1] > ha_dates[-1]:
end_date = ha_dates[-1]
else:
end_date = ma_dates[-1]
return start_date, end_date
[docs]def hedge_calculator(
unhedged_return: pd.Series,
benchmark_return: pd.Series,
rdates: List[pd.Timestamp],
meth: str = "ols",
half_life: int = 21 * 12,
min_obs: int = 24,
max_obs: int = 1000,
) -> pd.DataFrame:
"""
Calculate the hedge ratios for each cross-section in the panel being hedged. It is
worth noting that the sample of data used for calculating the hedge ratio will
increase according to the dates parameter: each date represents an additional number
of timestamps where the numeracy of dates added to the sample is instructed by the
"refreq" parameter.
Parameters
----------
unhedged_return : ~pandas.Series
the return series of the asset that is being hedged.
benchmark_return : ~pandas.Series
the return series of the asset being used to hedge against the main asset.
rdates : List[~pandas.Timestamp]
the dates controlling the frequency of re-estimation.
meth : str
method to estimate the hedge ratio. Valid options are ``'ols'`` for ordinary
least squares and ``'twls'`` for time-weighted least squares.
half_life: int
half-life of the exponential decay function used to calculate the time weights
when ``meth='twls'``.
min_obs : int
a hedge ratio will only be computed if the number of days has surpassed the
integer held by the parameter.
max_obs : int
the maximum number of latest observations allowed in order to estimate a hedge
ratio. The default value is 1000.
Returns
-------
~pandas.DataFrame
returns a dataframe of the hedge ratios for the respective cross-section.
"""
# Input validation
if meth not in ("ols", "twls"):
raise ValueError(f"meth must be 'ols' or 'twls', got {meth!r}")
if min_obs < 1:
raise ValueError(f"min_obs must be >= 1, got {min_obs}")
if max_obs < min_obs:
raise ValueError(f"max_obs ({max_obs}) must be >= min_obs ({min_obs})")
benchmark_return = benchmark_return[
benchmark_return.first_valid_index() : benchmark_return.last_valid_index()
]
unhedged_return = unhedged_return[
unhedged_return.first_valid_index() : unhedged_return.last_valid_index()
]
s_date, e_date = date_alignment(
unhedged_return=unhedged_return, benchmark_return=benchmark_return
)
unhedged_return = unhedged_return.truncate(before=s_date, after=e_date)
benchmark_return = benchmark_return.truncate(before=s_date, after=e_date)
# The date series will be adjusted to each cross-section. Daily dates each return
# series is defined over.
date_series = unhedged_return.index
df_ur = unhedged_return.to_frame(name="returns")
df_ur = df_ur.reset_index()
# Access the minimum date from the adjusted series: having aligned the unhedged asset
# and the benchmark return. Both series will be defined over the same timestamps.
min_obs_date = date_series[min_obs]
# Storage dataframe defined over the re-balancing dates.
data_column = np.empty(len(rdates))
data_column[:] = np.nan
df_hrat = pd.DataFrame(data=data_column, index=rdates, columns=["value"])
for d in rdates:
if d > min_obs_date:
curr_start_date: pd.Timestamp = rdates[max(0, rdates.index(d) - max_obs)]
# Inclusive of the re-estimation date.
yvar = unhedged_return.loc[curr_start_date:d]
xvar = benchmark_return.loc[curr_start_date:d]
if meth == "ols":
weights = np.ones_like(yvar)
elif meth == "twls":
weights = np.power(2, -np.arange(yvar.shape[0]) / half_life)[::-1]
betas = weighted_least_squares(
X=np.column_stack((np.ones(xvar.shape[0]), xvar)),
y=yvar,
weights=weights,
)
df_hrat.loc[d] = betas[1]
# Any dates prior to the minimum observation which would be classified by NaN values
# remove from the DataFrame.
df_hrat = df_hrat.dropna(axis=0, how="all")
df_hrat.index.name = "real_date"
df_hrat = df_hrat.reset_index(level=0)
# Merge to convert to the re-estimation frequency. The intermediary dates, daily
# business days between re-estimation dates, will be populated with np.nan values.
df_hr = df_ur.merge(df_hrat, on="real_date", how="left")
df_hr = df_hr.drop("returns", axis=1)
df_hr = df_hr.ffill()
# Accounts for the application of the minimum number of observations required and
# merging the two DataFrames. Drop the np.nan values prior to the application of the
# shift (able to validate the logic).
df_hr = df_hr.dropna(axis=0, how="any")
df_hr = df_hr.set_index("real_date", drop=True).shift(1).reset_index(level=0)
return df_hr
[docs]def adjusted_returns(
benchmark_return: pd.Series, df_hedge: pd.DataFrame, dfw: pd.DataFrame
) -> pd.DataFrame:
"""
Method used to compute the hedge ratio returns on the hedging asset which will
subsequently be subtracted from the returns of the position contracts to calculate
the adjusted returns (adjusted for the hedged position) across all cross-sections in
the panel. For instance, if using US Equity to hedge Australia FX: AUD_FXXR_NSA_H =
AUD_FXXR_NSA - HR_AUD * USD_EQXR_NSA.
Parameters
----------
benchmark_return : ~pandas.Series
the return series of the asset being used to hedge against the main asset.
df_hedge : ~pandas.DataFrame
standardised dataframe with the hedge ratios.
dfw : ~pandas.DataFrame
pivoted dataframe of the relevant returns.
Returns
-------
~pandas.DataFrame
standardised dataframe of adjusted returns.
"""
hedge_pivot = df_hedge.pivot(index="real_date", columns="cid", values="value")
index = benchmark_return.index
# Matching the dimensions to the number of assets being hedged.
benchmark_return = np.tile(
benchmark_return.to_numpy(), (len(hedge_pivot.columns), 1)
).T
br_df = pd.DataFrame(
data=benchmark_return, columns=hedge_pivot.columns, index=index
)
hedged_returns = hedge_pivot.multiply(br_df)
adj_rets = dfw - hedged_returns
if isinstance(df_hedge["cid"].dtype, pd.CategoricalDtype):
adj_rets.columns = pd.Categorical(adj_rets.columns)
df_stack = adj_rets.stack().reset_index(name="value")
df_stack.columns = ["real_date", "cid", "value"]
return df_stack
[docs]def return_beta(
df: QuantamentalDataFrame,
xcat: str = None,
cids: List[str] = None,
benchmark_return: str = None,
start: str = None,
end: str = None,
blacklist: dict = None,
meth: str = "ols",
oos: bool = True,
refreq: str = "M",
min_obs: int = 24,
max_obs: int = 1000,
hedged_returns: bool = False,
ratio_name: str = "_HR",
hr_name: str = "H",
) -> QuantamentalDataFrame:
"""
Estimate sensitivities (betas) of return category with respect to single return.
Parameters
----------
df : QuantamentalDataFrame
standardized DataFrame with the necessary columns: 'cid', 'xcat', 'real_date'
and 'value.
xcat : str
return category based on the type of positions that are to be hedged.
cids : List[str]
cross-sections of the returns for which hedge ratios are to be calculated.
Default is all that are available in the dataframe.
benchmark_return : str
ticker of return of the hedge asset or basket. This is a single series, e.g.
U.S. equity index returns ("USD_EQXR_NSA").
start : str
earliest date in ISO format. Default is None: earliest date in df is used.
end : str
latest date in ISO format. Default is None: latest date in df is used.
blacklist : dict
cross-sections with date ranges that should be excluded from the sample of data
used for estimating hedge ratios. The estimated ratios during blacklist periods will
be set equal to the last valid estimate.
oos : bool
if True (default) hedge ratios are calculated out-of-sample, i.e. for the period
following the estimation period at the given re-estimation frequency.
Currently not implemented. So will always be out of sample.
refreq : str
re-estimation frequency. This is period after which hedge ratios are re-
estimated. The re-estimation is conducted at the end of the period and used as hedge
ratio for all days of the following period. Re-estimation can have weekly, monthly,
and quarterly frequency with the notations 'W', 'M', and 'Q' respectively. The
default frequency is monthly.
min_obs : int
the minimum number of observations required in order to estimate a hedge ratio.
The default value is 24 days. The permissible minimum is 10.
max_obs : int
the maximum number of latest observations allowed in order to estimate a hedge
ratio. The default value is 1000.
meth : str
method used to estimate hedge ratio. At present the only method is OLS
regression ('ols').
hedged_returns : bool
If True the function appends the hedged returns to the dataframe of hedge
ratios. Default is False.
ratio_name : str
hedge ratio label that will be appended to the category name. The default is
"_HR". For instance, 'xcat' + "_HR".
hr_name : str
label used to distinguish the hedged returns in the DataFrame. The label is
appended to the category being hedged. The default is "H".
Returns
-------
QuantamentalDataFrame
DataFrame with hedge ratio estimates that update at the chosen re-estimation
frequency. Additionally, the dataframe can include the hedged returns if the
parameter `benchmark_return` has been set to True.
.. note::
Each cross-section of this category uses the same hedge asset/basket.
.. note::
A return beta is the estimated sensitivity of the main return with respect to the
asset used for hedging. The ratio is recorded for the period after the estimation
sample up until the next re-estimation date.
"""
# Value checks
df: QuantamentalDataFrame = QuantamentalDataFrame(df)
all_tix = df.list_tickers()
bm_error = f"Benchmark return ticker {benchmark_return} is not in the DataFrame."
if not benchmark_return in all_tix:
raise ValueError(bm_error)
error_xcat = (
f"The field, xcat, must be a string but received <{type(xcat)}>. Only"
f" a single category is used to hedge against the main asset."
)
if not isinstance(xcat, str):
raise ValueError(error_xcat)
available_categories = df["xcat"].unique()
error_hedging = (
f"The return category used to be hedged, {xcat}, is "
f"not defined in the dataframe."
)
if not xcat in list(available_categories):
raise ValueError(error_hedging)
min_obs_error = (
"The number of minimum observations required to compute a hedge "
"ratio is 10 business days, or two weeks. Please provide an integer "
"value greater than 10."
)
if not isinstance(min_obs, int) or min_obs < 10:
raise ValueError(min_obs_error)
if not isinstance(max_obs, int) or max_obs < min_obs:
raise ValueError(f"`max_obs` must be an integer ≫ `min_obs`.")
# Information on hedge return and potential panel adjustment.
post_fix = benchmark_return.split("_")
xcat_hedge = "_".join(post_fix[1:])
cid_hedge = post_fix[0]
if xcat_hedge == xcat:
if cid_hedge in cids:
cids.remove(cid_hedge)
warnings.warn(
f"Return to be hedged for cross section {cid_hedge} is the hedge "
f"return and has been removed from the panel."
)
# Wide time series DataFrame of unhedged and benchmark returns.
# --- Time series DataFrame of unhedged returns.
dfp = reduce_df(
df, xcats=[xcat], cids=cids, start=start, end=end, blacklist=blacklist
)
dfp_w = dfp.pivot(index="real_date", columns="cid", values="value")
dfp_w = dfp_w.dropna(axis=0, how="all")
# --- Time series DataFrame of benchmark return for relevant dates.
# The asset being used as the hedge could only be defined over a shorter time-period.
dfh = reduce_df(
df,
xcats=[xcat_hedge],
cids=cid_hedge,
start=dfp_w.index[0],
end=dfp_w.index[-1],
)
dfh_w = dfh.pivot(index="real_date", columns="cid", values="value")
dfh_w.columns = ["hedge"]
# --- Merge time series and calculate re-balancing dates.
if isinstance(dfp_w.columns.dtype, pd.CategoricalDtype):
dfp_w.columns = dfp_w.columns.astype("object")
dfw = pd.merge(dfp_w, dfh_w, how="inner", on="real_date")
br = dfw["hedge"]
rf = _map_to_business_day_frequency(freq=refreq, valid_freqs=["W", "M", "Q"])
dates_re = dfw.asfreq(rf).index
if isinstance(dates_re, pd.DatetimeIndex):
dates_re: List[str] = dates_re.to_list()
# Cross-section-wise hedge ratio estimation.
aggregate = []
for c in cids:
df_hr = hedge_calculator(
unhedged_return=dfw[c],
benchmark_return=br,
rdates=dates_re,
meth=meth,
min_obs=min_obs,
max_obs=max_obs,
)
df_hr = QuantamentalDataFrame.from_long_df(df_hr, cid=c, xcat=xcat + ratio_name)
aggregate.append(df_hr)
df_hedge = QuantamentalDataFrame.from_qdf_list(aggregate)
if hedged_returns:
df_hreturn = adjusted_returns(df_hedge=df_hedge, dfw=dfw, benchmark_return=br)
df_hreturn = QuantamentalDataFrame.from_long_df(
df=df_hreturn, xcat=xcat + "_" + hr_name
)
df_hedge = df_hedge.update_df(df_hreturn)
return standardise_dataframe(df_hedge)
[docs]def beta_display(df_hedge: pd.DataFrame, subplots: bool = False, hr_name: str = "H"):
"""
Method used to visualise the hedging ratios across the panel: assumes a single
category is used to hedge the primary asset.
Parameters
----------
df_hedge : ~pandas.DataFrame
DataFrame with hedge ratios.
subplots : bool
matplotlib parameter to determine if each hedging series is displayed on
separate subplots.
hr_name : str
label used to distinguish the hedged returns in the DataFrame. Comparable to
return_beta() method, the default is "H".
"""
condition = lambda c: c.split("_")[-1] != hr_name
apply = list(map(condition, df_hedge["xcat"]))
df_hedge = df_hedge[apply]
dfw_ratios = df_hedge.pivot(index="real_date", columns="cid", values="value")
dfw_ratios.plot(subplots=subplots, title="Hedging Ratios.", legend=True)
plt.xlabel("real_date, years")
plt.show()
[docs]def weighted_least_squares(X, y, weights):
"""
Find the coefficient vector ``beta`` that minimizes the weighted
sum of squared residuals:
Parameters
----------
X : ndarray of shape (n, p)
Design matrix, where ``n`` is the number of observations and
``p`` is the number of predictors (including any intercept column).
y : ndarray of shape (n,)
Response vector.
weights : ndarray of shape (n,)
Non-negative weight for each observation. A higher weight gives
that observation more influence on the fitted coefficients.
Returns
-------
beta : ndarray of shape (p,)
Estimated coefficient vector.
"""
W = np.sqrt(weights) # square-root weights
X_w = X * W[:, np.newaxis] # scale each row of X
y_w = y * W # scale y the same way
beta, _, _, _ = np.linalg.lstsq(X_w, y_w, rcond=None)
return beta
if __name__ == "__main__":
# Emerging Market Asian countries.
cids = ["IDR", "INR", "KRW", "MYR", "PHP"]
# Add the US - used as the hedging asset.
cids += ["USD"]
xcats = ["FXXR_NSA", "GROWTHXR_NSA", "INFLXR_NSA", "EQXR_NSA"]
df_cids = pd.DataFrame(
index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"]
)
df_cids.loc["IDR"] = ["2010-01-01", "2020-12-31", 0.5, 2]
df_cids.loc["INR"] = ["2011-01-01", "2020-11-30", 0, 1]
df_cids.loc["KRW"] = ["2012-01-01", "2020-11-30", -0.2, 0.5]
df_cids.loc["MYR"] = ["2013-01-01", "2020-09-30", -0.2, 0.5]
df_cids.loc["PHP"] = ["2002-01-01", "2020-09-30", -0.1, 2]
df_cids.loc["USD"] = ["2000-01-01", "2022-03-14", 0, 1.25]
df_xcats = pd.DataFrame(
index=xcats,
columns=["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"],
)
df_xcats.loc["FXXR_NSA"] = ["2010-01-01", "2020-10-30", 1, 2, 0.9, 1]
df_xcats.loc["GROWTHXR_NSA"] = ["2012-01-01", "2020-10-30", 1, 2, 0.9, 1]
df_xcats.loc["INFLXR_NSA"] = ["2013-01-01", "2020-10-30", 1, 2, 0.8, 0.5]
df_xcats.loc["EQXR_NSA"] = ["2010-01-01", "2022-03-14", 0.5, 2, 0, 0.2]
dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)
black = {"IDR": ["2010-01-01", "2014-01-04"], "INR": ["2010-01-01", "2013-12-31"]}
xcat_hedge = "EQXR_NSA"
# S&P500.
benchmark_return = "USD_EQXR_NSA"
df_hedge = return_beta(
df=dfd,
xcat=xcat_hedge,
cids=["IDR", "INR", "KRW", "MYR", "PHP"],
benchmark_return=benchmark_return,
start="2010-01-01",
end="2020-10-30",
blacklist=black,
meth="ols",
oos=True,
refreq="w",
min_obs=24,
hedged_returns=True,
)
print(df_hedge)
beta_display(df_hedge=df_hedge, subplots=False)
# Long position in S&P500 or the Nasdaq, and subsequently using US FX to hedge the
# long position.
xcats = "FXXR_NSA"
cids = ["USD"]
benchmark_return = "USD_EQXR_NSA"
xcat_hedge_two = return_beta(
df=dfd,
xcat=xcats,
cids=cids,
benchmark_return=benchmark_return,
start="2010-01-01",
end="2020-10-30",
blacklist=black,
meth="ols",
oos=True,
refreq="m",
min_obs=24,
)
print(xcat_hedge_two)