"""
"Naive" profit-and-loss (PnL) calculations with basic signal options, disregarding transaction costs.
"""
from dataclasses import dataclass
import warnings
from itertools import product
from typing import Dict, List, Optional, Tuple, Union
from numbers import Number
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
import pandas as pd
import seaborn as sns
from macrosynergy import PYTHON_3_8_OR_LATER
from macrosynergy.management.simulate import make_qdf
from macrosynergy.management.utils import (
reduce_df,
update_df,
_map_to_business_day_frequency,
)
from macrosynergy.management.types import QuantamentalDataFrame
from macrosynergy.panel.make_zn_scores import make_zn_scores
from macrosynergy.pnl.sharpe_stability_ratio import sharpe_stability_ratio
from macrosynergy.signal import SignalReturnRelations
[docs]class NaivePnL:
"""
Computes and collects illustrative PnLs with limited signal options and disregarding
transaction costs.
Parameters
----------
df : ~pandas.Dataframe
standardized DataFrame with the following necessary columns: 'cid', 'xcat',
'real_date' and 'value'.
ret : str
return category.
sigs : List[str]
signal categories. Able to pass in multiple possible signals to the Class'
constructor and their respective vintages will be held on the instance's DataFrame.
The signals can subsequently be referenced through the self.make_pnl() method which
receives a single signal per call.
cids : List[str]
cross sections that are traded. Default is all in the dataframe.
bms : str, List[str]
list of benchmark tickers for which correlations are displayed against PnL
strategies.
start : str
earliest date in ISO format. Default is None and earliest date in df is used.
end : str
latest date in ISO format. Default is None and latest date in df is used.
blacklist : dict
cross-sections with date ranges that should be excluded from the dataframe.
"""
def __init__(
self,
df: pd.DataFrame,
ret: str,
sigs: List[str],
cids: List[str] = None,
bms: Union[str, List[str]] = None,
start: str = None,
end: str = None,
blacklist: dict = None,
):
cols = ["cid", "xcat", "real_date", "value"]
assert isinstance(ret, str), "The return category expects a single <str>."
self.ret = ret
xcats = [ret] + sigs
# Pre-filter to only the xcats needed before the expensive QDF construction.
needed_xcats = list(xcats)
if bms is not None:
bm_list = [bms] if isinstance(bms, str) else bms
needed_xcats += [bm.split("_", 1)[1] for bm in bm_list]
df = df[df["xcat"].isin(needed_xcats)]
df = QuantamentalDataFrame(df[cols])
self._as_categorical = df.InitializedAsCategorical
# Will host the benchmarks.
self.dfd = df
# Potentially excludes the benchmarks but will be held on the instance level
# through self.dfd.
self.df, self.xcats, self.cids = reduce_df(
df[cols], xcats, cids, start, end, blacklist, out_all=True
)
self.sigs = sigs
ticker_func = lambda t: t[0] + "_" + t[1]
self.tickers = list(map(ticker_func, product(self.cids, self.xcats)))
# Data structure used to track all of the generated PnLs from make_pnl() method.
self.pnl_names = []
self.signal_df = {}
self.black = blacklist
self.bm_bool = isinstance(bms, (str, list))
if self.bm_bool:
bms = [bms] if isinstance(bms, str) else bms
self.dfd = reduce_df(self.dfd, start=start, end=end, blacklist=blacklist)
# Pass in the original DataFrame; negative signal will not have been applied
# which will corrupt the use of the benchmark categories.
bm_dict = self.add_bm(df=self.dfd, bms=bms, tickers=self.tickers)
self._bm_dict = bm_dict
self.pnl_params = {}
self.start = start
self.end = end
self.blacklist = blacklist
[docs] def make_pnl(
self,
sig: str,
sig_op: str = "zn_score_pan",
sig_add: float = 0,
sig_mult: float = 1,
sig_neg: bool = False,
pnl_name: Optional[str] = None,
rebal_freq: str = "daily",
rebal_slip: int = 0,
vol_scale: Optional[float] = None,
leverage: float = 1.0,
min_obs: int = 261,
iis: bool = True,
sequential: bool = True,
neutral: str = "zero",
thresh: float = None,
entry_barrier: float = None,
exit_barrier: float = None,
winsorize_first: bool = False,
normalized_weights: bool = False,
):
"""
Calculate daily PnL and add to class instance.
Parameters
----------
sig : str
name of raw signal that is basis for positioning. The signal is assumed to
be recorded at the end of the day prior to position taking.
sig_op : str
signal transformation options; must be one of 'zn_score_pan', 'zn_score_cs',
'binary', or 'raw'. The default is 'zn_score_pan'. See notes below for
further details.
sig_add : float
add a constant to the signal after initial transformation. This allows to
give PnLs a long or short bias relative to the signal score. Default is 0.
sig_mult : float
multiply a constant to the signal after initial tranformation and after
sig_add has been added too. Default is 1.
sig_neg : str
if True the PnL is based on the negative value of the transformed signal.
Default is False.
pnl_name : str
name of the PnL to be generated and stored. Default is None, i.e. a default
name is given. The default name will be: 'PNL_<signal name>[_<NEG>]', with the
last part added if sig_neg has been set to True. Previously calculated PnLs of
the same name will be overwritten. This means that if a set of PnLs are to be
compared, each PnL requires a distinct name.
rebal_freq : str
re-balancing frequency for positions according to signal must be one of
'daily' (default), 'weekly', 'monthly', 'quarterly', or 'annual'. The
re-balancing is only concerned with the signal value on the re-balancing date
which is delimited by the frequency chosen. Additionally, the re-balancing
frequency will be applied to make_zn_scores() if used as the method to produce
the raw signals.
rebal_slip : str
re-balancing slippage in days. Default is 1 which means that it takes one
day to re-balance the position and that the new positions produce PnL from the
second day after the signal has been recorded.
vol_scale : bool
ex-post scaling of PnL to annualized volatility given. This is for
comparative visualization and not out-of-sample. Default is none.
leverage : float
leverage applied to the raw signal when a `vol_scale` is not defined.
Default is 1.0, i.e., position size is 1 or 100% of implied risk capital.
min_obs : int
the minimum number of observations required to calculate zn_scores. Default
is 252.
iis : bool
if True (default) zn-scores are also calculated for the initial sample
period defined by min_obs, on an in-sample basis, to avoid losing history.
sequential : bool
if True (default) score parameters (neutral level and standard deviations)
are estimated sequentially with concurrently available information only.
neutral : str
method to determine neutral level. Default is 'zero'. Alternatives are
'mean' and "median".
thresh : float
threshold value beyond which scores are winsorized, i.e. contained at that
threshold. Therefore, the threshold is the maximum absolute score value that the
function is allowed to produce. The minimum threshold is one standard deviation.
Default is no threshold.
entry_barrier : float
Threshold in terms of absolute signal value to enter a position in a binary
strategy. This prevents binary strategies from excessive position flipping.
Default is None, i.e., the binary strategy always takes its full position, no
matter how small the signal value.
exit_barrier : float
Threshold in terms of absolute signal value to exit a position in a binary
strategy. In conjunction with an entry barrier, this determines the
probability of position liquidations when the sign of the signal does not
change. The value must be below the entry barrier. Without an entry barrier
there can be no exit barrier. Default is None, which means that a position is
only liquidated of the sign of the signal flips.
winsorize_first : bool
if True, the signal is winsorized before any signal manipulation is applied.
This means that the signal is clipped to a specified range before any further
processing is done.
Default is False, meaning that the signal is winsorized after all other
transformations have been applied.
normalized_weights : bool
if True, the PnL is computed using normalized weights, meaning that the
PnL is computed as the mean of the signal-adjusted returns across all
cross-sections. Default is False, meaning that the PnL is computed as the
sum of the signal-adjusted returns across all cross-sections.
Notes
-----
When `sig_op = "zn_score_pan"`, raw signals are transformed into zn-scores
around a neutral value where pooled panel statistics are calculated.
When `sig_op = "zn_score_cs"`, raw signals are transformed into zn-scores
around a neutral value where statistics are calculated by cross section alone.
When `sig_op = "binary"`, transforms signals into uniform long/shorts (1/-1)
across all cross sections.
When `sig_op = "raw"`, no transformation is applied to the signal.
Entry and exit barriers are only applicable when the signal operation is binary.
See Also
--------
macrosynergy.panel.make_zn_scores : compute zn-scores for a panel.
"""
for varx, name, typex in (
(sig, "sig", str),
(sig_op, "sig_op", str),
(
sig_add,
"sig_add",
(Number),
), # testing for number instead of (float, int)
(sig_mult, "sig_mult", (Number)),
(sig_neg, "sig_neg", bool),
(pnl_name, "pnl_name", (str, type(None))),
(rebal_freq, "rebal_freq", str),
(rebal_slip, "rebal_slip", int),
(vol_scale, "vol_scale", (Number, type(None))),
(leverage, "leverage", (Number)),
(min_obs, "min_obs", int),
(iis, "iis", bool),
(sequential, "sequential", bool),
(neutral, "neutral", str),
(thresh, "thresh", (Number, type(None))),
):
if not isinstance(varx, typex):
raise TypeError(f"{name} must be a {typex}.")
error_sig = (
f"Signal category missing from the options defined on the class: "
f"{self.sigs}. "
)
if sig not in self.sigs:
raise ValueError(error_sig)
sig_options = ["zn_score_pan", "zn_score_cs", "binary", "raw"]
error_sig_method = (
f"The signal transformation method, {sig_op}, is not one of "
f"the options specified: {sig_options}."
)
if sig_op not in sig_options:
raise ValueError(error_sig_method)
freq_params = ["daily", "weekly", "monthly", "quarterly", "annual"]
freq_error = f"Re-balancing frequency must be one of: {freq_params}."
if rebal_freq not in freq_params:
raise ValueError(freq_error)
if thresh is not None and thresh < 1:
raise ValueError("thresh must be greater than or equal to one.")
err_lev = "`leverage` must be a numerical value greater than 0."
if leverage <= 0:
raise ValueError(err_lev)
err_vol = "`vol_scale` must be a numerical value greater than 0."
if vol_scale is not None and (vol_scale <= 0):
raise ValueError(err_vol)
if entry_barrier is not None and exit_barrier is not None:
if not isinstance(entry_barrier, (float, int)) or not isinstance(
exit_barrier, (float, int)
):
raise TypeError(
"Entry and exit barriers must be numerical values >= 0."
)
if sig_op != "binary":
raise ValueError(
"Entry and exit barriers are only applicable when the signal "
"operation is binary."
)
if entry_barrier <= 0 or exit_barrier < 0 or entry_barrier < exit_barrier:
raise ValueError(
"Please ensure that: 0 <= exit_barrier < entry_barrier"
)
# B. Extract DataFrame of exclusively return and signal categories in time series
# format.
dfx = self.df[self.df["xcat"].isin([self.ret, sig])]
thresh_sig = thresh if winsorize_first else None
dfw = self._make_signal(
dfx=dfx,
sig=sig,
sig_op=sig_op,
min_obs=min_obs,
iis=iis,
sequential=sequential,
neutral=neutral,
thresh=thresh_sig,
)
if sig_neg:
dfw["psig"] *= -1
neg = "_NEG"
else:
neg = ""
dfw["psig"] += sig_add
dfw["psig"] *= sig_mult
if not winsorize_first:
self._winsorize(df=dfw["psig"], thresh=thresh)
# Multi-index DataFrame with a natural minimum lag applied.
dfw["psig"] = dfw["psig"].groupby(level=0, observed=True).shift(1)
dfw.reset_index(inplace=True)
dfw = dfw.rename_axis(None, axis=1)
dfw = dfw.sort_values(["cid", "real_date"])
# if rebal_freq != "daily":
sig_series = self.rebalancing(
dfw=dfw, rebal_freq=rebal_freq, rebal_slip=rebal_slip
)
dfw["sig"] = np.squeeze(sig_series.to_numpy())
# Code to do history dependent binary pnl
if (
entry_barrier is not None
and exit_barrier is not None
and sig_op == "binary"
):
dfw.rename({"sig": "prev_sig"}, axis=1, inplace=True)
dfw["psig"] = dfw.apply(
self._apply_barriers,
axis=1,
sig=sig,
entry_barrier=entry_barrier,
exit_barrier=exit_barrier,
)
dfw["psig"] = dfw.groupby("cid", observed=True)["psig"].shift(1)
dfw = dfw.sort_values(["cid", "real_date"])
sig_series = self.rebalancing(
dfw=dfw, rebal_freq=rebal_freq, rebal_slip=rebal_slip
)
dfw["sig"] = np.squeeze(sig_series.to_numpy())
# else:
# dfw = dfw.rename({"psig": "sig"}, axis=1)
# The signals are generated across the panel.
if normalized_weights:
# Normalize signal weights by the number of available cids for each real_date
dfw["sig"] = dfw.groupby("real_date")["sig"].transform(
lambda x: x / x.count() if x.count() > 0 else x
)
dfw["value"] = dfw[self.ret] * dfw["sig"]
df_pnl = dfw.loc[:, ["cid", "real_date", "value"]]
# Compute the return across the panel. The returns are still computed daily
# regardless of the re-balancing frequency potentially occurring weekly or
# monthly.
df_pnl_all = df_pnl.groupby(["real_date"]).sum(numeric_only=True)
df_pnl_all = df_pnl_all[df_pnl_all["value"].cumsum() != 0]
# Returns are computed for each cross-section and across the panel.
if df_pnl["cid"].dtype.name == "category":
df_pnl_all["cid"] = pd.Categorical.from_codes(
codes=[0] * len(df_pnl_all), categories=["ALL"]
)
else:
df_pnl_all["cid"] = "ALL"
df_pnl_all = df_pnl_all.reset_index()[df_pnl.columns]
# Will be inclusive of each individual cross-section's signal-adjusted return and
# the aggregated panel return.
pnn = ("PNL_" + sig + neg) if pnl_name is None else pnl_name
df_pnl = QuantamentalDataFrame.from_long_df(df_pnl, xcat=pnn)
df_pnl_all = QuantamentalDataFrame.from_long_df(df_pnl_all, xcat=pnn)
df_pnl = QuantamentalDataFrame.from_qdf_list([df_pnl, df_pnl_all])
if vol_scale is not None:
leverage = vol_scale * (df_pnl_all["value"].std() * np.sqrt(261)) ** (-1)
assert isinstance(leverage, (float, int)), err_lev # sanity check
df_pnl["value"] = df_pnl["value"] * leverage
# Populating the signal dictionary is required for the display methods:
self.signal_df[pnn] = dfw.loc[:, ["cid", "real_date", "sig"]]
if pnn in self.pnl_names:
self.df = self.df[~(self.df["xcat"] == pnn)]
else:
self.pnl_names = self.pnl_names + [pnn]
# agg_df = pd.concat([self.df, df_pnl[self.df.columns]])
agg_df = QuantamentalDataFrame.from_qdf_list([self.df, df_pnl[self.df.columns]])
self.df = agg_df.reset_index(drop=True)
self.df = QuantamentalDataFrame(
self.df, _initialized_as_categorical=self._as_categorical
)
self.pnl_params[pnn] = PnLParams(
pnl_name=pnn,
signal=sig,
sig_op=sig_op,
sig_add=sig_add,
sig_neg=sig_neg,
rebal_freq=rebal_freq,
rebal_slip=rebal_slip,
vol_scale=vol_scale,
neutral=neutral,
thresh=thresh,
)
[docs] def make_long_pnl(
self,
vol_scale: Optional[float] = None,
label: Optional[str] = None,
leverage: float = 1.0,
normalized_weights: bool = False,
):
"""
Computes long-only returns which may act as a basis for comparison
against the signal-adjusted returns. Will take a long-only position in the
category passed to the parameter 'self.ret'.
Parameters
----------
vol_scale : bool
ex-post scaling of PnL to annualized volatility given. This is for
comparative visualization and not out-of-sample, and is applied to the long-only
position. Default is None.
label : str
associated label that will be mapped to the long-only DataFrame. The label
will be used in the plotting graphic for plot_pnls(). If a label is not defined,
the default will be the name of the return category.
leverage : float
leverage applied to the raw signal when a `vol_scale` is not defined.
Default is 1.0, i.e., position size is 1 or 100% of implied risk capital.
"""
if vol_scale is not None:
vol_err = (
"The parameter `vol_scale` must be a numerical value greater than 0."
)
if not isinstance(vol_scale, (float, int)):
raise TypeError(vol_err)
elif vol_scale <= 0:
raise ValueError(vol_err)
else:
err_lev = "`leverage` must be a numerical value greater than 0."
if not isinstance(leverage, (float, int)):
raise TypeError(err_lev)
elif leverage <= 0:
raise ValueError(err_lev)
if label is None:
label = self.ret
dfx = self.df[self.df["xcat"].isin([self.ret])]
df_long = self.long_only_pnl(
dfw=dfx,
vol_scale=vol_scale,
label=label,
leverage=leverage,
normalized_weights=normalized_weights,
)
self.df = QuantamentalDataFrame.from_qdf_list([self.df, df_long])
if label not in self.pnl_names:
self.pnl_names = self.pnl_names + [label]
self.df = self.df.reset_index(drop=True)
[docs] def add_bm(self, df: pd.DataFrame, bms: List[str], tickers: List[str]):
"""
Returns a dictionary with benchmark return series.
Parameters
----------
df : ~pandas.DataFrame
aggregate DataFrame passed into the Class.
bms : List[str]
benchmark return tickers.
tickers : List[str]
the available tickers held in the reduced DataFrame. The reduced DataFrame
consists exclusively of the signal & return categories.
"""
bm_dict = {}
for bm in bms:
# Accounts for appending "_NEG" to the ticker.
bm_s = bm.split("_", maxsplit=1)
cid = bm_s[0]
xcat = bm_s[1]
dfa = df[(df["cid"] == cid) & (df["xcat"] == xcat)]
if dfa.shape[0] == 0:
print(f"{bm} has no observations in the DataFrame.")
else:
df_single_bm = dfa.pivot(
index="real_date", columns="xcat", values="value"
)
df_single_bm.columns = [bm]
bm_dict[bm] = df_single_bm
if bm not in tickers:
self.df = update_df(self.df, dfa)
return bm_dict
@staticmethod
def _apply_barriers(row, sig, entry_barrier, exit_barrier):
if abs(row[sig]) >= entry_barrier:
return np.sign(row[sig])
elif (
row["prev_sig"] is None or np.isnan(row["prev_sig"]) or row["prev_sig"] == 0
):
# No position taken last rebalancing period
return 0
elif abs(row[sig]) > exit_barrier and np.sign(row[sig]) == np.sign(
row["prev_sig"]
):
# Position taken and current signal is same as previous rebalanced signal
return np.sign(row[sig])
else:
# Position taken and current signal is different sign from previous rebalanced signal
return 0
@staticmethod
def _make_signal(
dfx: pd.DataFrame,
sig: str,
sig_op: str = "zn_score_pan",
min_obs: int = 252,
iis: bool = True,
sequential: bool = True,
neutral: str = "zero",
thresh: float = None,
):
"""
Helper function used to produce the raw signal that forms the basis for
positioning.
Parameters
----------
dfx : ~pandas.DataFrame
DataFrame defined over the return & signal category.
sig : str
name of the raw signal.
sig_op : str
signal transformation.
min_obs : int
the minimum number of observations required to calculate zn_scores. Default
is 252.
iis : bool
if True (default) zn-scores are also calculated for the initial sample
period defined by min_obs, on an in-sample basis, to avoid losing history.
sequential : bool
if True (default) score parameters are estimated sequentially with
concurrently available information only.
neutral : str
method to determine neutral level.
thresh : float
threshold value beyond which scores are winsorized,
"""
if sig_op == "binary":
dfw = dfx.pivot(index=["cid", "real_date"], columns="xcat", values="value")
dfw["psig"] = np.sign(dfw[sig])
elif sig_op == "raw":
dfw = dfx.pivot(index=["cid", "real_date"], columns="xcat", values="value")
dfw["psig"] = dfw[sig]
else:
panw = 1 if sig_op == "zn_score_pan" else 0
# The re-estimation frequency for the neutral level and standard deviation
# will be the same as the re-balancing frequency. For instance, if the
# neutral level is computed weekly, a material change in the signal will only
# manifest along a similar timeline. Therefore, re-estimation and
# re-balancing frequencies match.
df_ms = make_zn_scores(
dfx,
xcat=sig,
neutral=neutral,
pan_weight=panw,
sequential=sequential,
min_obs=min_obs,
iis=iis,
thresh=thresh,
)
df_ms = df_ms.drop("xcat", axis=1)
df_ms = QuantamentalDataFrame.from_long_df(df_ms, xcat="psig")
dfx_concat = pd.concat([dfx, df_ms])
dfw = dfx_concat.pivot(
index=["cid", "real_date"], columns="xcat", values="value"
)
# Reconstruct the DataFrame to recognise the signal's start date for each
# individual cross-section
dfw_list = []
for c, cid_df in dfw.groupby(level=0, observed=True):
first_date = cid_df.loc[:, "psig"].first_valid_index()
cid_df = cid_df.loc[first_date:, :]
dfw_list.append(cid_df)
return pd.concat(dfw_list)
[docs] @staticmethod
def rebalancing(dfw: pd.DataFrame, rebal_freq: str = "daily", rebal_slip=0):
"""
The signals are calculated daily and for each individual cross-section defined
in the panel. However, re-balancing a position can occur more infrequently than
daily. Therefore, produce the re-balancing values according to the more
infrequent timeline (weekly or monthly).
Parameters
----------
dfw : ~pandas.Dataframe
DataFrame with each category represented by a column and the daily signal is
also included with the column name 'psig'.
rebal_freq : str
re-balancing frequency for positions according to signal must be one of
'daily' (default), 'weekly', 'monthly', 'quarterly', or 'annual'.
rebal_slip : str
re-balancing slippage in days.
Returns
-------
~pandas.Series
will return a pd.Series containing the associated signals according to the
re-balancing frequency.
"""
# The re-balancing days are the first of the respective time-periods because of
# the shift forward by one day applied earlier in the code. Therefore, only
# concerned with the minimum date of each re-balance period.
dfw["year"] = dfw["real_date"].dt.year
if rebal_freq == "annual":
rebal_dates = dfw.groupby(["cid", "year"], observed=True)["real_date"].min()
elif rebal_freq == "quarterly":
dfw["quarter"] = dfw["real_date"].dt.quarter
rebal_dates = dfw.groupby(
["cid", "year", "quarter"],
observed=True,
)["real_date"].min()
elif rebal_freq == "monthly":
dfw["month"] = dfw["real_date"].dt.month
rebal_dates = dfw.groupby(
["cid", "year", "month"],
observed=True,
)["real_date"].min()
elif rebal_freq == "weekly":
dfw["week"] = dfw["real_date"].apply(lambda x: x.week)
rebal_dates = dfw.groupby(["cid", "year", "week"], observed=True)[
"real_date"
].min()
elif rebal_freq == "daily":
rebal_dates = dfw.groupby(["cid", "year", "real_date"], observed=True)[
"real_date"
].min()
else:
raise ValueError(
"Re-balancing frequency must be one of: daily, weekly, monthly, quarterly or annual."
)
# Convert the index, 'cid', to a formal column aligned to the re-balancing dates.
r_dates_df = rebal_dates.reset_index(level=0)
r_dates_df.reset_index(drop=True, inplace=True)
dfw = dfw[["real_date", "psig", "cid"]]
# Isolate the required signals on the re-balancing dates. Only concerned with the
# respective signal on the re-balancing date. However, the produced DataFrame
# will only be defined over the re-balancing dates. Therefore, merge the
# aforementioned DataFrame with the original DataFrame such that all business
# days are included. The intermediary dates, dates between re-balancing dates,
# will initially be populated by NA values. To ensure the signal is used for the
# duration between re-balancing dates, forward fill the computed signal over the
# associated dates.
# The signal is computed for each individual cross-section. Therefore, merge on
# the real_date and the cross-section.
rebal_merge = r_dates_df.merge(dfw, how="left", on=["real_date", "cid"])
# Re-establish the daily date series index where the intermediary dates, between
# the re-balancing dates, will be populated using a forward fill.
rebal_merge = dfw[["real_date", "cid"]].merge(
rebal_merge, how="left", on=["real_date", "cid"]
)
rebal_merge["psig"] = (
rebal_merge.groupby("cid", observed=True)["psig"].ffill().shift(rebal_slip)
)
rebal_merge = rebal_merge.sort_values(["cid", "real_date"])
rebal_merge = rebal_merge.set_index("real_date")
sig_series = rebal_merge.drop(["cid"], axis=1)
return sig_series
[docs] @staticmethod
def long_only_pnl(
dfw: pd.DataFrame,
vol_scale: float = None,
label: str = None,
leverage: float = 1.0,
normalized_weights: bool = False,
):
"""
Method used to compute the PnL accrued from simply taking a long-only position
in the category, 'self.ret'. The returns from the category are not predicated on
any exogenous signal.
Parameters
----------
dfw : ~pandas.DataFrame
vol_scale : bool
ex-post scaling of PnL to annualized volatility given. This is for
comparative visualization and not out-of-sample. Default is none.
label : str
associated label that will be mapped to the long-only DataFrame.
leverage : float
leverage applied to the raw signal when a `vol_scale` is not defined.
Default is 1.0, i.e., position size is 1 or 100% of implied risk capital.
Returns
-------
~pandas.DataFrame
standardised dataframe containing exclusively the return category, and the
long-only panel return.
"""
lev_err = "`leverage` must be a numerical value greater than 0."
dfw_long = dfw.reset_index(drop=True)
if normalized_weights:
panel_pnl = dfw_long.groupby(["real_date"]).mean(numeric_only=True)
else:
panel_pnl = dfw_long.groupby(["real_date"]).sum(numeric_only=True)
panel_pnl = panel_pnl.reset_index(level=0)
panel_pnl = QuantamentalDataFrame.from_long_df(panel_pnl, cid="ALL", xcat=label)
if vol_scale is not None:
leverage = vol_scale * (panel_pnl["value"].std() * np.sqrt(261)) ** (-1)
if not isinstance(leverage, Number) or leverage <= 0:
raise TypeError(lev_err)
panel_pnl["value"] = panel_pnl["value"] * leverage
return QuantamentalDataFrame(
panel_pnl[["cid", "xcat", "real_date", "value"]],
_initialized_as_categorical=True,
).to_original_dtypes()
[docs] def plot_pnls(
self,
pnl_cats: List[str] = None,
pnl_cids: List[str] = ["ALL"],
start: str = None,
end: str = None,
compounding: bool = False,
facet: bool = False,
ncol: int = 3,
same_y: bool = True,
title: str = "Cumulative Naive PnL",
title_fontsize: int = 20,
tick_fontsize: int = 12,
xcat_labels: Union[List[str], dict] = None,
xlab: str = "",
ylab: str = "% of risk capital",
label_fontsize: int = 12,
share_axis_labels: bool = True,
figsize: Tuple = (12, 7),
aspect: float = 1.7,
height: float = 3,
label_adj: float = 0.05,
title_adj: float = 0.95,
y_label_adj: float = 0.95,
legend_fontsize: int = None,
return_fig: bool = False,
) -> None:
"""
Plot line chart of cumulative PnLs, single PnL, multiple PnL types per cross
section, or multiple cross sections per PnL type.
Parameters
----------
pnl_cats : List[str]
list of PnL categories that should be plotted.
pnl_cids : List[str]
list of cross sections to be plotted; default is 'ALL' (global PnL). Note:
one can only have multiple PnL categories or multiple cross sections, not both.
start : str
earliest date in ISO format. Default is None and earliest date in df is
used.
end : str
latest date in ISO format. Default is None and latest date in df is used.
compounding : bool
parameter to control whether the PnLs are compounded daily. Default is False.
facet : bool
parameter to control whether each PnL series is plotted on its own
respective grid using Seaborn's FacetGrid. Default is False and all series will
be plotted in the same graph.
ncol : int
number of columns in facet grid. Default is 3. If the total number of PnLs
is less than ncol, the number of columns will be adjusted on runtime.
same_y : bool
if True (default) all plots in facet grid share same y axis.
title : str
allows entering text for a custom chart header.
title_fontsize : int
font size for the title. Default is 20.
xcat_labels : List[str]
custom labels to be used for the PnLs.
xlab : str
label for x-axis of the plot (or subplots if faceted), default is None
(empty string)..
ylab : str
label for y-axis of the plot (or subplots if faceted), default is '% of risk
capital' with a note on compounding.
share_axis_labels : bool
if True (default) the axis labels are shared by all subplots in the facet
grid.
figsize : tuple
tuple of plot width and height. Default is (12 , 7).
aspect : float
width-height ratio for plots in facet. Default is 1.7.
height : float
height of plots in facet. Default is 3.
label_adj : float
parameter that sets bottom of figure to fit the label. Default is 0.05.
title_adj : float
parameter that sets top of figure to accommodate title. Default is 0.95.
y_label_adj : float
parameter that sets left of figure to fit the y-label.
"""
default_ylab: str = "% of risk capital"
if pnl_cats is None:
pnl_cats = self.pnl_names
else:
pnl_cats_error = (
f"List of PnL categories expected - received {type(pnl_cats)}."
)
if not isinstance(pnl_cats, list):
raise TypeError(pnl_cats_error)
pnl_cats_copy = pnl_cats.copy()
pnl_cats = [pnl for pnl in pnl_cats if pnl in self.pnl_names]
dif = set(pnl_cats_copy).difference(set(pnl_cats))
if dif:
print(
f"The PnL(s) requested, {dif}, have not been defined on the "
f"Class. The defined PnL(s) are {self.pnl_names}."
)
elif len(pnl_cats) == 0:
raise ValueError(
"There are not any valid PnL(s) to display given the request."
)
error_message = "Either pnl_cats or pnl_cids must be a list of length 1"
if not ((len(pnl_cats) == 1) | (len(pnl_cids) == 1)):
raise ValueError(error_message)
# adjust ncols of the facetgrid if necessary
if max([len(pnl_cats), len(pnl_cids)]) < ncol:
ncol = max([len(pnl_cats), len(pnl_cids)])
dfx = reduce_df(
self.df, pnl_cats, pnl_cids, start, end, self.black, out_all=False
)
if max([len(pnl_cats), len(pnl_cids)]) < ncol:
ncol = max([len(pnl_cats), len(pnl_cids)])
error_message = (
"The number of custom labels must match the defined number of "
"categories in pnl_cats."
)
if isinstance(xcat_labels, dict) or xcat_labels is None:
if xcat_labels is None:
xcat_labels = pnl_cats.copy()
else:
if not len(xcat_labels) == len(pnl_cats):
raise ValueError(error_message)
xcat_labels = [xcat_labels[pnl] for pnl in pnl_cats]
elif isinstance(xcat_labels, list) and all(
isinstance(item, str) for item in xcat_labels
):
warnings.warn(
"xcat_labels should be a dictionary with keys as pnl_cats and values as "
"the custom labels. This will be enforced in a future version.",
)
if len(xcat_labels) != len(pnl_cats):
raise ValueError(error_message)
else:
raise TypeError(
"xcat_labels should be a dictionary with keys as pnl_cats and values as "
"the custom labels."
)
no_cids = len(pnl_cids)
sns.set_theme(
style="whitegrid", palette="colorblind", rc={"figure.figsize": figsize}
)
if no_cids == 1:
plot_by = "xcat"
col_order = pnl_cats
labels = xcat_labels
legend_title = "PnL Category(s)"
else:
plot_by = "cid"
col_order = pnl_cids
if xcat_labels is not None:
labels = pnl_cids.copy()
legend_title = "Cross Section(s)"
df_grouped = dfx.groupby(plot_by, observed=True)
if compounding:
dfx["cum_value"] = (
df_grouped["value"].transform(lambda x: (x / 100 + 1).cumprod()) - 1
) * 100
else:
dfx["cum_value"] = df_grouped["value"].cumsum(numeric_only=True)
if ylab == default_ylab:
ylab += ", with daily compounding" if compounding else ", no compounding"
if facet:
fg = sns.FacetGrid(
data=dfx,
col=plot_by,
col_wrap=ncol,
sharey=same_y,
aspect=aspect,
height=height,
col_order=col_order,
legend_out=True,
)
fg.fig.suptitle(
title,
fontsize=title_fontsize,
)
fg.fig.subplots_adjust(top=title_adj, bottom=label_adj, left=y_label_adj)
fg.map_dataframe(
sns.lineplot,
x="real_date",
y="cum_value",
hue=plot_by,
hue_order=col_order,
estimator=None,
lw=1,
)
for ix, ax in enumerate(fg.axes.flat):
ax.axhline(y=0, color="black", linestyle="--", linewidth=1)
if no_cids == 1:
ax.set_title(xcat_labels[ix])
if no_cids > 1:
fg.set_titles(row_template="", col_template="{col_name}")
if share_axis_labels:
fg.set_axis_labels("", "")
fg.fig.supxlabel(xlab)
fg.fig.supylabel(ylab)
else:
fg.set_axis_labels(xlab, ylab)
else:
fg = sns.lineplot(
data=dfx,
x="real_date",
y="cum_value",
hue=plot_by,
hue_order=col_order,
estimator=None,
lw=1,
)
leg = fg.axes.get_legend()
plt.title(title, fontsize=title_fontsize)
plt.legend(
labels=labels,
title=legend_title,
title_fontsize=legend_fontsize,
fontsize=legend_fontsize,
)
plt.xlabel(xlab, fontsize=label_fontsize)
plt.ylabel(ylab, fontsize=label_fontsize)
if no_cids == 1:
if facet:
labels = labels[::-1]
else:
labels = labels[::-1]
fg.tick_params(axis="both", labelsize=tick_fontsize)
plt.axhline(y=0, color="black", linestyle="--", lw=1)
if return_fig:
return fg
plt.show()
[docs] def get_generated_signals(self) -> pd.DataFrame:
"""
Returns a DataFrame containing signals generated by NaivePNL to produce the PnLs.
Returns
-------
~pandas.DataFrame
DataFrame containing the generated signals.
"""
return QuantamentalDataFrame(
pd.concat(
[self.signal_df[xc].assign(xcat=xc) for xc in self.signal_df.keys()]
)
.dropna()
.reset_index(drop=True)
.rename(columns={"sig": "value"})
)
[docs] def get_asset_returns_data(self) -> pd.DataFrame:
"""
Returns a DataFrame containing the returns used to generate the PnLs.
Returns
-------
~pandas.DataFrame
DataFrame containing the returns used to generate the PnLs.
"""
return QuantamentalDataFrame(self.df, self._as_categorical).reduce_df(
cids=self.cids, xcats=[self.ret]
)
[docs] def get_pnls_returns_data(self) -> pd.DataFrame:
"""
Returns a DataFrame containing the PnLs generated.
Returns
-------
~pandas.DataFrame
DataFrame containing the PnLs generated.
"""
return QuantamentalDataFrame(
self.df,
self._as_categorical,
).reduce_df(cids=self.cids, xcats=self.pnl_names)
[docs] def signal_heatmap(
self,
pnl_name: str,
pnl_cids: List[str] = None,
start: str = None,
end: str = None,
freq: str = "M",
title: str = "Average applied signal values",
title_fontsize: int = None,
x_label: str = "",
y_label: str = "",
figsize: Optional[Tuple[float, float]] = None,
tick_fontsize: int = None,
return_fig: bool = False,
):
"""
Display heatmap of signals across times and cross-sections.
Parameters
----------
pnl_name : str
name of naive PnL whose signals are displayed.
pnl_cids : List[str]
cross-sections. Default is all available.
start : str
earliest date in ISO format. Default is None and earliest date in df is
used.
end : str
latest date in ISO format. Default is None and latest date in df is used.
freq : str
frequency for which signal average is displayed. Default is monthly ('m').
The only alternative is quarterly ('q').
title : str
allows entering text for a custom chart header.
title_fontsize : int
font size of the title. Default is None, uses matplotlib default.
x_label : str
label for the x-axis. Default is None.
y_label : str
label for the y-axis. Default is None.
figsize : (float, float)
width and height in inches. Default is (14, number of cross sections).
tick_fontsize : int
font size for the ticks. Default is None.
.. note::
Signal is here is the value that actually determines the concurrent PnL.
"""
if not isinstance(pnl_name, str):
raise TypeError("The method expects to receive a single PnL name.")
error_cats = (
f"The PnL passed to 'pnl_name' parameter is not defined. The "
f"possible options are {self.pnl_names}."
)
if pnl_name not in self.pnl_names:
raise ValueError(error_cats)
err_msg = "Defined time-period must be monthly ('m') or quarterly ('q')"
freq = _map_to_business_day_frequency(freq)
if not freq.startswith(("BQ", "BM")):
raise ValueError(err_msg)
err_cids = f"Cross-sections not available. Available cids are:{self.cids}."
if pnl_cids is None:
pnl_cids = self.cids
else:
if not set(pnl_cids) <= set(self.cids):
raise ValueError(err_cids)
for vname, lbl in zip(["x_label", "y_label"], [x_label, y_label]):
if not isinstance(lbl, str):
raise TypeError(f"<str> expected for `{vname}` - received {type(lbl)}.")
dfx: pd.DataFrame = self.signal_df[pnl_name]
dfw: pd.DataFrame = dfx.pivot(index="real_date", columns="cid", values="sig")
dfw: pd.DataFrame = dfw[pnl_cids]
if start is None:
start = dfw.index[0]
elif end is None:
end = dfw.index[-1]
dfw = dfw.truncate(before=start, after=end)
dfw = dfw.resample(freq).mean()
if figsize is None:
figsize = (14, len(pnl_cids))
fig, ax = plt.subplots(figsize=figsize)
dfw = dfw.transpose()
dfw.columns = [str(d.strftime("%d-%m-%Y")) for d in dfw.columns]
ax = sns.heatmap(dfw, cmap="vlag_r", center=0)
ax.set(xlabel=x_label, ylabel=y_label)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_title(title, fontsize=title_fontsize)
ax.tick_params(axis="x", labelsize=tick_fontsize)
ax.tick_params(axis="y", labelsize=tick_fontsize)
if return_fig:
return fig
plt.show()
[docs] def agg_signal_bars(
self,
pnl_name: str,
freq: str = "m",
metric: str = "direction",
title: str = None,
title_fontsize: int = None,
y_label: str = "Sum of Std. across the Panel",
return_fig: bool = False,
):
"""
Display aggregate signal strength and - potentially - direction.
Parameters
----------
pnl_name : str
name of the PnL whose signal is to be visualized.
freq : str
frequency at which the signal is visualized. Default is monthly ('m'). The
alternative is quarterly ('q').
metric : str
the type of signal value. Default is "direction". Alternative is "strength".
title : str
allows entering text for a custom chart header. Default will be "Directional
Bar Chart of <pnl_name>.".
title_fontsize : int
font size of the title. Default is None, uses matplotlib default.
y_label : str
label for the y-axis. Default is the sum of standard deviations across the
panel corresponding to the default signal transformation: 'zn_score_pan'.
.. note::
The referenced signal corresponds to the series that determines the concurrent
PnL.
"""
assert isinstance(
pnl_name, str
), "The method expects to receive a single PnL name."
error_cats = (
f"The PnL passed to 'pnl_name' parameter is not defined. The "
f"possible options are {self.pnl_names}."
)
assert pnl_name in self.pnl_names, error_cats
error_time = "Defined time-period must either be monthly, m, or quarterly, q."
freq = _map_to_business_day_frequency(freq)
if not freq.startswith(("BQ", "BM")):
raise ValueError(error_time)
metric_error = "The metric must either be 'direction' or 'strength'."
assert metric in ["direction", "strength"], metric_error
if title is None:
title = f"Directional Bar Chart of {pnl_name}."
dfx: pd.DataFrame = self.signal_df[pnl_name]
dfw: pd.DataFrame = dfx.pivot(index="real_date", columns="cid", values="sig")
if metric == "strength":
dfw = dfw.abs()
dfw = dfw.resample(freq).mean()
df_s = dfw.sum(axis=1)
index = np.array(df_s.index)
df_signal = pd.DataFrame(
data=df_s.to_numpy(), columns=["aggregate_signal"], index=index
)
df_signal = df_signal.reset_index(level=0)
df_signal = df_signal.rename({"index": ""}, axis="columns")
dates = [pd.Timestamp(d) for d in df_signal[""]]
df_signal[""] = np.array(dates)
plt.style.use("ggplot")
fig, ax = plt.subplots()
df_signal.plot.bar(
x="",
y="aggregate_signal",
ax=ax,
ylabel=y_label,
legend=False,
)
ax.set_title(title, fontsize=title_fontsize)
ticklabels = [""] * len(df_signal)
skip = max(1, len(df_signal) // 12) # avoid division by zero
ticklabels[::skip] = df_signal[""].iloc[::skip].dt.strftime("%Y-%m-%d")
ax.xaxis.set_major_formatter(mticker.FixedFormatter(ticklabels))
fig.autofmt_xdate()
def fmt(x, pos=0, max_i=len(ticklabels) - 1):
i = int(x)
i = 0 if i < 0 else max_i if i > max_i else i
return dates[i]
ax.fmt_xdata = fmt
if return_fig:
return fig
else:
plt.show()
[docs] def evaluate_pnls(
self,
pnl_cats: Optional[List[str]] = None,
pnl_cids: List[str] = ["ALL"],
start: Optional[str] = None,
end: Optional[str] = None,
label_dict: Optional[Dict[str, str]] = None,
):
"""
Returns a table of PnL statistics containing the following metrics:
- Return - percentage, annualized
- Standard Deviation - percentage, annualized
- Sharpe Ratio
- Sortino Ratio
- Max 21-Day Draw - percentage
- Max 6-Month Draw - percentage
- Peak to Trough Draw - percentage
- Top 5% Monthly PnL Share
- Sharpe Stability Ratio - HAC-robust t-stat for the mean rolling
Sharpe ratio (see ``sharpe_stability_ratio``); accounts for
sample size and serial dependence
- Traded Months
Parameters
----------
pnl_cats : List[str], optional
list of PnL categories that should be plotted. Default is None and all
available PnL categories are used.
pnl_cids : List[str]
list of cross-sections to be plotted; default is 'ALL' (global PnL). Note:
one can only have multiple PnL categories or multiple cross-sections, not both.
start : str
earliest date in ISO format. Default is None and earliest date in df is
used.
end : str
latest date in ISO format. Default is None and latest date in df is used.
label_dict : dict[str, str]
dictionary with keys as pnl_cats and values as new labels for the PnLs.
Returns
-------
~pandas.DataFrame
standardized DataFrame with key PnL performance statistics.
"""
error_cids = "List of cross-sections expected."
error_xcats = "List of PnL categories expected."
if not isinstance(pnl_cids, list):
raise TypeError(error_cids)
if not isinstance(pnl_cats, (list, type(None))):
raise TypeError(error_xcats)
if pnl_cats is not None:
if not all([isinstance(elem, str) for elem in pnl_cats]):
raise TypeError(error_xcats)
if not all([isinstance(elem, str) for elem in pnl_cids]):
raise TypeError(error_cids)
if pnl_cats is None:
# The field, self.pnl_names, is a data structure that stores the name of the
# category assigned to PnL values. Each time make_pnl() method is called, the
# computed DataFrame will have an associated category established by the
# logical method: ('PNL_' + sig) if pnl_name is None else pnl_name. Each
# category will be held in the data structure.
pnl_cats = self.pnl_names
if not set(pnl_cats) <= set(self.pnl_names):
missing = [pnl for pnl in pnl_cats if pnl not in self.pnl_names]
pnl_error = (
f"Received PnL categories have not been defined. The PnL "
f"category(s) which has not been defined is: {missing}. "
f"The produced PnL category(s) are {self.pnl_names}."
)
raise ValueError(pnl_error)
assert (len(pnl_cats) == 1) | (len(pnl_cids) == 1)
dfx = reduce_df(
self.df, pnl_cats, pnl_cids, start, end, self.black, out_all=False
)
groups = "xcat" if len(pnl_cids) == 1 else "cid"
stats = [
"Return %",
"St. Dev. %",
"Sharpe Ratio",
"Sortino Ratio",
"Max 21-Day Draw %",
"Max 6-Month Draw %",
"Peak to Trough Draw %",
"Top 5% Monthly PnL Share",
"Traded Months",
]
# If benchmark tickers have been passed into the Class and if the tickers are
# present in self.dfd.
benchmark_tickers = []
if self.bm_bool and bool(self._bm_dict):
benchmark_tickers = list(self._bm_dict.keys())
for bm in benchmark_tickers:
stats.insert(len(stats) - 1, f"{bm} correl")
stats.insert(len(stats) - 1, "Sharpe Stability Ratio")
dfw = dfx.pivot(index="real_date", columns=groups, values="value")
df = pd.DataFrame(columns=dfw.columns, index=stats)
df.iloc[0, :] = dfw.mean(axis=0) * 261
df.iloc[1, :] = dfw.std(axis=0) * np.sqrt(261)
df.iloc[2, :] = df.iloc[0, :] / df.iloc[1, :]
dsd = dfw.apply(lambda x: np.sqrt(np.sum(x[x < 0] ** 2) / len(x))) * np.sqrt(
261
)
df.iloc[3, :] = df.iloc[0, :] / dsd
df.iloc[4, :] = dfw.rolling(21).sum().min()
df.iloc[5, :] = dfw.rolling(6 * 21).sum().min()
cum_pnl = dfw.cumsum()
high_watermark = cum_pnl.cummax()
drawdown = high_watermark - cum_pnl
df.iloc[6, :] = -drawdown.max()
mfreq = _map_to_business_day_frequency("M")
monthly_pnl = dfw.resample(mfreq).sum()
total_pnl = monthly_pnl.sum(axis=0)
top_5_percent_cutoff = int(np.ceil(len(monthly_pnl) * 0.05))
top_months = pd.DataFrame(columns=monthly_pnl.columns)
for column in monthly_pnl.columns:
top_months[column] = (
monthly_pnl[column]
.nlargest(top_5_percent_cutoff)
.reset_index(drop=True)
)
df.iloc[7, :] = top_months.sum() / total_pnl
if len(benchmark_tickers) > 0:
bm_df = pd.concat(list(self._bm_dict.values()), axis=1)
for i, bm in enumerate(benchmark_tickers):
index = dfw.index.intersection(bm_df.index)
correlation = dfw.loc[index].corrwith(
bm_df.loc[index].iloc[:, i], axis=0, method="pearson", drop=True
)
df.loc[f"{bm} correl", :] = correlation
for col in dfw.columns:
df.loc["Sharpe Stability Ratio", col] = sharpe_stability_ratio(
dfw[col].dropna(),
window=252,
benchmark_sr=0.0,
annualization_factor=252,
)
df.loc["Traded Months", :] = dfw.notna().resample(mfreq).sum().ne(0).sum()
if label_dict is not None:
if not isinstance(label_dict, dict):
raise TypeError("label_dict must be a dictionary.")
if not all([isinstance(k, str) for k in label_dict.keys()]):
raise TypeError("Keys in label_dict must be strings.")
if not all([isinstance(v, str) for v in label_dict.values()]):
raise TypeError("Values in label_dict must be strings.")
if len(label_dict) != len(df.columns):
raise ValueError(
"label_dict must have the same number of keys as columns in the "
"DataFrame."
)
df.rename(columns=label_dict, inplace=True)
df = df[label_dict.values()]
return df
[docs] def print_pnl_names(self):
"""
Print list of names of available PnLs in the class instance.
"""
print(self.pnl_names)
[docs] def pnl_df(self, pnl_names: List[str] = None, cs: bool = False):
"""
Return dataframe with PnLs.
Parameters
----------
pnl_names : List[str]
list of names of PnLs to be returned. Default is 'ALL'.
cs : bool
inclusion of cross section PnLs. Default is False.
Returns
-------
~pandas.DataFrame
custom DataFrame with PnLs
"""
selected_pnls = pnl_names if pnl_names is not None else self.pnl_names
filter_1 = self.df["xcat"].isin(selected_pnls)
filter_2 = self.df["cid"] == "ALL" if not cs else True
return QuantamentalDataFrame(
self.df[filter_1 & filter_2],
_initialized_as_categorical=self._as_categorical,
).to_original_dtypes()
def _winsorize(self, df: pd.DataFrame, thresh: float):
if thresh is not None:
df.clip(lower=-thresh, upper=thresh, inplace=True)
[docs]def create_results_dataframe(
title: str, pnl: NaivePnL, sigs_renamed: dict = None, **srr_kwargs
):
"""
Create a DataFrame with key performance metrics for signals and PnLs
from a precomputed NaivePnL object.
Parameters
----------
title : str
Title of the DataFrame.
pnl : NaivePnL
NaivePnL object with pnl.make_pnl() already applied.
bm : str, optional
Benchmark name for correlation.
sigs_renamed : dict, optional
Dictionary for renaming signals in the output.
Returns
-------
~pandas.DataFrame or Styler
DataFrame with performance metrics.
"""
sigs = []
sig_neg = []
freqs = []
pnl_freq = {
"daily": "D",
"weekly": "W",
"monthly": "M",
"quarterly": "Q",
"annual": "A",
}
bms = list(pnl._bm_dict.keys())
if pnl.pnl_params is None:
raise ValueError(
"PnL parameters not found. Ensure pnl.make_pnl() has been run."
)
for pnl_name, params in pnl.pnl_params.items():
sigs.append(params.signal)
sig_neg.append(params.sig_neg)
freqs.append(pnl_freq[params.rebal_freq])
# Get relation metrics (SignalReturnRelations lives inside pnl.df)
srr = SignalReturnRelations(
df=pnl.df,
rets=pnl.ret,
sigs=sigs,
sig_neg=sig_neg,
cids=pnl.cids,
start=pnl.start,
end=pnl.end,
blacklist=pnl.blacklist,
**srr_kwargs,
)
sigs_df = (
srr.multiple_relations_table()
.astype("float")[["accuracy", "bal_accuracy", "pearson", "kendall"]]
.round(3)
)
# Get evaluated PnL statistics
if bms:
bm_cols = [f"{bm} correl" for bm in bms]
pnl_df = (
pnl.evaluate_pnls()
.transpose()[["Sharpe Ratio", "Sortino Ratio"] + bm_cols]
.astype("float")
.round(3)
)
else:
pnl_df = (
pnl.evaluate_pnls()
.transpose()[["Sharpe Ratio", "Sortino Ratio"]]
.astype("float")
.round(3)
)
pnl_df = pnl_df.reindex(pnl.pnl_params)
for sig in pnl_df.index:
new_sig = pnl.pnl_params[sig].signal
if pnl.pnl_params[sig].sig_neg:
new_sig = f"{new_sig}_NEG"
pnl_df.rename(index={sig: new_sig}, inplace=True)
# Reset redundant indices in sigs_df
sigs_df = sigs_df.reset_index(
level=["Return", "Frequency", "Aggregation"], drop=True
)
# Combine them
res_df = pd.concat([sigs_df, pnl_df], axis=1)
metric_map = {
"Sharpe Ratio": "Sharpe",
"Sortino Ratio": "Sortino",
"accuracy": "Accuracy",
"bal_accuracy": "Bal. Accuracy",
"pearson": "Pearson",
"kendall": "Kendall",
}
res_df.rename(columns=metric_map, inplace=True)
if sigs_renamed:
res_df.rename(index=sigs_renamed, inplace=True)
if PYTHON_3_8_OR_LATER:
res_df = (
res_df.style.format("{:.3f}")
.set_caption(title)
.set_table_styles(
[
{
"selector": "caption",
"props": [("text-align", "center"), ("font-weight", "bold")],
}
]
)
)
return res_df
[docs]@dataclass
class PnLParams:
"""
Dataclass to store the parameters for the PnL creation.
"""
signal: str
sig_op: str
sig_neg: bool
sig_add: float
pnl_name: str
rebal_freq: str
rebal_slip: int
vol_scale: int
neutral: str
thresh: float
if __name__ == "__main__":
cids = ["AUD", "CAD", "GBP", "NZD", "USD", "EUR"]
xcats = ["EQXR_NSA", "CRY", "GROWTH", "INFL", "DUXR"]
cols_1 = ["earliest", "latest", "mean_add", "sd_mult"]
df_cids = pd.DataFrame(index=cids, columns=cols_1)
df_cids.loc["AUD", :] = ["2008-01-03", "2020-12-31", 0.5, 2]
df_cids.loc["CAD", :] = ["2010-01-03", "2020-11-30", 0, 1]
df_cids.loc["GBP", :] = ["2012-01-03", "2020-11-30", -0.2, 0.5]
df_cids.loc["NZD"] = ["2002-01-03", "2020-09-30", -0.1, 2]
df_cids.loc["USD"] = ["2015-01-03", "2020-12-31", 0.2, 2]
df_cids.loc["EUR"] = ["2008-01-03", "2020-12-31", 0.1, 2]
cols_2 = cols_1 + ["ar_coef", "back_coef"]
df_xcats = pd.DataFrame(index=xcats, columns=cols_2)
df_xcats.loc["EQXR_NSA"] = ["2000-01-03", "2020-12-31", 0.1, 1, 0, 0.3]
df_xcats.loc["CRY"] = ["2000-01-01", "2020-10-30", 1, 2, 0.95, 1]
df_xcats.loc["GROWTH"] = ["2010-01-03", "2020-10-30", 1, 2, 0.9, 1]
df_xcats.loc["INFL"] = ["2001-01-01", "2020-10-30", 1, 2, 0.8, 0.5]
df_xcats.loc["DUXR"] = ["2000-01-01", "2020-12-31", 0.1, 0.5, 0, 0.1]
black = {"AUD": ["2006-01-01", "2015-12-31"], "GBP": ["2022-01-01", "2100-01-01"]}
dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)
# Instantiate a new instance to test the long-only functionality.
# Benchmarks are used to calculate correlation against PnL series.
pnl = NaivePnL(
dfd,
ret="EQXR_NSA",
sigs=["CRY", "GROWTH", "INFL"],
cids=cids,
start="2000-01-01",
blacklist=black,
bms=["EUR_EQXR_NSA", "USD_EQXR_NSA"],
)
pnl.make_pnl(
sig="GROWTH",
sig_op="binary",
entry_barrier=1,
exit_barrier=0.3,
# sig_neg=True,
# sig_add=0.5,
rebal_freq="quarterly",
vol_scale=5,
rebal_slip=1,
min_obs=250,
thresh=2,
normalized_weights=True,
)
pnl.make_pnl(
sig="GROWTH",
sig_op="binary",
sig_neg=True,
# sig_add=0.5,
rebal_freq="annual",
vol_scale=5,
rebal_slip=1,
min_obs=250,
thresh=2,
)
pnl.make_pnl(
sig="INFL",
sig_op="zn_score_pan",
sig_neg=True,
sig_add=0.5,
rebal_freq="annual",
vol_scale=5,
rebal_slip=1,
min_obs=250,
thresh=2,
)
pnl.make_long_pnl(vol_scale=10, label="Long")
df_eval = pnl.evaluate_pnls(
pnl_cats=["PNL_GROWTH_NEG", "PNL_INFL_NEG"],
start="2015-01-01",
end="2020-12-31",
)
heatmap = pnl.signal_heatmap(
pnl_name="PNL_GROWTH_NEG",
pnl_cids=cids,
freq="m",
return_fig=True,
title="Heatmap Example",
)
print(df_eval)
pnl.agg_signal_bars(
pnl_name="PNL_GROWTH_NEG",
freq="m",
metric="direction",
title=None,
)
pnl.plot_pnls(
pnl_cats=["PNL_GROWTH_NEG", "Long"], title_fontsize=60, xlab="date", ylab="%"
)
pnl.plot_pnls(
pnl_cats=["PNL_GROWTH_NEG", "Long"],
facet=False,
xcat_labels=["S_1", "S_2"],
xlab="date",
ylab="%",
)
pnl.plot_pnls(
pnl_cats=["PNL_GROWTH_NEG", "Long"], facet=True, xcat_labels=["S_1", "S_2"]
)
pnl.plot_pnls(
pnl_cats=["PNL_GROWTH_NEG", "Long"],
facet=True,
)
pnl.plot_pnls(pnl_cats=["PNL_GROWTH_NEG"], pnl_cids=cids, xcat_labels=None)
pnl.plot_pnls(
pnl_cats=["PNL_GROWTH_NEG"], pnl_cids=cids, facet=True, xcat_labels=None
)
pnl.plot_pnls(
pnl_cats=["PNL_GROWTH_NEG"],
pnl_cids=cids,
same_y=True,
facet=True,
xcat_labels=None,
share_axis_labels=False,
xlab="Date",
ylab="PnL",
y_label_adj=0.1,
)
results_eq_ols = create_results_dataframe(
title="Performance metrics, PARITY vs OLS, equity",
pnl=pnl,
)
print(results_eq_ols.data)