Source code for macrosynergy.pnl.multi_pnl

"""
The `MultiPnL` class allows for the visualization and analysis of PnLs across multiple return categories.
It also provides functionality to calculate a weighted aggregate PnL based on user-defined weights
for each PnL.
"""

from numbers import Number
from typing import Dict, List, Optional, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from macrosynergy.management.simulate import make_qdf
from macrosynergy.management.utils import update_df, _map_to_business_day_frequency
from macrosynergy.pnl import NaivePnL
from macrosynergy.pnl.sharpe_stability_ratio import sharpe_stability_ratio
from macrosynergy.management.types import QuantamentalDataFrame


[docs]class MultiPnL:
    """
    Manages multiple `NaivePnL` instances, enabling combined PnL analysis and visualization.
    """

    def __init__(
        self,
        df: Optional[pd.DataFrame] = None,
        bms: Optional[Union[str, List[str]]] = None,
    ):
        """
        Parameters
        ----------
        df : ~pandas.DataFrame, optional
            Dataframe containing benchmark return series. Required if
            ``bms`` is specified.
        bms : str or List[str], optional
            Benchmark ticker(s) in the format ``'cid_xcat'`` (e.g.
            ``'USD_EQXR_NSA'``). Correlations with each benchmark are included as
            rows in :meth:`evaluate_pnls`. Requires ``df`` to be provided.
        """
        self.pnls_df = QuantamentalDataFrame(
            pd.DataFrame(columns=["real_date", "xcat", "value", "cid"])
        )
        self.single_return_pnls: Dict[str, NaivePnL] = {}
        self.composite_pnl_xcats = []
        self.xcat_to_ret = {}
        self._bm_dict: Dict[str, pd.DataFrame] = {}

        if (df is None) != (bms is None):
            raise ValueError(
                "Both `df` and `bms` must be provided together, or neither."
            )
        if df is not None and bms is not None:
            bms = [bms] if isinstance(bms, str) else bms
            for bm in bms:
                cid, xcat = bm.split("_", maxsplit=1)
                dfa = df[(df["cid"] == cid) & (df["xcat"] == xcat)]
                if dfa.shape[0] == 0:
                    raise ValueError(f"{bm} has no observations in the DataFrame.")
                else:
                    bm_series = dfa.pivot(
                        index="real_date", columns="xcat", values="value"
                    )
                    bm_series.columns = [bm]
                    self._bm_dict[bm] = bm_series
        self.bm_bool = bool(self._bm_dict)

[docs]    def add_pnl(self, pnl: NaivePnL, pnl_xcats: List[str]):
        """
        Add PnL(s) from a NaivePnL object. PnL categories will be ingested into
        the MultiPnL object as 'pnl_xcat/return'.

        Parameters
        ----------
        pnl : NaivePnL
            NaivePnL object.
        pnl_xcats : List[str]
            List of PnLs to add from the NaivePnL object.
        """

        self._validate_pnl(pnl, pnl_xcats)

        pnl_df = pnl.pnl_df(pnl_xcats)
        pnl_df = QuantamentalDataFrame(pnl_df).rename_xcats(postfix=f"/{pnl.ret}")
        self.pnls_df = update_df(self.pnls_df, pnl_df)
        for xcat in pnl_df.xcat.unique():
            self.single_return_pnls[xcat] = pnl

        for xcat in pnl_xcats:
            if xcat not in self.xcat_to_ret:
                self.xcat_to_ret[xcat] = {pnl.ret}
            else:
                self.xcat_to_ret[xcat].add(pnl.ret)
        pass

[docs]    def combine_pnls(
        self,
        pnl_xcats: List[str],
        composite_pnl_xcat: str,
        weights: Optional[Dict[str, float]] = None,
    ):
        """
        Combine PnLs with optional weighting.

        Parameters
        ----------
        pnl_xcats : List[str]
            List of PnLs to combine. Must be in the format 'xcat/return' and added using
            `add_pnl()`.
        composite_pnl_xcat : str
            xcat for the combined PnL.
        weights : Optional[Dict[str, float]]
            Weights for each PnL, by default None. Must be in the format {'xcat':
            weight} or {'xcat/return': weight}.
        """

        self._check_pnls_added(min_pnls=2)
        for i, pnl_xcat in enumerate(pnl_xcats):
            pnl_xcats[i] = self._infer_return_by_xcat(pnl_xcat)

        # Default weights
        if weights is None:
            weights = {pnl_name: 1 for pnl_name in pnl_xcats}
        else:
            weights = {self._infer_return_by_xcat(k): v for k, v in weights.items()}
        weights = self._normalize_weights(weights)

        multiasset_df = []
        for pnl_xcat in pnl_xcats:
            single_asset_df = self.pnls_df[self.pnls_df["xcat"] == pnl_xcat].assign(
                asset=pnl_xcat
            )
            multiasset_df.append(single_asset_df)

        multiasset_df = QuantamentalDataFrame.from_qdf_list(multiasset_df)

        raw_pnls = multiasset_df.set_index(["real_date", "xcat"])["value"].unstack()

        # Default weights for each strategy
        start_weights = pd.DataFrame(
            {asset_name: weights[asset_name] for asset_name in raw_pnls.columns},
            index=raw_pnls.index,
        )

        # Daily change in portfolio weights due to previous returns since the last rebalancing
        mfreq = _map_to_business_day_frequency("M")
        weights_change = (
            (1 + raw_pnls / 100).groupby(pd.Grouper(freq=mfreq)).cumprod()
        )  # in decimals, not percentage, gross amount
        weights_change = (
            weights_change.groupby(pd.Grouper(freq=mfreq))
            .shift(periods=1)
            .fillna(value=1)
        )

        # Dynamic weights
        final_weights = start_weights * weights_change
        final_weights = final_weights.div(final_weights.sum(axis=1), axis=0)

        # final calculation
        multiasset_rets = (final_weights * raw_pnls).sum(axis=1)

        multiasset_rets.name = composite_pnl_xcat

        multi_asset_pnl = QuantamentalDataFrame.from_long_df(
            multiasset_rets.reset_index().melt(
                id_vars=["real_date"], var_name="xcat", value_name="value"
            ),
            cid="ALL",
        )
        multi_asset_pnl = multi_asset_pnl.sort_values(by=["xcat", "real_date"])

        self.pnls_df = update_df(self.pnls_df, multi_asset_pnl).sort_values(
            by=["xcat", "real_date"]
        )
        self.composite_pnl_xcats.append(composite_pnl_xcat)

[docs]    def plot_pnls(
        self,
        pnl_xcats: List[str] = None,
        title: str = None,
        title_fontsize: int = 20,
        xcat_labels: Union[List[str], dict] = None,
        figsize: Tuple = (12, 7),
        tick_fontsize: int = 12,
        label_fontsize: int = 12,
        legend_fontsize: int = None,
    ):
        """
        Creates a plot of PnLs from added NaivePnL objects and/or
        combined PnLs created with `combine_pnls()`.

        Parameters
        ----------
        pnl_xcats : List[str]
            List of PnLs to plot. If None, all PnLs are plotted. Must be in the format
            'xcat', or 'xcat/return_xcat'.
        title : str
            Title of the plot.
        title_fontsize : int
            font size for the title. Default is 20.
        xcat_labels : Union[List[str], dict]
            custom labels to be used for the PnLs.
        figsize : Tuple
            tuple of plot width and height. Default is (12, 7).
        tick_fontsize : int
            font size for the tick labels. Default is 12.
        label_fontsize : int
            font size for the axis labels. Default is 12.
        legend_fontsize : int
            font size for the legend. Default is None (uses matplotlib default).
        """

        self._check_pnls_added()

        if pnl_xcats is None:
            pnl_df = self.pnls_df
            pnl_xcats = self.pnl_xcats
        else:
            for i, pnl_xcat in enumerate(pnl_xcats):
                pnl_xcats[i] = self._infer_return_by_xcat(pnl_xcat)
            pnl_df = self.pnls_df[self.pnls_df["xcat"].isin(pnl_xcats)].copy()

        if pnl_df["cid"].dtype.name == "category":
            pnl_df.loc[:, "cid"] = pnl_df["cid"].cat.remove_unused_categories()
        if pnl_df["xcat"].dtype.name == "category":
            pnl_df.loc[:, "xcat"] = pnl_df["xcat"].cat.remove_unused_categories()

        if xcat_labels is not None:
            xcat_labels = self._check_xcat_labels(pnl_xcats, xcat_labels)
            pnl_df["xcat"] = pnl_df["xcat"].map(xcat_labels)

        pnl_df.loc[:, "cumulative pnl"] = pnl_df.groupby("xcat")["value"].cumsum()

        sns.set_theme(
            style="whitegrid", palette="colorblind", rc={"figure.figsize": figsize}
        )

        sns.lineplot(
            data=pnl_df,
            x="real_date",
            y="cumulative pnl",
            hue="xcat",
            estimator=None,
            lw=1,
        )
        plt.axhline(y=0, color="black", linestyle="--", lw=1)
        plt.title(title, fontsize=title_fontsize)
        plt.xlabel(None, fontsize=label_fontsize)
        plt.ylabel("% risk capital, no compounding", fontsize=label_fontsize)
        plt.legend(
            title="PnL Category(s)",
            title_fontsize=legend_fontsize,
            fontsize=legend_fontsize,
        )
        plt.tick_params(axis="both", labelsize=tick_fontsize)
        plt.show()
        pnl_df.drop(columns="cumulative pnl", inplace=True)

[docs]    def evaluate_pnls(
        self,
        pnl_xcats: List[str] = None,
        sr_thresholds: Optional[List[float]] = None,
    ) -> pd.DataFrame:
        """
        Returns a DataFrame containing the following evaluation metrics for specified PnLs:

        - Return %
        - St. Dev. %
        - Sharpe Ratio
        - Sortino Ratio
        - Max 21-Day Draw %
        - Max 6-Month Draw %
        - Peak to Trough Draw %
        - Top 5% Monthly PnL Share
        - Correlation with benchmarks (if available)
        - Sharpe Stability Ratio - HAC-robust t-stat for the mean rolling
          Sharpe ratio (see :func:`sharpe_stability_ratio`); accounts for
          sample size and serial dependence
        - Prob. Sharpe Ratio > {threshold} - one-sided asymptotic probability
          that the mean rolling Sharpe ratio is above each threshold in
          ``sr_thresholds``, if provided
        - Traded Months

        .. note::
            The evaluation metrics are calculated in a manner similar to NaivePnL's `evaluate_pnls()`.

            Benchmark correlations are included when ``df`` and ``bms`` are passed to the
            :class:`MultiPnL` constructor. They apply uniformly to all PnLs in the table,
            including composite PnLs.

        Parameters
        ----------
        pnl_xcats : List[str]
            List of PnLs to evaluate. If None, all PnLs are evaluated. Must be in the
            format 'xcat', or 'xcat/return_xcat'.
        sr_thresholds : List[float], optional
            Sharpe ratio thresholds for which one-sided probabilities are
            reported. Default is None, in which case ``[0.25, 0.5, 0.75]`` is
            used. Pass an empty list to suppress the threshold probability rows.

        Returns
        -------
        ~pandas.DataFrame
            DataFrame containing evaluation metrics for the specified PnLs.
        """

        self._check_pnls_added()
        if sr_thresholds is None:
            sr_thresholds = [0.25, 0.5, 0.75]
        if not isinstance(sr_thresholds, list):
            raise TypeError("sr_thresholds must be a list of numbers.")
        if not all(
            isinstance(elem, Number) and not isinstance(elem, bool)
            for elem in sr_thresholds
        ):
            raise TypeError("sr_thresholds must be a list of numbers.")

        if pnl_xcats is None:
            pnl_xcats = self.pnl_xcats
        else:
            for i, pnl_xcat in enumerate(pnl_xcats):
                pnl_xcats[i] = self._infer_return_by_xcat(pnl_xcat)
        pnl_evals = []
        for pnl_xcat in pnl_xcats:
            if pnl_xcat in self.composite_pnl_xcats or self._bm_dict:
                eval_df = self._evaluate_pnl_stats(
                    pnl_xcat, sr_thresholds=sr_thresholds
                )
                eval_df.columns = [pnl_xcat]
            else:
                pnl = self.single_return_pnls[pnl_xcat]
                eval_df = pnl.evaluate_pnls(
                    [pnl_xcat.split("/")[0]], sr_thresholds=sr_thresholds
                )
                eval_df.columns = [pnl_xcat]
            pnl_evals.append(eval_df)

        return pd.concat(pnl_evals, axis=1, ignore_index=False, sort=False)

    def _evaluate_pnl_stats(
        self,
        pnl_xcat: str,
        sr_thresholds: List[float],
    ) -> pd.DataFrame:
        """
        Evaluate a PnL in a manner similar to NaivePnL's ``evaluate_pnls()``.

        Works for both single-return and composite PnLs. Benchmark correlation rows
        are included when ``self._bm_dict`` is populated.
        """
        stats = [
            "Return %",
            "St. Dev. %",
            "Sharpe Ratio",
            "Sortino Ratio",
            "Max 21-Day Draw %",
            "Max 6-Month Draw %",
            "Peak to Trough Draw %",
            "Top 5% Monthly PnL Share",
        ]
        if self._bm_dict:
            for bm in self._bm_dict:
                stats.append(f"{bm} correl")
        stats.append("Sharpe Stability Ratio")
        sr_prob_rows = [f"Prob. Sharpe Ratio > {float(sr):g}" for sr in sr_thresholds]
        stats.extend(sr_prob_rows)
        stats.append("Traded Months")

        pnl_df = self.pnls_df[self.pnls_df["xcat"] == pnl_xcat].copy()
        if pnl_df["xcat"].dtype.name == "category":
            pnl_df["xcat"] = pnl_df["xcat"].cat.remove_unused_categories()
        dfw = pnl_df.pivot(index="real_date", columns="xcat", values="value")
        df = pd.DataFrame(columns=dfw.columns, index=stats)

        df.iloc[0, :] = dfw.mean(axis=0) * 261
        df.iloc[1, :] = dfw.std(axis=0) * np.sqrt(261)
        df.iloc[2, :] = df.iloc[0, :] / df.iloc[1, :]
        dsd = dfw.apply(lambda x: np.sqrt(np.sum(x[x < 0] ** 2) / len(x))) * np.sqrt(
            261
        )
        df.iloc[3, :] = df.iloc[0, :] / dsd
        df.iloc[4, :] = dfw.rolling(21).sum().min()
        df.iloc[5, :] = dfw.rolling(6 * 21).sum().min()

        cum_pnl = dfw.cumsum()
        high_watermark = cum_pnl.cummax()
        drawdown = high_watermark - cum_pnl
        df.iloc[6, :] = -drawdown.max()

        mfreq = _map_to_business_day_frequency("M")
        monthly_pnl = dfw.resample(mfreq).sum()
        total_pnl = monthly_pnl.sum(axis=0)
        top_5_percent_cutoff = int(np.ceil(len(monthly_pnl) * 0.05))
        top_months = pd.DataFrame(columns=monthly_pnl.columns)
        for column in monthly_pnl.columns:
            top_months[column] = (
                monthly_pnl[column]
                .nlargest(top_5_percent_cutoff)
                .reset_index(drop=True)
            )
        df.iloc[7, :] = top_months.sum() / total_pnl

        if self._bm_dict:
            bm_df = pd.concat(list(self._bm_dict.values()), axis=1)
            for i, bm in enumerate(self._bm_dict.keys()):
                index = dfw.index.intersection(bm_df.index)
                correlation = dfw.loc[index].corrwith(
                    bm_df.loc[index].iloc[:, i], axis=0, method="pearson", drop=True
                )
                df.loc[f"{bm} correl", :] = correlation

        for col in dfw.columns:
            df.loc["Sharpe Stability Ratio", col] = sharpe_stability_ratio(
                dfw[col].dropna(),
                window=252,
                benchmark_sr=0.0,
                annualization_factor=261,
            )
            for sr, row in zip(sr_thresholds, sr_prob_rows):
                df.loc[row, col] = sharpe_stability_ratio(
                    dfw[col].dropna(),
                    window=252,
                    benchmark_sr=float(sr),
                    annualization_factor=261,
                    probability=True,
                )

        df.loc["Traded Months", :] = dfw.resample(mfreq).sum().count()
        return df

[docs]    def get_pnls(self, pnl_xcats: List[str] = None) -> pd.DataFrame:
        """
        Returns a DataFrame containing specified PnLs.

        Parameters
        ----------
        pnl_xcats : List[str]
            List of PnLs to return. If None, all PnLs are returned. Must be in the
            format 'xcat', or 'xcat/return_xcat'.

        Returns
        -------
        ~pandas.DataFrame
            DataFrame containing the specified PnLs.
        """

        if self.pnls_df is None:
            raise ValueError("The PnLs have been added. Use add_pnl() first.")

        if pnl_xcats is None:
            return QuantamentalDataFrame(self.pnls_df, categorical=False)

        else:
            for i, pnl_xcat in enumerate(pnl_xcats):
                pnl_xcats[i] = self._infer_return_by_xcat(pnl_xcat)

            return (
                QuantamentalDataFrame(self.pnls_df, _initialized_as_categorical=False)
                .reduce_df(xcats=pnl_xcats)
                .to_original_dtypes()
            )

    def _normalize_weights(self, weights: dict) -> dict:
        """
        Normalize the weights to sum up to 1.
        """
        weights_sum = sum(weights.values())
        return {k: v / weights_sum for k, v in weights.items()}

    def _validate_pnl(self, pnl: NaivePnL, pnl_xcats: List[str]):
        """
        Validate the PnL and PnL categories.
        """
        if not isinstance(pnl, NaivePnL):
            raise ValueError("The pnl must be a NaivePnL object.")
        if not set(pnl_xcats).issubset(pnl.pnl_names):
            raise ValueError("The pnl_xcats must be in the NaivePnL object.")
        if not all(isinstance(x, str) for x in pnl_xcats):
            raise ValueError("All elements in the list must be strings.")
        return True

    @property
    def pnl_xcats(self):
        """
        List of all unique PnL categories stored in the MultiPnL object.

        Returns
        -------
        List[str]
            A list of unique category names from the "xcat" column in `pnls_df`.
        """
        return self.pnls_df["xcat"].unique().tolist()

    @property
    def return_xcats(self):
        """
        List of all unique return categories associated with PnLs.

        Returns
        -------
        List[str]
            A list of unique return category names derived from the `xcat_to_ret` mapping.
        """
        return list(set(self.xcat_to_ret.values()))

    def _check_pnls_added(self, min_pnls: int = 1):
        """
        Check if at least `min_pnls` PnLs have been added.
        """
        if len(self.pnl_xcats) < min_pnls:
            raise ValueError(
                f"At least {min_pnls} PnL must be added with add_pnl() first."
            )

    def _infer_return_by_xcat(self, pnl_xcat):
        """
        Infer the return category from the xcat if not provided. Throws an error if there
        are multiple return categories for the xcat.
        """

        if pnl_xcat in self.composite_pnl_xcats:
            return pnl_xcat

        if "/" not in pnl_xcat:
            if pnl_xcat not in self.xcat_to_ret:
                raise ValueError(f"{pnl_xcat} has not been added with add_pnl() yet.")
            if len(self.xcat_to_ret[pnl_xcat]) > 1:
                raise ValueError(
                    f"{pnl_xcat} corresponds to multiple return categories: {self.xcat_to_ret[pnl_xcat]}. "
                    "Must append return to xcat in the format 'xcat/return'."
                )
            else:
                return f"{pnl_xcat}/{list(self.xcat_to_ret[pnl_xcat])[0]}"
        else:
            if pnl_xcat not in self.pnl_xcats:
                raise ValueError(f"{pnl_xcat} has not been added with add_pnl() yet.")
            else:
                return pnl_xcat

    def _check_xcat_labels(self, pnl_xcats, xcat_labels):
        if isinstance(xcat_labels, dict):
            xcat_labels = {
                self._infer_return_by_xcat(k): v for k, v in xcat_labels.items()
            }
        elif isinstance(xcat_labels, list):
            if len(pnl_xcats) != len(xcat_labels):
                raise ValueError(
                    "If using a list, the number of labels must match the number of PnLs."
                )
            xcat_labels = dict(zip(pnl_xcats, xcat_labels))
        else:
            raise ValueError("xcat_labels must be a list or a dictionary.")
        return xcat_labels


if __name__ == "__main__":
    np.random.seed(0)

    cids = ["AUD", "CAD", "GBP", "NZD", "USD", "EUR"]
    xcats = ["EQXR_NSA", "FXXR", "GROWTH", "INFL", "DUXR"]

    cols_1 = ["earliest", "latest", "mean_add", "sd_mult"]
    df_cids = pd.DataFrame(index=cids, columns=cols_1)
    df_cids.loc["AUD", :] = ["2008-01-03", "2020-12-31", 0.5, 2]
    df_cids.loc["CAD", :] = ["2010-01-03", "2020-11-30", 0, 1]
    df_cids.loc["GBP", :] = ["2012-01-03", "2020-11-30", -0.2, 0.5]
    df_cids.loc["NZD"] = ["2002-01-03", "2020-09-30", -0.1, 2]
    df_cids.loc["USD"] = ["2015-01-03", "2020-12-31", 0.2, 2]
    df_cids.loc["EUR"] = ["2008-01-03", "2020-12-31", 0.1, 2]

    cols_2 = cols_1 + ["ar_coef", "back_coef"]

    df_xcats = pd.DataFrame(index=xcats, columns=cols_2)
    df_xcats.loc["EQXR_NSA"] = ["2000-01-03", "2020-12-31", 0.1, 1, 0, 0.3]
    df_xcats.loc["FXXR"] = ["2000-01-01", "2020-10-30", 1, 2, 0.95, 1]
    df_xcats.loc["GROWTH"] = ["2000-01-03", "2020-10-30", 1, 2, 0.9, 1]
    df_xcats.loc["INFL"] = ["2001-01-01", "2020-10-30", 1, 2, 0.8, 0.5]
    df_xcats.loc["DUXR"] = ["2000-01-01", "2020-12-31", 0.1, 0.5, 0, 0.1]

    black = {"AUD": ["2006-01-01", "2015-12-31"], "GBP": ["2022-01-01", "2100-01-01"]}
    dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)

    pnl_eq = NaivePnL(
        dfd,
        ret="EQXR_NSA",
        sigs=["GROWTH"],
        cids=cids,
        start="2000-01-01",
        blacklist=black,
    )

    pnl_eq.make_pnl(
        sig="GROWTH",
        sig_op="zn_score_pan",
        sig_neg=False,
        sig_add=0.5,
        rebal_freq="monthly",
        vol_scale=5,
        rebal_slip=1,
        min_obs=250,
        thresh=2,
        pnl_name="PNL_EQ",
    )

    pnl_eq.make_long_pnl(vol_scale=10, label="LONG")

    pnl_fx = NaivePnL(
        dfd,
        ret="FXXR",
        sigs=["INFL"],
        cids=cids,
        start="2000-01-01",
        blacklist=black,
    )

    pnl_fx.make_pnl(
        sig="INFL",
        sig_op="zn_score_pan",
        sig_neg=True,
        sig_add=0.5,
        rebal_freq="monthly",
        vol_scale=5,
        rebal_slip=1,
        min_obs=250,
        thresh=2,
        pnl_name="PNL_FX",
    )

    pnl_fx.make_long_pnl(vol_scale=10, label="LONG")

    print(pnl_eq.pnl_names)

    # Pass df and benchmark tickers so evaluate_pnls() includes correlation rows
    # for all PnLs, including composite ones.
    mapnl = MultiPnL(dfd, bms=["EUR_EQXR_NSA", "USD_EQXR_NSA"])

    mapnl.add_pnl(pnl_fx, ["PNL_FX", "LONG"])
    mapnl.add_pnl(pnl_eq, ["PNL_EQ", "LONG"])

    mapnl.combine_pnls(
        ["PNL_EQ", "PNL_FX"],
        composite_pnl_xcat="EQ_FX_LONG",
    )
    print(mapnl.evaluate_pnls(["PNL_FX", "PNL_EQ", "EQ_FX_LONG"]))
    mapnl.plot_pnls(["PNL_FX", "PNL_EQ"], xcat_labels=["EQ", "FX"], title="PnLs")
    print(mapnl.evaluate_pnls())