Source code for macrosynergy.panel.basket

"""
Basket class for calculating the returns and carries of baskets of financial contracts
using various weighting methods.
"""

import numpy as np
import pandas as pd
import seaborn as sns
import warnings

import random
from typing import List, Union, Tuple, Dict
from macrosynergy.panel.historic_vol import expo_weights, expo_std, flat_std
from macrosynergy.management.utils import reduce_df_by_ticker
from macrosynergy.panel.converge_row import ConvergeRow
from macrosynergy.management.types import QuantamentalDataFrame
from macrosynergy.management.simulate import make_qdf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates


[docs]class Basket(object):
    """
    Calculates the returns and carries of baskets of financial contracts using various
    weighting methods.

    Parameters
    ----------
    df : ~pandas.Dataframe
        standardized DataFrame containing the columns: 'cid', 'xcat', 'real_date' and
        'value'.
    contracts : List[str]
        base tickers (combinations of cross-sections and base categories) that define
        the contracts that go into the basket.
    ret : str
        return category postfix to be appended to the contract base; default is
        "XR_NSA".
    cry : List[str] or str
        carry category postfix; default is None. The field can either be a single carry
        or multiple carries defined in a List.
    start : str
        earliest date in ISO 8601 format. Default is None.
    end : str
        latest date in ISO 8601 format. Default is None.
    blacklist : dict
        cross-sections with date ranges that should be excluded from the DataFrame. If
        one cross-section has several blacklist periods append numbers to the cross-section
        code.
    ewgts : List[str]
        one or more postfixes that may identify exogenous weight categories. Similar to
        return postfixes they are appended to base tickers.


    .. note::
        Each instance of the class will update associated standardised DataFrames,
        containing return and carry categories, and external weights.
    """

    def __init__(
        self,
        df: pd.DataFrame,
        contracts: List[str],
        ret: str = "XR_NSA",
        cry: Union[str, List[str]] = None,
        start: str = None,
        end: str = None,
        blacklist: dict = None,
        ewgts: List[str] = None,
    ):
        df = QuantamentalDataFrame(df[["cid", "xcat", "real_date", "value"]])
        self._as_categorical = df.InitializedAsCategorical
        assert isinstance(contracts, list)
        c_error = "Contracts must be a list of strings."
        assert all(isinstance(c, str) for c in contracts), c_error
        assert isinstance(ret, str), "`ret` must be a string"
        if isinstance(ewgts, str):
            ewgts = [ewgts]

        df = reduce_df_by_ticker(
            df, start=start, end=end, ticks=None, blacklist=blacklist
        )
        self.contracts = contracts
        self.ret = ret
        self.ticks_ret = [con + ret for con in contracts]
        dfw_ret = self.pivot_dataframe(df, self.ticks_ret)
        self.dfw_ret = dfw_ret.dropna(axis=0, how="all")

        self.store_attributes(df, cry, "cry")
        self.store_attributes(df, ewgts, "wgt")

        self.tickers = self.ticks_ret + self.ticks_cry + self.ticks_wgt
        self.start = self.date_check(start)
        self.end = self.date_check(end)
        self.dfx = reduce_df_by_ticker(df, ticks=self.tickers)
        self.dict_retcry = {}  # dictionary for collecting basket return/carry dfs.
        self.dict_wgs = {}  # dictionary for collecting basket return/carry dfs.

[docs]    def store_attributes(self, df: pd.DataFrame, pfx: List[str], pf_name: str):
        """
        Adds multiple attributes to class based on postfixes that denote carry or
        external weight types.

        Parameters
        ----------
        df : ~pandas.DataFrame
            original, standardised DataFrame.
        pfx : List[str]
            category postfixes involved in the basket calculation.
        pf_name : str
            associated name of the postfix "cry" or "wgt".  Note: These are [1] flags of
            existence of carry and weight strings in class, [2] lists of tickers related to
            all postfixes, [3] a dictionary of wide time series panel dataframes for all
            postfixes.
        """

        pfx_flag = pfx is not None
        self.__dict__[pf_name + "_flag"] = pfx_flag
        self.__dict__["ticks_" + pf_name] = []
        if pfx_flag:
            error = f"'{pf_name}' must be a <str> or a <List[str]>."
            assert isinstance(pfx, (list, str)), error
            pfx = [pfx] if isinstance(pfx, str) else pfx
            self.__dict__[pf_name] = pfx

            dfws_pfx: Dict[str, pd.DataFrame] = {}
            for cat in pfx:
                ticks = [con + cat for con in self.contracts]
                self.__dict__["ticks_" + pf_name] += ticks
                dfws_pfx[cat] = self.pivot_dataframe(df, ticks)
                if dfws_pfx[cat].empty:
                    raise ValueError(f"Empty dataframe for contract-type: {cat}")
                missing = set(ticks) - set(dfws_pfx[cat].columns)
                if missing:
                    raise ValueError(
                        f"Missing tickers in dataframe for contract-type: {cat}: {missing}"
                    )

        else:
            dfws_pfx = None

        self.__dict__["dfws_" + pf_name] = dfws_pfx

[docs]    @staticmethod
    def pivot_dataframe(df: QuantamentalDataFrame, tick_list: List[str]):
        """
        Reduces the standardised DataFrame to include a subset of the possible tickers
        and, subsequently returns a wide dataframe: each column corresponds to a ticker.

        Parameters
        ----------
        tick_list : List[str]
            list of the respective tickers.
        df : ~pandas.DataFrame
            standardised dataframe.

        Returns
        -------
        ~pandas.DataFrame
            wide dataframe.
        """

        return (
            QuantamentalDataFrame(df=df)
            .reduce_df_by_ticker(tickers=tick_list)
            .to_wide()
        )

[docs]    @staticmethod
    def date_check(date_string):
        """
        Validates that the dates passed are valid timestamp expressions and will convert
        to the required form '%Y-%m-%d'. Will raise an assertion if not in the expected
        form.

        Parameters
        ----------
        date_string : str
            valid date expression. For instance, "1st January, 2000."
        """

        date_error = "Expected form of string: '%Y-%m-%d'."
        if date_string is not None:
            try:
                pd.Timestamp(date_string).strftime("%Y-%m-%d")
            except ValueError:
                raise AssertionError(date_error)
            else:
                return pd.Timestamp(date_string).strftime("%Y-%m-%d")

[docs]    @staticmethod
    def check_weights(weight: pd.DataFrame):
        """
        Checks if all rows in dataframe add up to roughly 1.

        Parameters
        ----------
        weight : ~pandas.DataFrame
            weight dataframe.
        """

        check = weight.sum(axis=1)
        c = ~((abs(check - 1) < 1e-6) | (abs(check) < 1e-6))
        assert not any(c), f"weights must sum to one (or zero), not: {check[c]}"

[docs]    @staticmethod
    def max_weight_func(weights: pd.DataFrame, max_weight: float):
        """
        Enforces maximum weight caps or - if impossible applies equal weight.

        Parameters
        ----------
        weights : ~pandas.DataFrame
            Corresponding weight matrix. Multidimensional.
        max_weight : float
            Upper-bound on the weight allowed for each cross-section.

        Returns
        -------
        ~pandas.DataFrame
            Will return the modified weight DataFrame.


        .. note::
            If the maximum weight is
            less than the equal weight weight, this replaces the computed weight with the
            equal weight. For instance, [np.nan, 0.63, np.nan, np.nan, 0.27] becomes
            [np.nan, 0.5, np.nan, np.nan, 0.5]. Otherwise, the function calls the
            ConvergeRow Class to ensure all weights "converge" to a value within the upper-
            bound. Allow for a margin of error set to 0.001.
        """

        dfw_wgs = weights.to_numpy()

        for i, row in enumerate(dfw_wgs):
            row = ConvergeRow.application(row, max_weight)
            weights.iloc[i, :] = row

        return weights

[docs]    def equal_weight(self, df_ret: pd.DataFrame) -> pd.DataFrame:
        """
        Calculates dataframe of equal weights based on available return data.

        Parameters
        ----------
        df_ret : ~pandas.DataFrame
            wide time-indexed data frame of returns.

        Returns
        -------
        ~pandas.DataFrame
            dataframe of weights.


        Notes
        -----
        The method determines the number of non-NA cross-sections per timestamp, and
        subsequently distributes the weights evenly across non-NA cross-sections.
        """

        act_cross = ~df_ret.isnull()
        uniform = (1 / act_cross.sum(axis=1)).values
        uniform = uniform[:, np.newaxis]

        broadcast = np.repeat(uniform, df_ret.shape[1], axis=1)

        weight = act_cross.multiply(broadcast)
        cols = weight.columns
        # Zeroes treated as NaNs.
        weight[cols] = weight[cols].replace({0: np.nan})

        self.check_weights(weight=weight)

        return weight

[docs]    def fixed_weight(self, df_ret: pd.DataFrame, weights: List[float]):
        """
        Calculates fixed weights based on a single list of values and a corresponding
        return panel dataframe.

        Parameters
        ----------
        df_ret : ~pandas.DataFrame
            Return series matrix. Multidimensional.
        weights : List[float]
            List of floats determining weight allocation.

        Returns
        -------
        ~pandas.DataFrame
            panel of weights
        """

        act_cross = ~df_ret.isnull()

        weights = np.array(weights, dtype=np.float32)
        rows = act_cross.shape[0]
        broadcast = np.tile(weights, (rows, 1))  # Constructs Array by row repetition.

        # Replaces weight factors with zeroes if concurrent return unavailable.
        weight = act_cross.multiply(broadcast)
        cols = weight.columns
        # Zeroes treated as NaNs.
        weight[cols] = weight[cols].replace({0: np.nan})

        weight_arr = weight.to_numpy()  # convert df to np array.
        weight[weight.columns] = weight_arr / np.sum(weight_arr, axis=1)[:, np.newaxis]
        self.check_weights(weight)

        return weight

[docs]    def inverse_weight(
        self,
        dfw_ret: pd.DataFrame,
        lback_meth: str = "xma",
        lback_periods: int = 21,
        remove_zeros: bool = True,
    ):
        """
        Calculates weights inversely proportionate to recent return standard deviations.

        Parameters
        ----------
        dfw_ret : ~pandas.DataFrame
            panel dataframe of returns.
        lback_meth : str
            Lookback method for "invsd" weighting method. Default is "xma".
        lback_periods : int
            Lookback periods. Default is 21.  Half-time for "xma" and full lookback
            period for "ma".
        remove_zeros : Bool
            Any returns that are exact zeros will not be included in the lookback window
            and prior non-zero values are added to the window instead.

        Returns
        -------
        ~pandas.DataFrame
            Dataframe of weights.


        .. note::
            The rolling standard deviation will be calculated either using the standard
            moving average (ma) or the exponential moving average (xma). Both will require
            returns before a first weight can be computed.
        """

        if lback_meth == "ma":
            dfwa = dfw_ret.rolling(window=lback_periods).agg(flat_std, remove_zeros)
            dfwa *= np.sqrt(252)

        else:
            half_life = lback_periods
            weights = expo_weights(lback_periods * 2, half_life)
            dfwa = dfw_ret.rolling(window=lback_periods * 2).agg(
                expo_std, w=weights, remove_zeros=remove_zeros
            )

        cols = dfwa.columns
        # Zeroes treated as NaNs.
        dfwa[cols] = dfwa[cols].replace({0: np.nan})

        df_isd = 1 / dfwa
        df_wgts = df_isd / df_isd.sum(axis=1).values[:, np.newaxis]
        self.check_weights(df_wgts)

        return df_wgts

[docs]    def values_weight(
        self, dfw_ret: pd.DataFrame, dfw_wgt: pd.DataFrame, weight_meth: str
    ):
        """
        Returns weights based on an external weighting category.

        Parameters
        ----------
        dfw_ret : ~pandas.DataFrame
            Standard wide dataframe of returns across time and contracts.
        dfw_wgt : ~pandas.DataFrame
            Standard wide dataframe of weight category values across time and contracts.
        weight_meth : str


        Returns
        -------
        ~pandas.DataFrame
            Dataframe of weights.
        """

        negative_condition = np.any((dfw_wgt < 0).to_numpy())
        if negative_condition:
            dfw_wgt[dfw_wgt < 0] = 0.0
            warnings.warn("Negative values in the weight matrix set to zero.")

        exo_array = dfw_wgt.to_numpy()
        df_bool = ~dfw_ret.isnull()

        weights_df = df_bool.multiply(exo_array)
        cols = weights_df.columns

        # Zeroes treated as NaNs.
        weights_df[cols] = weights_df[cols].replace({0: np.nan})

        if weight_meth != "values":
            weights_df = 1 / weights_df

        weights = weights_df.divide(weights_df.sum(axis=1), axis=0)
        self.check_weights(weights)

        return weights

[docs]    def make_weights(
        self,
        weight_meth: str = "equal",
        weights: List[float] = None,
        lback_meth: str = "xma",
        lback_periods: int = 21,
        ewgt: str = None,
        max_weight: float = 1.0,
        remove_zeros: bool = True,
    ):
        """
        Returns wide dataframe of weights to be used for basket series.

        Parameters
        ----------
        weight_meth : str
            method used for weighting constituent returns and carry. The parameter can
            receive either a single weight method or multiple weighting methods. See
            `make_basket` docstring.
        weights : List[float]
            single list of weights corresponding to the base tickers in `contracts`
            argument. This is only relevant for the fixed weight method.
        lback_meth : str
            look-back method for "invsd" weighting method. Default is Exponential MA,
            "ema". The alternative is simple moving average, "ma".
        lback_periods : int
            look-back periods for "invsd" weighting method. Default is 21.  Half-time
            for "xma" and full lookback period for "ma".
        ewgt : str
            Exogenous weight postfix that defines the weight value panel. Only needed
            for the 'values' or 'inv_values' method.
        max_weight : float
            maximum weight of a single contract. Default is 1, i.e zero restrictions.
            The purpose of the restriction is to limit concentration within the basket.
        remove_zeros : bool
            removes the zeros. Default is set to True.

        Returns
        -------
        pd.DataFrame
            wide dataframe of contract weights across time.
        """

        assert 0.0 < max_weight <= 1.0
        assert weight_meth in ["equal", "fixed", "values", "inv_values", "invsd"]

        # Apply weight method.

        if weight_meth == "equal":
            dfw_wgs = self.equal_weight(df_ret=self.dfw_ret)

        elif weight_meth == "fixed":
            message = "Expects a list of weights."
            message_2 = "List of weights must be equal to the number of contracts."
            assert isinstance(weights, list), message
            assert self.dfw_ret.shape[1] == len(weights), message_2
            message_3 = "Expects a list of floating point values."
            assert all(isinstance(w, (int, float)) for w in weights), message_3

            dfw_wgs = self.fixed_weight(df_ret=self.dfw_ret, weights=weights)

        elif weight_meth == "invsd":
            error_message = "Lookback method method must be 'ma' or 'xma'."
            assert lback_meth in ["xma", "ma"], error_message
            assert isinstance(lback_periods, int), "Expects <int>."
            dfw_wgs = self.inverse_weight(
                dfw_ret=self.dfw_ret,
                lback_meth=lback_meth,
                lback_periods=lback_periods,
                remove_zeros=remove_zeros,
            )

        elif weight_meth in ["values", "inv_values"]:
            assert ewgt in self.wgt, f"{ewgt} is not defined on the instance."
            # Lag by one day to be used as weights.
            try:
                dfw_wgt = self.dfws_wgt[ewgt].shift(1)
            except KeyError as e:
                print(f"Basket not found: {e}.")
            else:
                cols = sorted(dfw_wgt.columns)
                dfw_ret = dfw_wgt.reindex(cols, axis=1)
                dfw_wgt = dfw_wgt.reindex(cols, axis=1)
                dfw_wgs = self.values_weight(dfw_ret, dfw_wgt, weight_meth)

        else:
            raise NotImplementedError(f"Weight method unknown {weight_meth}")

        # Remove leading NA rows.

        fvi = max(dfw_wgs.first_valid_index(), self.dfw_ret.first_valid_index())
        dfw_wgs = dfw_wgs.loc[fvi:]

        # Impose cap on cross-section weights.

        if max_weight < 1.0:
            dfw_wgs = self.max_weight_func(weights=dfw_wgs, max_weight=max_weight)

        return dfw_wgs

[docs]    @staticmethod
    def column_manager(df_cat: pd.DataFrame, dfw_wgs: pd.DataFrame):
        """
        Will match the column names of the two dataframes involved in the computation:
        either the return & weight dataframes or the carry & weight dataframes. The
        pandas multiply operation requires the column names, of both dataframes involved
        in the binary operation, to be identical.

        Parameters
        ----------
        df_cat : ~pandas.DataFrame
            return or carry dataframe.
        dfw_wgs : ~pandas.DataFrame
            weight dataframe.

        Returns
        -------
        ~pandas.DataFrame
            modified weight dataframe (column names will map to the other dataframe
            received).
        """

        df_cat = df_cat.reindex(sorted(df_cat.columns), axis=1)
        dfw_wgs = dfw_wgs.reindex(sorted(dfw_wgs.columns), axis=1)

        ret_cols = df_cat.columns
        weight_cols = dfw_wgs.columns

        if all(ret_cols != weight_cols):
            dfw_wgs.columns = ret_cols

        return dfw_wgs

[docs]    def column_weights(self, dfw_wgs: pd.DataFrame):
        """
        The weight dataframe is used to compute the basket performance for returns,
        carries etc. Therefore, with their broad application, the column names of the
        dataframe should correspond to the ticker postfix of each contract.

        Parameters
        ----------
        dfw_wgs : ~pandas.DataFrame
            weight dataframe.

        Returns
        -------
        ~pandas.DataFrame
            weight dataframe with updated columns names.
        """

        dfw_weight_names = lambda w_name: w_name[: w_name.find(self.w_field)]
        if self.wgt_flag and self.exo_w_postfix is not None:
            self.w_field = self.exo_w_postfix
        else:
            self.w_field = self.ret

        cols = list(map(dfw_weight_names, dfw_wgs.columns))
        dfw_wgs.columns = cols
        dfw_wgs.columns.name = "ticker"

        return dfw_wgs

[docs]    def make_basket(
        self,
        weight_meth: str = "equal",
        weights: List[float] = None,
        lback_meth: str = "xma",
        lback_periods: int = 21,
        ewgt: str = None,
        max_weight: float = 1.0,
        remove_zeros: bool = True,
        basket_name: str = "GLB_ALL",
    ):
        """
        Calculates all basket performance categories.

        Parameters
        ----------
        weight_meth : str
            method used for weighting constituent returns and carry. The parameter can
            receive either a single weight method or multiple weighting methods. The options
            are as follows:

            - [1] "equal": all constituents with non-NA returns have the same
            weight. This is the default.

            - [2] "fixed": weights are proportionate to a single
            list of values provided which are passed to argument `weights` (each value
            corresponds to a single contract).

            - [3] "invsd": weights based on inverse to
            standard deviations of recent returns.

            - [4] "values": weights proportionate to a
            panel of values of exogenous weight category.

            - [5] "inv_values": weights are
            inversely proportionate to of values of exogenous weight category.

        weights : List[float]
            single list of weights corresponding to the base tickers in `contracts`
            argument. This is only relevant for the fixed weight method.
        lback_meth : str
            look-back method for "invsd" weighting method. Default is Exponential MA,
            "ema". The alternative is simple moving average, "ma".
        lback_periods : int
            look-back periods for "invsd" weighting method. Default is 21.  Half-time
            for "xma" and full lookback period for "ma".
        ewgt : str
            Exogenous weight postfix that defines the weight value panel. Only needed
            for the 'values' or 'inv_values' method.
        max_weight : float
            maximum weight of a single contract. Default is 1, i.e zero restrictions.
            The purpose of the restriction is to limit concentration within the basket.
        remove_zeros : bool
            removes the zeros. Default is set to True.
        basket_name : str
            name of basket base ticker (analogous to contract name) to be used for
            return and (possibly) carry are calculated. Default is "GLB_ALL".
        """

        assert isinstance(weight_meth, str), "`weight_meth` must be string"

        self.exo_w_postfix: str = ewgt
        dfw_wgs = self.make_weights(
            weight_meth=weight_meth,
            weights=weights,
            lback_meth=lback_meth,
            lback_periods=lback_periods,
            ewgt=ewgt,
            max_weight=max_weight,
            remove_zeros=remove_zeros,
        )
        select = ["ticker", "real_date", "value"]

        dfw_wgs_copy = self.column_manager(df_cat=self.dfw_ret, dfw_wgs=dfw_wgs)

        dfw_bret = self.dfw_ret.multiply(dfw_wgs_copy).sum(axis=1)
        basket_ret = basket_name + "_" + self.ret
        store = [dfw_bret.to_frame(basket_ret)]
        if not hasattr(self, "cry_flag"):
            raise ValueError("Please initialise the class to use this method. ")
        self.cry_flag: bool
        if self.cry_flag:
            cry_list = []
            for cr in self.cry:
                dfw_wgs_copy = self.column_manager(
                    df_cat=self.dfws_cry[cr], dfw_wgs=dfw_wgs
                )
                self.dfws_cry: Dict[str, pd.DataFrame]
                dfw_bcry: pd.Series = (
                    self.dfws_cry[cr].multiply(dfw_wgs_copy).sum(axis=1)
                )
                tkr = basket_name + "_" + cr
                cry_list.append(dfw_bcry.to_frame(tkr))

            store += cry_list

        df_retcry = pd.concat(store, axis=1)
        self.dict_retcry[basket_name] = df_retcry

        self.dict_wgs[basket_name] = self.column_weights(dfw_wgs)

[docs]    def weight_visualiser(
        self,
        basket_name,
        start_date: str = None,
        end_date: str = None,
        subplots: bool = True,
        facet_grid: bool = False,
        scatter: bool = False,
        all_tickers: bool = True,
        single_ticker: str = None,
        percentage_change: bool = False,
        size: Tuple[int, int] = (7, 7),
    ):
        """
        Method used to visualise the weights associated with each contract in the
        basket.

        Parameters
        ----------
        basket_name : str
            name of basket whose weights are visualized
        start_date : str
            start date of he visualisation period.
        end_date : str
            end date of the visualization period.
        subplots : bool
            contract weights are displayed on different plots (True) or on a single plot
            (False).
        facet_grid : bool
            parameter used to break up the plot into multiple cartesian coordinate
            systems. If the basket consists of a high number of contracts, using the Facet
            Grid is recommended.
        scatter : bool
            if the facet_grid parameter is set to True there are two options: i) scatter
            plot if there a lot of blacklist periods; ii) line plot for continuous series.
        all_tickers : bool
            if True (default) all weights are displayed. If set to False `single-ticker`
            must be specified.
        single_ticker : str
            individual ticker for further, more detailed, analysis.
        percentage_change : bool
            graphical display used to further assimilate the fluctuations in the
            contract's weight. The graphical display is limited to a single contract.
            Therefore, pass the ticker into the parameter "single_ticker".
        size : Tuple[int, int]
            size of the plot. Default is (7, 7).
        """

        date_conv = lambda d: pd.Timestamp(d).strftime("%Y-%m-%d %X")
        try:
            dfw_wgs = self.dict_wgs[basket_name]
        except KeyError as e:
            print(f"Basket not found - call make_basket() method first: {e}.")
        else:
            if isinstance(start_date, str) and isinstance(end_date, str):
                self.date_check(start_date)
                self.date_check(end_date)
                start_date = date_conv(start_date)
                end_date = date_conv(end_date)
            elif isinstance(start_date, str):
                self.date_check(start_date)
                start_date = date_conv(start_date)
                end_date = dfw_wgs.index[-1]
            elif isinstance(end_date, str):
                self.date_check(end_date)
                start_date = dfw_wgs.index[0]
                end_date = date_conv(end_date)
            else:
                start_date = dfw_wgs.index[0]
                end_date = dfw_wgs.index[-1]

            error_1 = f"{start_date} unavailable in weight dataframe."
            c = dfw_wgs.index
            assert start_date in c, error_1
            error_2 = f"{end_date} unavailable in weight dataframe."
            assert end_date in c, error_2

            dfw_wgs = dfw_wgs.truncate(before=start_date, after=end_date)
            if not all_tickers:
                error_3 = "The parameter, 'single_ticker', must be a <str>."
                assert isinstance(single_ticker, str), error_3
                error_4 = (
                    f"Ticker not present in the weight dataframe. Available "
                    f"tickers: {list(dfw_wgs.columns)}."
                )
                assert single_ticker in dfw_wgs.columns, error_4
                dfw_wgs = dfw_wgs[[single_ticker]]

            if facet_grid:
                dfw_wgs.columns.name = "ticker"
                df_stack = dfw_wgs.stack().to_frame("value").reset_index()
                df_stack = df_stack.sort_values(["ticker", "real_date"])
                no_contracts = dfw_wgs.shape[1]
                facet_cols = 4 if no_contracts >= 8 else 3
                sns.set(rc={"figure.figsize": size})
                fg = sns.FacetGrid(
                    df_stack, col="ticker", col_wrap=facet_cols, sharey=True
                )

                scatter_error = (
                    f"Boolean object expected - instead received " f"{type(scatter)}."
                )
                assert isinstance(scatter, bool), scatter_error
                if scatter:
                    fg.map_dataframe(sns.scatterplot, x="real_date", y="value")
                else:
                    # Seaborn will linearly interpolate NaN values which is visually
                    # misleading. Therefore, aim to negate the operation.
                    fg.map_dataframe(
                        sns.lineplot,
                        x="real_date",
                        y="value",
                        hue=df_stack["value"].isna().cumsum(),
                        palette=["blue"] * df_stack["value"].isna().cumsum().nunique(),
                        estimator=None,
                        markers=True,
                    )

                equal_value = 1 / no_contracts
                fg.map(plt.axhline, y=equal_value, linestyle="--", color="gray", lw=0.5)
                # Set axes labels of individual charts.
                fg.set_axis_labels("", "")
                fg.set_titles(col_template="{col_name}")
                fg.fig.suptitle("Contract weights in basket", y=1.02)
            else:
                plt.rcParams["figure.figsize"] = size
                dfw_wgs.plot(
                    subplots=subplots, title="Weight Values Timestamp", legend=True
                )
                plt.xlabel("real_date, years")

            date_func = lambda d: pd.Timestamp(d).strftime("%Y-%m-%d")
            if percentage_change:
                error_5 = (
                    "Percentage change display is applied to a single ticker. Set "
                    "the parameter 'all_tickers' to False."
                )
                assert dfw_wgs.shape[1] == 1, error_5

                plt.rcParams["figure.figsize"] = size
                fig, ax = plt.subplots()
                dfw_pct = dfw_wgs.pct_change(periods=1) * 100
                n_index = np.array(list(map(date_func, dfw_pct.index)))
                dfw_pct = dfw_pct.set_index(keys=n_index)
                dfw_pct.plot(kind="bar", color="coral", ax=ax)
                ax.xaxis.set_major_locator(mdates.MonthLocator())
                plt.xticks(rotation=0)
                ax.set_ylabel("Percentage Change in weight.")
                ax.legend()

            plt.show()

[docs]    @staticmethod
    def column_split(df: pd.DataFrame):
        """
        Receives a dataframe with the columns 'ticker', 'real_date' and 'value' and
        returns a standardised dataframe with the columns 'cid', 'xcat', 'real_date' and
        'value. The 'ticker' column is broken up to produce the two new columns.

        Parameters
        ----------
        df : ~pandas.DataFrame

        Returns
        -------
        ~pandas.DataFrame
            standardised dataframe.
        """

        select = ["cid", "xcat", "real_date", "value"]

        cid_func = lambda t: t.split("_")[0]
        xcat_func = lambda t: "_".join(t.split("_")[1:])

        cids_w_df = list(map(cid_func, df["ticker"]))
        df["cid"] = np.array(cids_w_df)

        df = df.rename(columns={"ticker": "xcat"})
        df["xcat"] = np.array(list(map(xcat_func, df["xcat"])))

        df = df[select]
        return df

[docs]    def return_basket(self, basket_names: Union[str, List[str]] = None):
        """
        Return standardized dataframe of basket performance data based on one or more
        weighting methods.

        Parameters
        ----------
        basket_names : str or List[str]
            single basket name or list for which performance data are to be returned. If
            none is given all baskets added to the instance are selected.

        Returns
        -------
        QuantamentalDataFrame
            standardized DataFrame with the basket return and (possibly) carry data in
            standard form, i.e. columns 'cid', 'xcat', 'real_date' and 'value'.
        """

        if basket_names is None:
            basket_names = list(self.dict_retcry.keys())

        basket_error = "String or List of basket names expected."
        assert isinstance(basket_error, (list, str)), basket_error
        if isinstance(basket_names, str):
            basket_names = [basket_names]

        ret_baskets = []
        for b in basket_names:
            try:
                dfw_retcry = self.dict_retcry[b]
            except KeyError as e:
                print(f"Basket not found - call make_basket() method first: {e}.")
            else:
                qdf = QuantamentalDataFrame.from_wide(dfw_retcry)
                ret_baskets.append(qdf)

        return_df = QuantamentalDataFrame.from_qdf_list(ret_baskets)
        return QuantamentalDataFrame(
            df=return_df, _initialized_as_categorical=self._as_categorical
        ).to_original_dtypes()

[docs]    def return_weights(self, basket_names: Union[str, List[str]] = None):
        """
        Return the standardised dataframe containing the corresponding weights used to
        compute the basket.

        Parameters
        ----------
        basket_names : str or List[str]
            single basket name or list for which performance data are to be returned. If
            none is given all baskets added to the instance are selected.

        Returns
        -------
        QuantamentalDataFrame
            standardized DataFrame with basket weights.
        """

        if basket_names is None:
            basket_names = list(self.dict_wgs.keys())

        basket_error = "String or List of basket names expected."
        assert isinstance(basket_error, (list, str)), basket_error
        if isinstance(basket_names, str):
            basket_names = [basket_names]

        weight_baskets = []
        select = ["cid", "xcat", "real_date", "value"]

        for b in basket_names:
            try:
                dfw_wgs: pd.DataFrame = self.dict_wgs[b]
            except KeyError as e:
                print(f"Basket not found - call make_basket() method first: {e}.")
            else:
                pfx = f"_{b}_WGT"
                qdf = QuantamentalDataFrame.from_wide(dfw_wgs).rename_xcats(postfix=pfx)
                weight_baskets.append(qdf)

        return QuantamentalDataFrame(
            df=QuantamentalDataFrame.from_qdf_list(weight_baskets),
            _initialized_as_categorical=self._as_categorical,
        ).to_original_dtypes()


if __name__ == "__main__":
    cids = ["AUD", "GBP", "NZD", "USD"]
    xcats = [
        "FXXR_NSA",
        "FXCRY_NSA",
        "FXCRR_NSA",
        "EQXR_NSA",
        "EQCRY_NSA",
        "EQCRR_NSA",
        "FXWBASE_NSA",
        "EQWBASE_NSA",
    ]

    df_cids = pd.DataFrame(
        index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"]
    )

    df_cids.loc["AUD"] = ["2000-01-01", "2022-03-14", 0, 1]
    df_cids.loc["GBP"] = ["2001-01-01", "2022-03-14", 0, 2]
    df_cids.loc["NZD"] = ["2002-01-01", "2022-03-14", 0, 3]
    df_cids.loc["USD"] = ["2000-01-01", "2022-03-14", 0, 4]

    df_xcats = pd.DataFrame(
        index=xcats,
        columns=["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"],
    )
    df_xcats.loc["FXXR_NSA"] = ["2010-01-01", "2022-03-14", 0, 1, 0, 0.2]
    df_xcats.loc["FXCRY_NSA"] = ["2010-01-01", "2022-03-14", 1, 1, 0.9, 0.2]
    df_xcats.loc["FXCRR_NSA"] = ["2010-01-01", "2022-03-14", 0.5, 0.8, 0.9, 0.2]
    df_xcats.loc["EQXR_NSA"] = ["2010-01-01", "2022-03-14", 0.5, 2, 0, 0.2]
    df_xcats.loc["EQCRY_NSA"] = ["2010-01-01", "2022-03-14", 2, 1.5, 0.9, 0.5]
    df_xcats.loc["EQCRR_NSA"] = ["2010-01-01", "2022-03-14", 1.5, 1.5, 0.9, 0.5]
    df_xcats.loc["FXWBASE_NSA"] = ["2010-01-01", "2022-02-01", 1, 1.5, 0.8, 0.5]
    df_xcats.loc["EQWBASE_NSA"] = ["2010-01-01", "2022-02-01", 1, 1.5, 0.9, 0.5]

    random.seed(2)
    dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)

    black = {"AUD": ["2010-01-01", "2013-12-31"], "GBP": ["2010-01-01", "2013-12-31"]}
    contracts = ["AUD_FX", "AUD_EQ", "NZD_FX", "GBP_EQ", "USD_EQ"]
    gdp_figures = [17.0, 17.0, 41.0, 9.0, 250.0]

    contracts_1 = ["AUD_FX", "GBP_FX", "NZD_FX", "USD_EQ"]

    # First test. Multiple carries. Equal weight method.
    # The main aspect to check in the code is that basket performance has been applied to
    # both the return category and the multiple carry categories.
    dfd["grading"] = np.ones(dfd.shape[0])

    basket_1 = Basket(
        df=dfd,
        contracts=contracts_1,
        ret="XR_NSA",
        cry=["CRY_NSA", "CRR_NSA"],
        blacklist=black,
    )
    basket_1.make_basket(weight_meth="equal", max_weight=0.55, basket_name="GLB_EQUAL")

    basket_1.make_basket(
        weight_meth="fixed",
        max_weight=0.55,
        weights=[1 / 6, 1 / 6, 1 / 6, 1 / 2],
        basket_name="GLB_FIXED",
    )

    # show the weights of the GLB_FIXED basket

    basket_1.weight_visualiser(basket_name="GLB_FIXED", subplots=False, size=(10, 5))