Source code for macrosynergy.pnl.contract_signals

"""
Module for calculating contract signals based on cross-section-specific signals, and
hedging them with a basket of contracts. Main function is `contract_signals`.
"""

import pandas as pd
import warnings
from numbers import Number
from typing import List, Union, Tuple, Optional, Set, Any, Dict

from macrosynergy.management.types import NoneType, QuantamentalDataFrame
from macrosynergy.panel import make_relative_value
from macrosynergy.management.utils import (
    is_valid_iso_date,
    update_df,
    reduce_df,
    estimate_release_frequency,
)
import logging

logger = logging.getLogger(__name__)


def _check_scaling_args(
    ctypes: List[str],
    cscales: Optional[List[Union[Number, str]]] = None,
    csigns: Optional[List[int]] = None,
    hbasket: Optional[List[str]] = None,
    hscales: Optional[List[Union[Number, str]]] = None,
    hratios: Optional[str] = None,
) -> Tuple[Any, Any, Any, Any, Any]:

    ## Check cscales and csigns
    if cscales is not None:
        # check that the number of scales is the same as the number of ctypes
        if len(cscales) != len(ctypes):
            raise ValueError("`cscales` must be of the same length as `ctypes`")
        if not all([isinstance(x, (str, Number)) for x in cscales]):
            raise TypeError("`cscales` must be a List of strings or numerical values")
    else:
        cscales: List[Number] = [1.0] * len(ctypes)

    if csigns is not None:
        # check that the number of signs is the same as the number of ctypes
        if len(csigns) != len(ctypes):
            raise ValueError("`csigns` must be of the same length as `ctypes`")
        if not all([isinstance(x, int) for x in csigns]):
            raise TypeError("`csigns` must be a List of integers")
        if not all([x in [-1, 1] for x in csigns]):
            warnings.warn(
                "`csigns` must be a List of -1 or 1, coercing to -1 or 1",
                UserWarning,
            )
            csigns: List[int] = [x / abs(x) for x in csigns]
    else:
        csigns: List[int] = [1] * len(ctypes)

    ## Check hbasket and hscales
    if hbasket is not None:
        if not (bool(hratios) and bool(hscales)):
            raise ValueError(
                "`hratios` and `hscales` must be provided if `hbasket` is provided"
            )
        else:
            # check that the number of scales is the same as the number of hbasket
            if len(hscales) != len(hbasket):
                raise ValueError("`hscales` must be of the same length as `hbasket`")
            if not all([isinstance(x, (str, Number)) for x in hscales]):
                raise TypeError(
                    "`hscales` must be a List of strings or numerical values"
                )

    return cscales, csigns, hbasket, hscales, hratios


def _check_estimation_frequency(df_wide: pd.DataFrame, rebal_freq: str) -> pd.DataFrame:
    """
    Check the timeseries to see if the estimated frequency matches the actual frequency.
    Raises a warning if the estimated frequency does not match the rebalancing frequency.

    Parameters
    ----------
    df_wide : pd.DataFrame
        dataframe in wide format with the contract signals.
    est_freq : str
        the estimated frequency of the contract signals.

    Returns
    -------
    pd.DataFrame
        dataframe with the estimated frequency.
    """

    estimated_freq: pd.Series = estimate_release_frequency(df_wide=df_wide)

    # for each series in the dataframe, check if the estimated frequency matches the rebal_freq
    for _col in df_wide.columns:
        if estimated_freq[_col] is None:
            warnings.warn(
                f"Unable to estimate frequency for `{_col}`",
            )
        elif estimated_freq[_col] != rebal_freq:
            warnings.warn(
                f"Estimated frequency for `{_col}` does not match "
                f"the rebalancing frequency`{rebal_freq}`",
            )

    return df_wide


def _make_relative_value(
    df: pd.DataFrame,
    sig: str,
    *args,
    **kwargs,
) -> pd.DataFrame:
    assert isinstance(df, QuantamentalDataFrame), "`df` must be a QuantamentalDataFrame"
    return make_relative_value(
        df=df[df["xcat"] == sig], xcats=[sig], postfix="", *args, **kwargs
    )


def _gen_contract_signals(
    df_wide: pd.DataFrame,
    cids: List[str],
    sig: str,
    ctypes: List[str],
    cscales: List[Union[Number, str]],
    csigns: List[int],
) -> pd.DataFrame:
    """
    Generate contract signals from cross-section-specific signals.

    Parameters
    ----------
    df : pd.DataFrame
        dataframe in quantamental format with the contract signals and potentially
        categories required for translation into contract signals.
    cids : List[str]
        list of cross-sections whose signals are to be translated into contract signals.
    sig : str
        the category ticker of the cross-section-specific signal that is translated into
        contract signals.
    ctypes : List[str]
        list of identifiers for the contract types that are to be traded. They typically
        correspond to the contract type acronyms that are used in JPMaQS for generic
        returns, carry and volatility, such as "FX" for FX forwards or "EQ" for equity index
        futures. N.B. Overall a contract is identified by the combination of its cross-
        section and its contract type "<cid>_<ctype>".
    cscales : List[Union[Number, str]]
        list of scaling factors for the contract signals. These can be either a list of
        floats or a list of category tickers that serve as basis of translation. The former
        are fixed across time, the latter variable.
    csigns : List[int]
        list of signs for the contract signals. These must be either 1 for long position
        or -1 for short position.

    Returns
    -------
    pd.DataFrame
        dataframe with scaling applied.
    """

    expected_contract_signals: List[str] = [f"{cx}_{sig}" for cx in cids]

    # Check that all the contract signals are in the dataframe
    if not set(expected_contract_signals).issubset(set(df_wide.columns)):
        raise ValueError(
            "Some `cids` are missing the `sig` in the provided dataframe."
            f"\nMissing: {set(expected_contract_signals) - set(df_wide.columns)}"
        )

    # Multiply each cid_ctype by the corresponding scale and sign to produce the contract signal
    new_conts: List[str] = []
    for _cid in cids:
        sig_col: str = _cid + "_" + sig
        for ix, ctx in enumerate(ctypes):
            scale_var: Union[Number, pd.Series]
            # If the scale is a string, it must be a category ticker
            # Otherwise it is a fixed numeric value
            if isinstance(cscales[ix], str):
                scale_var: pd.Series = df_wide[_cid + "_" + cscales[ix]]
            else:
                scale_var: Number = cscales[ix]

            new_cont_name: str = _cid + "_" + ctx + "_CSIG"
            df_wide[new_cont_name] = df_wide[sig_col] * csigns[ix] * scale_var
            # For convenience, keep track of the new contract signals
            new_conts.append(new_cont_name)

    # Only return the new contract signals
    df_wide = df_wide.loc[:, new_conts]

    return df_wide


def _apply_hedge_ratios(
    df_wide: pd.DataFrame,
    cids: List[str],
    sig: str,
    hbasket: List[str],
    hscales: List[Union[Number, str]],
    hratios: str,
) -> pd.DataFrame:

    # check if the CID_SIG is in the dataframe
    expc_cid_sigs: List[str] = [f"{cx}_{sig}" for cx in cids]
    expc_cid_hr: List[str] = [f"{cx}_{hratios}" for cx in cids]
    err_str: str = (
        "Some `cids` are missing the `{sig_type}` in the provided dataframe."
        "\nMissing: {missing_items}"
    )
    for sig_type, expc_sigs in zip(["sig", "hratio"], [expc_cid_sigs, expc_cid_hr]):
        if not set(expc_sigs).issubset(set(df_wide.columns)):
            raise ValueError(
                err_str.format(
                    sig_type=sig_type,
                    missing_items=set(expc_sigs) - set(df_wide.columns),
                )
            )
    hedged_assets_list: List[str] = []
    for hb_ix, _hb in enumerate(hbasket):
        basket_pos: str = _hb + "_CSIG"
        df_wide[basket_pos] = 0.0  # initialise the basket position to zero
        for _cid in cids:
            # HBASKETx_CSIG = CIDx_SIG * CIDx_HRATIO * HBASKETx_SCALE
            # e.g.:
            # USD_EQ_CSIG = AUD_SIG * AUD_HRATIO * USD_EQ_HSCALE

            cid_sig: str = _cid + "_" + sig
            cid_hr: str = _cid + "_" + hratios

            hb_hratio: Union[Number, pd.Series]
            # If the scale is a string, it must be a category ticker
            # Otherwise it is a fixed numeric value
            if isinstance(hscales[hb_ix], str):
                hb_hratio: pd.Series = df_wide[_cid + "_" + hscales[hb_ix]]
            else:
                hb_hratio: Number = hscales[hb_ix]

            # Add the basket position to the exisitng basket_pos column in the df
            _posx: pd.Series = df_wide[cid_sig] * df_wide[cid_hr] * hb_hratio
            # set nans to zero, to avoid nans in the final result
            _posx = _posx.fillna(0.0)
            df_wide[basket_pos] += _posx

            hedged_assets_list.append(basket_pos)

    # List(Set(hedged_assets_list)) to remove duplicates
    hedged_assets_list: List[str] = list(set(hedged_assets_list))

    df_wide = df_wide.loc[:, hedged_assets_list]

    return df_wide


def _add_hedged_signals(
    df_wide_cs: pd.DataFrame,
    df_wide_hs: Optional[pd.DataFrame] = None,
) -> pd.DataFrame:
    if df_wide_hs is None:
        return df_wide_cs

    for _col in set(df_wide_hs.columns):
        if _col not in df_wide_cs.columns:
            df_wide_cs[_col] = 0.0
        df_wide_cs[_col] += df_wide_hs[_col]

    return df_wide_cs


[docs]def contract_signals( df: pd.DataFrame, sig: str, cids: List[str], ctypes: List[str], cscales: Optional[List[Union[Number, str]]] = None, csigns: Optional[List[int]] = None, hbasket: Optional[List[str]] = None, hscales: Optional[List[Union[Number, str]]] = None, hratios: Optional[str] = None, relative_value: bool = False, start: Optional[str] = None, end: Optional[str] = None, rebal_freq: str = "M", blacklist: Optional[dict] = None, sname: str = "STRAT", *args, **kwargs, ) -> QuantamentalDataFrame: """ Calculate contract-specific signals based on cross section-specific signals. Each contract signal a product of the cross-section-specific signal, a scaling factor or category, and a sign (+-1). Optionally, the contract signals can be hedged with a basket of contracts. The hedge ratios are determined by a cross-section-specific category. When hedging, the hedged signals are added to the contract signals; thus yielding a single contract signal per financial contract. Parameters ---------- df : pd.DataFrame standardized JPMaQS DataFrame with the necessary columns: 'cid', 'xcat', 'real_date' and 'value'. This dataframe must contain the cross-section-specific signals and possibly [1] categories for changing scale factors of the main contracts, [2] contracts of a hedging basket, and [3] cross-section specific hedge ratios sig : str the cross section-specific signal that serves as the basis of contract signals. cids : List[str] list of cross-sections whose signal is to be used. ctypes : List[str] list of identifiers for the contract types that are to be traded. They typically correspond to the contract type acronyms that are used in JPMaQS for generic returns, carry and volatility, such as "FX" for FX forwards or "EQ" for equity index futures. If n contracts are traded per cross sections `ctypes` must contain n arguments. N.B. Overall a contract is identified by the combination of its cross- section and its contract type "<cid>_<ctype>". cscales : List[str|float] list of scaling factors for the contract signals. These can be either a list of floats or a list of category tickers that serve as basis of translation. The former are fixed across time, the latter variable. The list `cscales` must be of the same length as the list `ctypes`. csigns : List[int] list of signs that determine the direction of the contract signals. Contract signal is sign x cross section signal. The signs must be either 1 or -1. The list `csigns` must be of the same length as `ctypes` and `cscales`. hbasket : List[str] list of contract identifiers in the format "<cid>_<ctype>" that serve as constituents of a hedging basket, if one is used. hscales : List[str|float] list of scaling factors (weights) for the basket. These can be either a list of floats or a list of category tickers that serve as basis of translation. The former are fixed across time, the latter variable. hratios : str category name for cross-section-specific hedge ratios. The values of this category determine direction and size of the hedge basket per unit of the cross section-specific signal. relative_value : bool If False (default), no relative value is calculated. If True boolean, relative value is calculated for all cids in the strategy. # TODO split above `relative_value` argument into two: `relative_value` and `relative_value_cids`? start : str earliest date in ISO format. Default is None and earliest date in df is used. end : str latest date in ISO format. Default is None and latest date in df is used. blacklist : dict cross-sections with date ranges that should be excluded from the calculation of contract signals. sname : str name of the strategy. Default is "STRAT". Returns ------- pd.DataFrame with the contract signals for all traded contracts and the specified strategy. It has the standard JPMaQS DataFrame. The contract signals have the following format "<cid>_<ctype>_<sname>_CSIG". """ ## basic type and value checks for varx, namex, typex in [ (sig, "sig", str), (cids, "cids", list), (ctypes, "ctypes", list), (cscales, "cscales", (list, NoneType)), (csigns, "csigns", (list, NoneType)), (hbasket, "hbasket", (list, NoneType)), (hscales, "hscales", (list, NoneType)), (hratios, "hratios", (str, NoneType)), (relative_value, "relative_value", bool), (start, "start", (str, NoneType)), (end, "end", (str, NoneType)), (blacklist, "blacklist", (dict, NoneType)), (sname, "sname", str), ]: if not isinstance(varx, typex): raise TypeError(f"`{namex}` must be <{typex}> not <{type(varx)}>") if typex in [list, str, dict] and len(varx) == 0: raise ValueError(f"`{namex}` must not be an empty {str(typex)}") if not isinstance(df, QuantamentalDataFrame): raise TypeError("`df` must be a standardised quantamental dataframe") ## Standardise and copy the dataframe df: pd.DataFrame = QuantamentalDataFrame(df) _initialized_as_categorical: bool = df.InitializedAsCategorical ## Check the dates if start is None: start: str = pd.Timestamp(df["real_date"].min()).strftime("%Y-%m-%d") if end is None: end: str = pd.Timestamp(df["real_date"].max()).strftime("%Y-%m-%d") for dx, nx in [(start, "start"), (end, "end")]: if not is_valid_iso_date(dx): raise ValueError(f"`{nx}` must be a valid ISO-8601 date string") ## Reduce the dataframe df: pd.DataFrame = reduce_df(df=df, start=start, end=end, blacklist=blacklist) ## Check that all cid_ctype are in the dataframe expected_base_signals: Set[str] = set([f"{cx}_{sig}" for cx in cids]) found_base_signals: Set[str] = set(QuantamentalDataFrame(df).list_tickers()) if not (expected_base_signals).issubset(found_base_signals): raise ValueError( "Some `cids` are missing the `sig` in the provided dataframe." f"\nMissing: {expected_base_signals - found_base_signals}" ) ## Check the scaling and hedging arguments cscales, csigns, hbasket, hscales, hratios = _check_scaling_args( ctypes=ctypes, cscales=cscales, csigns=csigns, hbasket=hbasket, hscales=hscales, hratios=hratios, ) # Actual calculation ## Calculate relative value if requested if relative_value: rel_signals = _make_relative_value( df=df, sig=sig, blacklist=blacklist, cids=cids, start=start, end=end, *args, **kwargs, ) df = update_df(df=df, df_add=rel_signals) ## Cast the dataframe to wide format df_wide: pd.DataFrame = QuantamentalDataFrame(df=df).to_wide() ## Check rebal_freq or downsample the dataframe # df_wide: pd.DataFrame = _check_estimation_frequency( # df_wide=df_wide, rebal_freq=rebal_freq # ) ## Generate primary contract signals df_contract_signals: pd.DataFrame = _gen_contract_signals( df_wide=df_wide, cids=cids, sig=sig, ctypes=ctypes, cscales=cscales, csigns=csigns, ) ## Generate hedge contract signals df_hedge_signals: Optional[pd.DataFrame] = None if hbasket is not None: df_hedge_signals: pd.DataFrame = _apply_hedge_ratios( df_wide=df_wide, cids=cids, sig=sig, hbasket=hbasket, hscales=hscales, hratios=hratios, ) # Add the hedge signals to the contract signals df_out: pd.DataFrame = _add_hedged_signals( df_wide_cs=df_contract_signals, df_wide_hs=df_hedge_signals, ) ## Wide to quantamental df_out: pd.DataFrame = QuantamentalDataFrame.from_wide(df=df_out) # Append the strategy name to all the xcats df_out = df_out.rename_xcats(postfix=f"_{sname}") assert isinstance(df_out, QuantamentalDataFrame) return QuantamentalDataFrame( df_out, _initialized_as_categorical=_initialized_as_categorical ).to_original_dtypes()
[docs]def multi_signal_contract_signals( df: QuantamentalDataFrame, contract_dicts: Dict[str, Dict[str, Any]], common_args: Dict[str, Any], ): """ Calculate contract signals for multiple signal specifications. Each signal specification is a dictionary with arguments for a call to `contract_signals()`. The common arguments are passed to all calls. The dataframe is passed to all calls, and should not be included in the dictionaries, or common arguments. Please see the documentation for `contract_signals()` for more information on the arguments. Note: when two (or more) of the arguments produce a series with the same name, each new series will overwrite the previous one. Parameters ---------- df : pd.DataFrame standardized JPMaQS DataFrame with the necessary columns: 'cid', 'xcat', 'real_date' and 'value'. contract_dicts : dict[str, dict[str, Any]] dictionary of dictionaries with arguments for each call to `contract_signals()`. The keys are the names of the signal specifications. common_args : dict[str, Any] dictionary with common arguments for all calls to `contract_signals()`. These are passed to all calls, and should not be included in the dictionaries in `contract_dicts`. Returns ------- QuantamentalDataFrame dataframe with the contract signals for all traded contracts and the specified strategies. It has the standard JPMaQS DataFrame. The contract signals have the following format "<cid>_<ctype>_<sname>_CSIG". """ # Check that the types are correct types_dict: dict = { "sig": str, "cids": list, "ctypes": list, "cscales": (list, NoneType), "csigns": (list, NoneType), "hbasket": (list, NoneType), "hscales": (list, NoneType), "hratios": (str, NoneType), "relative_value": bool, "start": (str, NoneType), "end": (str, NoneType), "blacklist": (dict, NoneType), "sname": str, } df = QuantamentalDataFrame(df) _initialized_as_categorical: bool = df.InitializedAsCategorical type_error_msg: str = "{} must be <{}> not <{}>" value_error_msg: str = "{} must not be an empty {}" for k, v in common_args.items(): if not isinstance(v, types_dict[k]): raise TypeError(type_error_msg.format(k, types_dict[k], type(v))) if types_dict[k] in [list, str, dict] and len(v) == 0: raise ValueError(value_error_msg.format(k, str(types_dict[k]))) type_error_msg = type_error_msg + " ; in arguments passed for {}" value_error_msg = value_error_msg + " ; in arguments passed for {}" for dict_name, dict_args in contract_dicts.items(): for k, v in dict_args.items(): if not isinstance(v, types_dict[k]): raise TypeError( type_error_msg.format(k, types_dict[k], type(v), dict_name) ) if types_dict[k] in [list, str, dict] and len(v) == 0: raise ValueError( value_error_msg.format(k, str(types_dict[k]), dict_name) ) # Calculate the contract signals for each signal specification # Keep updating the dataframe with the new signals to allow signals to be used to # generate other signals # the awkward syntax **{**a, **b} is used to update left dict values with right dict values # to avoid conflicts in the keys of the two dictionaries for dict_args in contract_dicts.values(): # TODO: use update_df? df = QuantamentalDataFrame.from_qdf_list( [df, contract_signals(df=df, **{**common_args, **dict_args})] ) return QuantamentalDataFrame( df, _initialized_as_categorical=_initialized_as_categorical ).to_original_dtypes()
if __name__ == "__main__": from macrosynergy.management.simulate import make_test_df cids: List[str] = ["USD", "EUR", "GBP", "AUD", "CAD"] xcats: List[str] = ["SIG", "HR"] start: str = "2000-01-01" end: str = "2020-12-31" df: pd.DataFrame = make_test_df( cids=cids, xcats=xcats, start=start, end=end, ) df.loc[(df["cid"] == "USD") & (df["xcat"] == "SIG"), "value"] = 1.0 ctypes = ["FX", "IRS", "CDS"] cscales = [1.0, 0.5, 0.1] csigns = [1, -1, 1] hbasket = ["USD_EQ", "EUR_EQ"] hscales = [0.7, 0.3] df_cs: pd.DataFrame = contract_signals( df=df, sig="SIG", cids=cids, ctypes=ctypes, cscales=cscales, csigns=csigns, hbasket=hbasket, hscales=hscales, hratios="HR", ) df_cs["xcat"].unique()