Source code for macrosynergy.pnl.historic_portfolio_volatility

"""
Estimation of Historic Portfolio Volatility.
"""

import logging
import warnings

import functools
from typing import Dict, List, Optional
from typing import Callable, Tuple, Any, Union
from numbers import Number

import numpy as np
import pandas as pd
from macrosynergy.panel.historic_vol import expo_weights
from macrosynergy.management.types import NoneType, QuantamentalDataFrame
from macrosynergy.management.constants import FFILL_LIMITS, ANNUALIZATION_FACTORS
from macrosynergy.management.utils import (
    _map_to_business_day_frequency,
    get_sops,
    is_valid_iso_date,
    reduce_df,
    # standardise_dataframe,
    # ticker_df_to_qdf,
)

RETURN_SERIES_XCAT = "_PNL_USD1S_ASD"


logger = logging.getLogger(__name__)

cache = functools.lru_cache(maxsize=None)


[docs]@cache def flat_weights_arr(lback_periods: int, *args, **kwargs) -> np.ndarray: """Flat weights for the look-back period.""" return np.ones(lback_periods) / lback_periods
[docs]@cache def expo_weights_arr(lback_periods: int, half_life: int, *args, **kwargs) -> np.ndarray: """Exponential weights for the lookback period.""" return expo_weights(lback_periods=lback_periods, half_life=half_life)
def _weighted_covariance( x: np.ndarray, y: np.ndarray, weights_func: Callable[[int, int], np.ndarray], lback_periods: int, half_life: int, min_obs: int = 1, ) -> float: """ Estimate covariance between two series after applying weights. """ assert half_life > 0, "half_life must be greater than 0" assert lback_periods > 0 or lback_periods == -1, "lback_periods must be >0" assert x.ndim == 1 or x.shape[1] == 1, "`x` must be a 1D array or a column vector" assert y.ndim == 1 or y.shape[1] == 1, "`y` must be a 1D array or a column vector" assert x.shape[0] == y.shape[0], "`x` and `y` must have same length" # if either of x or y is all NaNs, return NaN if np.isnan(x).all() or np.isnan(y).all(): return np.nan wmask = np.isnan(x) | np.isnan(y) weightslen = min(sum(~wmask), lback_periods if lback_periods > 0 else len(x)) if weightslen < min_obs: return np.nan # drop NaNs and only consider the most recent lback_periods x, y = x[~wmask][-weightslen:], y[~wmask][-weightslen:] if len(x) < weightslen or weightslen == 0: return np.nan assert x.shape[0] == weightslen w: np.ndarray = weights_func(lback_periods=weightslen, half_life=half_life) err_str = f"weights produced by {weights_func.__name__} do not sum to 1" assert np.isclose(w.sum(), 1), err_str x_mean, y_mean = (w * x).sum(), (w * y).sum() array_of_products = (x - x_mean) * (y - y_mean) return w.T.dot(array_of_products)
[docs]def estimate_variance_covariance( piv_ret: pd.DataFrame, remove_zeros: bool, weights_func: Callable[[int, int], np.ndarray], lback_periods: int, half_life: int, lback_min_obs: int = 1, ) -> pd.DataFrame: """ Estimation of the variance-covariance matrix needs to have the following configuration options 1. Absolutely vs squared deviations, 2. Flat weights (equal) vs. exponential weights, 3. Frequency of estimation (daily, weekly, monthly, quarterly) and their weights. """ cov_mat = np.zeros((len(piv_ret.columns), len(piv_ret.columns))) logger.info(f"Estimating variance-covariance matrix for {piv_ret.columns}") if remove_zeros: piv_ret = piv_ret.replace(0, np.nan) for i_b, c_b in enumerate(piv_ret.columns): for i_a, c_a in enumerate(piv_ret.columns[: i_b + 1]): logger.debug(f"Estimating covariance between {c_a} and {c_b}") est_vol = _weighted_covariance( x=piv_ret[c_a].values, y=piv_ret[c_b].values, weights_func=weights_func, lback_periods=lback_periods, half_life=half_life, min_obs=lback_min_obs, ) cov_mat[i_a, i_b] = cov_mat[i_b, i_a] = est_vol assert np.all((cov_mat.T == cov_mat) ^ np.isnan(cov_mat)) return pd.DataFrame(cov_mat, index=piv_ret.columns, columns=piv_ret.columns)
def _downsample_returns( piv_df: pd.DataFrame, freq: str = "m", ) -> pd.DataFrame: # TODO create as a general convert_frequency function # TODO current aggregator is `art` (check definition of name in R code) # TODO test [1] input data is daily and [2] daily gives daily output freq = _map_to_business_day_frequency(freq) # TODO we should fix why we get the warnings... with warnings.catch_warnings(): warnings.simplefilter("ignore") piv_new_freq: pd.DataFrame = ( (1 + piv_df / 100).resample(freq).prod() - 1 ) * 100 warnings.resetwarnings() return piv_new_freq
[docs]def get_max_lookback(lb: int, nt: float) -> int: """ Calculate the maximum lookback period for a given lookback period and nan tolerance. Parameters ---------- lb : int the lookback period. nt : float the nan tolerance. Returns ------- int the maximum lookback period. """ return int(np.ceil(lb * (1 + nt))) if lb > 0 else 0
def _calculate_multi_frequency_vcv_for_period( pivot_returns: pd.DataFrame, pivot_signals: pd.DataFrame, rebal_date: pd.Timestamp, est_freqs: List[str], est_weights: List[float], weights_func: Callable[[int, int], np.ndarray], lback_periods: List[int], half_life: List[int], nan_tolerance: float, remove_zeros: bool, lback_min_obs: List[int], ) -> pd.DataFrame: window_df = pivot_returns.loc[pivot_returns.index <= rebal_date] dict_vcv: Dict[str, pd.DataFrame] = {} for freq, lb, hl, min_obs in zip( est_freqs, lback_periods, half_life, lback_min_obs ): piv_ret = _downsample_returns(window_df, freq=freq).iloc[ -get_max_lookback(lb=lb, nt=nan_tolerance) : ] dict_vcv[freq] = estimate_variance_covariance( piv_ret=piv_ret, lback_periods=lb, remove_zeros=remove_zeros, weights_func=weights_func, half_life=hl, lback_min_obs=min_obs, ) # if dict_vcv[freq].isna().any().any(): # raise ValueError( # f"N/A values in variance-covariance matrix at freq={freq} at real_date={rebal_date}!\n" # f"{dict_vcv[freq].isna().any()}" # ) # NOTE: in this case Float+NA = Na vcv_df: pd.DataFrame = sum( [ est_weights[ix] * ANNUALIZATION_FACTORS[freq] * dict_vcv[freq] for ix, freq in enumerate(est_freqs) ] ) return vcv_df def _calc_vol_tuple( vcv_df: pd.DataFrame, signals: pd.DataFrame, date: pd.Timestamp, available_cids: List[str], ) -> Tuple[pd.Timestamp, float]: s = signals.loc[date, :].copy() s = s.loc[available_cids] vcv_df = vcv_df.loc[available_cids, available_cids] if not set(s.index) == set(vcv_df.columns): raise ValueError( "Signals and variance-covariance matrix do not have the same columns." f"\nSignals: {s.columns.tolist()}" f"\nVariance-Covariance: {vcv_df.columns.tolist()}" ) idx_mask = s.isna() | (s.abs() < 1e-6) s.loc[idx_mask] = 0 vcv_df.loc[idx_mask, :] = 0 vcv_df.loc[:, idx_mask] = 0 if vcv_df.isna().any().any(): raise ValueError("N/A values in variance-covariance matrix") pvol: float = np.sqrt(s.T.dot(vcv_df).dot(s)) return date, pvol
[docs]def stack_covariances( vcv_df: pd.DataFrame, real_date: pd.Timestamp, ) -> pd.DataFrame: """Stack the covariance matrix DataFrame.""" return ( vcv_df.rename_axis("fid1", axis=0) .rename_axis("fid2", axis=1) .stack() .to_frame("value") .reset_index() .assign(real_date=real_date) )
def _get_first_usable_date( pivot_returns: pd.DataFrame, pivot_signals: pd.DataFrame, rebal_dates: pd.Series, est_freqs: List[str], lback_periods: List[int], nan_tolerance: float, ) -> pd.Series: max_lb = 0 # for each frequency and lookback for lb, est_freq in zip(lback_periods, est_freqs): _max_lb = get_max_lookback(lb, nan_tolerance) _max_lb = ( FFILL_LIMITS[_map_to_business_day_frequency(est_freq)] if _max_lb == 0 else _max_lb ) max_lb = _max_lb if _max_lb > max_lb else max_lb assert set(pivot_returns.columns.tolist()) == set(pivot_signals.columns.tolist()) pr_starts = {} ps_starts = {} for col in pivot_returns.columns.tolist(): # 'full' start date for returns - where the maximum lookback period is available fstart_ret = pivot_returns[col].first_valid_index() + pd.offsets.BDay(max_lb) fstart_sig = pivot_signals[col].first_valid_index() + pd.offsets.BDay(max_lb) pr_starts[col] = rebal_dates[rebal_dates >= fstart_ret].min() ps_starts[col] = rebal_dates[rebal_dates >= fstart_sig].min() # get the later of the two start dates and return return pd.Series( {k: max(pr_starts[k], ps_starts[k]) for k in pr_starts.keys()}, name="real_date", ) def _calculate_portfolio_volatility( pivot_returns: pd.DataFrame, pivot_signals: pd.DataFrame, rebal_freq: str, est_freqs: List[str], est_weights: List[float], weights_func: Callable[[int, int], np.ndarray], lback_periods: List[int], half_life: List[int], nan_tolerance: float, remove_zeros: bool, lback_min_obs: List[int], portfolio_return_name: str, ) -> Tuple[pd.DataFrame, pd.DataFrame]: logger.info( f"Calculating portfolio volatility " f"for FIDS={pivot_returns.columns.tolist()} " f"from {min(pivot_returns.index.min(), pivot_signals.index.min())} " f"to {max(pivot_returns.index.max(), pivot_signals.index.max())}, with " f"lback_periods={lback_periods}, nan_tolerance={nan_tolerance}, " f"remove_zeros={remove_zeros}, rebal_freq={rebal_freq}, est_freqs={est_freqs}, " f"est_weights={est_weights} " ) rebal_dates = get_sops(dates=pivot_signals.index, freq=rebal_freq) # Returns batches logger.info( "Rebalance portfolio from %s to %s (%s times)", rebal_dates.min(), rebal_dates.max(), rebal_dates.shape[0], ) # td = rebal_dates.iloc[-1] # TODO convert frequencies list_vcv: List[pd.DataFrame] = [] list_pvol: List[Tuple[pd.Timestamp, np.float64]] = [] first_starts = _get_first_usable_date( pivot_returns=pivot_returns, pivot_signals=pivot_signals, rebal_dates=rebal_dates, est_freqs=est_freqs, lback_periods=lback_periods, nan_tolerance=nan_tolerance, ) for td in rebal_dates: avails = first_starts[first_starts <= td].index.tolist() if len(avails) == 0: logger.warning( f"No data available for {td} with lookback period of {max(lback_periods)} days." ) continue vcv_df = _calculate_multi_frequency_vcv_for_period( pivot_returns=pivot_returns[avails], pivot_signals=pivot_signals[avails], rebal_date=td, est_freqs=est_freqs, est_weights=est_weights, weights_func=weights_func, lback_periods=lback_periods, half_life=half_life, nan_tolerance=nan_tolerance, remove_zeros=remove_zeros, lback_min_obs=lback_min_obs, ) list_vcv.append(stack_covariances(vcv_df=vcv_df, real_date=td)) vol_tuple = _calc_vol_tuple( vcv_df=vcv_df, # signals=signals, signals=pivot_signals, date=td, available_cids=avails, ) list_pvol.append(vol_tuple) pvol = pd.DataFrame( list_pvol, columns=["real_date", portfolio_return_name], ).set_index("real_date") vcv_df_long = pd.concat(list_vcv, axis=0) # add to cls.vcv vcv_df_long["helper"] = vcv_df_long[["fid1", "fid2", "real_date"]].apply( func=(lambda x: "-".join(sorted([x["fid1"], x["fid2"]])) + str(x["real_date"])), axis=1, ) vcv_df_long = ( vcv_df_long.drop_duplicates(subset=["helper"]) .drop(columns=["helper"]) .reset_index(drop=True) ) return pvol, vcv_df_long def _hist_vol( pivot_signals: pd.DataFrame, pivot_returns: pd.DataFrame, sname: str, rebal_freq: str, lback_meth: str, # TODO allow for different method at different frequencies lback_periods: List[int], # default all for all half_life, lback_min_obs: List[int], est_freqs: List[str], est_weights: List[float], nan_tolerance: float, remove_zeros: bool, return_variance_covariance: bool, ) -> List[pd.DataFrame]: """ Calculates historic volatility for a given strategy. It assumes that the dataframe is composed solely of the relevant signals and returns for the strategy. Parameters ---------- pivot_signals : pd.DataFrame the pivot table of the contract signals. pivot_returns : pd.DataFrame the pivot table of the contract returns. rebal_freq : str the frequency of the volatility estimation. Default is 'm' for monthly. Alternatives are 'w' for business weekly, 'd' for daily, and 'q' for quarterly. Estimations are conducted for the end of the period. lback_periods : int the number of periods to use for the lookback period of the volatility-targeting method. Default is 21. lback_meth : str the method to use for the lookback period of the volatility-targeting method. Default is 'ma' for moving average. Alternative is "xma", for exponential moving average. half_life : int Refers to the half-time for "xma" and full lookback period for "ma". Default is 11. lback_min_obs : List[int] minimum required observations in each lookback window. If fewer observations are available the variance-covariance estimate for that period is set to NaN. nan_tolerance : float maximum ratio of NaNs to non-NaNs in a lookback window, if exceeded the resulting volatility is set to NaN. Default is 0.25. remove_zeros : bool removes zeroes as invalid entries and shortens the effective window. """ lback_meth = lback_meth.lower() if lback_meth not in ["ma", "xma"]: raise NotImplementedError( f"`lback_meth` must be 'ma' or 'xma'; got {lback_meth}" ) # TODO get the correct rebalance dates weights_func = flat_weights_arr if lback_meth == "ma" else expo_weights_arr logger.info( "Found lback_meth=%s, using weights_func=%s", lback_meth, weights_func.__name__ ) portfolio_return_name = f"{sname}{RETURN_SERIES_XCAT}" pvol_df: pd.DataFrame vcv_df: pd.DataFrame pvol_df, vcv_df = _calculate_portfolio_volatility( pivot_returns=pivot_returns, pivot_signals=pivot_signals, rebal_freq=rebal_freq, weights_func=weights_func, portfolio_return_name=portfolio_return_name, lback_periods=lback_periods, remove_zeros=remove_zeros, nan_tolerance=nan_tolerance, half_life=half_life, lback_min_obs=lback_min_obs, est_freqs=est_freqs, est_weights=est_weights, ) # assert portfolio_return_name the only column pvol_df = pvol_df.reset_index() assert set(pvol_df.columns.tolist()) == set([portfolio_return_name, "real_date"]) nan_dates = pvol_df[pvol_df[portfolio_return_name].isna()]["real_date"].copy() if len(nan_dates) > 0: logger.warning( f"Found NaNs in {portfolio_return_name} at: {nan_dates.tolist()}, dropping all NaNs." ) pvol_df = pvol_df[~pvol_df["real_date"].isin(nan_dates)].copy() pvol_df = pvol_df.set_index("real_date") if return_variance_covariance: return [pvol_df, vcv_df] return [pvol_df]
[docs]def unstack_covariances( vcv_df: pd.DataFrame, fillna: bool = False, ) -> Dict[str, pd.DataFrame]: """Unstack the covariance matrix DataFrame.""" vcvs: Dict[str, pd.DataFrame] = {} for dt, df in vcv_df.groupby("real_date"): vcv = df.pivot(index="fid2", columns="fid1", values="value") if fillna: vcv = vcv.fillna(vcv.T) assert all(vcv == vcv.T) vcvs[pd.Timestamp(dt).strftime("%Y-%m-%d")] = vcv return vcvs
def _check_input_arguments( arguments: List[Tuple[Any, str, Union[type, Tuple[type, type]]]], ): # TODO move to general utils for varx, namex, typex in arguments: if not isinstance(varx, typex): raise TypeError(f"`{namex}` must be {typex}.") if typex in [str, list, dict] and len(varx) == 0: raise ValueError(f"`{namex}` must not be an empty {str(typex)}.") def _check_frequency(freq: str, freq_type: str): # TODO move to general utils try: _map_to_business_day_frequency(freq) except ValueError as e: raise ValueError( f"`{freq_type:s}` ({freq:s}) must be a valid frequency string: {e}" ) def _check_missing_data( df: pd.DataFrame, sname: str, fids: List[str], rstring: str ) -> None: ## Check that there is atleast one contract signal for the strategy if not any(df["ticker"].str.endswith(f"_CSIG_{sname}")): raise ValueError(f"No contract signals for strategy `{sname}`.") u_tickers: List[str] = list(df["ticker"].unique()) for contx in fids: if not any( [tx.startswith(contx) and tx.endswith(f"_CSIG_{sname}") for tx in u_tickers] ): raise ValueError(f"Contract identifier `{contx}` not in dataframe.") if not all([f"{contx}{rstring}" in u_tickers for contx in fids]): missing_tickers = [ f"{contx}{rstring}" for contx in fids if f"{contx}{rstring}" not in u_tickers ] raise ValueError( f"The dataframe is missing the following return series: {missing_tickers}" ) def _check_est_args( est_freqs: List[str], est_weights: List[Number], lback_periods: List[int], half_life: List[int], lback_min_obs: List[int], ) -> Tuple[List[str], List[float], List[int], List[int], List[int]]: # Calculate the maximum length of the provided lists max_len = max( len(est_freqs), len(est_weights), len(lback_periods), len(half_life), len(lback_min_obs), ) def expand_list(lst, name): if len(lst) == 1: return lst * max_len elif len(lst) != max_len: raise ValueError( "All lists must have length 1 or the same length as the longest " f"list ({max_len}). '{name}' has length {len(lst)}." ) return lst # Expand lists to match the maximum length est_freqs = expand_list(est_freqs, "est_freqs") est_weights = expand_list(est_weights, "est_weights") lback_periods = expand_list(lback_periods, "lback_periods") half_life = expand_list(half_life, "half_life") lback_min_obs = expand_list(lback_min_obs, "lback_min_obs") inv_weights_msg = "Invalid weights in `est_weights` at index {ix:d}" inv_lback_msg = "Invalid lookback period in `lback_periods` at index {ix:d}: {lb:d}" inv_hl_msg = "Invalid half-life in `half_life` at index {ix:d}: {hl:d}" for ix, (freq, weight, lback, hl, min_obs) in enumerate( zip(est_freqs, est_weights, lback_periods, half_life, lback_min_obs) ): _check_frequency(freq=freq, freq_type=f"est_freq[{ix:d}]") if not isinstance(weight, Number) or weight < 0: raise ValueError(inv_weights_msg.format(ix=ix)) # stated idiosyncratically to allow for -1 if not isinstance(lback, int) or (lback < 0 and lback != -1): raise ValueError(inv_lback_msg.format(ix=ix, lb=lback)) if not isinstance(hl, int) or hl < 0: raise ValueError(inv_hl_msg.format(ix=ix, hl=hl)) if not isinstance(min_obs, int) or min_obs < 1: raise ValueError( f"Invalid minimum observations in `lback_min_obs` at index {ix:d}: {min_obs}" ) # normalize est_weights if not np.isclose(np.sum(est_weights), 1): est_weights = list(np.array(est_weights) / np.sum(est_weights)) return est_freqs, est_weights, lback_periods, half_life, lback_min_obs
[docs]def add_fid_column(df: QuantamentalDataFrame, rstring: str) -> QuantamentalDataFrame: """Add financial identifier (fid) to DataFrame.""" df["fid"] = ( df["cid"].astype(str) + "_" + df["xcat"] .str.split("_") .map( lambda x: ( x[0][: -len(rstring.split("_")[0])] if x[0].endswith(rstring.split("_")[0]) else x[0] ) ) ) return df
[docs]def historic_portfolio_vol( df: pd.DataFrame, sname: str, fids: List[str], rstring: str = "XR", rebal_freq: str = "m", lback_meth: str = "ma", est_freqs: Union[str, List[str]] = ["D", "W", "M"], # "m", "w", "d", "q" est_weights: Union[Number, List[Number]] = [1, 1, 1], # default equal weights lback_periods: Union[int, List[int]] = [-1, -1, -1], # default all for all half_life: Union[int, List[int]] = [11, 5, 6], lback_min_obs: Union[int, List[int]] = 1, start: Optional[str] = None, end: Optional[str] = None, blacklist: Optional[dict] = None, nan_tolerance: float = 0.25, remove_zeros: bool = True, return_variance_covariance: bool = True, ) -> Union[QuantamentalDataFrame, Tuple[QuantamentalDataFrame, pd.DataFrame]]: """ Historical portfolio volatility. Estimates annualized standard deviations of a portfolio, based on historic variances and co-variances. Parameters ---------- df : QuantamentalDataFrame JPMaQS standard DataFrame containing contract-specific signals and return series. sname : str the name of the strategy. It must correspond to contract signals in the dataframe, which have the format "<cid>_<ctype>_CSIG_<sname>", and which are typically calculated by the function contract_signals(). fids : List[str] list of financial contract identifiers in the format "<cid>_<ctype>". It must correspond to contract signals in the dataframe. rstring : str a general string of the return category. This identifies the contract returns that are required for the volatility-targeting method, based on the category identifier format <cid>_<ctype><rstring> in accordance with JPMaQS conventions. Default is 'XR'. rebal_freq : str the frequency of rebalancing and volatility estimation. Default is 'M' for monthly. Alternatives are 'W' for business weekly, 'D' for daily, and 'Q' for quarterly. Estimations are conducted for the end of the period. est_freqs : List[str] the list of frequencies for which the volatility is estimated. Volatility for a given period is the weighted sum of the volatilities estimated for each frequency. Default is ["D", "W", "M"]. est_weights : List[float] the list of weights for each frequency in `est_freqs`. Weights are normalized before applying. In cases where there may be missing data or NaNs in the result, the remaining weights are normalized. Default is None, which means that the weights are equal. lback_meth : str the method to use for the lookback period of the volatility-targeting method. Default is "ma" for moving average. Alternative is "xma", for exponential moving average. lback_periods : List[int] the number of periods to use for the lookback period of the volatility-targeting method. Each element corresponds to the the same index in `est_freqs`. Passing a single element will apply the same value to all frequencies. Default is [-1], which means that the lookback period is the full available data for all specified frequencies. half_life : List[int] number of periods in the half-life of the exponential moving average. Each element corresponds to the same index in `est_freqs`. start : str the start date of the data. Default is None, which means that the start date is taken from the dataframe. end : str the end date of the data. Default is None, which means that the end date is taken from the dataframe. blacklist : dict a dictionary of contract identifiers to exclude from the calculation. Default is None, which means that no contracts are excluded. nan_tolerance : float maximum ratio of number of NaN values to the total number of values in a lookback window. If exceeded the resulting volatility is set to NaN, else prior non- zero values are added to the window instead. Default is 0.25. remove_zeros : bool if True (default) any returns that are exact zeros will not be included in the lookback window and prior non-zero values are added to the window instead. Returns ------- pd.DataFrame JPMaQS dataframe of annualized standard deviation of estimated strategy PnL, with category name <sname>_PNL_USD1S_ASD. TODO: check if this is correct. The values are in % annualized. Values between estimation points are forward filled. Notes ----- If returns in the lookback window are not available the function will replace them with the average of the available returns of the same contract type. If no returns are available for a contract type the function will reduce the lookback window up to a minimum of 11 days. If no returns are available for a contract type for at least 11 days the function returns an NaN for that date and sends a warning of all the dates for which this happened. """ if isinstance(lback_periods, Number): lback_periods = [lback_periods] if isinstance(half_life, Number): half_life = [half_life] if isinstance(est_weights, Number): est_weights = [est_weights] if isinstance(est_freqs, str): est_freqs = [est_freqs] if isinstance(lback_min_obs, Number): lback_min_obs = [lback_min_obs] ## Check inputs # TODO create function for this? Also, do we want to create the set of failures (not just first one)? _check_input_arguments( arguments=[ (sname, "sname", str), (fids, "fids", list), (rstring, "rstring", str), (rebal_freq, "rebal_freq", str), (lback_meth, "lback_meth", str), (lback_periods, "lback_periods", list), (half_life, "half_life", list), (est_freqs, "est_freqs", list), (est_weights, "est_weights", list), (start, "start", (str, NoneType)), (end, "end", (str, NoneType)), (blacklist, "blacklist", (dict, NoneType)), (nan_tolerance, "nan_tolerance", float), (remove_zeros, "remove_zeros", bool), (lback_min_obs, "lback_min_obs", list), (return_variance_covariance, "return_variance_covariance", bool), ] ) # Check the frequency arguments _check_frequency(freq=rebal_freq, freq_type="rebal_freq") for ix, freq in enumerate(est_freqs): _check_frequency(freq=freq, freq_type=f"est_freq[{ix:d}]") ## Check estimation frequency weights est_freqs, est_weights, lback_periods, half_life, lback_min_obs = _check_est_args( est_freqs=est_freqs, est_weights=est_weights, lback_periods=lback_periods, half_life=half_life, lback_min_obs=lback_min_obs, ) ## Standardize and copy DF df = QuantamentalDataFrame(df) rebal_freq = _map_to_business_day_frequency(rebal_freq) est_freqs: List[str] = [_map_to_business_day_frequency(freq) for freq in est_freqs] ## Check the dates if start is None: start: str = pd.Timestamp(df["real_date"].min()).strftime("%Y-%m-%d") if end is None: end: str = pd.Timestamp(df["real_date"].max()).strftime("%Y-%m-%d") for dx, nx in [(start, "start"), (end, "end")]: if not is_valid_iso_date(dx): raise ValueError(f"`{nx}` must be a valid ISO-8601 date string") ## Reduce the dataframe df: pd.DataFrame = reduce_df(df=df, start=start, end=end, blacklist=blacklist) df = QuantamentalDataFrame(df).add_ticker_column() u_tickers: List[str] = df.list_tickers() ## Check for missing data _check_missing_data(df=df, sname=sname, fids=fids, rstring=rstring) # Add financial identifier (fid) to DataFrame df = add_fid_column(df=df, rstring=rstring) ## Filter out data-frame and select contract signals (CSIG) and returns (XR) filt_csigs: List[str] = [tx for tx in u_tickers if tx.endswith(f"_CSIG_{sname}")] filt_xrs: List[str] = [tx for tx in u_tickers if tx.endswith(rstring)] # TODO check if all exists pivot_signals: pd.DataFrame = df.loc[df["ticker"].isin(filt_csigs)].pivot( index="real_date", columns="fid", values="value" ) pivot_returns: pd.DataFrame = df.loc[df["ticker"].isin(filt_xrs)].pivot( index="real_date", columns="fid", values="value" ) assert set(pivot_signals.columns) == set(pivot_returns.columns) result: List[pd.DataFrame] = _hist_vol( pivot_returns=pivot_returns, pivot_signals=pivot_signals, sname=sname, rebal_freq=rebal_freq, est_freqs=est_freqs, est_weights=est_weights, lback_periods=lback_periods, lback_meth=lback_meth, half_life=half_life, lback_min_obs=lback_min_obs, nan_tolerance=nan_tolerance, remove_zeros=remove_zeros, return_variance_covariance=return_variance_covariance, ) assert len(result) == 1 + int(return_variance_covariance) result[0] = QuantamentalDataFrame.from_wide(df=result[0]) if return_variance_covariance: return result[0], result[1] return result[0]
if __name__ == "__main__": from macrosynergy.management.simulate import simulate_returns_and_signals np.random.seed(42) # Fix numpy seed to 42 for reproducibility # Signals: FXCRY_NSA, EQCRY_NSA (rename to FX_CSIG_STRAT, EQ_CSIG_STRAT) # Returns: FXXR_NSA, EQXR_NSA (renamed to FXXR, EQXR) cids: List[str] = ["EUR", "GBP", "AUD", "CAD"] xcats: List[str] = ["EQ"] ctypes = xcats.copy() start: str = "2000-01-01" xr_tickers = [f"{cid}_{xcat}XR" for cid in cids for xcat in xcats] cs_tickers = [f"{cid}_{xcat}_CSIG_STRAT" for cid in cids for xcat in xcats] fids: List[str] = [f"{cid}_{ctype}" for cid in cids for ctype in ctypes] df = simulate_returns_and_signals( cids=cids, xcat=xcats[0], return_suffix="XR", signal_suffix="CSIG_STRAT", start=start, years=20, ) # TODO simulate_returns_and_signals are risk-signals, not contract signals. We need to adjust for volatility and common (observed) factor. end = df["real_date"].max().strftime("%Y-%m-%d") df_copy = df.copy() # TODO why copy? N_p_nans = 0.01 df["value"] = df["value"].apply( lambda x: x if np.random.rand() > N_p_nans else np.nan ) df_vol, vcv_df = historic_portfolio_vol( df=df, sname="STRAT", fids=fids, rebal_freq="m", est_freqs=["D", "W", "M"], est_weights=[0.1, 0.2, 0.7], lback_periods=[30, 20, -1], half_life=[10, 5, 2], lback_meth="xma", rstring="XR", start=start, end=end, return_variance_covariance=True, ) vcvs_dict = unstack_covariances(vcv_df) dates = [ dt.strftime("%Y-%m-%d") for dt in sorted(pd.to_datetime(list(vcvs_dict.keys())))[-9:] ] # with sns.axes_style("whitegrid"): # fig, ax = plt.subplots(3, 3, figsize=(15, 15)) # for ix, dt in enumerate(dates): # sns.heatmap(vcvs_dict[dt], ax=ax[ix // 3, ix % 3]) # ax[ix // 3, ix % 3].set_title(dt) # plt.tight_layout() # plt.show() df_copy_vol: pd.DataFrame = historic_portfolio_vol( df=df_copy, sname="STRAT", fids=fids, rebal_freq="m", lback_periods=15, lback_meth="ma", half_life=11, rstring="XR", start=start, end=end, return_variance_covariance=False, ) # print(df_copy_vol.head(10)) # print(df_copy_vol.tail(10))