Source code for macrosynergy.panel.view_correlations

"""
Functions used to visualize correlations across categories or cross-sections of panels.
"""

import pandas as pd
import numpy as np
from typing import Dict, List, Union, Tuple, Optional, Any
from macrosynergy.management.simulate import make_qdf

import macrosynergy.visuals as msv


[docs]def correl_matrix( df: pd.DataFrame, xcats: Union[str, List[str]] = None, cids: List[str] = None, tickers: Optional[List[str]] = None, xcats_secondary: Optional[Union[str, List[str]]] = None, cids_secondary: Optional[List[str]] = None, start: str = None, end: str = None, val: str = "value", freq: str = None, cluster: bool = False, lags: dict = None, lags_secondary: Optional[dict] = None, title: str = None, title_fontsize: Optional[int] = None, size: Tuple[float] = (14, 8), max_color: float = None, show: bool = True, xcat_labels: Optional[Union[List[str], Dict[str, str]]] = None, xcat_secondary_labels: Optional[Union[List[str], Dict[str, str]]] = None, cid_labels: Optional[Union[List[str], Dict[str, str]]] = None, cid_secondary_labels: Optional[Union[List[str], Dict[str, str]]] = None, ticker_labels: Optional[Union[List[str], Dict[str, str]]] = None, footnote: Optional[str] = None, footnote_fontsize: int = 9, **kwargs: Any, ): """ Visualize correlation across categories or cross-sections of panels. Parameters ---------- df : ~pandas.Dataframe standardized JPMaQS DataFrame with the necessary columns: 'cid', 'xcat', 'real_date' and at least one column with values of interest. xcats : List[str] extended categories to be correlated. Default is all in the DataFrame. If `xcats` contains only one category the correlation coefficients across cross sections are displayed. If `xcats` contains more than one category, the correlation coefficients across categories are displayed. Additionally, the order of the `xcats` received will be mirrored in the correlation matrix. cids : List[str] cross sections to be correlated. Default is all in the DataFrame. tickers : List[str], optional specific tickers to correlate (format: "CID_XCAT", e.g. "USD_FXXR_NSA"). If provided, correlations will be calculated between the full ticker combinations. Cannot be used together with xcats/cids or xcats_secondary/cids_secondary. xcats_secondary : List[str] an optional second set of extended categories. If `xcats_secondary` is provided, correlations will be calculated between the categories in `xcats` and `xcats_secondary`. cids_secondary : List[str] an optional second list of cross sections. If `cids_secondary` is provided correlations will be calculated and visualized between these two sets. start : str earliest date in ISO format. Default is None and earliest date in df is used. end : str latest date in ISO format. Default is None and latest date in df is used. val : str name of column that contains the values of interest. Default is 'value'. freq : str frequency option. Per default the correlations are calculated based on the native frequency of the datetimes in 'real_date', which is business daily. Down- sampling options include weekly ('W'), monthly ('M'), or quarterly ('Q') mean. cluster : bool if True the series in the correlation matrix are reordered by hierarchical clustering. Default is False. lags : dict optional dictionary of lags applied to respective categories. The key will be the category and the value is the lag or lags. If a category has multiple lags applied, pass in a list of lag values. The lag factor will be appended to the category name in the correlation matrix. If `xcats_secondary` is not none, this parameter will specify lags for the categories in `xcats`. lags_secondary : dict optional dictionary of lags applied to the second set of categories if `xcats_secondary` is provided. title : str chart heading. If none is given, a default title is used. title_fontsize : int font size of the title. Default is None. size : Tuple[float] two-element tuple setting width/height of figure. Default is (14, 8). max_color : float maximum values of positive/negative correlation coefficients for color scale. Default is none. If a value is given it applies symmetrically to positive and negative values. show : bool if True the figure will be displayed. Default is True. xcat_labels : Optional[Union[List[str], Dict[str, str]] optional list or dictionary of labels for the categories specified in `xcats`. A list should be in the same order as `xcats`, a dictionary should map from each category to its label (e.g. {'XR': 'Excess returns', 'CRY': 'Carry'}). xcat_secondary_labels : Optional[Union[List[str], Dict[str, str]]] optional list or dictionary of labels for `xcats_secondary`. cid_labels : Optional[Union[List[str], Dict[str, str]]] optional list or dictionary of labels for cids. A list should be in the same order as cids, a dictionary should map from each cid to its label. cid_secondary_labels : Optional[Union[List[str], Dict[str, str]]] optional list or dictionary of labels for cids_secondary. ticker_labels : Optional[Union[List[str], Dict[str, str]]] optional list or dictionary of labels for tickers. A list should be in the same order as tickers, a dictionary should map from each ticker to its label. footnote : str Optional text shown at the bottom-left of the figure canvas. footnote_fontsize : int Font size of the footnote. Default is 9. **kwargs : Dict Arbitrary keyword arguments that are passed to seaborn.heatmap. .. note:: Lags can include a 0 if the original should also be correlated. .. note:: The function displays the heatmap of a correlation matrix across categories or cross- sections (depending on which parameter has received multiple elements). """ msv.view_correlation( df=df, xcats=xcats, cids=cids, tickers=tickers, xcats_secondary=xcats_secondary, cids_secondary=cids_secondary, start=start, end=end, val=val, freq=freq, cluster=cluster, lags=lags, lags_secondary=lags_secondary, title=title, title_fontsize=title_fontsize, size=size, max_color=max_color, show=show, xcat_labels=xcat_labels, xcat_secondary_labels=xcat_secondary_labels, cid_labels=cid_labels, cid_secondary_labels=cid_secondary_labels, ticker_labels=ticker_labels, footnote=footnote, footnote_fontsize=footnote_fontsize, **kwargs, )
if __name__ == "__main__": np.random.seed(0) # Un-clustered correlation matrices. cids = ["AUD", "CAD", "GBP", "USD", "NZD", "EUR"] cids_dmsc = ["CHF", "NOK", "SEK"] cids_dmec = ["DEM", "ESP", "FRF", "ITL", "NLG"] cids += cids_dmec cids += cids_dmsc xcats = ["XR", "CRY"] df_cids = pd.DataFrame( index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"] ) df_cids.loc["AUD"] = ["2010-01-01", "2020-12-31", 0.5, 2] df_cids.loc["CAD"] = ["2011-01-01", "2020-11-30", 0, 1] df_cids.loc["GBP"] = ["2012-01-01", "2020-11-30", -0.2, 0.5] df_cids.loc["USD"] = ["2010-01-01", "2020-12-30", -0.2, 0.5] df_cids.loc["NZD"] = ["2002-01-01", "2020-09-30", -0.1, 2] df_cids.loc["EUR"] = ["2002-01-01", "2020-09-30", -0.2, 2] df_cids.loc["DEM"] = ["2003-01-01", "2020-09-30", -0.3, 2] df_cids.loc["ESP"] = ["2003-01-01", "2020-09-30", -0.1, 2] df_cids.loc["FRF"] = ["2003-01-01", "2020-09-30", -0.2, 2] df_cids.loc["ITL"] = ["2004-01-01", "2020-09-30", -0.2, 0.5] df_cids.loc["NLG"] = ["2003-01-01", "2020-12-30", -0.1, 0.5] df_cids.loc["CHF"] = ["2003-01-01", "2020-12-30", -0.3, 2.5] df_cids.loc["NOK"] = ["2010-01-01", "2020-12-30", -0.1, 0.5] df_cids.loc["SEK"] = ["2010-01-01", "2020-09-30", -0.1, 0.5] df_xcats = pd.DataFrame( index=xcats, columns=["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"], ) df_xcats.loc["XR",] = ["2010-01-01", "2020-12-31", 0.1, 1, 0, 0.3] df_xcats.loc["CRY",] = ["2010-01-01", "2020-10-30", 1, 2, 0.95, 0.5] dfd = make_qdf(df_cids, df_xcats, back_ar=0.75) start = "2012-01-01" end = "2020-09-30" lag_dict = {"XR": [0, 2, 5]} # Clustered correlation matrices. Test hierarchical clustering. correl_matrix( df=dfd, xcats=["XR", "CRY"], xcats_secondary=None, cids=cids, cids_secondary=None, start=start, end=end, val="value", freq=None, cluster=True, title="Correlation Matrix", size=(14, 8), max_color=None, lags=None, lags_secondary=None, annot=True, fmt=".2f", footnote="JPMaQS data", footnote_fontsize=10, )