"""
Functions used to visualize correlations across categories or cross-sections of panels.
"""
import pandas as pd
import numpy as np
from typing import Dict, List, Union, Tuple, Optional, Any
from macrosynergy.management.simulate import make_qdf
import macrosynergy.visuals as msv
[docs]def correl_matrix(
df: pd.DataFrame,
xcats: Union[str, List[str]] = None,
cids: List[str] = None,
tickers: Optional[List[str]] = None,
xcats_secondary: Optional[Union[str, List[str]]] = None,
cids_secondary: Optional[List[str]] = None,
start: str = None,
end: str = None,
val: str = "value",
freq: str = None,
cluster: bool = False,
lags: dict = None,
lags_secondary: Optional[dict] = None,
title: str = None,
title_fontsize: Optional[int] = None,
size: Tuple[float] = (14, 8),
max_color: float = None,
show: bool = True,
xcat_labels: Optional[Union[List[str], Dict[str, str]]] = None,
xcat_secondary_labels: Optional[Union[List[str], Dict[str, str]]] = None,
cid_labels: Optional[Union[List[str], Dict[str, str]]] = None,
cid_secondary_labels: Optional[Union[List[str], Dict[str, str]]] = None,
ticker_labels: Optional[Union[List[str], Dict[str, str]]] = None,
footnote: Optional[str] = None,
footnote_fontsize: int = 9,
**kwargs: Any,
):
"""
Visualize correlation across categories or cross-sections of panels.
Parameters
----------
df : ~pandas.Dataframe
standardized JPMaQS DataFrame with the necessary columns: 'cid', 'xcat',
'real_date' and at least one column with values of interest.
xcats : List[str]
extended categories to be correlated. Default is all in the DataFrame. If `xcats`
contains only one category the correlation coefficients across cross sections are
displayed. If `xcats` contains more than one category, the correlation coefficients
across categories are displayed. Additionally, the order of the `xcats` received will
be mirrored in the correlation matrix.
cids : List[str]
cross sections to be correlated. Default is all in the DataFrame.
tickers : List[str], optional
specific tickers to correlate (format: "CID_XCAT", e.g. "USD_FXXR_NSA").
If provided, correlations will be calculated between the full ticker combinations.
Cannot be used together with xcats/cids or xcats_secondary/cids_secondary.
xcats_secondary : List[str]
an optional second set of extended categories. If `xcats_secondary` is provided,
correlations will be calculated between the categories in `xcats` and `xcats_secondary`.
cids_secondary : List[str]
an optional second list of cross sections. If `cids_secondary` is provided
correlations will be calculated and visualized between these two sets.
start : str
earliest date in ISO format. Default is None and earliest date in df is used.
end : str
latest date in ISO format. Default is None and latest date in df is used.
val : str
name of column that contains the values of interest. Default is 'value'.
freq : str
frequency option. Per default the correlations are calculated based on the
native frequency of the datetimes in 'real_date', which is business daily. Down-
sampling options include weekly ('W'), monthly ('M'), or quarterly ('Q') mean.
cluster : bool
if True the series in the correlation matrix are reordered by hierarchical
clustering. Default is False.
lags : dict
optional dictionary of lags applied to respective categories. The key will be
the category and the value is the lag or lags. If a category has multiple lags
applied, pass in a list of lag values. The lag factor will be appended to the
category name in the correlation matrix. If `xcats_secondary` is not none, this
parameter will specify lags for the categories in `xcats`.
lags_secondary : dict
optional dictionary of lags applied to the second set of categories if
`xcats_secondary` is provided.
title : str
chart heading. If none is given, a default title is used.
title_fontsize : int
font size of the title. Default is None.
size : Tuple[float]
two-element tuple setting width/height of figure. Default is (14, 8).
max_color : float
maximum values of positive/negative correlation coefficients for color scale.
Default is none. If a value is given it applies symmetrically to positive and
negative values.
show : bool
if True the figure will be displayed. Default is True.
xcat_labels : Optional[Union[List[str], Dict[str, str]]
optional list or dictionary of labels for the categories specified in `xcats`.
A list should be in the same order as `xcats`, a dictionary should map from each
category to its label (e.g. {'XR': 'Excess returns', 'CRY': 'Carry'}).
xcat_secondary_labels : Optional[Union[List[str], Dict[str, str]]]
optional list or dictionary of labels for `xcats_secondary`.
cid_labels : Optional[Union[List[str], Dict[str, str]]]
optional list or dictionary of labels for cids. A list should be in the same
order as cids, a dictionary should map from each cid to its label.
cid_secondary_labels : Optional[Union[List[str], Dict[str, str]]]
optional list or dictionary of labels for cids_secondary.
ticker_labels : Optional[Union[List[str], Dict[str, str]]]
optional list or dictionary of labels for tickers. A list should be in the same
order as tickers, a dictionary should map from each ticker to its label.
footnote : str
Optional text shown at the bottom-left of the figure canvas.
footnote_fontsize : int
Font size of the footnote. Default is 9.
**kwargs : Dict
Arbitrary keyword arguments that are passed to seaborn.heatmap.
.. note::
Lags can include a 0 if the original should also be correlated.
.. note::
The function displays the heatmap of a correlation matrix across categories or cross-
sections (depending on which parameter has received multiple elements).
"""
msv.view_correlation(
df=df,
xcats=xcats,
cids=cids,
tickers=tickers,
xcats_secondary=xcats_secondary,
cids_secondary=cids_secondary,
start=start,
end=end,
val=val,
freq=freq,
cluster=cluster,
lags=lags,
lags_secondary=lags_secondary,
title=title,
title_fontsize=title_fontsize,
size=size,
max_color=max_color,
show=show,
xcat_labels=xcat_labels,
xcat_secondary_labels=xcat_secondary_labels,
cid_labels=cid_labels,
cid_secondary_labels=cid_secondary_labels,
ticker_labels=ticker_labels,
footnote=footnote,
footnote_fontsize=footnote_fontsize,
**kwargs,
)
if __name__ == "__main__":
np.random.seed(0)
# Un-clustered correlation matrices.
cids = ["AUD", "CAD", "GBP", "USD", "NZD", "EUR"]
cids_dmsc = ["CHF", "NOK", "SEK"]
cids_dmec = ["DEM", "ESP", "FRF", "ITL", "NLG"]
cids += cids_dmec
cids += cids_dmsc
xcats = ["XR", "CRY"]
df_cids = pd.DataFrame(
index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"]
)
df_cids.loc["AUD"] = ["2010-01-01", "2020-12-31", 0.5, 2]
df_cids.loc["CAD"] = ["2011-01-01", "2020-11-30", 0, 1]
df_cids.loc["GBP"] = ["2012-01-01", "2020-11-30", -0.2, 0.5]
df_cids.loc["USD"] = ["2010-01-01", "2020-12-30", -0.2, 0.5]
df_cids.loc["NZD"] = ["2002-01-01", "2020-09-30", -0.1, 2]
df_cids.loc["EUR"] = ["2002-01-01", "2020-09-30", -0.2, 2]
df_cids.loc["DEM"] = ["2003-01-01", "2020-09-30", -0.3, 2]
df_cids.loc["ESP"] = ["2003-01-01", "2020-09-30", -0.1, 2]
df_cids.loc["FRF"] = ["2003-01-01", "2020-09-30", -0.2, 2]
df_cids.loc["ITL"] = ["2004-01-01", "2020-09-30", -0.2, 0.5]
df_cids.loc["NLG"] = ["2003-01-01", "2020-12-30", -0.1, 0.5]
df_cids.loc["CHF"] = ["2003-01-01", "2020-12-30", -0.3, 2.5]
df_cids.loc["NOK"] = ["2010-01-01", "2020-12-30", -0.1, 0.5]
df_cids.loc["SEK"] = ["2010-01-01", "2020-09-30", -0.1, 0.5]
df_xcats = pd.DataFrame(
index=xcats,
columns=["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"],
)
df_xcats.loc["XR",] = ["2010-01-01", "2020-12-31", 0.1, 1, 0, 0.3]
df_xcats.loc["CRY",] = ["2010-01-01", "2020-10-30", 1, 2, 0.95, 0.5]
dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)
start = "2012-01-01"
end = "2020-09-30"
lag_dict = {"XR": [0, 2, 5]}
# Clustered correlation matrices. Test hierarchical clustering.
correl_matrix(
df=dfd,
xcats=["XR", "CRY"],
xcats_secondary=None,
cids=cids,
cids_secondary=None,
start=start,
end=end,
val="value",
freq=None,
cluster=True,
title="Correlation Matrix",
size=(14, 8),
max_color=None,
lags=None,
lags_secondary=None,
annot=True,
fmt=".2f",
footnote="JPMaQS data",
footnote_fontsize=10,
)