Source code for macrosynergy.visuals.timelines

"""
Function for visualising a facet grid of time line charts of one or more categories.

.. code-block:: python

    import macrosynergy.visuals as msv
    ...
    msv.view.timelines(df, xcats=["FXXR","EQXR", "IR"], cids=["USD", "EUR", "GBP"] )
    ...
    msv.FacetPlot(df).lineplot(cid_grid=True)

"""

from typing import Dict, List, Optional, Tuple, Union

import pandas as pd

from macrosynergy.management.utils import reduce_df
from macrosynergy.visuals import FacetPlot, LinePlot
from numbers import Number

IDX_COLS: List[str] = ["cid", "xcat", "real_date"]


[docs]def timelines( df: pd.DataFrame, xcats: Optional[List[str]] = None, cids: Optional[List[str]] = None, intersect: bool = False, val: str = "value", cumsum: bool = False, start: str = None, end: Optional[str] = None, ncol: int = 3, square_grid: bool = False, legend_ncol: int = 1, same_y: bool = True, y_centre_to_zero: bool = False, all_xticks: bool = False, # ~(same_x) basically xcat_grid: bool = False, xcat_labels: Union[Optional[List[str]], Dict] = None, cid_labels: Union[Optional[List[str]], Dict] = None, sort_cid_labels: bool = False, single_chart: bool = False, label_adj: float = 0.05, title: Optional[str] = None, title_adj: float = 0.95, title_xadj: float = 0.5, title_fontsize: int = 22, cs_mean: bool = False, size: Tuple[Number, Number] = (12, 7), aspect: Number = 1.7, height: Number = 3.0, legend_fontsize: int = 12, blacklist: Dict = None, ax_hline: Union[float, Dict] = None, footnote: Optional[str] = None, footnote_fontsize: int = 9, return_fig: bool = False, ): """ Displays a facet grid of time line charts of one or more categories. Parameters ---------- df : ~pandas.DataFrame standardized DataFrame with the necessary columns: 'cid', 'xcat', 'real_date' and at least one column with values of interest. xcats : List[str] extended categories to plot. Default is all in DataFrame. cids : List[str] cross sections to plot. Default is all in DataFrame. If this contains only one cross section a single line chart is created. intersect : bool if True only retains cids that are available for all xcats. Default is False. val : str name of column that contains the values of interest. Default is 'value'. cumsum : bool plot cumulative sum of the values over time. Default is False. start : str earliest date in ISO format. Default is earliest date available. end : str latest date in ISO format. Default is latest date available. ncol : int number of columns in facet grid. Default is 3. legend_ncol : int number of columns in legend. Default is 1. same_y : bool if True (default) all plots in facet grid share same y axis. y_centre_to_zero : bool if True, adjusts y-axis limits to be symmetric around zero. Default is False. all_xticks : bool if True x-axis tick labels are added to all plots in grid. Default is False, i.e only the lowest row displays the labels. xcat_grid : bool if True, shows a facet grid of line charts for each xcat for given cross sections. Default is False. xcat_labels : Union[Optional[List[str]], Dict] labels to be used for xcats. If not defined, the labels will be identical to extended categories. cid_labels : Union[Optional[List[str]], Dict] labels to be used for cids. If not defined, the labels will be identical to cross-sections. sort_cid_labels : bool if True, sorts the cross-sectional labels in the grid alphabetically. Otherwise, the order of `cids` is preserved. Default is False. single_chart : bool if True, all lines are plotted in a single chart. title : str chart heading. Default is no title. title_adj : float parameter that sets top of figure to accommodate title. Default is 0.95. title_xadj : float parameter that sets x position of title. Default is 0.5. title_fontsize : int font size of title. Default is 16. label_adj : float parameter that sets bottom of figure to fit the label. Default is 0.05. cs_mean : bool if True this adds a line of cross-sectional averages to the line charts. This is only allowed for function calls with a single category. Default is False. size : Tuple[Number, Number] two-element tuple setting width/height of single cross section plot. Default is (12, 7). This is irrelevant for facet grid. aspect : Number width-height ratio for plots in facet. Default is 1.7. height : Number height of plots in facet. Default is 3. legend_fontsize : int font size of legend. Default is 12. blacklist : dict cross-sections with date ranges that should be excluded from the dataframe. footnote : str Optional text shown at the bottom-left of the figure canvas. footnote_fontsize : int Font size of the footnote. Default is 9. """ if not isinstance(df, pd.DataFrame): raise TypeError("`df` must be a pandas DataFrame.") if len(df.columns) < 4: df = df.copy().reset_index() if val not in df.columns: if len(df.columns) == len(IDX_COLS) + 1: val: str = list(set(df.columns) - set(IDX_COLS))[0] if not pd.api.types.is_numeric_dtype(df[val]): raise ValueError( f"Column '{val}' (passed as `metric`) is not numeric, and there are " f"no other numeric columns in the DataFrame." ) else: raise ValueError( f"Column '{val}' (passed as `metric`) does not exist, and there are " "none/many other numeric columns in the DataFrame." ) if start is None: start: str = pd.Timestamp(df["real_date"].min()).strftime("%Y-%m-%d") if end is None: end: str = pd.Timestamp(df["real_date"].max()).strftime("%Y-%m-%d") if isinstance(xcats, str): xcats: List[str] = [xcats] if isinstance(cids, str): cids: List[str] = [cids] for varx, namex in zip([single_chart, xcat_grid], ["single_chart", "xcat_grid"]): if not isinstance(varx, bool): raise TypeError(f"`{namex}` must be a boolean.") if xcat_grid and single_chart: raise ValueError( "`xcat_grid` and `single_chart` cannot be True simultaneously." ) if cs_mean and xcat_grid: raise ValueError("`cs_mean` requires `xcat_grid` to be False.") if blacklist: if not isinstance(blacklist, dict): raise TypeError("`blacklist` must be a dictionary.") for key, value in blacklist.items(): if not isinstance(key, str): raise TypeError("Keys in `blacklist` must be strings.") if not isinstance(value, (list, tuple)): raise TypeError("Values in `blacklist` must be lists.") if xcats is None: if xcat_labels: raise ValueError("`xcat_labels` requires `xcats` to be defined.") xcats: List[str] = df["xcat"].unique().tolist() if cids is None: cids: List[str] = df["cid"].unique().tolist() df = reduce_df( df, xcats=xcats, cids=cids, start=start, end=end, blacklist=blacklist ) if cumsum: df[val] = ( df.sort_values(["cid", "xcat", "real_date"])[["cid", "xcat", val]] .groupby(["cid", "xcat"]) .cumsum() ) cross_mean_series: Optional[str] = f"mean_{xcats[0]}" if cs_mean else None if cs_mean: if len(xcats) > 1: raise ValueError("`cs_mean` cannot be True for multiple categories.") if len(cids) == 1: raise ValueError("`cs_mean` cannot be True for a single cross section.") df_mean: pd.DataFrame = ( df.groupby(["real_date", "xcat"])[val].mean(numeric_only=True).reset_index() ) df_mean["cid"] = "mean" df: pd.DataFrame = pd.concat([df, df_mean], axis=0) # Drop to save memory df_mean: pd.DataFrame = pd.DataFrame() if xcat_labels: # when `cs_mean` is True, `xcat_labels` may have one extra label if isinstance(xcat_labels, list): if len(xcat_labels) != len(xcats) and len(xcat_labels) != len(xcats) + int( cs_mean ): raise ValueError( "`xcat_labels` must have same length as `xcats` " "(or one extra label if `cs_mean` is True)." ) elif isinstance(xcat_labels, dict): if not all([x in xcat_labels for x in xcats]): raise ValueError("Keys in `xcat_labels` must be a subset of `xcats`.") xcat_labels = [xcat_labels[x] for x in xcats if x in xcat_labels] if cid_labels: if isinstance(cid_labels, list): if len(cid_labels) != len(cids): raise ValueError("`cid_labels` must have same length as `cids`.") elif isinstance(cid_labels, dict): if not all([cid in cid_labels for cid in cids]): raise ValueError("Keys in `cid_labels` must be a subset of `cids`.") else: raise TypeError("`cid_labels` must be a list or a dictionary.") if cs_mean: if xcat_labels is None: xcat_labels = [xcats[0]] if len(xcat_labels) == 1: xcat_labels.append("Cross-Sectional Mean") facet_size: Optional[Tuple[float, float]] = ( (aspect * height, height) if (aspect is not None and height is not None) else None ) if xcat_grid and (len(xcats) == 1): xcat_grid: bool = False single_chart: bool = True fig = None if xcat_grid: with FacetPlot( df=df, xcats=xcats, cids=cids, intersect=intersect, metrics=[val], tickers=[cross_mean_series] if cs_mean else None, start=start, end=end, blacklist=blacklist, ) as fp: fig = fp.lineplot( share_y=same_y, share_x=not all_xticks, y_centre_to_zero=y_centre_to_zero, figsize=size, xcat_grid=True, facet_titles=xcat_labels or None, title=title, title_yadjust=title_adj, title_xadjust=title_xadj, compare_series=cross_mean_series if cs_mean else None, title_fontsize=title_fontsize, ncols=ncol, attempt_square=square_grid, facet_size=facet_size, legend_ncol=legend_ncol, legend_fontsize=legend_fontsize, interpolate=cumsum, ax_hline=ax_hline, footnote=footnote, footnote_fontsize=footnote_fontsize, show=not return_fig, return_figure=return_fig, ) elif single_chart or (len(cids) == 1): with LinePlot( df=df, cids=cids, xcats=xcats, intersect=intersect, metrics=[val], tickers=[cross_mean_series] if cs_mean else None, start=start, end=end, blacklist=blacklist, ) as lp: fig = lp.plot( metric=val, figsize=size, title=title, title_yadjust=title_adj, title_xadjust=title_xadj, compare_series=cross_mean_series if cs_mean else None, title_fontsize=title_fontsize, legend_ncol=legend_ncol, legend_fontsize=legend_fontsize, legend_labels=xcat_labels or None, ax_hline=ax_hline, footnote=footnote, footnote_fontsize=footnote_fontsize, show=not return_fig, return_figure=return_fig, ) else: if cid_labels and sort_cid_labels: if isinstance(cid_labels, list): cid_labels = {cid: label for cid, label in zip(cids, cid_labels)} cids = sorted(cids, key=lambda x: cid_labels.get(x, x)) with FacetPlot( df=df, xcats=xcats, cids=cids, intersect=intersect, metrics=[val], tickers=[cross_mean_series] if cs_mean else None, start=start, end=end, blacklist=blacklist, ) as fp: show_legend = bool(cross_mean_series) or (len(xcats) > 1) if ncol > len(cids): ncol = len(cids) fig = fp.lineplot( figsize=size, share_y=same_y, share_x=not all_xticks, title=title, cid_grid=True, facet_titles=cid_labels or None, title_yadjust=title_adj, title_xadjust=title_xadj, compare_series=cross_mean_series if cs_mean else None, facet_size=facet_size, title_fontsize=title_fontsize, ncols=ncol, attempt_square=square_grid, legend=show_legend, legend_ncol=legend_ncol, legend_labels=xcat_labels, legend_fontsize=legend_fontsize, interpolate=cumsum, ax_hline=ax_hline, footnote=footnote, footnote_fontsize=footnote_fontsize, show=not return_fig, return_figure=return_fig, ) if return_fig: return fig
if __name__ == "__main__": from macrosynergy.visuals import FacetPlot from macrosynergy.management.simulate import make_test_df import numpy as np np.random.seed(42) cids: List[str] = [ "USD", "EUR", "GBP", "AUD", "CAD", "JPY", "CHF", "NZD", "SEK", "NOK", "DKK", "INR", ] xcats: List[str] = [ "FXXR", "EQXR", "RIR", "IR", "REER", "CPI", "PPI", "M2", "M1", "M0", "FXVOL", "FX", ] sel_cids: List[str] = ["GBP", "USD", "EUR"] sel_xcats: List[str] = ["FXXR", "EQXR", "RIR", "IR"] r_styles: List[str] = [ "linear", "decreasing-linear", "sharp-hill", "sine", "four-bit-sine", ] df: pd.DataFrame = make_test_df( cids=list(set(cids) - set(sel_cids)), xcats=xcats, start="2000-01-01", ) for rstyle, xcatx in zip(r_styles, sel_xcats): dfB: pd.DataFrame = make_test_df( cids=sel_cids, xcats=[xcatx], start="2000-01-01", style=rstyle, ) df: pd.DataFrame = pd.concat([df, dfB], axis=0) for ix, cidx in enumerate(sel_cids): df.loc[df["cid"] == cidx, "value"] = ( ((df[df["cid"] == cidx]["value"]) * (ix + 1)).reset_index(drop=True).copy() ) for ix, xcatx in enumerate(sel_xcats): df.loc[df["xcat"] == xcatx, "value"] = ( ((df[df["xcat"] == xcatx]["value"]) * (ix * 10 + 1)) .reset_index(drop=True) .copy() ) import time black = { "EUR": tuple(["2012-01-01", "2018-01-01"]), "GBP": ["2004-01-01", "2007-01-01"], "USD": ["2015-01-01", "2018-01-01"], } timer_start: float = time.time() timelines( df=df, xcats=sel_xcats, xcat_grid=False, square_grid=True, cids=sel_cids, blacklist=black, same_y=True, xcat_labels={"FXXR": "FX Returns", "EQXR": "Equity Returns", "RIR": "Real Interest Rate", "IR": "Interest Rate"}, return_fig=True, ) # timelines( # df=df, # xcats=sel_xcats[0], # cids=sel_cids, # cs_mean=True, # # xcat_grid=False, # single_chart=True, # blacklist=black, # ) timelines( df=df, same_y=False, xcats=sel_xcats[0], cids=sel_cids, title=( "Plotting multiple cross sections for a single category \n with different " "y-axis!" ), # blacklist=black, cumsum=True, cid_labels={"USD": "Label 1", "EUR": "Label 2", "GBP": "Label 3"}, xcat_labels={"FXXR": "Xcat Label"}, cs_mean=True, )