"""
Function for visualising a facet grid of time line charts of one or more categories.
.. code-block:: python
import macrosynergy.visuals as msv
...
msv.view.timelines(df, xcats=["FXXR","EQXR", "IR"], cids=["USD", "EUR", "GBP"] )
...
msv.FacetPlot(df).lineplot(cid_grid=True)
"""
from typing import Dict, List, Optional, Tuple, Union
import pandas as pd
from macrosynergy.management.utils import reduce_df
from macrosynergy.visuals import FacetPlot, LinePlot
from numbers import Number
IDX_COLS: List[str] = ["cid", "xcat", "real_date"]
[docs]def timelines(
df: pd.DataFrame,
xcats: Optional[List[str]] = None,
cids: Optional[List[str]] = None,
intersect: bool = False,
val: str = "value",
cumsum: bool = False,
start: str = None,
end: Optional[str] = None,
ncol: int = 3,
square_grid: bool = False,
legend_ncol: int = 1,
same_y: bool = True,
y_centre_to_zero: bool = False,
all_xticks: bool = False, # ~(same_x) basically
xcat_grid: bool = False,
xcat_labels: Union[Optional[List[str]], Dict] = None,
cid_labels: Union[Optional[List[str]], Dict] = None,
sort_cid_labels: bool = False,
single_chart: bool = False,
label_adj: float = 0.05,
title: Optional[str] = None,
title_adj: float = 0.95,
title_xadj: float = 0.5,
title_fontsize: int = 22,
cs_mean: bool = False,
size: Tuple[Number, Number] = (12, 7),
aspect: Number = 1.7,
height: Number = 3.0,
legend_fontsize: int = 12,
blacklist: Dict = None,
ax_hline: Union[float, Dict] = None,
footnote: Optional[str] = None,
footnote_fontsize: int = 9,
return_fig: bool = False,
):
"""
Displays a facet grid of time line charts of one or more categories.
Parameters
----------
df : ~pandas.DataFrame
standardized DataFrame with the necessary columns: 'cid', 'xcat', 'real_date'
and at least one column with values of interest.
xcats : List[str]
extended categories to plot. Default is all in DataFrame.
cids : List[str]
cross sections to plot. Default is all in DataFrame. If this contains only one
cross section a single line chart is created.
intersect : bool
if True only retains cids that are available for all xcats. Default is False.
val : str
name of column that contains the values of interest. Default is 'value'.
cumsum : bool
plot cumulative sum of the values over time. Default is False.
start : str
earliest date in ISO format. Default is earliest date available.
end : str
latest date in ISO format. Default is latest date available.
ncol : int
number of columns in facet grid. Default is 3.
legend_ncol : int
number of columns in legend. Default is 1.
same_y : bool
if True (default) all plots in facet grid share same y axis.
y_centre_to_zero : bool
if True, adjusts y-axis limits to be symmetric around zero. Default is False.
all_xticks : bool
if True x-axis tick labels are added to all plots in grid. Default is False, i.e
only the lowest row displays the labels.
xcat_grid : bool
if True, shows a facet grid of line charts for each xcat for given cross
sections. Default is False.
xcat_labels : Union[Optional[List[str]], Dict]
labels to be used for xcats. If not defined, the labels will be identical to
extended categories.
cid_labels : Union[Optional[List[str]], Dict]
labels to be used for cids. If not defined, the labels will be identical to
cross-sections.
sort_cid_labels : bool
if True, sorts the cross-sectional labels in the grid alphabetically. Otherwise,
the order of `cids` is preserved. Default is False.
single_chart : bool
if True, all lines are plotted in a single chart.
title : str
chart heading. Default is no title.
title_adj : float
parameter that sets top of figure to accommodate title. Default is 0.95.
title_xadj : float
parameter that sets x position of title. Default is 0.5.
title_fontsize : int
font size of title. Default is 16.
label_adj : float
parameter that sets bottom of figure to fit the label. Default is 0.05.
cs_mean : bool
if True this adds a line of cross-sectional averages to the line charts. This is
only allowed for function calls with a single category. Default is False.
size : Tuple[Number, Number]
two-element tuple setting width/height of single cross section plot. Default is
(12, 7). This is irrelevant for facet grid.
aspect : Number
width-height ratio for plots in facet. Default is 1.7.
height : Number
height of plots in facet. Default is 3.
legend_fontsize : int
font size of legend. Default is 12.
blacklist : dict
cross-sections with date ranges that should be excluded from the dataframe.
footnote : str
Optional text shown at the bottom-left of the figure canvas.
footnote_fontsize : int
Font size of the footnote. Default is 9.
"""
if not isinstance(df, pd.DataFrame):
raise TypeError("`df` must be a pandas DataFrame.")
if len(df.columns) < 4:
df = df.copy().reset_index()
if val not in df.columns:
if len(df.columns) == len(IDX_COLS) + 1:
val: str = list(set(df.columns) - set(IDX_COLS))[0]
if not pd.api.types.is_numeric_dtype(df[val]):
raise ValueError(
f"Column '{val}' (passed as `metric`) is not numeric, and there are "
f"no other numeric columns in the DataFrame."
)
else:
raise ValueError(
f"Column '{val}' (passed as `metric`) does not exist, and there are "
"none/many other numeric columns in the DataFrame."
)
if start is None:
start: str = pd.Timestamp(df["real_date"].min()).strftime("%Y-%m-%d")
if end is None:
end: str = pd.Timestamp(df["real_date"].max()).strftime("%Y-%m-%d")
if isinstance(xcats, str):
xcats: List[str] = [xcats]
if isinstance(cids, str):
cids: List[str] = [cids]
for varx, namex in zip([single_chart, xcat_grid], ["single_chart", "xcat_grid"]):
if not isinstance(varx, bool):
raise TypeError(f"`{namex}` must be a boolean.")
if xcat_grid and single_chart:
raise ValueError(
"`xcat_grid` and `single_chart` cannot be True simultaneously."
)
if cs_mean and xcat_grid:
raise ValueError("`cs_mean` requires `xcat_grid` to be False.")
if blacklist:
if not isinstance(blacklist, dict):
raise TypeError("`blacklist` must be a dictionary.")
for key, value in blacklist.items():
if not isinstance(key, str):
raise TypeError("Keys in `blacklist` must be strings.")
if not isinstance(value, (list, tuple)):
raise TypeError("Values in `blacklist` must be lists.")
if xcats is None:
if xcat_labels:
raise ValueError("`xcat_labels` requires `xcats` to be defined.")
xcats: List[str] = df["xcat"].unique().tolist()
if cids is None:
cids: List[str] = df["cid"].unique().tolist()
df = reduce_df(
df, xcats=xcats, cids=cids, start=start, end=end, blacklist=blacklist
)
if cumsum:
df[val] = (
df.sort_values(["cid", "xcat", "real_date"])[["cid", "xcat", val]]
.groupby(["cid", "xcat"])
.cumsum()
)
cross_mean_series: Optional[str] = f"mean_{xcats[0]}" if cs_mean else None
if cs_mean:
if len(xcats) > 1:
raise ValueError("`cs_mean` cannot be True for multiple categories.")
if len(cids) == 1:
raise ValueError("`cs_mean` cannot be True for a single cross section.")
df_mean: pd.DataFrame = (
df.groupby(["real_date", "xcat"])[val].mean(numeric_only=True).reset_index()
)
df_mean["cid"] = "mean"
df: pd.DataFrame = pd.concat([df, df_mean], axis=0)
# Drop to save memory
df_mean: pd.DataFrame = pd.DataFrame()
if xcat_labels:
# when `cs_mean` is True, `xcat_labels` may have one extra label
if isinstance(xcat_labels, list):
if len(xcat_labels) != len(xcats) and len(xcat_labels) != len(xcats) + int(
cs_mean
):
raise ValueError(
"`xcat_labels` must have same length as `xcats` "
"(or one extra label if `cs_mean` is True)."
)
elif isinstance(xcat_labels, dict):
if not all([x in xcat_labels for x in xcats]):
raise ValueError("Keys in `xcat_labels` must be a subset of `xcats`.")
xcat_labels = [xcat_labels[x] for x in xcats if x in xcat_labels]
if cid_labels:
if isinstance(cid_labels, list):
if len(cid_labels) != len(cids):
raise ValueError("`cid_labels` must have same length as `cids`.")
elif isinstance(cid_labels, dict):
if not all([cid in cid_labels for cid in cids]):
raise ValueError("Keys in `cid_labels` must be a subset of `cids`.")
else:
raise TypeError("`cid_labels` must be a list or a dictionary.")
if cs_mean:
if xcat_labels is None:
xcat_labels = [xcats[0]]
if len(xcat_labels) == 1:
xcat_labels.append("Cross-Sectional Mean")
facet_size: Optional[Tuple[float, float]] = (
(aspect * height, height)
if (aspect is not None and height is not None)
else None
)
if xcat_grid and (len(xcats) == 1):
xcat_grid: bool = False
single_chart: bool = True
fig = None
if xcat_grid:
with FacetPlot(
df=df,
xcats=xcats,
cids=cids,
intersect=intersect,
metrics=[val],
tickers=[cross_mean_series] if cs_mean else None,
start=start,
end=end,
blacklist=blacklist,
) as fp:
fig = fp.lineplot(
share_y=same_y,
share_x=not all_xticks,
y_centre_to_zero=y_centre_to_zero,
figsize=size,
xcat_grid=True,
facet_titles=xcat_labels or None,
title=title,
title_yadjust=title_adj,
title_xadjust=title_xadj,
compare_series=cross_mean_series if cs_mean else None,
title_fontsize=title_fontsize,
ncols=ncol,
attempt_square=square_grid,
facet_size=facet_size,
legend_ncol=legend_ncol,
legend_fontsize=legend_fontsize,
interpolate=cumsum,
ax_hline=ax_hline,
footnote=footnote,
footnote_fontsize=footnote_fontsize,
show=not return_fig,
return_figure=return_fig,
)
elif single_chart or (len(cids) == 1):
with LinePlot(
df=df,
cids=cids,
xcats=xcats,
intersect=intersect,
metrics=[val],
tickers=[cross_mean_series] if cs_mean else None,
start=start,
end=end,
blacklist=blacklist,
) as lp:
fig = lp.plot(
metric=val,
figsize=size,
title=title,
title_yadjust=title_adj,
title_xadjust=title_xadj,
compare_series=cross_mean_series if cs_mean else None,
title_fontsize=title_fontsize,
legend_ncol=legend_ncol,
legend_fontsize=legend_fontsize,
legend_labels=xcat_labels or None,
ax_hline=ax_hline,
footnote=footnote,
footnote_fontsize=footnote_fontsize,
show=not return_fig,
return_figure=return_fig,
)
else:
if cid_labels and sort_cid_labels:
if isinstance(cid_labels, list):
cid_labels = {cid: label for cid, label in zip(cids, cid_labels)}
cids = sorted(cids, key=lambda x: cid_labels.get(x, x))
with FacetPlot(
df=df,
xcats=xcats,
cids=cids,
intersect=intersect,
metrics=[val],
tickers=[cross_mean_series] if cs_mean else None,
start=start,
end=end,
blacklist=blacklist,
) as fp:
show_legend = bool(cross_mean_series) or (len(xcats) > 1)
if ncol > len(cids):
ncol = len(cids)
fig = fp.lineplot(
figsize=size,
share_y=same_y,
share_x=not all_xticks,
title=title,
cid_grid=True,
facet_titles=cid_labels or None,
title_yadjust=title_adj,
title_xadjust=title_xadj,
compare_series=cross_mean_series if cs_mean else None,
facet_size=facet_size,
title_fontsize=title_fontsize,
ncols=ncol,
attempt_square=square_grid,
legend=show_legend,
legend_ncol=legend_ncol,
legend_labels=xcat_labels,
legend_fontsize=legend_fontsize,
interpolate=cumsum,
ax_hline=ax_hline,
footnote=footnote,
footnote_fontsize=footnote_fontsize,
show=not return_fig,
return_figure=return_fig,
)
if return_fig:
return fig
if __name__ == "__main__":
from macrosynergy.visuals import FacetPlot
from macrosynergy.management.simulate import make_test_df
import numpy as np
np.random.seed(42)
cids: List[str] = [
"USD",
"EUR",
"GBP",
"AUD",
"CAD",
"JPY",
"CHF",
"NZD",
"SEK",
"NOK",
"DKK",
"INR",
]
xcats: List[str] = [
"FXXR",
"EQXR",
"RIR",
"IR",
"REER",
"CPI",
"PPI",
"M2",
"M1",
"M0",
"FXVOL",
"FX",
]
sel_cids: List[str] = ["GBP", "USD", "EUR"]
sel_xcats: List[str] = ["FXXR", "EQXR", "RIR", "IR"]
r_styles: List[str] = [
"linear",
"decreasing-linear",
"sharp-hill",
"sine",
"four-bit-sine",
]
df: pd.DataFrame = make_test_df(
cids=list(set(cids) - set(sel_cids)),
xcats=xcats,
start="2000-01-01",
)
for rstyle, xcatx in zip(r_styles, sel_xcats):
dfB: pd.DataFrame = make_test_df(
cids=sel_cids,
xcats=[xcatx],
start="2000-01-01",
style=rstyle,
)
df: pd.DataFrame = pd.concat([df, dfB], axis=0)
for ix, cidx in enumerate(sel_cids):
df.loc[df["cid"] == cidx, "value"] = (
((df[df["cid"] == cidx]["value"]) * (ix + 1)).reset_index(drop=True).copy()
)
for ix, xcatx in enumerate(sel_xcats):
df.loc[df["xcat"] == xcatx, "value"] = (
((df[df["xcat"] == xcatx]["value"]) * (ix * 10 + 1))
.reset_index(drop=True)
.copy()
)
import time
black = {
"EUR": tuple(["2012-01-01", "2018-01-01"]),
"GBP": ["2004-01-01", "2007-01-01"],
"USD": ["2015-01-01", "2018-01-01"],
}
timer_start: float = time.time()
timelines(
df=df,
xcats=sel_xcats,
xcat_grid=False,
square_grid=True,
cids=sel_cids,
blacklist=black,
same_y=True,
xcat_labels={"FXXR": "FX Returns", "EQXR": "Equity Returns", "RIR": "Real Interest Rate", "IR": "Interest Rate"},
return_fig=True,
)
# timelines(
# df=df,
# xcats=sel_xcats[0],
# cids=sel_cids,
# cs_mean=True,
# # xcat_grid=False,
# single_chart=True,
# blacklist=black,
# )
timelines(
df=df,
same_y=False,
xcats=sel_xcats[0],
cids=sel_cids,
title=(
"Plotting multiple cross sections for a single category \n with different "
"y-axis!"
),
# blacklist=black,
cumsum=True,
cid_labels={"USD": "Label 1", "EUR": "Label 2", "GBP": "Label 3"},
xcat_labels={"FXXR": "Xcat Label"},
cs_mean=True,
)