Source code for macrosynergy.panel.view_grades

"""
Functions for visualizing data grading and blacklisted periods from a quantamental DataFrame.
"""

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Tuple

from macrosynergy.management.simulate import make_qdf
from macrosynergy.management.utils import reduce_df


[docs]def heatmap_grades(
    df: pd.DataFrame,
    xcats: List[str],
    cids: List[str] = None,
    start: str = None, 
    end: str = None,
    grade: str = "grading",
    title: str = None,
    size: Tuple[float] = None,
):
    """
    Displays a heatmap of the grading for a given set of cross sections and extended

    Parameters
    ----------
    df : ~pandas.Dataframe
        standardized DataFrame with the necessary columns: 'cid', 'xcat', 'real_date'
        and 'grading'.
    xcats : List[str]
        extended categorys to be checked on.
    cids : List[str]
        cross sections to visualize. Default is all in  DataFrame.
    start : str
        earliest date in ISO format. Default is earliest available.
    end : str
        latest date in ISO format. Default is latest available.
    grade : str
        name of column that contains the grades. Default is 'grading'.
    title : str
        string of chart title; defaults depend on type of range plot.
    size : Tuple[float]
        Tuple of width and height of graph. Default is None, meaning it is set in
        accordance with df.
    """

    df["real_date"] = pd.to_datetime(df["real_date"], format="%Y-%m-%d")

    df_cols = list(df.columns)
    grade_error = (
        "Column that contains the grade values must be present in the "
        f"DataFrame: {df_cols}."
    )
    assert grade in df.columns, grade_error

    df, xcats, cids = reduce_df(df, xcats, cids, start, end, out_all=True)
    df = df[["xcat", "cid", "real_date", grade]]
    df[grade] = df[grade].astype(float).round(2)

    df_ags = (
        df.groupby(["xcat", "cid"])
        .mean()
        .reset_index()
        .pivot(index="xcat", columns="cid", values=grade)
    )

    if size is None:
        size = (max(df_ags.shape[0] / 2, 15), max(1, df_ags.shape[1] / 2))
    if title is None:
        sdate = df["real_date"].min().strftime("%Y-%m-%d")
        title = f"Average grade of vintages since {sdate}"
    sns.set(rc={"figure.figsize": size})
    sns.heatmap(
        df_ags,
        cmap="YlOrBr",
        vmin=1,
        vmax=3,
        annot=True,
        fmt=".1f",
        linewidth=1,
        cbar=False,
    )
    plt.xlabel("")
    plt.ylabel("")
    plt.title(title, fontsize=18)
    plt.show()


if __name__ == "__main__":
    cids = ["NZD", "AUD", "CAD", "GBP"]
    xcats = ["XR", "CRY", "GROWTH", "INFL"]
    df_cids = pd.DataFrame(
        index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"]
    )
    df_cids.loc["AUD",] = ["2000-01-01", "2020-12-31", 0.1, 1]
    df_cids.loc["CAD",] = ["2001-01-01", "2020-11-30", 0, 1]
    df_cids.loc["GBP",] = ["2002-01-01", "2020-11-30", 0, 2]
    df_cids.loc["NZD",] = ["2002-01-01", "2020-09-30", -0.1, 2]

    df_xcats = pd.DataFrame(
        index=xcats,
        columns=["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"],
    )
    df_xcats.loc["XR",] = ["2000-01-01", "2020-12-31", 0.1, 1, 0, 0.3]
    df_xcats.loc["CRY",] = ["2000-01-01", "2020-10-30", 1, 2, 0.95, 1]
    df_xcats.loc["GROWTH",] = ["2001-01-01", "2020-10-30", 1, 2, 0.9, 1]
    df_xcats.loc["INFL",] = ["2001-01-01", "2020-10-30", 1, 2, 0.8, 0.5]

    dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)

    dfd["grading"] = "3"
    filter_date = dfd["real_date"] >= pd.to_datetime("2010-01-01")
    filter_cid = dfd["cid"].isin(["NZD", "AUD"])
    dfd.loc[filter_date & filter_cid, "grading"] = "1"
    filter_date = dfd["real_date"] >= pd.to_datetime("2013-01-01")
    filter_xcat = dfd["xcat"].isin(["CRY", "GROWTH"])
    dfd.loc[filter_date & filter_xcat, "grading"] = "2.1"
    filter_xcat = dfd["xcat"] == "XR"
    dfd.loc[filter_xcat, "grading"] = 1

    heatmap_grades(dfd, xcats=["CRY", "GROWTH", "INFL"], cids=cids)

    dfd.info()