Source code for macrosynergy.management.types.qdf.classes

"""
Module hosting custom types and meta-classes for use across the package.
"""

from typing import Optional, Mapping, Union, Sequence, List, Tuple, Dict
import pandas as pd
from macrosynergy.compat import PD_2_0_OR_LATER
from .methods import (
    get_col_sort_order,
    change_column_format,
    _get_tickers_series,
    reduce_df,
    reduce_df_by_ticker,
    update_df,
    apply_blacklist,
    qdf_to_wide_df,
    check_is_categorical,
    add_nan_series,
    drop_nan_series,
    rename_xcats,
    qdf_from_timeseries,
    create_empty_categorical_qdf,
    concat_qdfs,
)
from .base import QuantamentalDataFrameBase


[docs]class QuantamentalDataFrame(QuantamentalDataFrameBase): """ Type extension of `pd.DataFrame` for Quantamental DataFrames. Usage: >>> df: pd.DataFrame = load_data() >>> qdf = QuantamentalDataFrame(df) """ def __init__( self, df: Optional[pd.DataFrame] = None, categorical: bool = True, _initialized_as_categorical: Optional[bool] = None, ): if df is not None: df_err = "Input must be a standardised Quantamental DataFrame." if not isinstance(df, pd.DataFrame): raise TypeError(df_err) if not isinstance(df, QuantamentalDataFrame): if "real_date" in df.columns: df["real_date"] = pd.to_datetime(df["real_date"]) if not isinstance(df, QuantamentalDataFrame): raise ValueError(df_err) if type(df) is QuantamentalDataFrame: if _initialized_as_categorical is None: _initialized_as_categorical = df.InitializedAsCategorical else: if df.columns.tolist() != get_col_sort_order(df): df = df[get_col_sort_order(df)] if PD_2_0_OR_LATER: super().__init__(df) else: super().__init__(df.copy()) # pragma: no cover _check_cat = check_is_categorical(self) if _initialized_as_categorical is None: self.InitializedAsCategorical = _check_cat else: if not isinstance(_initialized_as_categorical, bool): raise TypeError("`_initialized_as_categorical` must be a boolean.") self.InitializedAsCategorical = _initialized_as_categorical if categorical: if not _check_cat: self.to_categorical() else: self.to_string_type()
[docs] def is_categorical(self) -> bool: """ Returns True if the QuantamentalDataFrame is categorical. Returns ------- bool True if the QuantamentalDataFrame is categorical """ return check_is_categorical(self)
[docs] def to_categorical(self) -> "QuantamentalDataFrame": """ Converts the QuantamentalDataFrame to a categorical DataFrame. Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with categorical columns. """ return change_column_format(self, cols=self._StrIndexCols, dtype="category")
[docs] def to_string_type(self) -> "QuantamentalDataFrame": """ Converts the QuantamentalDataFrame to a string DataFrame. Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with string columns. """ return change_column_format(self, cols=self._StrIndexCols, dtype="object")
[docs] def to_original_dtypes(self) -> "QuantamentalDataFrame": """ Converts the QuantamentalDataFrame to its original dtypes (using the `InitialisedAsCategorical` attribute). Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with its original dtypes. The dtype is determined by the `InitialisedAsCategorical` attribute. The output dtype will be either 'category' or 'object'. """ if self.InitializedAsCategorical: return self.to_categorical() return self.to_string_type()
[docs] def list_tickers(self) -> List[str]: """ List all tickers in the QuantamentalDataFrame. Returns ------- List[str] A list of all tickers in the QuantamentalDataFrame. """ ltickers: List[str] = sorted(_get_tickers_series(self).unique()) return ltickers
[docs] def add_ticker_column(self) -> "QuantamentalDataFrame": """ Add a ticker column to the QuantamentalDataFrame. `ticker` is a combination of `cid` and `xcat` columns. i.e. `ticker = cid_xcat`. Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with a `ticker` column. """ ticker_col = _get_tickers_series(self) self["ticker"] = ticker_col return self
[docs] def drop_ticker_column(self) -> "QuantamentalDataFrame": """ Drop the ticker column from the QuantamentalDataFrame. Raises ------ ValueError If no `ticker` column is found in the DataFrame. Returns ------- QuantamentalDataFrame The QuantamentalDataFrame without the `ticker` column. """ if "ticker" not in self.columns: raise ValueError("No `ticker` column found in the DataFrame.") return self.drop(columns=["ticker"])
[docs] def reduce_df( self, cids: Optional[Sequence[str]] = None, xcats: Optional[Sequence[str]] = None, start: Optional[str] = None, end: Optional[str] = None, blacklist: Dict[str, Sequence[Union[str, pd.Timestamp]]] = None, out_all: bool = False, intersect: bool = False, ) -> Union[ "QuantamentalDataFrame", Tuple["QuantamentalDataFrame", List[str], List[str]] ]: """ Filter DataFrame by `cids`, `xcats`, and `start` & `end` dates. Parameters ---------- cids : Optional[Sequence[str]], optional List of CIDs to filter by, by default None xcats : Optional[Sequence[str]], optional List of XCATs to filter by, by default None start : Optional[str], optional Start date to filter by, by default None end : Optional[str], optional End date to filter by, by default None blacklist : Dict[str, Sequence[Union[str, pd.Timestamp]]], optional Blacklist to apply to the DataFrame, by default None out_all : bool, optional If True, return the filtered DataFrame, the filtered XCATs, and the filtered CIDs, by default False Returns ------- QuantamentalDataFrame The filtered QuantamentalDataFrame. (if out_all=False, default) Tuple[QuantamentalDataFrame, List[str], List[str]] The filtered QuantamentalDataFrame, the filtered XCATs, and the filtered CIDs. (if out_all=True) """ result = reduce_df( df=self, cids=cids, xcats=xcats, start=start, end=end, blacklist=blacklist, out_all=out_all, intersect=intersect, ) if out_all: result, _xcats, _cids = result result = QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, ) if out_all: return result, _xcats, _cids return result
[docs] def reduce_df_by_ticker( self, tickers: Sequence[str], start: Optional[str] = None, end: Optional[str] = None, blacklist: Mapping[str, Sequence[Union[str, pd.Timestamp]]] = None, ) -> "QuantamentalDataFrame": """ Filter DataFrame by `ticker`, `start` & `end` dates. Parameters ---------- tickers : Sequence[str] List of tickers to filter by start : Optional[str], optional Start date to filter by, by default None end : Optional[str], optional End date to filter by, by default None blacklist : Mapping[str, Sequence[Union[str, pd.Timestamp]]], optional Blacklist to apply to the DataFrame, by default None Returns ------- QuantamentalDataFrame The filtered QuantamentalDataFrame. """ result = reduce_df_by_ticker( df=self, tickers=tickers, start=start, end=end, blacklist=blacklist, ) return QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, )
[docs] def apply_blacklist( self, blacklist: Mapping[str, Sequence[Union[str, pd.Timestamp]]], ): """ Apply a blacklist to the QuantamentalDataFrame. """ result = apply_blacklist(df=self, blacklist=blacklist) return QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, )
[docs] def update_df( self, df_add: pd.DataFrame, xcat_replace: bool = False, ) -> "QuantamentalDataFrame": """ Update the QuantamentalDataFrame with a new DataFrame. Parameters ---------- df_add : pd.DataFrame DataFrame to update the QuantamentalDataFrame with xcat_replace : bool, optional If True, replace the XCATs in the QuantamentalDataFrame with the XCATs in `df_add`, by default False Returns ------- QuantamentalDataFrame The updated QuantamentalDataFrame. """ result = update_df(df=self, df_add=df_add, xcat_replace=xcat_replace) return QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, )
[docs] def add_nan_series( self, ticker: str, start: Optional[str] = None, end: Optional[str] = None, ) -> "QuantamentalDataFrame": """ Add a NaN series to the QuantamentalDataFrame. Parameters ---------- ticker : str Ticker to add the NaN series to start : Optional[str], optional Start date of the NaN series, by default None end : Optional[str], optional End date of the NaN series, by default None Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with the NaN series added. """ result = add_nan_series(df=self, ticker=ticker, start=start, end=end) return QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, )
[docs] def drop_nan_series( self, column: str = "value", raise_warning: bool = True, ) -> "QuantamentalDataFrame": """ Drop NaN series from the QuantamentalDataFrame. Parameters ---------- column : str, optional Column to check for NaN series, by default "value" raise_warning : bool, optional If True, raise a warning if NaN series are dropped, by default True Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with NaN series dropped """ result = drop_nan_series(df=self, column=column, raise_warning=raise_warning) return QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, )
[docs] def rename_xcats( self, xcat_map: Optional[Mapping[str, str]] = None, select_xcats: Optional[List[str]] = None, postfix: Optional[str] = None, prefix: Optional[str] = None, name_all: Optional[str] = None, fmt_string: Optional[str] = None, ) -> "QuantamentalDataFrame": """ Rename xcats in the QuantamentalDataFrame. Parameters ---------- xcat_map : Optional[Mapping[str, str]], optional Mapping of xcats to rename, by default None select_xcats : Optional[List[str]], optional List of xcats to rename, by default None postfix : Optional[str], optional Postfix to add to the xcats, by default None prefix : Optional[str], optional Prefix to add to the xcats, by default None name_all : Optional[str], optional Name to rename all xcats to, by default None fmt_string : Optional[str], optional Format string to rename xcats, by default None Returns ------- QuantamentalDataFrame The QuantamentalDataFrame with the xcats renamed. """ result = rename_xcats( df=self, xcat_map=xcat_map, select_xcats=select_xcats, postfix=postfix, prefix=prefix, name_all=name_all, fmt_string=fmt_string, ) return QuantamentalDataFrame( result, categorical=self.is_categorical(), _initialized_as_categorical=self.InitializedAsCategorical, )
[docs] def to_wide( self, value_column: str = "value", ) -> "QuantamentalDataFrame": """ Pivot the QuantamentalDataFrame. Parameters ---------- value_column : str, optional Column to pivot, by default "value" Returns ------- QuantamentalDataFrame The pivoted QuantamentalDataFrame, with each ticker as a column with the values of the `value_column` and the index as the `real_date`. """ result = qdf_to_wide_df(self, value_column=value_column) if self.InitializedAsCategorical: return result result.columns = result.columns.astype("object") return result
[docs] @classmethod def from_timeseries( cls, timeseries: pd.Series, ticker: str, metric: str = "value", ) -> "QuantamentalDataFrame": """ Convert a timeseries DataFrame to a QuantamentalDataFrame. Parameters ---------- timeseries : pd.Series Timeseries to convert to a QuantamentalDataFrame ticker : str Ticker to assign to the timeseries metric : str, optional Metric to assign to the timeseries, by default "value" Returns ------- QuantamentalDataFrame The QuantamentalDataFrame created from the timeseries. """ return QuantamentalDataFrame( qdf_from_timeseries( timeseries=timeseries, ticker=ticker, metric=metric, ) )
[docs] @classmethod def from_long_df( cls, df: pd.DataFrame, real_date_column: str = "real_date", value_column: str = "value", cid: Optional[str] = None, xcat: Optional[str] = None, ticker: Optional[str] = None, categorical: bool = True, ) -> "QuantamentalDataFrame": """ Convert a long DataFrame to a QuantamentalDataFrame. This is useful when the DataFrame may contain only a `cid` or `xcat` column, or in cases where the `cid` and `xcat` columns are not named as such. Parameters ---------- df : pd.DataFrame Long DataFrame to convert to a QuantamentalDataFrame real_date_column : str, optional Column name of the real date, by default "real_date" value_column : str, optional Column name of the value, by default "value" cid : Optional[str], optional `cid` to assign to the DataFrame, by default None. If not specified, the `cid` column must be present in the DataFrame. xcat : Optional[str], optional `xcat` to assign to the DataFrame, by default None ticker : Optional[str], optional Ticker to assign to the DataFrame, by default None categorical : bool, optional If True, convert the DataFrame to categorical, by default True Raises ------ ValueError If the `real_date_column` or `value_column` are not found in the DataFrame, or if `ticker` is specified with `cid` or `xcat`. ValueError If the input DataFrame is empty. ValueError If the `cid` or `xcat` columns are not found in the DataFrame, and have not been specified in the function call. Returns ------- QuantamentalDataFrame The QuantamentalDataFrame created from the long DataFrame. """ if real_date_column not in df.columns: raise ValueError(f"No `{real_date_column}` column found in the DataFrame.") else: df = df.rename(columns={real_date_column: "real_date"}) real_date_column = "real_date" if value_column not in df.columns: raise ValueError(f"No `{value_column}` column found in the DataFrame.") if len(df) == 0: raise ValueError("Input DataFrame is empty.") if ticker is not None: if bool(cid) or bool(xcat): raise ValueError("Cannot specify `ticker` with `cid` or `xcat`.") cid, xcat = ticker.split("_", 1) errstr = "No `{}` column found in the DataFrame and `{}` not specified." for var_, name_ in zip([cid, xcat], ["cid", "xcat"]): if name_ not in df.columns and var_ is None: raise ValueError(errstr.format(name_, name_)) new_df = df[[real_date_column, value_column]].copy() # if the cid col is there in the df copy it over as a categorical type if "cid" in df.columns: new_df["cid"] = df["cid"].astype("category") # if the xcat col is there in the df copy it over as a categorical type if "xcat" in df.columns: new_df["xcat"] = df["xcat"].astype("category") # if the cid was specfied then overwrite and initialise as a single cid category dtype if cid is not None: new_df["cid"] = pd.Categorical.from_codes( codes=[0] * len(new_df), categories=[cid] ) # if the xcat was specfied then overwrite and initialise as a single xcat category dtype if xcat is not None: new_df["xcat"] = pd.Categorical.from_codes( codes=[0] * len(new_df), categories=[xcat] ) return QuantamentalDataFrame(new_df, categorical=categorical)
[docs] @classmethod def from_qdf_list( cls, qdf_list: List["QuantamentalDataFrame"], categorical: bool = True, ) -> "QuantamentalDataFrame": """ Concatenate a list of QuantamentalDataFrames into a single QuantamentalDataFrame. Parameters ---------- qdf_list : List[QuantamentalDataFrame] List of QuantamentalDataFrames to concatenate categorical : bool, optional If True, convert the DataFrame to categorical, by default True Raises ------ TypeError If any element in the list is not a QuantamentalDataFrame. ValueError If the input list is empty. Returns ------- QuantamentalDataFrame The concatenated QuantamentalDataFrame. """ if not all(isinstance(qdf, QuantamentalDataFrame) for qdf in qdf_list): raise TypeError("All elements in the list must be QuantamentalDataFrames.") if not qdf_list: raise ValueError("Input list is empty.") qdf_list = concat_qdfs(qdf_list) return QuantamentalDataFrame(qdf_list, categorical=categorical)
[docs] @classmethod def from_wide( cls, df: pd.DataFrame, value_column: str = "value", categorical: bool = True, ) -> "QuantamentalDataFrame": """ Convert a wide DataFrame to a QuantamentalDataFrame. Parameters ---------- df : pd.DataFrame Wide DataFrame to convert to a QuantamentalDataFrame value_column : str, optional Column to pivot, by default "value" categorical : bool, optional If True, convert the DataFrame to categorical, by default True Raises ------ TypeError If `df` is not a pandas DataFrame. ValueError If `df` does not have a datetime index. ValueError If all columns are not in the format 'cid_xcat'. Returns ------- QuantamentalDataFrame The QuantamentalDataFrame created from the wide DataFrame. """ if not isinstance(df, pd.DataFrame): raise TypeError("`df` must be a pandas DataFrame.") if not isinstance(df.index, pd.DatetimeIndex): raise ValueError("`df` must have a datetime index.") if not isinstance(value_column, str): raise TypeError("`value_column` must be a string.") if not all("_" in col for col in df.columns): raise ValueError("All columns must be in the format 'cid_xcat'.") df.index.name = "real_date" df.columns.name = None tickers = df.columns.tolist() qdfs_list: List[QuantamentalDataFrame] = [] for tkr in tickers: cid, xcat = tkr.split("_", 1) qdf = df[[tkr]].reset_index().rename(columns={tkr: value_column}) df = df.drop(columns=[tkr]) qdf = QuantamentalDataFrame.from_long_df( df=qdf, cid=cid, xcat=xcat, value_column=value_column ) qdfs_list.append(qdf) return QuantamentalDataFrame.from_qdf_list(qdfs_list, categorical=categorical)
[docs] @classmethod def create_empty_df( cls, cid: Optional[str] = None, xcat: Optional[str] = None, ticker: Optional[str] = None, metrics: List[str] = ["value"], date_range: Optional[pd.DatetimeIndex] = None, start: Optional[str] = None, end: Optional[str] = None, categorical: bool = True, ) -> "QuantamentalDataFrame": """ Create an empty QuantamentalDataFrame. Parameters ---------- cid : Optional[str], optional `cid` to assign to the DataFrame, by default None xcat : Optional[str], optional `xcat` to assign to the DataFrame, by default None ticker : Optional[str], optional Ticker to assign to the DataFrame, by default None. If specified, `cid` and `xcat` must not be specified. metrics : List[str], optional Metrics to assign to the DataFrame, by default ["value"] date_range : Optional[pd.DatetimeIndex], optional Date range to assign to the DataFrame, by default None. If not specified, `start` and `end` must be specified. start : Optional[str], optional Start date to assign to the DataFrame, by default None end : Optional[str], optional End date to assign to the DataFrame, by default None categorical : bool, optional If True, convert the DataFrame to categorical, by default True Returns ------- QuantamentalDataFrame The empty QuantamentalDataFrame. """ qdf = create_empty_categorical_qdf( cid=cid, xcat=xcat, ticker=ticker, metrics=metrics, date_range=date_range, start=start, end=end, ) return QuantamentalDataFrame(qdf, categorical=categorical)