"""
Basket class for calculating the returns and carries of baskets of financial contracts
using various weighting methods.
"""
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
import random
from typing import List, Union, Tuple, Dict
from macrosynergy.panel.historic_vol import expo_weights, expo_std, flat_std
from macrosynergy.management.utils import reduce_df_by_ticker
from macrosynergy.panel.converge_row import ConvergeRow
from macrosynergy.management.types import QuantamentalDataFrame
from macrosynergy.management.simulate import make_qdf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
[docs]class Basket(object):
"""
Calculates the returns and carries of baskets of financial contracts using various
weighting methods.
Parameters
----------
df : ~pandas.Dataframe
standardized DataFrame containing the columns: 'cid', 'xcat', 'real_date' and
'value'.
contracts : List[str]
base tickers (combinations of cross-sections and base categories) that define
the contracts that go into the basket.
ret : str
return category postfix to be appended to the contract base; default is
"XR_NSA".
cry : List[str] or str
carry category postfix; default is None. The field can either be a single carry
or multiple carries defined in a List.
start : str
earliest date in ISO 8601 format. Default is None.
end : str
latest date in ISO 8601 format. Default is None.
blacklist : dict
cross-sections with date ranges that should be excluded from the DataFrame. If
one cross-section has several blacklist periods append numbers to the cross-section
code.
ewgts : List[str]
one or more postfixes that may identify exogenous weight categories. Similar to
return postfixes they are appended to base tickers.
.. note::
Each instance of the class will update associated standardised DataFrames,
containing return and carry categories, and external weights.
"""
def __init__(
self,
df: pd.DataFrame,
contracts: List[str],
ret: str = "XR_NSA",
cry: Union[str, List[str]] = None,
start: str = None,
end: str = None,
blacklist: dict = None,
ewgts: List[str] = None,
):
df = QuantamentalDataFrame(df[["cid", "xcat", "real_date", "value"]])
self._as_categorical = df.InitializedAsCategorical
assert isinstance(contracts, list)
c_error = "Contracts must be a list of strings."
assert all(isinstance(c, str) for c in contracts), c_error
assert isinstance(ret, str), "`ret` must be a string"
if isinstance(ewgts, str):
ewgts = [ewgts]
df = reduce_df_by_ticker(
df, start=start, end=end, ticks=None, blacklist=blacklist
)
self.contracts = contracts
self.ret = ret
self.ticks_ret = [con + ret for con in contracts]
dfw_ret = self.pivot_dataframe(df, self.ticks_ret)
self.dfw_ret = dfw_ret.dropna(axis=0, how="all")
self.store_attributes(df, cry, "cry")
self.store_attributes(df, ewgts, "wgt")
self.tickers = self.ticks_ret + self.ticks_cry + self.ticks_wgt
self.start = self.date_check(start)
self.end = self.date_check(end)
self.dfx = reduce_df_by_ticker(df, ticks=self.tickers)
self.dict_retcry = {} # dictionary for collecting basket return/carry dfs.
self.dict_wgs = {} # dictionary for collecting basket return/carry dfs.
[docs] def store_attributes(self, df: pd.DataFrame, pfx: List[str], pf_name: str):
"""
Adds multiple attributes to class based on postfixes that denote carry or
external weight types.
Parameters
----------
df : ~pandas.DataFrame
original, standardised DataFrame.
pfx : List[str]
category postfixes involved in the basket calculation.
pf_name : str
associated name of the postfix "cry" or "wgt". Note: These are [1] flags of
existence of carry and weight strings in class, [2] lists of tickers related to
all postfixes, [3] a dictionary of wide time series panel dataframes for all
postfixes.
"""
pfx_flag = pfx is not None
self.__dict__[pf_name + "_flag"] = pfx_flag
self.__dict__["ticks_" + pf_name] = []
if pfx_flag:
error = f"'{pf_name}' must be a <str> or a <List[str]>."
assert isinstance(pfx, (list, str)), error
pfx = [pfx] if isinstance(pfx, str) else pfx
self.__dict__[pf_name] = pfx
dfws_pfx: Dict[str, pd.DataFrame] = {}
for cat in pfx:
ticks = [con + cat for con in self.contracts]
self.__dict__["ticks_" + pf_name] += ticks
dfws_pfx[cat] = self.pivot_dataframe(df, ticks)
if dfws_pfx[cat].empty:
raise ValueError(f"Empty dataframe for contract-type: {cat}")
missing = set(ticks) - set(dfws_pfx[cat].columns)
if missing:
raise ValueError(
f"Missing tickers in dataframe for contract-type: {cat}: {missing}"
)
else:
dfws_pfx = None
self.__dict__["dfws_" + pf_name] = dfws_pfx
[docs] @staticmethod
def pivot_dataframe(df: QuantamentalDataFrame, tick_list: List[str]):
"""
Reduces the standardised DataFrame to include a subset of the possible tickers
and, subsequently returns a wide dataframe: each column corresponds to a ticker.
Parameters
----------
tick_list : List[str]
list of the respective tickers.
df : ~pandas.DataFrame
standardised dataframe.
Returns
-------
~pandas.DataFrame
wide dataframe.
"""
return (
QuantamentalDataFrame(df=df)
.reduce_df_by_ticker(tickers=tick_list)
.to_wide()
)
[docs] @staticmethod
def date_check(date_string):
"""
Validates that the dates passed are valid timestamp expressions and will convert
to the required form '%Y-%m-%d'. Will raise an assertion if not in the expected
form.
Parameters
----------
date_string : str
valid date expression. For instance, "1st January, 2000."
"""
date_error = "Expected form of string: '%Y-%m-%d'."
if date_string is not None:
try:
pd.Timestamp(date_string).strftime("%Y-%m-%d")
except ValueError:
raise AssertionError(date_error)
else:
return pd.Timestamp(date_string).strftime("%Y-%m-%d")
[docs] @staticmethod
def check_weights(weight: pd.DataFrame):
"""
Checks if all rows in dataframe add up to roughly 1.
Parameters
----------
weight : ~pandas.DataFrame
weight dataframe.
"""
check = weight.sum(axis=1)
c = ~((abs(check - 1) < 1e-6) | (abs(check) < 1e-6))
assert not any(c), f"weights must sum to one (or zero), not: {check[c]}"
[docs] @staticmethod
def max_weight_func(weights: pd.DataFrame, max_weight: float):
"""
Enforces maximum weight caps or - if impossible applies equal weight.
Parameters
----------
weights : ~pandas.DataFrame
Corresponding weight matrix. Multidimensional.
max_weight : float
Upper-bound on the weight allowed for each cross-section.
Returns
-------
~pandas.DataFrame
Will return the modified weight DataFrame.
.. note::
If the maximum weight is
less than the equal weight weight, this replaces the computed weight with the
equal weight. For instance, [np.nan, 0.63, np.nan, np.nan, 0.27] becomes
[np.nan, 0.5, np.nan, np.nan, 0.5]. Otherwise, the function calls the
ConvergeRow Class to ensure all weights "converge" to a value within the upper-
bound. Allow for a margin of error set to 0.001.
"""
dfw_wgs = weights.to_numpy()
for i, row in enumerate(dfw_wgs):
row = ConvergeRow.application(row, max_weight)
weights.iloc[i, :] = row
return weights
[docs] def equal_weight(self, df_ret: pd.DataFrame) -> pd.DataFrame:
"""
Calculates dataframe of equal weights based on available return data.
Parameters
----------
df_ret : ~pandas.DataFrame
wide time-indexed data frame of returns.
Returns
-------
~pandas.DataFrame
dataframe of weights.
Notes
-----
The method determines the number of non-NA cross-sections per timestamp, and
subsequently distributes the weights evenly across non-NA cross-sections.
"""
act_cross = ~df_ret.isnull()
uniform = (1 / act_cross.sum(axis=1)).values
uniform = uniform[:, np.newaxis]
broadcast = np.repeat(uniform, df_ret.shape[1], axis=1)
weight = act_cross.multiply(broadcast)
cols = weight.columns
# Zeroes treated as NaNs.
weight[cols] = weight[cols].replace({0: np.nan})
self.check_weights(weight=weight)
return weight
[docs] def fixed_weight(self, df_ret: pd.DataFrame, weights: List[float]):
"""
Calculates fixed weights based on a single list of values and a corresponding
return panel dataframe.
Parameters
----------
df_ret : ~pandas.DataFrame
Return series matrix. Multidimensional.
weights : List[float]
List of floats determining weight allocation.
Returns
-------
~pandas.DataFrame
panel of weights
"""
act_cross = ~df_ret.isnull()
weights = np.array(weights, dtype=np.float32)
rows = act_cross.shape[0]
broadcast = np.tile(weights, (rows, 1)) # Constructs Array by row repetition.
# Replaces weight factors with zeroes if concurrent return unavailable.
weight = act_cross.multiply(broadcast)
cols = weight.columns
# Zeroes treated as NaNs.
weight[cols] = weight[cols].replace({0: np.nan})
weight_arr = weight.to_numpy() # convert df to np array.
weight[weight.columns] = weight_arr / np.sum(weight_arr, axis=1)[:, np.newaxis]
self.check_weights(weight)
return weight
[docs] def inverse_weight(
self,
dfw_ret: pd.DataFrame,
lback_meth: str = "xma",
lback_periods: int = 21,
remove_zeros: bool = True,
):
"""
Calculates weights inversely proportionate to recent return standard deviations.
Parameters
----------
dfw_ret : ~pandas.DataFrame
panel dataframe of returns.
lback_meth : str
Lookback method for "invsd" weighting method. Default is "xma".
lback_periods : int
Lookback periods. Default is 21. Half-time for "xma" and full lookback
period for "ma".
remove_zeros : Bool
Any returns that are exact zeros will not be included in the lookback window
and prior non-zero values are added to the window instead.
Returns
-------
~pandas.DataFrame
Dataframe of weights.
.. note::
The rolling standard deviation will be calculated either using the standard
moving average (ma) or the exponential moving average (xma). Both will require
returns before a first weight can be computed.
"""
if lback_meth == "ma":
dfwa = dfw_ret.rolling(window=lback_periods).agg(flat_std, remove_zeros)
dfwa *= np.sqrt(252)
else:
half_life = lback_periods
weights = expo_weights(lback_periods * 2, half_life)
dfwa = dfw_ret.rolling(window=lback_periods * 2).agg(
expo_std, w=weights, remove_zeros=remove_zeros
)
cols = dfwa.columns
# Zeroes treated as NaNs.
dfwa[cols] = dfwa[cols].replace({0: np.nan})
df_isd = 1 / dfwa
df_wgts = df_isd / df_isd.sum(axis=1).values[:, np.newaxis]
self.check_weights(df_wgts)
return df_wgts
[docs] def values_weight(
self, dfw_ret: pd.DataFrame, dfw_wgt: pd.DataFrame, weight_meth: str
):
"""
Returns weights based on an external weighting category.
Parameters
----------
dfw_ret : ~pandas.DataFrame
Standard wide dataframe of returns across time and contracts.
dfw_wgt : ~pandas.DataFrame
Standard wide dataframe of weight category values across time and contracts.
weight_meth : str
Returns
-------
~pandas.DataFrame
Dataframe of weights.
"""
negative_condition = np.any((dfw_wgt < 0).to_numpy())
if negative_condition:
dfw_wgt[dfw_wgt < 0] = 0.0
warnings.warn("Negative values in the weight matrix set to zero.")
exo_array = dfw_wgt.to_numpy()
df_bool = ~dfw_ret.isnull()
weights_df = df_bool.multiply(exo_array)
cols = weights_df.columns
# Zeroes treated as NaNs.
weights_df[cols] = weights_df[cols].replace({0: np.nan})
if weight_meth != "values":
weights_df = 1 / weights_df
weights = weights_df.divide(weights_df.sum(axis=1), axis=0)
self.check_weights(weights)
return weights
[docs] def make_weights(
self,
weight_meth: str = "equal",
weights: List[float] = None,
lback_meth: str = "xma",
lback_periods: int = 21,
ewgt: str = None,
max_weight: float = 1.0,
remove_zeros: bool = True,
):
"""
Returns wide dataframe of weights to be used for basket series.
Parameters
----------
weight_meth : str
method used for weighting constituent returns and carry. The parameter can
receive either a single weight method or multiple weighting methods. See
`make_basket` docstring.
weights : List[float]
single list of weights corresponding to the base tickers in `contracts`
argument. This is only relevant for the fixed weight method.
lback_meth : str
look-back method for "invsd" weighting method. Default is Exponential MA,
"ema". The alternative is simple moving average, "ma".
lback_periods : int
look-back periods for "invsd" weighting method. Default is 21. Half-time
for "xma" and full lookback period for "ma".
ewgt : str
Exogenous weight postfix that defines the weight value panel. Only needed
for the 'values' or 'inv_values' method.
max_weight : float
maximum weight of a single contract. Default is 1, i.e zero restrictions.
The purpose of the restriction is to limit concentration within the basket.
remove_zeros : bool
removes the zeros. Default is set to True.
Returns
-------
pd.DataFrame
wide dataframe of contract weights across time.
"""
assert 0.0 < max_weight <= 1.0
assert weight_meth in ["equal", "fixed", "values", "inv_values", "invsd"]
# Apply weight method.
if weight_meth == "equal":
dfw_wgs = self.equal_weight(df_ret=self.dfw_ret)
elif weight_meth == "fixed":
message = "Expects a list of weights."
message_2 = "List of weights must be equal to the number of contracts."
assert isinstance(weights, list), message
assert self.dfw_ret.shape[1] == len(weights), message_2
message_3 = "Expects a list of floating point values."
assert all(isinstance(w, (int, float)) for w in weights), message_3
dfw_wgs = self.fixed_weight(df_ret=self.dfw_ret, weights=weights)
elif weight_meth == "invsd":
error_message = "Lookback method method must be 'ma' or 'xma'."
assert lback_meth in ["xma", "ma"], error_message
assert isinstance(lback_periods, int), "Expects <int>."
dfw_wgs = self.inverse_weight(
dfw_ret=self.dfw_ret,
lback_meth=lback_meth,
lback_periods=lback_periods,
remove_zeros=remove_zeros,
)
elif weight_meth in ["values", "inv_values"]:
assert ewgt in self.wgt, f"{ewgt} is not defined on the instance."
# Lag by one day to be used as weights.
try:
dfw_wgt = self.dfws_wgt[ewgt].shift(1)
except KeyError as e:
print(f"Basket not found: {e}.")
else:
cols = sorted(dfw_wgt.columns)
dfw_ret = dfw_wgt.reindex(cols, axis=1)
dfw_wgt = dfw_wgt.reindex(cols, axis=1)
dfw_wgs = self.values_weight(dfw_ret, dfw_wgt, weight_meth)
else:
raise NotImplementedError(f"Weight method unknown {weight_meth}")
# Remove leading NA rows.
fvi = max(dfw_wgs.first_valid_index(), self.dfw_ret.first_valid_index())
dfw_wgs = dfw_wgs[fvi:]
# Impose cap on cross-section weights.
if max_weight < 1.0:
dfw_wgs = self.max_weight_func(weights=dfw_wgs, max_weight=max_weight)
return dfw_wgs
[docs] @staticmethod
def column_manager(df_cat: pd.DataFrame, dfw_wgs: pd.DataFrame):
"""
Will match the column names of the two dataframes involved in the computation:
either the return & weight dataframes or the carry & weight dataframes. The
pandas multiply operation requires the column names, of both dataframes involved
in the binary operation, to be identical.
Parameters
----------
df_cat : ~pandas.DataFrame
return or carry dataframe.
dfw_wgs : ~pandas.DataFrame
weight dataframe.
Returns
-------
~pandas.DataFrame
modified weight dataframe (column names will map to the other dataframe
received).
"""
df_cat = df_cat.reindex(sorted(df_cat.columns), axis=1)
dfw_wgs = dfw_wgs.reindex(sorted(dfw_wgs.columns), axis=1)
ret_cols = df_cat.columns
weight_cols = dfw_wgs.columns
if all(ret_cols != weight_cols):
dfw_wgs.columns = ret_cols
return dfw_wgs
[docs] def column_weights(self, dfw_wgs: pd.DataFrame):
"""
The weight dataframe is used to compute the basket performance for returns,
carries etc. Therefore, with their broad application, the column names of the
dataframe should correspond to the ticker postfix of each contract.
Parameters
----------
dfw_wgs : ~pandas.DataFrame
weight dataframe.
Returns
-------
~pandas.DataFrame
weight dataframe with updated columns names.
"""
dfw_weight_names = lambda w_name: w_name[: w_name.find(self.w_field)]
if self.wgt_flag and self.exo_w_postfix is not None:
self.w_field = self.exo_w_postfix
else:
self.w_field = self.ret
cols = list(map(dfw_weight_names, dfw_wgs.columns))
dfw_wgs.columns = cols
dfw_wgs.columns.name = "ticker"
return dfw_wgs
[docs] def make_basket(
self,
weight_meth: str = "equal",
weights: List[float] = None,
lback_meth: str = "xma",
lback_periods: int = 21,
ewgt: str = None,
max_weight: float = 1.0,
remove_zeros: bool = True,
basket_name: str = "GLB_ALL",
):
"""
Calculates all basket performance categories.
Parameters
----------
weight_meth : str
method used for weighting constituent returns and carry. The parameter can
receive either a single weight method or multiple weighting methods. The options
are as follows:
- [1] "equal": all constituents with non-NA returns have the same
weight. This is the default.
- [2] "fixed": weights are proportionate to a single
list of values provided which are passed to argument `weights` (each value
corresponds to a single contract).
- [3] "invsd": weights based on inverse to
standard deviations of recent returns.
- [4] "values": weights proportionate to a
panel of values of exogenous weight category.
- [5] "inv_values": weights are
inversely proportionate to of values of exogenous weight category.
weights : List[float]
single list of weights corresponding to the base tickers in `contracts`
argument. This is only relevant for the fixed weight method.
lback_meth : str
look-back method for "invsd" weighting method. Default is Exponential MA,
"ema". The alternative is simple moving average, "ma".
lback_periods : int
look-back periods for "invsd" weighting method. Default is 21. Half-time
for "xma" and full lookback period for "ma".
ewgt : str
Exogenous weight postfix that defines the weight value panel. Only needed
for the 'values' or 'inv_values' method.
max_weight : float
maximum weight of a single contract. Default is 1, i.e zero restrictions.
The purpose of the restriction is to limit concentration within the basket.
remove_zeros : bool
removes the zeros. Default is set to True.
basket_name : str
name of basket base ticker (analogous to contract name) to be used for
return and (possibly) carry are calculated. Default is "GLB_ALL".
"""
assert isinstance(weight_meth, str), "`weight_meth` must be string"
self.exo_w_postfix: str = ewgt
dfw_wgs = self.make_weights(
weight_meth=weight_meth,
weights=weights,
lback_meth=lback_meth,
lback_periods=lback_periods,
ewgt=ewgt,
max_weight=max_weight,
remove_zeros=remove_zeros,
)
select = ["ticker", "real_date", "value"]
dfw_wgs_copy = self.column_manager(df_cat=self.dfw_ret, dfw_wgs=dfw_wgs)
dfw_bret = self.dfw_ret.multiply(dfw_wgs_copy).sum(axis=1)
basket_ret = basket_name + "_" + self.ret
store = [dfw_bret.to_frame(basket_ret)]
if not hasattr(self, "cry_flag"):
raise ValueError("Please initialise the class to use this method. ")
self.cry_flag: bool
if self.cry_flag:
cry_list = []
for cr in self.cry:
dfw_wgs_copy = self.column_manager(
df_cat=self.dfws_cry[cr], dfw_wgs=dfw_wgs
)
self.dfws_cry: Dict[str, pd.DataFrame]
dfw_bcry: pd.Series = (
self.dfws_cry[cr].multiply(dfw_wgs_copy).sum(axis=1)
)
tkr = basket_name + "_" + cr
cry_list.append(dfw_bcry.to_frame(tkr))
store += cry_list
df_retcry = pd.concat(store, axis=1)
self.dict_retcry[basket_name] = df_retcry
self.dict_wgs[basket_name] = self.column_weights(dfw_wgs)
[docs] def weight_visualiser(
self,
basket_name,
start_date: str = None,
end_date: str = None,
subplots: bool = True,
facet_grid: bool = False,
scatter: bool = False,
all_tickers: bool = True,
single_ticker: str = None,
percentage_change: bool = False,
size: Tuple[int, int] = (7, 7),
):
"""
Method used to visualise the weights associated with each contract in the
basket.
Parameters
----------
basket_name : str
name of basket whose weights are visualized
start_date : str
start date of he visualisation period.
end_date : str
end date of the visualization period.
subplots : bool
contract weights are displayed on different plots (True) or on a single plot
(False).
facet_grid : bool
parameter used to break up the plot into multiple cartesian coordinate
systems. If the basket consists of a high number of contracts, using the Facet
Grid is recommended.
scatter : bool
if the facet_grid parameter is set to True there are two options: i) scatter
plot if there a lot of blacklist periods; ii) line plot for continuous series.
all_tickers : bool
if True (default) all weights are displayed. If set to False `single-ticker`
must be specified.
single_ticker : str
individual ticker for further, more detailed, analysis.
percentage_change : bool
graphical display used to further assimilate the fluctuations in the
contract's weight. The graphical display is limited to a single contract.
Therefore, pass the ticker into the parameter "single_ticker".
size : Tuple[int, int]
size of the plot. Default is (7, 7).
"""
date_conv = lambda d: pd.Timestamp(d).strftime("%Y-%m-%d %X")
try:
dfw_wgs = self.dict_wgs[basket_name]
except KeyError as e:
print(f"Basket not found - call make_basket() method first: {e}.")
else:
if isinstance(start_date, str) and isinstance(end_date, str):
self.date_check(start_date)
self.date_check(end_date)
start_date = date_conv(start_date)
end_date = date_conv(end_date)
elif isinstance(start_date, str):
self.date_check(start_date)
start_date = date_conv(start_date)
end_date = dfw_wgs.index[-1]
elif isinstance(end_date, str):
self.date_check(end_date)
start_date = dfw_wgs.index[0]
end_date = date_conv(end_date)
else:
start_date = dfw_wgs.index[0]
end_date = dfw_wgs.index[-1]
error_1 = f"{start_date} unavailable in weight dataframe."
c = dfw_wgs.index
assert start_date in c, error_1
error_2 = f"{end_date} unavailable in weight dataframe."
assert end_date in c, error_2
dfw_wgs = dfw_wgs.truncate(before=start_date, after=end_date)
if not all_tickers:
error_3 = "The parameter, 'single_ticker', must be a <str>."
assert isinstance(single_ticker, str), error_3
error_4 = (
f"Ticker not present in the weight dataframe. Available "
f"tickers: {list(dfw_wgs.columns)}."
)
assert single_ticker in dfw_wgs.columns, error_4
dfw_wgs = dfw_wgs[[single_ticker]]
if facet_grid:
dfw_wgs.columns.name = "ticker"
df_stack = dfw_wgs.stack().to_frame("value").reset_index()
df_stack = df_stack.sort_values(["ticker", "real_date"])
no_contracts = dfw_wgs.shape[1]
facet_cols = 4 if no_contracts >= 8 else 3
sns.set(rc={"figure.figsize": size})
fg = sns.FacetGrid(
df_stack, col="ticker", col_wrap=facet_cols, sharey=True
)
scatter_error = (
f"Boolean object expected - instead received " f"{type(scatter)}."
)
assert isinstance(scatter, bool), scatter_error
if scatter:
fg.map_dataframe(sns.scatterplot, x="real_date", y="value")
else:
# Seaborn will linearly interpolate NaN values which is visually
# misleading. Therefore, aim to negate the operation.
fg.map_dataframe(
sns.lineplot,
x="real_date",
y="value",
hue=df_stack["value"].isna().cumsum(),
palette=["blue"] * df_stack["value"].isna().cumsum().nunique(),
estimator=None,
markers=True,
)
equal_value = 1 / no_contracts
fg.map(plt.axhline, y=equal_value, linestyle="--", color="gray", lw=0.5)
# Set axes labels of individual charts.
fg.set_axis_labels("", "")
fg.set_titles(col_template="{col_name}")
fg.fig.suptitle("Contract weights in basket", y=1.02)
else:
plt.rcParams["figure.figsize"] = size
dfw_wgs.plot(
subplots=subplots, title="Weight Values Timestamp", legend=True
)
plt.xlabel("real_date, years")
date_func = lambda d: pd.Timestamp(d).strftime("%Y-%m-%d")
if percentage_change:
error_5 = (
"Percentage change display is applied to a single ticker. Set "
"the parameter 'all_tickers' to False."
)
assert dfw_wgs.shape[1] == 1, error_5
plt.rcParams["figure.figsize"] = size
fig, ax = plt.subplots()
dfw_pct = dfw_wgs.pct_change(periods=1) * 100
n_index = np.array(list(map(date_func, dfw_pct.index)))
dfw_pct = dfw_pct.set_index(keys=n_index)
dfw_pct.plot(kind="bar", color="coral", ax=ax)
ax.xaxis.set_major_locator(mdates.MonthLocator())
plt.xticks(rotation=0)
ax.set_ylabel("Percentage Change in weight.")
ax.legend()
plt.show()
[docs] @staticmethod
def column_split(df: pd.DataFrame):
"""
Receives a dataframe with the columns 'ticker', 'real_date' and 'value' and
returns a standardised dataframe with the columns 'cid', 'xcat', 'real_date' and
'value. The 'ticker' column is broken up to produce the two new columns.
Parameters
----------
df : ~pandas.DataFrame
Returns
-------
~pandas.DataFrame
standardised dataframe.
"""
select = ["cid", "xcat", "real_date", "value"]
cid_func = lambda t: t.split("_")[0]
xcat_func = lambda t: "_".join(t.split("_")[1:])
cids_w_df = list(map(cid_func, df["ticker"]))
df["cid"] = np.array(cids_w_df)
df = df.rename(columns={"ticker": "xcat"})
df["xcat"] = np.array(list(map(xcat_func, df["xcat"])))
df = df[select]
return df
[docs] def return_basket(self, basket_names: Union[str, List[str]] = None):
"""
Return standardized dataframe of basket performance data based on one or more
weighting methods.
Parameters
----------
basket_names : str or List[str]
single basket name or list for which performance data are to be returned. If
none is given all baskets added to the instance are selected.
Returns
-------
QuantamentalDataFrame
standardized DataFrame with the basket return and (possibly) carry data in
standard form, i.e. columns 'cid', 'xcat', 'real_date' and 'value'.
"""
if basket_names is None:
basket_names = list(self.dict_retcry.keys())
basket_error = "String or List of basket names expected."
assert isinstance(basket_error, (list, str)), basket_error
if isinstance(basket_names, str):
basket_names = [basket_names]
ret_baskets = []
for b in basket_names:
try:
dfw_retcry = self.dict_retcry[b]
except KeyError as e:
print(f"Basket not found - call make_basket() method first: {e}.")
else:
qdf = QuantamentalDataFrame.from_wide(dfw_retcry)
ret_baskets.append(qdf)
return_df = QuantamentalDataFrame.from_qdf_list(ret_baskets)
return QuantamentalDataFrame(
df=return_df, _initialized_as_categorical=self._as_categorical
).to_original_dtypes()
[docs] def return_weights(self, basket_names: Union[str, List[str]] = None):
"""
Return the standardised dataframe containing the corresponding weights used to
compute the basket.
Parameters
----------
basket_names : str or List[str]
single basket name or list for which performance data are to be returned. If
none is given all baskets added to the instance are selected.
Returns
-------
QuantamentalDataFrame
standardized DataFrame with basket weights.
"""
if basket_names is None:
basket_names = list(self.dict_wgs.keys())
basket_error = "String or List of basket names expected."
assert isinstance(basket_error, (list, str)), basket_error
if isinstance(basket_names, str):
basket_names = [basket_names]
weight_baskets = []
select = ["cid", "xcat", "real_date", "value"]
for b in basket_names:
try:
dfw_wgs: pd.DataFrame = self.dict_wgs[b]
except KeyError as e:
print(f"Basket not found - call make_basket() method first: {e}.")
else:
pfx = f"_{b}_WGT"
qdf = QuantamentalDataFrame.from_wide(dfw_wgs).rename_xcats(postfix=pfx)
weight_baskets.append(qdf)
return QuantamentalDataFrame(
df=QuantamentalDataFrame.from_qdf_list(weight_baskets),
_initialized_as_categorical=self._as_categorical,
).to_original_dtypes()
if __name__ == "__main__":
cids = ["AUD", "GBP", "NZD", "USD"]
xcats = [
"FXXR_NSA",
"FXCRY_NSA",
"FXCRR_NSA",
"EQXR_NSA",
"EQCRY_NSA",
"EQCRR_NSA",
"FXWBASE_NSA",
"EQWBASE_NSA",
]
df_cids = pd.DataFrame(
index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"]
)
df_cids.loc["AUD"] = ["2000-01-01", "2022-03-14", 0, 1]
df_cids.loc["GBP"] = ["2001-01-01", "2022-03-14", 0, 2]
df_cids.loc["NZD"] = ["2002-01-01", "2022-03-14", 0, 3]
df_cids.loc["USD"] = ["2000-01-01", "2022-03-14", 0, 4]
df_xcats = pd.DataFrame(
index=xcats,
columns=["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"],
)
df_xcats.loc["FXXR_NSA"] = ["2010-01-01", "2022-03-14", 0, 1, 0, 0.2]
df_xcats.loc["FXCRY_NSA"] = ["2010-01-01", "2022-03-14", 1, 1, 0.9, 0.2]
df_xcats.loc["FXCRR_NSA"] = ["2010-01-01", "2022-03-14", 0.5, 0.8, 0.9, 0.2]
df_xcats.loc["EQXR_NSA"] = ["2010-01-01", "2022-03-14", 0.5, 2, 0, 0.2]
df_xcats.loc["EQCRY_NSA"] = ["2010-01-01", "2022-03-14", 2, 1.5, 0.9, 0.5]
df_xcats.loc["EQCRR_NSA"] = ["2010-01-01", "2022-03-14", 1.5, 1.5, 0.9, 0.5]
df_xcats.loc["FXWBASE_NSA"] = ["2010-01-01", "2022-02-01", 1, 1.5, 0.8, 0.5]
df_xcats.loc["EQWBASE_NSA"] = ["2010-01-01", "2022-02-01", 1, 1.5, 0.9, 0.5]
random.seed(2)
dfd = make_qdf(df_cids, df_xcats, back_ar=0.75)
black = {"AUD": ["2010-01-01", "2013-12-31"], "GBP": ["2010-01-01", "2013-12-31"]}
contracts = ["AUD_FX", "AUD_EQ", "NZD_FX", "GBP_EQ", "USD_EQ"]
gdp_figures = [17.0, 17.0, 41.0, 9.0, 250.0]
contracts_1 = ["AUD_FX", "GBP_FX", "NZD_FX", "USD_EQ"]
# First test. Multiple carries. Equal weight method.
# The main aspect to check in the code is that basket performance has been applied to
# both the return category and the multiple carry categories.
dfd["grading"] = np.ones(dfd.shape[0])
basket_1 = Basket(
df=dfd,
contracts=contracts_1,
ret="XR_NSA",
cry=["CRY_NSA", "CRR_NSA"],
blacklist=black,
)
basket_1.make_basket(weight_meth="equal", max_weight=0.55, basket_name="GLB_EQUAL")
basket_1.make_basket(
weight_meth="fixed",
max_weight=0.55,
weights=[1 / 6, 1 / 6, 1 / 6, 1 / 2],
basket_name="GLB_FIXED",
)
# show the weights of the GLB_FIXED basket
basket_1.weight_visualiser(basket_name="GLB_FIXED", subplots=False, size=(10, 5))