Source code for macrosynergy.learning.forecasting.naive_predictors

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, RegressorMixin


[docs]class NaiveRegressor(BaseEstimator, RegressorMixin): """ Equally weighted unbiased factor model. Notes ----- Given a collection of factors that are theoretically positively correlated with a dependent variable, a plausible signal is a simple average of those factors. This is effectively a linear regression model with zero intercept and equal weights for all factors. This is a useful benchmark model which works well when the factors are as uncorrelated as possible with one another, because it offers a layer of diversification on the underlying return drivers. When the user has strong priors, this is often a competitive model that is difficult to beat. However, it is vital for the features to have been preprocessed to have a positive theoretical correlation with the target variable. """
[docs] def fit(self, X, y=None): """ Fit method. Parameters ---------- X : pd.DataFrame, pd.Series or np.ndarray The input feature matrix. y : pd.DataFrame, pd.Series or np.ndarray The target variable. Returns ------- self The fitted model. Notes ----- This method involves fully trusting one's priors and thus requires no learning element. As a consequence, no training set information is needed. """ # Checks if not isinstance(X, (pd.DataFrame, pd.Series, np.ndarray)): raise TypeError( "X must be a pandas DataFrame, pandas Series or numpy array" ) elif isinstance(X, np.ndarray) and ((X.ndim > 2) or (X.ndim < 1)): raise ValueError( "When X is a numpy array, it must have either 1 or 2 dimensions." ) # No learning needed since priors are fully trusted self.n_ = len(X) if isinstance(X, pd.Series): self.p_ = 1 elif isinstance(X, np.ndarray): self.p_ = X.shape[1] if X.ndim == 2 else 1 else: self.p_ = X.shape[1] self.X_type_ = type(X) return self
[docs] def predict(self, X): """ Predict method. Notes ----- The predictions are simply the average of the features across columns of the input feature matrix. """ # Checks if not isinstance(X, (pd.DataFrame, pd.Series, np.ndarray)): raise TypeError( "X must be a pandas DataFrame, pandas Series or numpy array" ) elif isinstance(X, np.ndarray) and ((X.ndim > 2) or (X.ndim < 1)): raise ValueError( "When X is a numpy array, it must have either 1 or 2 dimensions." ) if not isinstance(X, self.X_type_): raise ValueError( "X must be of the same type as the input matrix to the fit() method." ) if isinstance(X, np.ndarray) and X.ndim == 1: p = 1 elif isinstance(X, pd.Series): p = 1 else: p = X.shape[1] if p != self.p_: raise ValueError( "X must have the same number of columns as the input matrix to the " "fit() method." ) # Return the naive signal if isinstance(X, pd.DataFrame): return np.mean(X.values, axis=1) elif isinstance(X, pd.Series): return X.values elif isinstance(X, np.ndarray) and X.ndim == 1: return X else: return np.mean(X, axis=1)
if __name__ == "__main__": import macrosynergy.management as msm from macrosynergy.management.simulate import make_qdf cids = ["AUD", "CAD", "GBP", "USD"] xcats = ["XR", "CRY", "GROWTH", "INFL"] cols = ["earliest", "latest", "mean_add", "sd_mult", "ar_coef", "back_coef"] df_cids = pd.DataFrame( index=cids, columns=["earliest", "latest", "mean_add", "sd_mult"] ) df_cids.loc["AUD"] = ["2012-01-01", "2020-12-31", 0, 1] df_cids.loc["CAD"] = ["2012-01-01", "2020-12-31", 0, 1] df_cids.loc["GBP"] = ["2012-01-01", "2020-12-31", 0, 1] df_cids.loc["USD"] = ["2012-01-01", "2020-12-31", 0, 1] df_xcats = pd.DataFrame(index=xcats, columns=cols) df_xcats.loc["XR"] = ["2012-01-01", "2020-12-31", 0.1, 1, 0, 0.3] df_xcats.loc["CRY"] = ["2012-01-01", "2020-12-31", 1, 2, 0.95, 1] df_xcats.loc["GROWTH"] = ["2012-01-01", "2020-12-31", 1, 2, 0.9, 1] df_xcats.loc["INFL"] = ["2012-01-01", "2020-12-31", -0.1, 2, 0.8, 0.3] dfd = make_qdf(df_cids, df_xcats, back_ar=0.75) Xy = msm.categories_df( df=dfd, xcats=xcats, cids=cids, freq="M", lag=1, xcat_aggs=["last", "sum"] ).dropna() X = Xy.iloc[:, :-1] y = Xy.iloc[:, -1] naive = NaiveRegressor() naive.fit(X, y) print(naive.predict(X))