Source code for macrosynergy.learning.forecasting.torch.models.mlps

import torch
import torch.nn as nn

import numbers

[docs]class MultiLayerPerceptron(nn.Module): r""" Multi-layer perceptron models in PyTorch. Parameters ---------- n_inputs : int Number of input features. Must be at least 1. n_latent : Union[int, list[int]] Number of latent features in a single hidden layer or list specifying the size of each hidden layer. n_outputs : int Number of output variables. Must be at least 1. encoder_activation : str, optional Activation function for the encoder layers. Default is "tanh". Other options include "relu" and "sigmoid". head_activation : str, optional Activation function for the head layers. Default is "identity" for no activation. Other options include "tanh", "relu" and "sigmoid". fit_encoder_intercept : bool, optional Whether to fit intercepts in the encoder layers. Default is False. fit_head_intercept : bool, optional Whether to fit intercepts in the output head. Default is True. dropout_p: float, optional Dropout probability for regularization. Default is 0 (no dropout). Must be between 0 and 0.5. Notes ----- A multi-layer perceptron is a feed-forward neural network that learns a (hopefully) optimal representation of the feature set for a prediction task, or for a collection of tasks. The intitial set is transformed into a new, "learnt", collection of features. This is the "first hidden layer" of the network. Each learnt feature is the composition of the linear combination of initial features and a non-linear activation function. The choice of activation is currently "relu" (:math:`f(x) = \max(0, x)`), "tanh" (:math:`f(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}`), or "sigmoid" (:math:`f(x) = \frac{1}{1 + e^{-x}}`). This new feature set can be further transformed in the same manner by creating a second hidden layer, and so on. The part of the network that describes how the initial features are transformed into the final features (before mapping to the outputs) is called the "encoder". The component that maps the final learnt features to the outputs is called the "projection head". When multiple outputs are being modelled, this is usually referred to as having a "multi-head" architecture. What's the advantage of a feedforward neural network over other models on tabular datasets? Structure and customizability. 32 neurons in a hidden layer means that 32 features are being learnt. I can shrink these features towards priors, if I have any beliefs. I can regularize network outputs to encourage smoothness (temporal regularization) and consistency with known relationships (spatial regularization). I can customize loss functions to optimize economically informed losses rather than generic distance metrics. I can penalize correlation against existing strategies, if so desired. People often refer to neural network flexibility in the context of learning an arbitrarily complex function. While this is true, I would use the word "flexibility" to refer to the ability to customize architectures and loss functions to suit a particular problem. The model allows for dropout regularization, which regularizes a neural network by randomly "dropping out" (setting to zero) a fraction of the neurons during training. This prevents over-reliance on specific neurons and encourages the network to become robust to the design of the neural network architecture. Future work ----------- - Support for skip connections. """ def __init__( self, n_inputs, n_latent, n_outputs, encoder_activation = "tanh", head_activation = "identity", fit_encoder_intercept = False, fit_head_intercept = True, dropout_p = 0, ): super().__init__() # Checks self._check_init_params( n_inputs, n_latent, n_outputs, encoder_activation, head_activation, fit_encoder_intercept, fit_head_intercept, dropout_p, ) # Attributes self.n_inputs = n_inputs if isinstance(n_latent, numbers.Integral): self.n_latent = [n_latent] else: self.n_latent = n_latent self.n_outputs = n_outputs self.encoder_activation = encoder_activation self.head_activation = head_activation self.fit_encoder_intercept = fit_encoder_intercept self.fit_head_intercept = fit_head_intercept self.dropout_p = dropout_p self.activation_map = { "tanh": lambda: nn.Tanh(), "relu": lambda: nn.ReLU(inplace=True), "sigmoid": lambda: nn.Sigmoid(), "identity": lambda: nn.Identity(), } # Encoder self.encoder = self._build_encoder(self.n_inputs, self.n_latent, self.encoder_activation, self.fit_encoder_intercept, self.dropout_p) # Projection head self.head = self._build_head(self.n_latent[-1], self.n_outputs, self.head_activation, self.fit_head_intercept)
[docs] def forward(self, x): """ Forward pass through the network. Parameters ---------- x : torch.Tensor Input tensor of shape (batch_size, n_inputs). Returns ------- torch.Tensor Output tensor of shape (batch_size, n_outputs). """ latent = self.encoder(x) output = self.head(latent) return output
def _build_encoder(self, n_inputs, n_latent, encoder_activation, fit_encoder_intercept, dropout_p): # Identify encoder activation activation_func = self.activation_map[encoder_activation] # Build encoder encoder_modules = [nn.Linear(n_inputs, n_latent[0], bias = fit_encoder_intercept), activation_func()] if dropout_p > 0: encoder_modules.append(nn.Dropout(p=dropout_p)) if len(n_latent) > 1: for layer_idx in range(1, len(n_latent)): encoder_modules.append( nn.Linear(n_latent[layer_idx - 1], n_latent[layer_idx], bias = fit_encoder_intercept) ) encoder_modules.append(activation_func()) if dropout_p > 0: encoder_modules.append(nn.Dropout(p=dropout_p*2)) return nn.Sequential(*encoder_modules) def _build_head(self, n_latent, n_outputs, head_activation, fit_head_intercept): head = nn.Sequential( nn.Linear(n_latent, n_outputs, bias = fit_head_intercept), self.activation_map[head_activation]() ) return head def _check_init_params( self, n_inputs, n_latent, n_outputs, encoder_activation, head_activation, fit_encoder_intercept, fit_head_intercept, dropout_p, ): # n_inputs if not isinstance(n_inputs, numbers.Integral): raise TypeError("n_inputs must be an integer.") if n_inputs < 1: raise ValueError("n_inputs must be at least 1.") # n_latent if not isinstance(n_latent, numbers.Integral): if not isinstance(n_latent, list): raise TypeError("n_latent must be either an integer or a list of integers.") if not all(isinstance(x, numbers.Integral) for x in n_latent): raise TypeError("When n_latent is a list, all elements must be integers.") if len(n_latent) <= 1: raise ValueError("When n_latent is a list, it must contain more than one element.") if not all(x >= 1 for x in n_latent): raise ValueError("When n_latent is a list, all elements must be at least 1.") else: if n_latent < 1: raise ValueError("When n_latent is an integer, it must be at least 1.") # n_outputs if not isinstance(n_outputs, numbers.Integral): raise TypeError("n_outputs must be an integer.") if n_outputs < 1: raise ValueError("n_outputs must be at least 1.") # encoder_activation if not isinstance(encoder_activation, str): raise TypeError("encoder_activation must be a string.") if encoder_activation not in {"tanh", "relu", "sigmoid"}: raise ValueError( "encoder_activation must be one of 'tanh', 'relu', or 'sigmoid'." ) # head_activation if not isinstance(head_activation, str): raise TypeError("head_activation must be a string.") if head_activation not in {"tanh", "relu", "sigmoid", "identity"}: raise ValueError( "head_activation must be one of 'tanh', 'relu', 'sigmoid', or 'identity'." ) # fit_encoder_intercept if not isinstance(fit_encoder_intercept, bool): raise TypeError("fit_encoder_intercept must be a boolean.") # fit_head_intercept if not isinstance(fit_head_intercept, bool): raise TypeError("fit_head_intercept must be a boolean.") # dropout_p if not isinstance(dropout_p, numbers.Real): raise TypeError("dropout_p must be a real number.") if not (0 <= dropout_p < 0.5): raise ValueError("dropout_p must be between 0 and 0.5.")
if __name__=="__main__": print("========================================") print("MLP: 5-32-1 structure, tanh activation") model = MultiLayerPerceptron( n_inputs=5, n_latent = 32, n_outputs=1, dropout_p=0.1, ) print(model) print("========================================") print("MLP: 10-[64,32,16]-3 structure, relu activation, sigmoid head, encoder intercept, no head intercept") model = MultiLayerPerceptron( n_inputs=10, n_latent = [64,32,16], n_outputs=3, encoder_activation="relu", head_activation="sigmoid", fit_encoder_intercept=True, fit_head_intercept=False, dropout_p=0.1, ) print(model) print("========================================")