from abc import abstractmethod import numpy as np from neural_net.transform_layer import Layer class ActivationLayer(Layer): def __init__(self, index, input_dim, output_dim, weights=None, biases=None): super().__init__('ActivationLayer', index, input_dim, output_dim) self.type = 'ActivationLayer' self.subtype = '' self.inputs = np.array([]) self.output = np.array([]) self.z = np.array([]) self.gradient_clip = 1.0 # Initialize weights and biases if weights is not None: self.weights = weights else: self.initialize_weights() if biases is not None: self.biases = biases else: self.initialize_biases() def describe(self): return f"{self.type} ({self.input_dim}x{self.output_dim} neurons, {self.subtype} activation)" @abstractmethod def initialize_weights(self): pass @abstractmethod def initialize_biases(self): pass def forward(self, inputs: np.array): self.inputs = inputs self.z = np.dot(self.inputs, self.weights) + self.biases self.output = self.activation(self.z) # Calls the implemented class's activation function (ie. Sigmoid) return self.output def backward(self, dL_dout, learning_rate): """ Backpropagate the error and update weights and biases. :param dL_dout: Gradient of loss with respect to layer outputs :param learning_rate: Learning rate for weight updates :return: Gradient with respect to inputs for previous layer (dL/dinputs) """ # Activation derivative dout/dz # This tells you how much the output of the activation function changes with respect to the pre-activation value z. # Sigmoid derivative formula: σ(z) * (1 - σ(z)) dout_dz = self.activation_derivative(self.output) # Gradient of the loss with respect to weights (dL/dweights) # This represents how much the loss changes when the weights change. # Formula: dL/dweights = inputs × dL/dout × σ′(z) dL_dweights = np.clip(np.dot(self.inputs.T, dL_dout * dout_dz), -self.gradient_clip, self.gradient_clip) dL_dbias = np.sum(dL_dout * dout_dz, axis=0) # Gradient of the loss with respect to inputs (dL/dinputs) # This is the gradient of the loss with respect to the input of the neuron or layer, often needed if you want to backpropagate further. # Formula: dL / dinputs = dL/dout × σ′(z) × weights dL_dinputs = np.dot(dL_dout * dout_dz, self.weights.T) # Clip gradients to prevent them from being too large # np.clip(dL_dweights, -10.0, 10.0, out=dL_dweights) # np.clip(dL_dbias, -10.0, 10.0, out=dL_dbias) # Adjust weights and biases self.weights -= learning_rate * dL_dweights self.biases -= learning_rate * dL_dbias return dL_dinputs, dL_dweights, dL_dbias, self.weights, self.biases def reset(self): self.initialize_weights() self.initialize_biases() @abstractmethod def activation(self, raw_outputs: np.array): """ Apply the activation function (Sigmoid, ReLU, etc.) """ pass @abstractmethod def activation_derivative(self, outputs: np.array): """ Compute the derivative of the activation function """ pass