|
- from abc import abstractmethod
-
- import numpy as np
-
- from neural_net.transform_layer import Layer
-
- class ActivationLayer(Layer):
- def __init__(self, index, input_dim, output_dim, weights=None, biases=None):
- super().__init__('ActivationLayer', index, input_dim, output_dim)
- self.type = 'ActivationLayer'
- self.subtype = ''
-
- self.inputs = np.array([])
- self.output = np.array([])
- self.z = np.array([])
- self.gradient_clip = 1.0
-
- # Initialize weights and biases
- if weights is not None:
- self.weights = weights
- else:
- self.initialize_weights()
- if biases is not None:
- self.biases = biases
- else:
- self.initialize_biases()
-
- def describe(self):
- return f"{self.type} ({self.input_dim}x{self.output_dim} neurons, {self.subtype} activation)"
-
- @abstractmethod
- def initialize_weights(self):
- pass
-
- @abstractmethod
- def initialize_biases(self):
- pass
-
- def forward(self, inputs: np.array):
- self.inputs = inputs
- self.z = np.dot(self.inputs, self.weights) + self.biases
- self.output = self.activation(self.z) # Calls the implemented class's activation function (ie. Sigmoid)
- return self.output
-
- def backward(self, dL_dout, learning_rate):
- """
- Backpropagate the error and update weights and biases.
- :param dL_dout: Gradient of loss with respect to layer outputs
- :param learning_rate: Learning rate for weight updates
- :return: Gradient with respect to inputs for previous layer (dL/dinputs)
- """
- # Activation derivative dout/dz
- # This tells you how much the output of the activation function changes with respect to the pre-activation value z.
- # Sigmoid derivative formula: σ(z) * (1 - σ(z))
- dout_dz = self.activation_derivative(self.output)
-
- # Gradient of the loss with respect to weights (dL/dweights)
- # This represents how much the loss changes when the weights change.
- # Formula: dL/dweights = inputs × dL/dout × σ′(z)
- dL_dweights = np.clip(np.dot(self.inputs.T, dL_dout * dout_dz), -self.gradient_clip, self.gradient_clip)
-
- dL_dbias = np.sum(dL_dout * dout_dz, axis=0)
-
- # Gradient of the loss with respect to inputs (dL/dinputs)
- # This is the gradient of the loss with respect to the input of the neuron or layer, often needed if you want to backpropagate further.
- # Formula: dL / dinputs = dL/dout × σ′(z) × weights
- dL_dinputs = np.dot(dL_dout * dout_dz, self.weights.T)
-
- # Clip gradients to prevent them from being too large
- # np.clip(dL_dweights, -10.0, 10.0, out=dL_dweights)
- # np.clip(dL_dbias, -10.0, 10.0, out=dL_dbias)
-
- # Adjust weights and biases
- self.weights -= learning_rate * dL_dweights
- self.biases -= learning_rate * dL_dbias
-
- return dL_dinputs, dL_dweights, dL_dbias, self.weights, self.biases
-
- def reset(self):
- self.initialize_weights()
- self.initialize_biases()
-
- @abstractmethod
- def activation(self, raw_outputs: np.array):
- """
- Apply the activation function (Sigmoid, ReLU, etc.)
- """
- pass
-
- @abstractmethod
- def activation_derivative(self, outputs: np.array):
- """
- Compute the derivative of the activation function
- """
- pass
|