您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

96 行
3.4KB

  1. from abc import abstractmethod
  2. import numpy as np
  3. from neural_net.transform_layer import Layer
  4. class ActivationLayer(Layer):
  5. def __init__(self, index, input_dim, output_dim, weights=None, biases=None):
  6. super().__init__('ActivationLayer', index, input_dim, output_dim)
  7. self.type = 'ActivationLayer'
  8. self.subtype = ''
  9. self.inputs = np.array([])
  10. self.output = np.array([])
  11. self.z = np.array([])
  12. self.gradient_clip = 1.0
  13. # Initialize weights and biases
  14. if weights is not None:
  15. self.weights = weights
  16. else:
  17. self.initialize_weights()
  18. if biases is not None:
  19. self.biases = biases
  20. else:
  21. self.initialize_biases()
  22. def describe(self):
  23. return f"{self.type} ({self.input_dim}x{self.output_dim} neurons, {self.subtype} activation)"
  24. @abstractmethod
  25. def initialize_weights(self):
  26. pass
  27. @abstractmethod
  28. def initialize_biases(self):
  29. pass
  30. def forward(self, inputs: np.array):
  31. self.inputs = inputs
  32. self.z = np.dot(self.inputs, self.weights) + self.biases
  33. self.output = self.activation(self.z) # Calls the implemented class's activation function (ie. Sigmoid)
  34. return self.output
  35. def backward(self, dL_dout, learning_rate):
  36. """
  37. Backpropagate the error and update weights and biases.
  38. :param dL_dout: Gradient of loss with respect to layer outputs
  39. :param learning_rate: Learning rate for weight updates
  40. :return: Gradient with respect to inputs for previous layer (dL/dinputs)
  41. """
  42. # Activation derivative dout/dz
  43. # This tells you how much the output of the activation function changes with respect to the pre-activation value z.
  44. # Sigmoid derivative formula: σ(z) * (1 - σ(z))
  45. dout_dz = self.activation_derivative(self.output)
  46. # Gradient of the loss with respect to weights (dL/dweights)
  47. # This represents how much the loss changes when the weights change.
  48. # Formula: dL/dweights = inputs × dL/dout × σ′(z)
  49. dL_dweights = np.clip(np.dot(self.inputs.T, dL_dout * dout_dz), -self.gradient_clip, self.gradient_clip)
  50. dL_dbias = np.sum(dL_dout * dout_dz, axis=0)
  51. # Gradient of the loss with respect to inputs (dL/dinputs)
  52. # This is the gradient of the loss with respect to the input of the neuron or layer, often needed if you want to backpropagate further.
  53. # Formula: dL / dinputs = dL/dout × σ′(z) × weights
  54. dL_dinputs = np.dot(dL_dout * dout_dz, self.weights.T)
  55. # Clip gradients to prevent them from being too large
  56. # np.clip(dL_dweights, -10.0, 10.0, out=dL_dweights)
  57. # np.clip(dL_dbias, -10.0, 10.0, out=dL_dbias)
  58. # Adjust weights and biases
  59. self.weights -= learning_rate * dL_dweights
  60. self.biases -= learning_rate * dL_dbias
  61. return dL_dinputs, dL_dweights, dL_dbias, self.weights, self.biases
  62. def reset(self):
  63. self.initialize_weights()
  64. self.initialize_biases()
  65. @abstractmethod
  66. def activation(self, raw_outputs: np.array):
  67. """
  68. Apply the activation function (Sigmoid, ReLU, etc.)
  69. """
  70. pass
  71. @abstractmethod
  72. def activation_derivative(self, outputs: np.array):
  73. """
  74. Compute the derivative of the activation function
  75. """
  76. pass