from abc import abstractmethod import numpy as np class Layer: def __init__(self, type, index, input_dim, output_dim): self.type = type self.index = index self.input_dim = input_dim self.output_dim = output_dim @abstractmethod def forward(self, inputs): raise NotImplementedError("This should be overridden by subclasses") @abstractmethod def backward(self, dL_dout, learning_rate): raise NotImplementedError("This should be overridden by subclasses") @abstractmethod def reset(self): raise NotImplementedError("This should be overridden by subclasses") class TransformLayer(Layer): def __init__(self, index, size): super().__init__('TransformLayer', index, size, size) def describe(self): return self.type def forward(self, inputs): raise NotImplementedError("This should be overridden by subclasses") def backward(self, dL_dout, learning_rate): return dL_dout, None, None, None, None # This is the gradient to propagate to the previous layer def reset(self): pass class NormalizeLayer(TransformLayer): def __init__(self, index, size): super().__init__(index, size) self.type = 'NormalizeLayer' def forward(self, inputs): """ Normalizes the input vector. [1, 5, 5, 3, 6] => [0.05, 0.25, 0.25, 0.15, 0.3] :param inputs: np.array(float) :return: np.array(float) """ return inputs / inputs.sum() class SoftMaxLayer(TransformLayer): def __init__(self, index, size): super().__init__(index, size) self.type = 'SoftMaxLayer' def forward(self, inputs): """ Normalizes the input vector, but "pushes" higher values to dominate the probability distribution [1, 5, 5, 3, 6] => [0.02, 0.26, 0.26, 0.10, 0.36] :param inputs: np.array(float) :return: np.array(float) """ input_ex = np.exp(inputs - inputs.max()) # Subtract max for numerical stability s = np.sum(input_ex, axis=-1, keepdims=True) # To prevent division by zero, ensure that the sum is not zero if np.any(s == 0): return np.ones_like(input_ex) / input_ex.shape[-1] # Return a uniform distribution if sum is 0 return input_ex / s