(doc/fluxion/activations) add/convert docstrings to mkdocstrings format

This commit is contained in:
Laurent 2024-02-01 22:07:12 +00:00 committed by Laureηt
parent 0fc3264fae
commit a7c048f5fb

View file

@ -1,34 +1,102 @@
from torch import Tensor, sigmoid
from abc import ABC
from enum import Enum
from torch import Tensor
from torch.nn.functional import (
gelu, # type: ignore
gelu,
relu,
sigmoid,
silu,
)
from refiners.fluxion.layers.module import Module
class Activation(Module):
class Activation(Module, ABC):
"""Base class for activation layers.
Activation layers are layers that apply a (non-linear) function to their input.
Receives:
x (Tensor):
Returns:
(Tensor):
"""
def __init__(self) -> None:
super().__init__()
class SiLU(Activation):
"""Sigmoid Linear Unit activation function.
See [[arXiv:1702.03118] Sigmoid-Weighted Linear Units for Neural Network Function Approximation in Reinforcement Learning](https://arxiv.org/abs/1702.03118) for more details.
"""
def __init__(self) -> None:
super().__init__()
def forward(self, x: Tensor) -> Tensor:
return silu(x) # type: ignore
return silu(x)
class ReLU(Activation):
"""Rectified Linear Unit activation function.
See [Rectified Linear Units Improve Restricted Boltzmann Machines](https://www.cs.toronto.edu/%7Efritz/absps/reluICML.pdf)
and [Cognitron: A self-organizing multilayered neural network](https://link.springer.com/article/10.1007/BF00342633)
Example:
```py
relu = fl.ReLU()
tensor = torch.tensor([[-1.0, 0.0, 1.0]])
output = relu(tensor)
expected_output = torch.tensor([[0.0, 0.0, 1.0]])
assert torch.allclose(output, expected_output)
```
"""
def __init__(self) -> None:
super().__init__()
def forward(self, x: Tensor) -> Tensor:
return x.relu()
return relu(x)
class GeLUApproximation(Enum):
"""Approximation methods for the Gaussian Error Linear Unit activation function.
Attributes:
NONE: No approximation, use the original formula.
TANH: Use the tanh approximation.
SIGMOID: Use the sigmoid approximation.
"""
NONE = "none"
TANH = "tanh"
SIGMOID = "sigmoid"
class GeLU(Activation):
"""Gaussian Error Linear Unit activation function.
This activation can be quite expensive to compute, a few approximations are available,
see [`GeLUApproximation`][refiners.fluxion.layers.activations.GeLUApproximation].
See [[arXiv:1606.08415] Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415) for more details.
Example:
```py
gelu = fl.GeLU()
tensor = torch.tensor([[-1.0, 0.0, 1.0]])
output = gelu(tensor)
```
"""
def __init__(self) -> None:
super().__init__()
@ -50,18 +118,36 @@ class ApproximateGeLU(Activation):
class Sigmoid(Activation):
"""Sigmoid activation function.
Example:
```py
sigmoid = fl.Sigmoid()
tensor = torch.tensor([[-1.0, 0.0, 1.0]])
output = sigmoid(tensor)
```
"""
def __init__(self) -> None:
super().__init__()
def forward(self, x: Tensor) -> Tensor:
return x.sigmoid()
return sigmoid(x)
class GLU(Activation):
"""
Gated Linear Unit activation layer.
"""Gated Linear Unit activation function.
See https://arxiv.org/abs/2002.05202v1 for details.
See [[arXiv:2002.05202] GLU Variants Improve Transformer](https://arxiv.org/abs/2002.05202) for more details.
Example:
```py
glu = fl.GLU()
tensor = torch.tensor([[-1.0, 0.0, 1.0]])
output = glu(tensor)
```
"""
def __init__(self, activation: Activation) -> None: