(doc/fluxion/activations) add/convert docstrings to mkdocstrings format

2024-11-21 21:58:47 +00:00 · 2024-02-01 22:07:12 +00:00 · 2024-02-01 22:07:12 +00:00 · a7c048f5fb
parent 0fc3264fae
commit a7c048f5fb
1 changed files with 95 additions and 9 deletions
--- a/src/refiners/fluxion/layers/activations.py
+++ b/src/refiners/fluxion/layers/activations.py
@ -1,34 +1,102 @@
-from torch import Tensor, sigmoid
+from abc import ABC
+from enum import Enum
+
+from torch import Tensor
 from torch.nn.functional import (
-    gelu,  # type: ignore
+    gelu,
+    relu,
+    sigmoid,
    silu,
 )

 from refiners.fluxion.layers.module import Module


-class Activation(Module):
+class Activation(Module, ABC):
+    """Base class for activation layers.
+
+    Activation layers are layers that apply a (non-linear) function to their input.
+
+    Receives:
+        x (Tensor):
+
+    Returns:
+        (Tensor):
+    """
+
    def __init__(self) -> None:
        super().__init__()


 class SiLU(Activation):
+    """Sigmoid Linear Unit activation function.
+
+    See [[arXiv:1702.03118] Sigmoid-Weighted Linear Units for Neural Network Function Approximation in Reinforcement Learning](https://arxiv.org/abs/1702.03118) for more details.
+    """
+
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x: Tensor) -> Tensor:
-        return silu(x)  # type: ignore
+        return silu(x)


 class ReLU(Activation):
+    """Rectified Linear Unit activation function.
+
+    See [Rectified Linear Units Improve Restricted Boltzmann Machines](https://www.cs.toronto.edu/%7Efritz/absps/reluICML.pdf)
+    and [Cognitron: A self-organizing multilayered neural network](https://link.springer.com/article/10.1007/BF00342633)
+
+    Example:
+        ```py
+        relu = fl.ReLU()
+
+        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
+        output = relu(tensor)
+
+        expected_output = torch.tensor([[0.0, 0.0, 1.0]])
+        assert torch.allclose(output, expected_output)
+        ```
+    """
+
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x: Tensor) -> Tensor:
-        return x.relu()
+        return relu(x)
+
+
+class GeLUApproximation(Enum):
+    """Approximation methods for the Gaussian Error Linear Unit activation function.
+
+    Attributes:
+        NONE: No approximation, use the original formula.
+        TANH: Use the tanh approximation.
+        SIGMOID: Use the sigmoid approximation.
+    """
+
+    NONE = "none"
+    TANH = "tanh"
+    SIGMOID = "sigmoid"


 class GeLU(Activation):
+    """Gaussian Error Linear Unit activation function.
+
+    This activation can be quite expensive to compute, a few approximations are available,
+    see [`GeLUApproximation`][refiners.fluxion.layers.activations.GeLUApproximation].
+
+    See [[arXiv:1606.08415] Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415) for more details.
+
+    Example:
+        ```py
+        gelu = fl.GeLU()
+
+        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
+        output = gelu(tensor)
+        ```
+    """
+
    def __init__(self) -> None:
        super().__init__()

@ -50,18 +118,36 @@ class ApproximateGeLU(Activation):


 class Sigmoid(Activation):
+    """Sigmoid activation function.
+
+    Example:
+        ```py
+        sigmoid = fl.Sigmoid()
+
+        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
+        output = sigmoid(tensor)
+        ```
+    """
+
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x: Tensor) -> Tensor:
-        return x.sigmoid()
+        return sigmoid(x)


 class GLU(Activation):
-    """
-    Gated Linear Unit activation layer.
+    """Gated Linear Unit activation function.

-    See https://arxiv.org/abs/2002.05202v1 for details.
+    See [[arXiv:2002.05202] GLU Variants Improve Transformer](https://arxiv.org/abs/2002.05202) for more details.
+
+    Example:
+        ```py
+        glu = fl.GLU()
+
+        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
+        output = glu(tensor)
+        ```
    """

    def __init__(self, activation: Activation) -> None: