(doc/fluxion/activations) add/convert docstrings to mkdocstrings format

2024-11-24 15:18:46 +00:00 · 2024-02-01 22:07:12 +00:00 · 2024-02-01 22:07:12 +00:00 · a7c048f5fb
parent 0fc3264fae
commit a7c048f5fb
1 changed files with 95 additions and 9 deletions
--- a/src/refiners/fluxion/layers/activations.py
+++ b/src/refiners/fluxion/layers/activations.py
@ -1,34 +1,102 @@
-from torch import Tensor, sigmoid
+from abc import ABC
 from enum import Enum
 from torch import Tensor
 from torch.nn.functional import (
-    gelu,  # type: ignore
+    gelu,
    relu,
    sigmoid,
    silu,
 )
 from refiners.fluxion.layers.module import Module
-class Activation(Module):
+class Activation(Module, ABC):
    """Base class for activation layers.
    Activation layers are layers that apply a (non-linear) function to their input.
    Receives:
        x (Tensor):
    Returns:
        (Tensor):
    """
    def __init__(self) -> None:
        super().__init__()
 class SiLU(Activation):
    """Sigmoid Linear Unit activation function.
    See [[arXiv:1702.03118] Sigmoid-Weighted Linear Units for Neural Network Function Approximation in Reinforcement Learning](https://arxiv.org/abs/1702.03118) for more details.
    """
    def __init__(self) -> None:
        super().__init__()
    def forward(self, x: Tensor) -> Tensor:
-        return silu(x)  # type: ignore
+        return silu(x)
 class ReLU(Activation):
    """Rectified Linear Unit activation function.
    See [Rectified Linear Units Improve Restricted Boltzmann Machines](https://www.cs.toronto.edu/%7Efritz/absps/reluICML.pdf)
    and [Cognitron: A self-organizing multilayered neural network](https://link.springer.com/article/10.1007/BF00342633)
    Example:
        ```py
        relu = fl.ReLU()
        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
        output = relu(tensor)
        expected_output = torch.tensor([[0.0, 0.0, 1.0]])
        assert torch.allclose(output, expected_output)
        ```
    """
    def __init__(self) -> None:
        super().__init__()
    def forward(self, x: Tensor) -> Tensor:
-        return x.relu()
+        return relu(x)
 class GeLUApproximation(Enum):
    """Approximation methods for the Gaussian Error Linear Unit activation function.
    Attributes:
        NONE: No approximation, use the original formula.
        TANH: Use the tanh approximation.
        SIGMOID: Use the sigmoid approximation.
    """
    NONE = "none"
    TANH = "tanh"
    SIGMOID = "sigmoid"
 class GeLU(Activation):
    """Gaussian Error Linear Unit activation function.
    This activation can be quite expensive to compute, a few approximations are available,
    see [`GeLUApproximation`][refiners.fluxion.layers.activations.GeLUApproximation].
    See [[arXiv:1606.08415] Gaussian Error Linear Units](https://arxiv.org/abs/1606.08415) for more details.
    Example:
        ```py
        gelu = fl.GeLU()
        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
        output = gelu(tensor)
        ```
    """
    def __init__(self) -> None:
        super().__init__()
@ -50,18 +118,36 @@ class ApproximateGeLU(Activation):
 class Sigmoid(Activation):
    """Sigmoid activation function.
    Example:
        ```py
        sigmoid = fl.Sigmoid()
        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
        output = sigmoid(tensor)
        ```
    """
    def __init__(self) -> None:
        super().__init__()
    def forward(self, x: Tensor) -> Tensor:
-        return x.sigmoid()
+        return sigmoid(x)
 class GLU(Activation):
-    """
+    """Gated Linear Unit activation function.
    Gated Linear Unit activation layer.
-    See https://arxiv.org/abs/2002.05202v1 for details.
+    See [[arXiv:2002.05202] GLU Variants Improve Transformer](https://arxiv.org/abs/2002.05202) for more details.
    Example:
        ```py
        glu = fl.GLU()
        tensor = torch.tensor([[-1.0, 0.0, 1.0]])
        output = glu(tensor)
        ```
    """
    def __init__(self, activation: Activation) -> None: