From 7307a3686e232f5df10f5163d63680ac15146b20 Mon Sep 17 00:00:00 2001
From: Laurent <laurent@lagon.tech>
Date: Fri, 2 Feb 2024 09:50:15 +0000
Subject: [PATCH] (docstrings) apply @deltheil suggestions

---
 pyproject.toml                             |  3 ++-
 src/refiners/fluxion/adapters/lora.py      | 18 ++++++++++++------
 src/refiners/fluxion/context.py            |  4 ++--
 src/refiners/fluxion/layers/activations.py |  2 +-
 src/refiners/fluxion/layers/attentions.py  | 10 ++++++----
 src/refiners/fluxion/layers/basics.py      |  6 +++---
 6 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2efff74..c3c2adf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,8 @@ conversion = [
     "tqdm>=4.62.3",
 ]
 doc = [
-    "black>=24.1.1",                # required by mkdocs to format the signatures
+    # required by mkdocs to format the signatures
+    "black>=24.1.1",
     "mkdocs-material>=9.5.6",
     "mkdocstrings[python]>=0.24.0",
     "mkdocs-literate-nav>=0.6.1",
diff --git a/src/refiners/fluxion/adapters/lora.py b/src/refiners/fluxion/adapters/lora.py
index 38f1bc3..007ecce 100644
--- a/src/refiners/fluxion/adapters/lora.py
+++ b/src/refiners/fluxion/adapters/lora.py
@@ -9,7 +9,7 @@ from refiners.fluxion.adapters.adapter import Adapter
 
 
 class Lora(fl.Chain, ABC):
-    """Low-rank approximation (LoRA) layer.
+    """Low-Rank Adaptation (LoRA) layer.
 
     This layer is composed of two [`WeightedModule`][refiners.fluxion.layers.WeightedModule]:
 
@@ -156,7 +156,7 @@ class Lora(fl.Chain, ABC):
             return LoraAdapter(layer, self), parent
 
     def load_weights(self, down_weight: Tensor, up_weight: Tensor) -> None:
-        """Load the weights of the LoRA.
+        """Load the (pre-trained) weights of the LoRA.
 
         Args:
             down_weight: The down weight.
@@ -169,7 +169,7 @@ class Lora(fl.Chain, ABC):
 
 
 class LinearLora(Lora):
-    """Low-rank approximation (LoRA) layer for linear layers.
+    """Low-Rank Adaptation (LoRA) layer for linear layers.
 
     This layer uses two [`Linear`][refiners.fluxion.layers.Linear] layers as its down and up layers.
     """
@@ -255,7 +255,7 @@ class LinearLora(Lora):
 
 
 class Conv2dLora(Lora):
-    """Low-rank approximation (LoRA) layer for 2D convolutional layers.
+    """Low-Rank Adaptation (LoRA) layer for 2D convolutional layers.
 
     This layer uses two [`Conv2d`][refiners.fluxion.layers.Conv2d] layers as its down and up layers.
     """
@@ -391,12 +391,12 @@ class LoraAdapter(fl.Sum, Adapter[fl.WeightedModule]):
 
     @property
     def loras(self) -> dict[str, Lora]:
-        """The LoRA layers."""
+        """The LoRA layers indexed by name."""
         return {lora.name: lora for lora in self.layers(Lora)}
 
     @property
     def scales(self) -> dict[str, float]:
-        """The scales of the LoRA layers."""
+        """The scales of the LoRA layers indexed by names."""
         return {lora.name: lora.scale for lora in self.layers(Lora)}
 
     @scales.setter
@@ -407,6 +407,9 @@ class LoraAdapter(fl.Sum, Adapter[fl.WeightedModule]):
     def add_lora(self, lora: Lora, /) -> None:
         """Add a LoRA layer to the adapter.
 
+        Raises:
+            AssertionError: If the adapter already contains a LoRA layer with the same name.
+
         Args:
             lora: The LoRA layer to add.
         """
@@ -416,6 +419,9 @@ class LoraAdapter(fl.Sum, Adapter[fl.WeightedModule]):
     def remove_lora(self, name: str, /) -> Lora | None:
         """Remove a LoRA layer from the adapter.
 
+        Note:
+            If the adapter doesn't contain a LoRA layer with the given name, nothing happens and `None` is returned.
+
         Args:
             name: The name of the LoRA layer to remove.
         """
diff --git a/src/refiners/fluxion/context.py b/src/refiners/fluxion/context.py
index 8730d8b..b30d31a 100644
--- a/src/refiners/fluxion/context.py
+++ b/src/refiners/fluxion/context.py
@@ -23,7 +23,7 @@ class ContextProvider:
         self.contexts[key] = value
 
     def get_context(self, key: str) -> Any:
-        """Retreive a value from the context.
+        """Retrieve a value from the context.
 
         Args:
             key: The key of the context.
@@ -34,7 +34,7 @@ class ContextProvider:
         return self.contexts.get(key)
 
     def update_contexts(self, new_contexts: Contexts) -> None:
-        """Update the contexts with new contexts.
+        """Update or set the contexts with new contexts.
 
         Args:
             new_contexts: The new contexts.
diff --git a/src/refiners/fluxion/layers/activations.py b/src/refiners/fluxion/layers/activations.py
index eded3ef..2bb3ab5 100644
--- a/src/refiners/fluxion/layers/activations.py
+++ b/src/refiners/fluxion/layers/activations.py
@@ -55,7 +55,7 @@ class ReLU(Activation):
         output = relu(tensor)
 
         expected_output = torch.tensor([[0.0, 0.0, 1.0]])
-        assert torch.allclose(output, expected_output)
+        assert torch.equal(output, expected_output)
         ```
     """
 
diff --git a/src/refiners/fluxion/layers/attentions.py b/src/refiners/fluxion/layers/attentions.py
index 7c7801a..6cfe32a 100644
--- a/src/refiners/fluxion/layers/attentions.py
+++ b/src/refiners/fluxion/layers/attentions.py
@@ -20,7 +20,9 @@ def scaled_dot_product_attention(
 ) -> Float[Tensor, "batch source_sequence_length dim"]:
     """Scaled Dot Product Attention.
 
-    Optimization depends on which pytorch backend is used.
+    Note:
+        Optimization depends on which PyTorch backend is used.
+
     See [[arXiv:1706.03762] Attention Is All You Need (Equation 1)](https://arxiv.org/abs/1706.03762) for more details.
     See also [torch.nn.functional.scaled_dot_product_attention](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html).
     """
@@ -213,7 +215,7 @@ class Attention(Chain):
             which transforms the 3 inputs into Query, Key and Value
         - a [`ScaledDotProductAttention`][refiners.fluxion.layers.attentions.ScaledDotProductAttention] layer
         - a [`Linear`][refiners.fluxion.layers.linear.Linear] layer,
-            which further transforms the output of the
+            which projects the output of the
             [`ScaledDotProductAttention`][refiners.fluxion.layers.attentions.ScaledDotProductAttention] layer
 
     Receives:
@@ -461,7 +463,7 @@ class SelfAttention2d(SelfAttention):
     ) -> Float[Tensor, "batch height*width channels"]:
         """Transform a 2D Tensor into a sequence.
 
-        The height and width of the input Tensor are stored in the context,
+        The height and width of the input Tensor are stored in a `"reshape"` context,
         so that the output Tensor can be transformed back into a 2D Tensor in the `sequence_to_tensor_2d` method.
         """
         height, width = x.shape[-2:]
@@ -480,7 +482,7 @@ class SelfAttention2d(SelfAttention):
     ) -> Float[Tensor, "batch channels height width"]:
         """Transform a sequence into a 2D Tensor.
 
-        The height and width of the output Tensor are retrieved from the context,
+        The height and width of the output Tensor are retrieved from the `"reshape"` context,
         which was set in the `tensor_2d_to_sequence` method.
         """
         height, width = self.use_context("reshape").values()
diff --git a/src/refiners/fluxion/layers/basics.py b/src/refiners/fluxion/layers/basics.py
index 70526df..93ec638 100644
--- a/src/refiners/fluxion/layers/basics.py
+++ b/src/refiners/fluxion/layers/basics.py
@@ -17,7 +17,7 @@ class Identity(Module):
         tensor = torch.randn(10, 10)
         output = identity(tensor)
 
-        assert torch.allclose(tensor, output)
+        assert torch.equal(tensor, output)
         ```
     """
 
@@ -51,9 +51,9 @@ class GetArg(Module):
             torch.randn(20, 20),
             torch.randn(30, 30),
         )
-        output = get_arg(inputs)
+        output = get_arg(*inputs)
 
-        assert torch.allclose(tensor[1], output)
+        assert id(inputs[1]) == id(output)
         ```
     """