diff --git a/src/refiners/fluxion/layers/__init__.py b/src/refiners/fluxion/layers/__init__.py
index bb1889f..1c1a03f 100644
--- a/src/refiners/fluxion/layers/__init__.py
+++ b/src/refiners/fluxion/layers/__init__.py
@@ -1,7 +1,6 @@
 from refiners.fluxion.layers.activations import GLU, ApproximateGeLU, GeLU, ReLU, Sigmoid, SiLU
 from refiners.fluxion.layers.attentions import Attention, SelfAttention, SelfAttention2d
 from refiners.fluxion.layers.basics import (
-    Buffer,
     Cos,
     Flatten,
     GetArg,
@@ -75,7 +74,6 @@ __all__ = [
     "Cos",
     "Multiply",
     "Matmul",
-    "Buffer",
     "Lambda",
     "Return",
     "Sum",
diff --git a/src/refiners/fluxion/layers/basics.py b/src/refiners/fluxion/layers/basics.py
index e6b6c94..3b8e595 100644
--- a/src/refiners/fluxion/layers/basics.py
+++ b/src/refiners/fluxion/layers/basics.py
@@ -162,27 +162,3 @@ class Parameter(WeightedModule):
 
     def forward(self, x: Tensor) -> Tensor:
         return self.weight.expand(x.shape[0], *self.dims)
-
-
-class Buffer(WeightedModule):
-    """
-    A layer that wraps a tensor as a buffer. This is useful to create a buffer that is not a weight or a bias.
-
-    Buffers are not trainable.
-    """
-
-    def __init__(self, *dims: int, device: Device | str | None = None, dtype: DType | None = None) -> None:
-        super().__init__()
-        self.dims = dims
-        self.register_buffer("buffer", randn(*dims, device=device, dtype=dtype))
-
-    @property
-    def device(self) -> Device:
-        return self.buffer.device
-
-    @property
-    def dtype(self) -> DType:
-        return self.buffer.dtype
-
-    def forward(self, _: Tensor) -> Tensor:
-        return self.buffer