No.18 paddle.nn.GLU易用性提升 (PaddlePaddle#58632)

* support paddle.nn.GLU as a layer * add extra_repr ut * add test for type * update doc * update test, merge two test file * delete atol
danleifeng · Nov 9, 2023 · 18e39bd · 18e39bd
1 parent 52a86d9
commit 18e39bd
Show file tree

Hide file tree

Showing 3 changed files with 93 additions and 1 deletion.
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
@@ -22,6 +22,7 @@
     CELU,
     ELU,
     GELU,
+    GLU,
     SELU,
     Hardshrink,
     Hardsigmoid,
@@ -240,6 +241,7 @@
     'TransformerDecoderLayer',
     'CrossEntropyLoss',
     'GELU',
+    'GLU',
     'SELU',
     'Silu',
     'Conv2DTranspose',

diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py
@@ -117,6 +117,55 @@ def extra_repr(self):
         return f'alpha={self._alpha}{name_str}'
 
 
+class GLU(Layer):
+    r"""
+    GLU Activation.
+
+    .. math::
+
+        GLU(a, b) = a \otimes \sigma(b) where :math:`a` is the first half of the input matrices and :math:`b` is the second half.
+
+    Parameters:
+        axis (int, optional): The axis along which split the input tensor. It
+            should be in range [-D, D), where D is the dimensions of ``x`` .
+            If ``axis`` < 0, it works the same way as :math:`axis + D` .
+            Default is -1.
+        name (str, optional): Name for the operation (optional, default is None).
+            For more information, please refer to :ref:`api_guide_Name`.
+
+    Shape:
+        - input: Tensor which the size of the given aixs is even.
+        - output: Tensor which the size of the given aixs is halved.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+            >>> x = paddle.to_tensor(
+            ...     [[-0.22014759, -1.76358426,  0.80566144,  0.04241343],
+            ...         [-1.94900405, -1.89956081,  0.17134808, -1.11280477]]
+            ... )
+            >>> m = paddle.nn.GLU()
+            >>> out = m(x)
+            >>> print(out)
+            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [[-0.15216254, -0.90048921],
+            [-1.05778778, -0.46985325]])
+    """
+
+    def __init__(self, axis=-1, name=None):
+        super().__init__()
+        self._axis = axis
+        self._name = name
+
+    def forward(self, x):
+        return F.glu(x, self._axis, self._name)
+
+    def extra_repr(self):
+        name_str = f', name={self._name}' if self._name else ''
+        return f'axis={self._axis}{name_str}'
+
+
 class GELU(Layer):
     r"""
     GELU Activation.

diff --git a/test/legacy_test/test_glu.py b/test/legacy_test/test_glu.py
@@ -18,7 +18,7 @@
 
 import paddle
 import paddle.base.dygraph as dg
-from paddle import base
+from paddle import base, nn
 from paddle.nn import functional as F
 
 
@@ -62,5 +62,46 @@ def test_errors(self):
         self.assertRaises(ValueError, self.glu_axis_size)
 
 
+class TestnnGLU(unittest.TestCase):
+    def setUp(self):
+        self.x = np.random.randn(6, 20)
+        self.dim = [-1, 0, 1]
+
+    def check_identity(self, place):
+        with dg.guard(place):
+            x_var = paddle.to_tensor(self.x)
+            for dim in self.dim:
+                act = nn.GLU(dim)
+                y_var = act(x_var)
+                y_np = y_var.numpy()
+                out = glu(self.x, dim)
+                np.testing.assert_allclose(y_np, out)
+
+    def test_case(self):
+        self.check_identity(base.CPUPlace())
+        if base.is_compiled_with_cuda():
+            self.check_identity(base.CUDAPlace(0))
+        act = nn.GLU(axis=0, name="test")
+        self.assertTrue(act.extra_repr() == 'axis=0, name=test')
+
+
+class TestnnGLUerror(unittest.TestCase):
+    def glu_axis_size(self):
+        paddle.enable_static()
+        x = paddle.static.data(name='x', shape=[1, 2, 3], dtype='float32')
+        act = nn.GLU(256)
+        act(x)
+
+    def test_errors(self):
+        self.assertRaises(ValueError, self.glu_axis_size)
+        act = nn.GLU(256)
+        self.assertRaises(TypeError, act, 1)
+        # The input dtype must be float16, float32, float64.
+        x_int32 = paddle.static.data(
+            name='x_int32', shape=[10, 18], dtype='int32'
+        )
+        self.assertRaises(TypeError, act, x_int32)
+
+
 if __name__ == '__main__':
     unittest.main()