diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index 54d84d5b74931e..f96b5e9b8c42bb 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -22,6 +22,7 @@ CELU, ELU, GELU, + GLU, SELU, Hardshrink, Hardsigmoid, @@ -240,6 +241,7 @@ 'TransformerDecoderLayer', 'CrossEntropyLoss', 'GELU', + 'GLU', 'SELU', 'Silu', 'Conv2DTranspose', diff --git a/python/paddle/nn/layer/activation.py b/python/paddle/nn/layer/activation.py index d8a4d0e6fedd03..60e3a95a20d18c 100644 --- a/python/paddle/nn/layer/activation.py +++ b/python/paddle/nn/layer/activation.py @@ -117,6 +117,55 @@ def extra_repr(self): return f'alpha={self._alpha}{name_str}' +class GLU(Layer): + r""" + GLU Activation. + + .. math:: + + GLU(a, b) = a \otimes \sigma(b) where :math:`a` is the first half of the input matrices and :math:`b` is the second half. + + Parameters: + axis (int, optional): The axis along which split the input tensor. It + should be in range [-D, D), where D is the dimensions of ``x`` . + If ``axis`` < 0, it works the same way as :math:`axis + D` . + Default is -1. + name (str, optional): Name for the operation (optional, default is None). + For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - input: Tensor which the size of the given aixs is even. + - output: Tensor which the size of the given aixs is halved. + + Examples: + .. code-block:: python + + >>> import paddle + >>> x = paddle.to_tensor( + ... [[-0.22014759, -1.76358426, 0.80566144, 0.04241343], + ... [-1.94900405, -1.89956081, 0.17134808, -1.11280477]] + ... ) + >>> m = paddle.nn.GLU() + >>> out = m(x) + >>> print(out) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[-0.15216254, -0.90048921], + [-1.05778778, -0.46985325]]) + """ + + def __init__(self, axis=-1, name=None): + super().__init__() + self._axis = axis + self._name = name + + def forward(self, x): + return F.glu(x, self._axis, self._name) + + def extra_repr(self): + name_str = f', name={self._name}' if self._name else '' + return f'axis={self._axis}{name_str}' + + class GELU(Layer): r""" GELU Activation. diff --git a/test/legacy_test/test_glu.py b/test/legacy_test/test_glu.py index 9ffbc5a7061810..1baa3295b9a4a3 100644 --- a/test/legacy_test/test_glu.py +++ b/test/legacy_test/test_glu.py @@ -18,7 +18,7 @@ import paddle import paddle.base.dygraph as dg -from paddle import base +from paddle import base, nn from paddle.nn import functional as F @@ -62,5 +62,46 @@ def test_errors(self): self.assertRaises(ValueError, self.glu_axis_size) +class TestnnGLU(unittest.TestCase): + def setUp(self): + self.x = np.random.randn(6, 20) + self.dim = [-1, 0, 1] + + def check_identity(self, place): + with dg.guard(place): + x_var = paddle.to_tensor(self.x) + for dim in self.dim: + act = nn.GLU(dim) + y_var = act(x_var) + y_np = y_var.numpy() + out = glu(self.x, dim) + np.testing.assert_allclose(y_np, out) + + def test_case(self): + self.check_identity(base.CPUPlace()) + if base.is_compiled_with_cuda(): + self.check_identity(base.CUDAPlace(0)) + act = nn.GLU(axis=0, name="test") + self.assertTrue(act.extra_repr() == 'axis=0, name=test') + + +class TestnnGLUerror(unittest.TestCase): + def glu_axis_size(self): + paddle.enable_static() + x = paddle.static.data(name='x', shape=[1, 2, 3], dtype='float32') + act = nn.GLU(256) + act(x) + + def test_errors(self): + self.assertRaises(ValueError, self.glu_axis_size) + act = nn.GLU(256) + self.assertRaises(TypeError, act, 1) + # The input dtype must be float16, float32, float64. + x_int32 = paddle.static.data( + name='x_int32', shape=[10, 18], dtype='int32' + ) + self.assertRaises(TypeError, act, x_int32) + + if __name__ == '__main__': unittest.main()