Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【Hackathon 5th No.116】paddle.quantile/nanquantile 功能增强 #56461

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
efc2c65
[quantile/nanquantile]易用性提升 No.20
Asthestarsfalll Aug 19, 2023
914b845
update
Asthestarsfalll Aug 19, 2023
3319772
update
Asthestarsfalll Aug 21, 2023
62f7f64
support zero-dim q and add backward test
Asthestarsfalll Aug 22, 2023
4bbaf6f
remove useless code
Asthestarsfalll Aug 22, 2023
8ae9f5f
fix unittest
Asthestarsfalll Aug 31, 2023
e8cdcd4
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Aug 31, 2023
1ce4044
fix ci coverage
Asthestarsfalll Sep 1, 2023
84e2aff
fix
Asthestarsfalll Sep 6, 2023
f95ef77
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Sep 6, 2023
728a95b
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Sep 6, 2023
8eb64da
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Sep 7, 2023
b5ffb9c
update docs
Asthestarsfalll Sep 9, 2023
03e36e6
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Sep 9, 2023
2d68ad9
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Asthestarsfalll Nov 8, 2023
75327bd
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Asthestarsfalll Nov 8, 2023
74b7143
Merge branch 'quantile' of https://github.com/Asthestarsfalll/Paddle …
Asthestarsfalll Nov 8, 2023
edba2f1
update
Asthestarsfalll Nov 8, 2023
6f47fb7
fix codestyle
Asthestarsfalll Dec 6, 2023
d0d42b2
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Dec 6, 2023
b4a2591
support interpolation
Asthestarsfalll Dec 6, 2023
34241d1
fix unittest
Asthestarsfalll Dec 6, 2023
6eca5ce
update unittest
Asthestarsfalll Dec 7, 2023
221f09d
add unittest
Asthestarsfalll Dec 7, 2023
38b7cbb
fix
Asthestarsfalll Dec 7, 2023
245fdae
fix
Asthestarsfalll Dec 10, 2023
da6bc4a
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Dec 10, 2023
ed0e6d2
Merge branch 'develop' into quantile
Asthestarsfalll Dec 18, 2023
6c6ad3a
Merge branch 'PaddlePaddle:develop' into quantile
Asthestarsfalll Jan 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 87 additions & 27 deletions python/paddle/tensor/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,14 +471,16 @@ def median(x, axis=None, keepdim=False, name=None):
return out_tensor


def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
def _compute_quantile(
x, q, axis=None, keepdim=False, interpolation="linear", ignore_nan=False
):
"""
Compute the quantile of the input along the specified axis.

Args:
x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
q (int|float|list): The q for calculate quantile, which should be in range [0, 1]. If q is a list,
each q will be calculated and the first dimension of output is same to the number of ``q`` .
q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list,
a 1-D Tensor or a 0-D Tensor, each q will be calculated and the first dimension of output is same to the number of ``q`` .
axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
Expand All @@ -489,6 +491,8 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
the output Tensor is the same as ``x`` except in the reduced
dimensions(it is of size 1 in this case). Otherwise, the shape of
the output Tensor is squeezed in ``axis`` . Default is False.
interpolation (str, optional): The interpolation method to use
when the desired quantile falls between two data points. Default is linear.
ignore_nan: (bool, optional): Whether to ignore NaN of input Tensor.
If ``ignore_nan`` is True, it will calculate nanquantile.
Otherwise it will calculate quantile. Default is False.
Expand All @@ -507,9 +511,33 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
elif isinstance(q, (list, tuple)):
if len(q) <= 0:
raise ValueError("q should not be empty")
elif isinstance(q, Variable):
if len(q.shape) > 1:
raise ValueError("q should be a 0-D tensor or a 1-D tensor")
if len(q.shape) == 0:
q = [q]
else:
raise TypeError("Type of q should be int, float, list or tuple.")
raise TypeError(
"Type of q should be int, float, list or tuple, or tensor"
)
for q_num in q:
if not in_dynamic_mode() and isinstance(q_num, Variable):
break
if q_num < 0 or q_num > 1:
raise ValueError("q should be in range [0, 1]")

if interpolation not in [
'linear',
'lower',
'higher',
'nearest',
'midpoint',
]:
raise ValueError(
"interpolation must be one of 'linear', 'lower', 'higher', 'nearest' or 'midpoint', but got {}".format(
interpolation
)
)
# Validate axis
dims = len(x.shape)
out_shape = list(x.shape)
Expand Down Expand Up @@ -557,8 +585,6 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):
indices = []

for q_num in q:
if q_num < 0 or q_num > 1:
raise ValueError("q should be in range [0, 1]")
if in_dynamic_or_pir_mode():
q_num = paddle.to_tensor(q_num, dtype='float64')
if ignore_nan:
Expand All @@ -573,47 +599,63 @@ def _compute_quantile(x, q, axis=None, keepdim=False, ignore_nan=False):

sorted_tensor = paddle.sort(x, axis)

outputs = []
def _compute_index(index):
if interpolation == "nearest":
idx = paddle.round(index).astype(paddle.int32)
return paddle.take_along_axis(sorted_tensor, idx, axis=axis)

# TODO(chenjianye): replace the for-loop to directly take elements.
for index in indices:
indices_below = paddle.floor(index).astype('int32')
indices_upper = paddle.ceil(index).astype('int32')
tensor_upper = paddle.take_along_axis(
sorted_tensor, indices_upper, axis=axis
)
indices_below = paddle.floor(index).astype(paddle.int32)
tensor_below = paddle.take_along_axis(
sorted_tensor, indices_below, axis=axis
)
weights = index - indices_below.astype('float64')
out = paddle.lerp(
tensor_below.astype('float64'),
tensor_upper.astype('float64'),
if interpolation == "lower":
return tensor_below

indices_upper = paddle.ceil(index).astype(paddle.int32)
tensor_upper = paddle.take_along_axis(
sorted_tensor, indices_upper, axis=axis
)
if interpolation == "higher":
return tensor_upper

if interpolation == "midpoint":
return (tensor_upper + tensor_below) / 2

weights = (index - indices_below).astype(paddle.float64)
return paddle.lerp(
tensor_below.astype(paddle.float64),
tensor_upper.astype(paddle.float64),
weights,
)

outputs = []

# TODO(chenjianye): replace the for-loop to directly take elements.
for index in indices:
out = _compute_index(index)
if not keepdim:
out = paddle.squeeze(out, axis=axis)
else:
out = out.reshape(out_shape)
outputs.append(out)

if len(q) > 1:
if len(outputs) > 1:
outputs = paddle.stack(outputs, 0)
else:
outputs = outputs[0]

return outputs


def quantile(x, q, axis=None, keepdim=False):
def quantile(x, q, axis=None, keepdim=False, interpolation="linear"):
"""
Compute the quantile of the input along the specified axis.
If any values in a reduced row are NaN, then the quantiles for that reduction will be NaN.

Args:
x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
q (int|float|list): The q for calculate quantile, which should be in range [0, 1]. If q is a list,
each q will be calculated and the first dimension of output is same to the number of ``q`` .
q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list,
Asthestarsfalll marked this conversation as resolved.
Show resolved Hide resolved
a 1-D Tensor or a 0-D Tensor, each q will be calculated and the first dimension of output is same to the number of ``q`` .
axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
Expand All @@ -624,6 +666,8 @@ def quantile(x, q, axis=None, keepdim=False):
the output Tensor is the same as ``x`` except in the reduced
dimensions(it is of size 1 in this case). Otherwise, the shape of
the output Tensor is squeezed in ``axis`` . Default is False.
interpolation (str, optional): The interpolation method to use
when the desired quantile falls between two data points. Default is linear.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.

Expand Down Expand Up @@ -670,18 +714,25 @@ def quantile(x, q, axis=None, keepdim=False):
[6.80000000]])

"""
return _compute_quantile(x, q, axis=axis, keepdim=keepdim, ignore_nan=False)
return _compute_quantile(
x,
q,
axis=axis,
keepdim=keepdim,
interpolation=interpolation,
ignore_nan=False,
)


def nanquantile(x, q, axis=None, keepdim=False):
def nanquantile(x, q, axis=None, keepdim=False, interpolation="linear"):
"""
Compute the quantile of the input as if NaN values in input did not exist.
If all values in a reduced row are NaN, then the quantiles for that reduction will be NaN.

Args:
x (Tensor): The input Tensor, it's data type can be float32, float64, int32, int64.
q (int|float|list): The q for calculate quantile, which should be in range [0, 1]. If q is a list,
each q will be calculated and the first dimension of output is same to the number of ``q`` .
q (int|float|list|Tensor): The q for calculate quantile, which should be in range [0, 1]. If q is a list or
a 1-D Tensor, each q will be calculated and the first dimension of output is same to the number of ``q`` .
axis (int|list, optional): The axis along which to calculate quantile. ``axis`` should be int or list of int.
``axis`` should be in range [-D, D), where D is the dimensions of ``x`` .
If ``axis`` is less than 0, it works the same way as :math:`axis + D`.
Expand All @@ -692,6 +743,8 @@ def nanquantile(x, q, axis=None, keepdim=False):
the output Tensor is the same as ``x`` except in the reduced
dimensions(it is of size 1 in this case). Otherwise, the shape of
the output Tensor is squeezed in ``axis`` . Default is False.
interpolation (str, optional): The interpolation method to use
when the desired quantile falls between two data points. Default is linear.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.

Expand Down Expand Up @@ -740,4 +793,11 @@ def nanquantile(x, q, axis=None, keepdim=False):
[nan]])

"""
return _compute_quantile(x, q, axis=axis, keepdim=keepdim, ignore_nan=True)
return _compute_quantile(
x,
q,
axis=axis,
keepdim=keepdim,
interpolation=interpolation,
ignore_nan=True,
)
Loading