Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUG] fix paddle.to_tensor/Tensor.item BF16 bug #53567

Merged
merged 1 commit into from
May 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions paddle/fluid/pybind/eager_method.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1066,13 +1066,11 @@ static PyObject* tensor__getitem_from_offset(TensorObject* self,
T b = paddle::pybind::TensorGetElement<T>(tensor, offset); \
Py_intptr_t py_dims[paddle::framework::DDim::kMaxRank]; \
Py_intptr_t py_strides[paddle::framework::DDim::kMaxRank]; \
py_dims[0] = 1; \
py_strides[0] = 1; \
auto& api = pybind11::detail::npy_api::get(); \
PyObject* array = api.PyArray_NewFromDescr_( \
api.PyArray_Type_, \
api.PyArray_DescrFromType_(numpy_dtype), \
1, \
0, \
py_dims, \
py_strides, \
nullptr, \
Expand Down
23 changes: 14 additions & 9 deletions python/paddle/fluid/data_feeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,27 +46,32 @@
}


def copy_bits_from_float_to_uint16(f):
return struct.unpack('<I', struct.pack('<f', f))[0] >> 16


def convert_float_to_uint16(data, data_format="NCHW"):
if data.size == 0:
return data.view(np.uint16)

if data_format == "NHWC":
data = np.transpose(data, [0, 3, 1, 2])

new_data = []
for x in np.nditer(data):
new_data.append(np.uint16(copy_bits_from_float_to_uint16(x)))
new_data = np.reshape(new_data, data.shape).view(np.uint16)
new_data = np.vectorize(
lambda x: struct.unpack('<I', struct.pack('<f', x))[0] >> 16,
otypes=[np.uint16],
)(data.flat)
new_data = np.reshape(new_data, data.shape)

if data_format == "NHWC":
new_data = np.transpose(new_output, [0, 2, 3, 1])
new_data = np.transpose(new_data, [0, 2, 3, 1])
return new_data


def convert_uint16_to_float(data):
new_data = np.vectorize(
lambda x: struct.unpack('<f', struct.pack('<I', x << 16))[0],
otypes=[np.float32],
)(data.flat)
return np.reshape(new_data, data.shape)


def convert_dtype(dtype):
if isinstance(dtype, core.VarDesc.VarType):
if dtype in _PADDLE_DTYPE_2_NUMPY_DTYPE:
Expand Down
10 changes: 8 additions & 2 deletions python/paddle/fluid/dygraph/tensor_patch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@
)
from .base import switch_to_static_graph
from .math_op_patch import monkey_patch_math_tensor
from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
from paddle.fluid.data_feeder import (
convert_uint16_to_float,
_PADDLE_DTYPE_2_NUMPY_DTYPE,
)
import paddle.utils.deprecated as deprecated
import paddle.profiler as profiler
from paddle.profiler.utils import in_profiler_mode
Expand Down Expand Up @@ -614,7 +617,10 @@ def item(self, *args):
print(x.item(0, 2)) #3.3

"""
return self._getitem_from_offset(*args).item()
scalar = self._getitem_from_offset(*args)
if scalar.dtype == np.uint16:
return convert_uint16_to_float(scalar).item()
return scalar.item()

@property
def inplace_version(self):
Expand Down
26 changes: 25 additions & 1 deletion python/paddle/fluid/tests/unittests/test_var_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,12 @@ def check_with_place(place):
np.testing.assert_array_equal(x.numpy(), numpy_array)
self.assertEqual(x.type, core.VarDesc.VarType.LOD_TENSOR)

# test dtype bfloat16
# test dtype=bfloat16
x = paddle.to_tensor(-1e6, dtype=paddle.bfloat16)
self.assertEqual(x.dtype, core.VarDesc.VarType.BF16)
self.assertTrue(x == -999424.0)
self.assertTrue(x.item() == -999424.0)
self.assertTrue(isinstance(x.item(), float))

x = paddle.to_tensor([-1e6, -1e6, -1e6], dtype='bfloat16')
self.assertEqual(x.dtype, core.VarDesc.VarType.BF16)
Expand All @@ -266,6 +268,28 @@ def check_with_place(place):
y.backward()
self.assertTrue(x.grad == -999424.0 * 2)

# test default_type=bfloat16
paddle.set_default_dtype('bfloat16')
x = paddle.to_tensor(-1e6)
self.assertEqual(x.dtype, core.VarDesc.VarType.BF16)
self.assertTrue(x == -999424.0)
self.assertTrue(x.item() == -999424.0)
self.assertTrue(isinstance(x.item(), float))

x = paddle.to_tensor([-1e6, -1e6, -1e6])
self.assertEqual(x.dtype, core.VarDesc.VarType.BF16)
self.assertTrue(x[0] == -999424.0)
self.assertTrue(x[1] == -999424.0)
self.assertTrue(x[2] == -999424.0)

x = paddle.to_tensor(-1e6, stop_gradient=False)
self.assertEqual(x.dtype, core.VarDesc.VarType.BF16)
self.assertTrue(x == -999424.0)
y = x * x
y.backward()
self.assertTrue(x.grad == -999424.0 * 2)
paddle.set_default_dtype('float32')

with self.assertRaises(ValueError):
paddle.randn([3, 2, 2]).item()
with self.assertRaises(ValueError):
Expand Down
4 changes: 2 additions & 2 deletions python/paddle/framework/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def set_default_dtype(d):

"""
if isinstance(d, type):
# This branch is for NumPy scalar types
# This branch is for np.dtype
if d in [np.float16, np.float32, np.float64]:
d = d.__name__
else:
Expand All @@ -49,7 +49,7 @@ def set_default_dtype(d):
", but received %s" % d.__name__
)
else:
# This branch is for np.dtype and str
# This branch is for str
if d in ['float16', 'float32', 'float64', 'bfloat16']:
# NOTE(SigureMo): Since the np.dtype object is not an instance of
# type, so it will not be handled by the previous branch. We need
Expand Down
2 changes: 0 additions & 2 deletions python/paddle/hapi/progressbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ def convert_uint16_to_float(in_list):
if k == "loss":
if isinstance(val, list):
scalar_val = val[0]
elif isinstance(val, np.ndarray):
scalar_val = val.item()
else:
scalar_val = val
if isinstance(scalar_val, np.uint16):
Expand Down
41 changes: 23 additions & 18 deletions python/paddle/tensor/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,18 +542,28 @@ def logspace(start, stop, num, base=10.0, dtype=None, name=None):


def _to_tensor_non_static(data, dtype=None, place=None, stop_gradient=True):
def _handle_tensor_dtype(tensor, dtype):
if dtype:
if convert_dtype(dtype) != convert_dtype(tensor.dtype):
return tensor.astype(convert_dtype(dtype))
return tensor

def _handle_np_dtype(ndarray, dtype):
if dtype:
if convert_dtype(dtype) != convert_dtype(ndarray.dtype):
# should not ndarray.astype('uint16') directly, data bits is wrong
if convert_dtype(dtype) in ['uint16']:
return convert_float_to_uint16(ndarray.astype('float32'))
else:
return ndarray.astype(convert_dtype(dtype))

return ndarray

if isinstance(data, np.number): # Special case for numpy scalars
data = np.array(data)

if not isinstance(data, np.ndarray):

def _handle_dtype(data, dtype):
if dtype:
if convert_dtype(dtype) != convert_dtype(data.dtype):
return data.astype(convert_dtype(dtype))
return data

if np.isscalar(data) and not isinstance(data, str):
data = np.array(data)
elif isinstance(data, (list, tuple)):
Expand All @@ -565,12 +575,12 @@ def _handle_dtype(data, dtype):
)
elif isinstance(data, paddle.Tensor) and not in_dygraph_mode():
data = data._copy_to(place, False)
data = _handle_dtype(data, dtype)
data = _handle_tensor_dtype(data, dtype)
data.stop_gradient = stop_gradient
return data
elif isinstance(data, core.eager.Tensor) and in_dygraph_mode():
data = data._copy_to(place, False)
data = _handle_dtype(data, dtype)
data = _handle_tensor_dtype(data, dtype)
data.stop_gradient = stop_gradient
return data
elif isinstance(data, (core.LoDTensor, core.Tensor)):
Expand All @@ -583,7 +593,7 @@ def _handle_dtype(data, dtype):
data = paddle.Tensor(data)
if not data.place._equals(place):
data = data._copy_to(place, False)
data = _handle_dtype(data, dtype)
data = _handle_tensor_dtype(data, dtype)
data.stop_gradient = stop_gradient
return data
else:
Expand All @@ -607,18 +617,13 @@ def _handle_dtype(data, dtype):
if default_type in ['float16', 'float32']
else 'complex128'
)
data = data.astype(default_type)
data = _handle_np_dtype(data, default_type)
# Windows default type is 'int32', while Linux/Mac is 'int64'. Unify they.
if data.dtype in ['int32']:
default_type = "int64"
data = data.astype(default_type)
data = data.astype("int64")

if dtype and convert_dtype(dtype) != data.dtype:
if convert_dtype(dtype) in ['uint16']:
# should not ndarray.astype('uint16') directly, data bits is wrong
data = convert_float_to_uint16(data.astype('float32'))
else:
data = data.astype(convert_dtype(dtype))
if dtype:
data = _handle_np_dtype(data, dtype)

if _in_eager_without_dygraph_check() and isinstance(data, np.ndarray):
return core.eager.Tensor(
Expand Down