diff --git a/src/qcodes/dataset/data_set.py b/src/qcodes/dataset/data_set.py index a7805911772..aebc528d83a 100644 --- a/src/qcodes/dataset/data_set.py +++ b/src/qcodes/dataset/data_set.py @@ -3,7 +3,6 @@ import importlib import json import logging -import sys import tempfile import time import uuid @@ -1467,7 +1466,7 @@ def _export_as_netcdf(self, path: Path, file_name: str) -> Path: log.info( "Dataset is expected to be larger that threshold. Using distributed export.", extra={ - "file_name": file_path, + "file_name": str(file_path), "qcodes_guid": self.guid, "ds_name": self.name, "exp_name": self.exp_name, @@ -1483,7 +1482,7 @@ def _export_as_netcdf(self, path: Path, file_name: str) -> Path: log.info( "Writing individual files to temp dir.", extra={ - "file_name": file_path, + "file_name": str(file_path), "qcodes_guid": self.guid, "ds_name": self.name, "exp_name": self.exp_name, @@ -1504,7 +1503,7 @@ def _export_as_netcdf(self, path: Path, file_name: str) -> Path: log.info( "Combining temp files into one file.", extra={ - "file_name": file_path, + "file_name": str(file_path), "qcodes_guid": self.guid, "ds_name": self.name, "exp_name": self.exp_name, @@ -1530,15 +1529,15 @@ def _estimate_ds_size(self) -> float: Give an estimated size of the dataset as the size of a single row times the len of the dataset. Result is returned in Mega Bytes. - Note that this does not take overhead into account so it is more accurate - if the row size is "large" + Note that this does not take overhead from storing the array into account + so it is assumed that the total array is large compared to the overhead. """ sample_data = self.get_parameter_data(start=1, end=1) row_size = 0.0 for param_data in sample_data.values(): for array in param_data.values(): - row_size += sys.getsizeof(array) + row_size += array.size * array.dtype.itemsize return row_size * len(self) / 1024 / 1024 diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index 24774762a77..307c3cc49b9 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -758,6 +758,39 @@ def test_export_dataset_small_no_delated( assert "Writing netcdf file directly" in caplog.records[0].msg +def test_export_dataset_delayed_numeric( + tmp_path_factory: TempPathFactory, mock_dataset_grid: DataSet, caplog +) -> None: + tmp_path = tmp_path_factory.mktemp("export_netcdf") + mock_dataset_grid._export_limit = 0 + with caplog.at_level(logging.INFO): + mock_dataset_grid.export(export_type="netcdf", path=tmp_path, prefix="qcodes_") + + assert ( + "Dataset is expected to be larger that threshold. Using distributed export." + in caplog.records[0].msg + ) + assert "Writing individual files to temp dir" in caplog.records[1].msg + assert "Combining temp files into one file" in caplog.records[2].msg + assert "Writing netcdf file using Dask delayed writer" in caplog.records[3].msg + + loaded_ds = xr.load_dataset(mock_dataset_grid.export_info.export_paths["nc"]) + assert loaded_ds.x.shape == (10,) + assert_allclose(loaded_ds.x, np.arange(10)) + assert loaded_ds.y.shape == (5,) + assert_allclose(loaded_ds.y, np.arange(20, 25, 1)) + + arrays = [] + for i in range(10): + arrays.append(np.arange(20 + i, 25 + i)) + expected_z = np.array(arrays) + + assert loaded_ds.z.shape == (10, 5) + assert_allclose(loaded_ds.z, expected_z) + + _assert_xarray_metadata_is_as_expected(loaded_ds, mock_dataset_grid) + + def test_export_dataset_delayed( tmp_path_factory: TempPathFactory, mock_dataset_numpy: DataSet, caplog ) -> None: