Skip to content

Commit

Permalink
Merge pull request #5641 from jenshnielsen/jenshnielsen/fix_object_un…
Browse files Browse the repository at this point in the history
…stacking

More robust converting to pandas / xarray
  • Loading branch information
jenshnielsen authored Jan 12, 2024
2 parents 4d8879c + dcdb126 commit fce620c
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/changes/newsfragments/5641.improved
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed an issue where datasets with categorical setpoints could fail to correctly export to Pandas dataframes or Xarray dataset.
21 changes: 17 additions & 4 deletions src/qcodes/dataset/exporters/export_to_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,23 @@ def _generate_pandas_index(
elif len(data) == 2:
index = pd.Index(data[keys[1]].ravel(), name=keys[1])
else:
index_data = tuple(np.concatenate(data[key])
if data[key].dtype == np.dtype('O')
else data[key].ravel()
for key in keys[1:])
index_data = []
for key in keys[1:]:
if data[key].dtype == np.dtype("O"):
# if we have a numpy array of dtype object,
# it could either be a variable length array
# in which case we concatenate it, or it could
# be a numpy array of scalar objects.
# In the latter case concatenate will fail
# with a value error but ravel will produce the
# correct result
try:
index_data.append(np.concatenate(data[key]))
except ValueError:
index_data.append(data[key].ravel())
else:
index_data.append(data[key].ravel())

index = pd.MultiIndex.from_arrays(
index_data,
names=keys[1:])
Expand Down
40 changes: 40 additions & 0 deletions tests/dataset/test_dataset_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
import xarray as xr
from numpy.testing import assert_allclose
Expand All @@ -26,6 +27,7 @@
from qcodes.dataset.descriptions.param_spec import ParamSpecBase
from qcodes.dataset.descriptions.versioning import serialization as serial
from qcodes.dataset.export_config import DataExportType
from qcodes.dataset.exporters.export_to_pandas import _generate_pandas_index
from qcodes.dataset.exporters.export_to_xarray import _calculate_index_shape
from qcodes.dataset.linked_datasets.links import links_to_str

Expand Down Expand Up @@ -1322,3 +1324,41 @@ def test_multi_index_wrong_option(mock_dataset_non_grid) -> None:

with pytest.raises(ValueError, match="Invalid value for use_multi_index"):
mock_dataset_non_grid.to_xarray_dataset(use_multi_index="perhaps")


def test_geneate_pandas_index():
indexes = {
"z": np.array([[7, 8, 9], [10, 11, 12]]),
"x": np.array([[1, 2, 3], [1, 2, 3]]),
"y": np.array([[5, 5, 5], [6, 6, 6]]),
}
pdi = _generate_pandas_index(indexes)
assert isinstance(pdi, pd.MultiIndex)
assert len(pdi) == 6

indexes = {
"z": np.array([[7, 8, 9], [10, 11, 12]]),
"x": np.array([["a", "b", "c"], ["a", "b", "c"]]),
"y": np.array([[5, 5, 5], [6, 6, 6]]),
}
pdi = _generate_pandas_index(indexes)
assert isinstance(pdi, pd.MultiIndex)
assert len(pdi) == 6

indexes = {
"z": np.array([[7, 8, 9], [10, 11, 12]]),
"x": np.array([["a", "b", "c"], ["a", "b", "c"]], dtype=np.object_),
"y": np.array([[5, 5, 5], [6, 6, 6]]),
}
pdi = _generate_pandas_index(indexes)
assert isinstance(pdi, pd.MultiIndex)
assert len(pdi) == 6

indexes = {
"z": np.array([[7], [8, 9]], dtype=np.object_),
"x": np.array([["a"], ["a", "b"]], dtype=np.object_),
"y": np.array([[5], [6, 6]], dtype=np.object_),
}
pdi = _generate_pandas_index(indexes)
assert isinstance(pdi, pd.MultiIndex)
assert len(pdi) == 3

0 comments on commit fce620c

Please sign in to comment.