Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: remove internal usage of integer_array() #38289

Merged
merged 3 commits into from
Dec 11, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pandas.core.arrays.categorical import Categorical
from pandas.core.arrays.datetimes import DatetimeArray
from pandas.core.arrays.floating import FloatingArray
from pandas.core.arrays.integer import IntegerArray, integer_array
from pandas.core.arrays.integer import IntegerArray
from pandas.core.arrays.interval import IntervalArray
from pandas.core.arrays.masked import BaseMaskedArray
from pandas.core.arrays.numpy_ import PandasArray, PandasDtype
Expand All @@ -26,7 +26,6 @@
"DatetimeArray",
"FloatingArray",
"IntegerArray",
"integer_array",
"IntervalArray",
"PandasArray",
"PandasDtype",
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/integer/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas.core.arrays import integer_array
import pandas as pd
from pandas.core.arrays.integer import (
Int8Dtype,
Int16Dtype,
Expand Down Expand Up @@ -32,15 +32,15 @@ def dtype(request):

@pytest.fixture
def data(dtype):
return integer_array(
return pd.array(
list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100],
dtype=dtype,
)


@pytest.fixture
def data_missing(dtype):
return integer_array([np.nan, 1], dtype=dtype)
return pd.array([np.nan, 1], dtype=dtype)


@pytest.fixture(params=["data", "data_missing"])
Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray, integer_array
from pandas.core.arrays import FloatingArray
import pandas.core.ops as ops

# Basic test for the arithmetic array ops
Expand Down Expand Up @@ -131,10 +131,10 @@ def test_pow_scalar():


def test_pow_array():
a = integer_array([0, 0, 0, 1, 1, 1, None, None, None])
b = integer_array([0, 1, None, 0, 1, None, 0, 1, None])
a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
result = a ** b
expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None])
expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
tm.assert_extension_array_equal(result, expected)


Expand All @@ -149,7 +149,7 @@ def test_rpow_one_to_na():

@pytest.mark.parametrize("other", [0, 0.5])
def test_numpy_zero_dim_ndarray(other):
arr = integer_array([1, None, 2])
arr = pd.array([1, None, 2])
result = arr + np.array(other)
expected = arr + other
tm.assert_equal(result, expected)
Expand Down Expand Up @@ -265,7 +265,7 @@ def test_reduce_to_float(op):
{
"A": ["a", "b", "b"],
"B": [1, None, 3],
"C": integer_array([1, None, 3], dtype="Int64"),
"C": pd.array([1, None, 3], dtype="Int64"),
}
)

Expand All @@ -277,7 +277,7 @@ def test_reduce_to_float(op):
result = getattr(df.groupby("A"), op)()

expected = pd.DataFrame(
{"B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64")},
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")},
index=pd.Index(["a", "b"], name="A"),
)
tm.assert_frame_equal(result, expected)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/integer/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import pandas as pd
import pandas._testing as tm
from pandas.api.types import is_integer
from pandas.core.arrays import IntegerArray, integer_array
from pandas.core.arrays.integer import Int8Dtype, Int32Dtype, Int64Dtype
from pandas.core.arrays import IntegerArray
from pandas.core.arrays.integer import Int8Dtype, Int32Dtype, Int64Dtype, integer_array
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be removed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be removed?

see the top post, this is the only file were I for now kept integer_array (it's used a lot, and it is specifically testing the behaviour of the construction implemented by it). But just changed the import, because I removed it from pandas.core.arrays
Now, as said above, I can further remove it in this file as well (can do that here, or later in another PR)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would completely remove it in this PR

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I removed any remaining usage of integer_array in this file, so could also remove the actual function.

I replaced it with a mixture of pd.array and IntegerArray._from_sequence (and sometimes a parametrized fixture to test both), since they are not exactly equal. For example, with _from_sequence you can also pass it a float array without specifying a dtype, something you can't replicate by only testing pd.array(..)



def test_uses_pandas_na():
Expand Down
13 changes: 6 additions & 7 deletions pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import integer_array
from pandas.core.arrays.integer import Int8Dtype, UInt32Dtype


Expand All @@ -28,7 +27,7 @@ def test_preserve_dtypes(op):
{
"A": ["a", "b", "b"],
"B": [1, None, 3],
"C": integer_array([1, None, 3], dtype="Int64"),
"C": pd.array([1, None, 3], dtype="Int64"),
}
)

Expand All @@ -43,15 +42,15 @@ def test_preserve_dtypes(op):
result = getattr(df.groupby("A"), op)()

expected = pd.DataFrame(
{"B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64")},
{"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Int64")},
index=pd.Index(["a", "b"], name="A"),
)
tm.assert_frame_equal(result, expected)


def test_astype_nansafe():
# see gh-22343
arr = integer_array([np.nan, 1, 2], dtype="Int8")
arr = pd.array([np.nan, 1, 2], dtype="Int8")
msg = "cannot convert to 'uint32'-dtype NumPy array with missing values."

with pytest.raises(ValueError, match=msg):
Expand All @@ -69,7 +68,7 @@ def test_construct_index(all_data, dropna):
else:
other = all_data

result = pd.Index(integer_array(other, dtype=all_data.dtype))
result = pd.Index(pd.array(other, dtype=all_data.dtype))
expected = pd.Index(other, dtype=object)

tm.assert_index_equal(result, expected)
Expand Down Expand Up @@ -229,14 +228,14 @@ def test_construct_cast_invalid(dtype):
msg = "cannot safely"
arr = [1.2, 2.3, 3.7]
with pytest.raises(TypeError, match=msg):
integer_array(arr, dtype=dtype)
pd.array(arr, dtype=dtype)

with pytest.raises(TypeError, match=msg):
pd.Series(arr).astype(dtype)

arr = [1.2, 2.3, 3.7, np.nan]
with pytest.raises(TypeError, match=msg):
integer_array(arr, dtype=dtype)
pd.array(arr, dtype=dtype)

with pytest.raises(TypeError, match=msg):
pd.Series(arr).astype(dtype)
Expand Down
27 changes: 13 additions & 14 deletions pandas/tests/arrays/integer/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,26 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import integer_array


@pytest.mark.parametrize("ufunc", [np.abs, np.sign])
# np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
@pytest.mark.filterwarnings("ignore:invalid value encountered in sign")
def test_ufuncs_single_int(ufunc):
a = integer_array([1, 2, -3, np.nan])
a = pd.array([1, 2, -3, np.nan])
result = ufunc(a)
expected = integer_array(ufunc(a.astype(float)))
expected = pd.array(ufunc(a.astype(float)), dtype="Int64")
tm.assert_extension_array_equal(result, expected)

s = pd.Series(a)
result = ufunc(s)
expected = pd.Series(integer_array(ufunc(a.astype(float))))
expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64"))
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
def test_ufuncs_single_float(ufunc):
a = integer_array([1, 2, -3, np.nan])
a = pd.array([1, 2, -3, np.nan])
with np.errstate(invalid="ignore"):
result = ufunc(a)
expected = ufunc(a.astype(float))
Expand All @@ -39,33 +38,33 @@ def test_ufuncs_single_float(ufunc):
@pytest.mark.parametrize("ufunc", [np.add, np.subtract])
def test_ufuncs_binary_int(ufunc):
# two IntegerArrays
a = integer_array([1, 2, -3, np.nan])
a = pd.array([1, 2, -3, np.nan])
result = ufunc(a, a)
expected = integer_array(ufunc(a.astype(float), a.astype(float)))
expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64")
tm.assert_extension_array_equal(result, expected)

# IntegerArray with numpy array
arr = np.array([1, 2, 3, 4])
result = ufunc(a, arr)
expected = integer_array(ufunc(a.astype(float), arr))
expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64")
tm.assert_extension_array_equal(result, expected)

result = ufunc(arr, a)
expected = integer_array(ufunc(arr, a.astype(float)))
expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64")
tm.assert_extension_array_equal(result, expected)

# IntegerArray with scalar
result = ufunc(a, 1)
expected = integer_array(ufunc(a.astype(float), 1))
expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64")
tm.assert_extension_array_equal(result, expected)

result = ufunc(1, a)
expected = integer_array(ufunc(1, a.astype(float)))
expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64")
tm.assert_extension_array_equal(result, expected)


def test_ufunc_binary_output():
a = integer_array([1, 2, np.nan])
a = pd.array([1, 2, np.nan])
result = np.modf(a)
expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))

Expand All @@ -74,13 +73,13 @@ def test_ufunc_binary_output():

for x, y in zip(result, expected):
# TODO(FloatArray): This will return an extension array.
# y = integer_array(y)
# y = pd.array(y)
tm.assert_numpy_array_equal(x, y)


@pytest.mark.parametrize("values", [[0, 1], [0, None]])
def test_ufunc_reduce_raises(values):
a = integer_array(values)
a = pd.array(values)
msg = r"The 'reduce' method is not supported."
with pytest.raises(NotImplementedError, match=msg):
np.add.reduce(a)
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/arrays/integer/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest

import pandas as pd
from pandas.core.arrays import integer_array
from pandas.core.arrays.integer import (
Int8Dtype,
Int16Dtype,
Expand Down Expand Up @@ -43,13 +42,13 @@ def test_repr_dtype(dtype, expected):


def test_repr_array():
result = repr(integer_array([1, None, 3]))
result = repr(pd.array([1, None, 3]))
expected = "<IntegerArray>\n[1, <NA>, 3]\nLength: 3, dtype: Int64"
assert result == expected


def test_repr_array_long():
data = integer_array([1, 2, None] * 1000)
data = pd.array([1, 2, None] * 1000)
expected = (
"<IntegerArray>\n"
"[ 1, 2, <NA>, 1, 2, <NA>, 1, 2, <NA>, 1,\n"
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
StringArray,
TimedeltaArray,
)
from pandas.core.arrays import PandasArray, integer_array, period_array
from pandas.core.arrays import PandasArray, period_array
from pandas.tests.extension.decimal import DecimalArray, DecimalDtype, to_decimal


Expand Down Expand Up @@ -122,7 +122,7 @@
# Sparse
([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
# IntegerNA
([1, None], "Int16", integer_array([1, None], dtype="Int16")),
([1, None], "Int16", pd.array([1, None], dtype="Int16")),
(pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
# String
(["a", None], "string", StringArray._from_sequence(["a", None])),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def test_numpy_array_all_dtypes(any_numpy_dtype):
[
(pd.Categorical(["a", "b"]), "_codes"),
(pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"),
(pd.core.arrays.integer_array([0, np.nan]), "_data"),
(pd.array([0, np.nan], dtype="Int64"), "_data"),
(IntervalArray.from_breaks([0, 1]), "_left"),
(SparseArray([0, 1]), "_sparse_values"),
(DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"),
Expand Down Expand Up @@ -285,7 +285,7 @@ def test_array_multiindex_raises():
pd.core.arrays.period_array(["2000", "2001"], freq="D"),
np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]),
),
(pd.core.arrays.integer_array([0, np.nan]), np.array([0, pd.NA], dtype=object)),
(pd.array([0, np.nan], dtype="Int64"), np.array([0, pd.NA], dtype=object)),
(
IntervalArray.from_breaks([0, 1, 2]),
np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object),
Expand Down
13 changes: 6 additions & 7 deletions pandas/tests/extension/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import integer_array
from pandas.core.arrays.integer import (
Int8Dtype,
Int16Dtype,
Expand Down Expand Up @@ -56,27 +55,27 @@ def dtype(request):

@pytest.fixture
def data(dtype):
return integer_array(make_data(), dtype=dtype)
return pd.array(make_data(), dtype=dtype)


@pytest.fixture
def data_for_twos(dtype):
return integer_array(np.ones(100) * 2, dtype=dtype)
return pd.array(np.ones(100) * 2, dtype=dtype)


@pytest.fixture
def data_missing(dtype):
return integer_array([pd.NA, 1], dtype=dtype)
return pd.array([pd.NA, 1], dtype=dtype)


@pytest.fixture
def data_for_sorting(dtype):
return integer_array([1, 2, 0], dtype=dtype)
return pd.array([1, 2, 0], dtype=dtype)


@pytest.fixture
def data_missing_for_sorting(dtype):
return integer_array([1, pd.NA, 0], dtype=dtype)
return pd.array([1, pd.NA, 0], dtype=dtype)


@pytest.fixture
Expand All @@ -96,7 +95,7 @@ def data_for_grouping(dtype):
a = 0
c = 2
na = pd.NA
return integer_array([b, b, na, na, a, a, b, c], dtype=dtype)
return pd.array([b, b, na, na, a, a, b, c], dtype=dtype)


class TestDtype(base.BaseDtypeTests):
Expand Down
Loading