Skip to content

Commit

Permalink
BUG: Make .iloc and .loc indexing consistent on empty dataframes
Browse files Browse the repository at this point in the history
Tests

Fix

Test reorder

Doc update

Tests fix

Tests fix

SQL tests fix

Testing update

Fixes

Testing fix

Test fix
  • Loading branch information
Artemy Kolchinsky committed Apr 29, 2015
1 parent 76571d0 commit 69fb279
Show file tree
Hide file tree
Showing 10 changed files with 64 additions and 21 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.16.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ Bug Fixes

- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`)


- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`)



Expand Down
8 changes: 5 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1741,17 +1741,19 @@ def _ixs(self, i, axis=0):
lab_slice = slice(label[0], label[-1])
return self.ix[:, lab_slice]
else:
label = self.columns[i]
if isinstance(label, Index):
return self.take(i, axis=1, convert=True)

index_len = len(self.index)

# if the values returned are not the same length
# as the index (iow a not found value), iget returns
# a 0-len ndarray. This is effectively catching
# a numpy error (as numpy should really raise)
values = self._data.iget(i)
if not len(values):
values = np.array([np.nan] * len(self.index), dtype=object)

if index_len and not len(values):
values = np.array([np.nan] * index_len, dtype=object)
result = self._constructor_sliced.from_array(
values, index=self.index,
name=label, fastpath=True)
Expand Down
6 changes: 4 additions & 2 deletions pandas/io/tests/test_json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,14 @@ def test_frame_to_json_except(self):
def test_frame_empty(self):
df = DataFrame(columns=['jim', 'joe'])
self.assertFalse(df._is_mixed_type)
assert_frame_equal(read_json(df.to_json()), df)
assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df)

def test_frame_empty_mixedtype(self):
# mixed type
df = DataFrame(columns=['jim', 'joe'])
df['joe'] = df['joe'].astype('i8')
self.assertTrue(df._is_mixed_type)
assert_frame_equal(read_json(df.to_json()), df)
assert_frame_equal(read_json(df.to_json(), dtype=dict(df.dtypes)), df)

def test_v12_compat(self):
df = DataFrame(
Expand Down
10 changes: 7 additions & 3 deletions pandas/io/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,10 +1256,14 @@ def test_transactions(self):
self._transaction_test()

def test_get_schema_create_table(self):
self._load_test2_data()
# Use a dataframe without a bool column, since MySQL converts bool to
# TINYINT (which read_sql_table returns as an int and causes a dtype
# mismatch)

self._load_test3_data()
tbl = 'test_get_schema_create_table'
create_sql = sql.get_schema(self.test_frame2, tbl, con=self.conn)
blank_test_df = self.test_frame2.iloc[:0]
create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn)
blank_test_df = self.test_frame3.iloc[:0]

self.drop_table(tbl)
self.conn.execute(create_sql)
Expand Down
3 changes: 2 additions & 1 deletion pandas/stats/tests/test_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ def _non_null_values(x):
if mock_mean:
# check that mean equals mock_mean
expected = mock_mean(x)
assert_equal(mean_x, expected)
assert_equal(mean_x, expected.astype('float64'))

# check that correlation of a series with itself is either 1 or NaN
corr_x_x = corr(x, x)
Expand Down Expand Up @@ -1549,6 +1549,7 @@ def test_moment_functions_zero_length(self):
df1_expected = df1
df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns)
df2 = DataFrame(columns=['a'])
df2['a'] = df2['a'].astype('float64')
df2_expected = df2
df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns)

Expand Down
10 changes: 4 additions & 6 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11808,12 +11808,10 @@ def test_mode(self):
"E": [8, 8, 1, 1, 3, 3]})
assert_frame_equal(df[["A"]].mode(),
pd.DataFrame({"A": [12]}))
assert_frame_equal(df[["D"]].mode(),
pd.DataFrame(pd.Series([], dtype="int64"),
columns=["D"]))
assert_frame_equal(df[["E"]].mode(),
pd.DataFrame(pd.Series([1, 3, 8], dtype="int64"),
columns=["E"]))
expected = pd.Series([], dtype='int64', name='D').to_frame()
assert_frame_equal(df[["D"]].mode(), expected)
expected = pd.Series([1, 3, 8], dtype='int64', name='E').to_frame()
assert_frame_equal(df[["E"]].mode(), expected)
assert_frame_equal(df[["A", "B"]].mode(),
pd.DataFrame({"A": [12], "B": [10.]}))
assert_frame_equal(df.mode(),
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1720,6 +1720,8 @@ def test_groupby_head_tail(self):
assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1))

empty_not_as = DataFrame(columns=df.columns)
empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype)
empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype)
assert_frame_equal(empty_not_as, g_not_as.head(0))
assert_frame_equal(empty_not_as, g_not_as.tail(0))
assert_frame_equal(empty_not_as, g_not_as.head(-1))
Expand All @@ -1735,6 +1737,8 @@ def test_groupby_head_tail(self):
assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1))

empty_as = DataFrame(index=df_as.index[:0], columns=df.columns)
empty_as['A'] = empty_not_as['A'].astype(df.A.dtype)
empty_as['B'] = empty_not_as['B'].astype(df.B.dtype)
assert_frame_equal(empty_as, g_as.head(0))
assert_frame_equal(empty_as, g_as.tail(0))
assert_frame_equal(empty_as, g_as.head(-1))
Expand Down
33 changes: 28 additions & 5 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,7 @@ def test_loc_setitem_consistency(self):

# empty (essentially noops)
expected = DataFrame(columns=['x', 'y'])
expected['x'] = expected['x'].astype(np.int64)
df = DataFrame(columns=['x', 'y'])
df.loc[:, 'x'] = 1
assert_frame_equal(df,expected)
Expand Down Expand Up @@ -3369,7 +3370,7 @@ def f():
expected = DataFrame(columns=['foo'])
def f():
df = DataFrame()
df['foo'] = Series([])
df['foo'] = Series([], dtype='object')
return df
assert_frame_equal(f(), expected)
def f():
Expand All @@ -3379,17 +3380,20 @@ def f():
assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df['foo'] = Series(range(len(df)))
df['foo'] = df.index
return df
assert_frame_equal(f(), expected)

expected = DataFrame(columns=['foo'])
expected['foo'] = expected['foo'].astype('float64')
def f():
df = DataFrame()
df['foo'] = []
return df
assert_frame_equal(f(), expected)
def f():
df = DataFrame()
df['foo'] = df.index
df['foo'] = Series(range(len(df)))
return df
assert_frame_equal(f(), expected)
def f():
Expand Down Expand Up @@ -3422,21 +3426,31 @@ def f():

# GH5720, GH5744
# don't create rows when empty
expected = DataFrame(columns=['A','B','New'])
expected['A'] = expected['A'].astype('int64')
expected['B'] = expected['B'].astype('float64')
expected['New'] = expected['New'].astype('float64')
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
y['New'] = np.nan
assert_frame_equal(y,DataFrame(columns=['A','B','New']))
assert_frame_equal(y,expected)
#assert_frame_equal(y,expected)

expected = DataFrame(columns=['a','b','c c','d'])
expected['d'] = expected['d'].astype('int64')
df = DataFrame(columns=['a', 'b', 'c c'])
df['d'] = 3
assert_frame_equal(df,DataFrame(columns=['a','b','c c','d']))
assert_frame_equal(df,expected)
assert_series_equal(df['c c'],Series(name='c c',dtype=object))

# reindex columns is ok
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
y = df[df.A > 5]
result = y.reindex(columns=['A','B','C'])
expected = DataFrame(columns=['A','B','C'])
expected['A'] = expected['A'].astype('int64')
expected['B'] = expected['B'].astype('float64')
expected['C'] = expected['C'].astype('float64')
assert_frame_equal(result,expected)

# GH 5756
Expand Down Expand Up @@ -4422,6 +4436,15 @@ def test_indexing_assignment_dict_already_exists(self):
expected.loc[5] = [9, 99]
tm.assert_frame_equal(df, expected)

def test_indexing_dtypes_on_empty(self):
# Check that .iloc and .ix return correct dtypes GH9983
df = DataFrame({'a':[1,2,3],'b':['b','b2','b3']})
df2 = df.ix[[],:]

self.assertEqual(df2.loc[:,'a'].dtype, int)
assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0])
assert_series_equal(df2.loc[:,'a'], df2.ix[:,0])



class TestCategoricalIndex(tm.TestCase):
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,14 @@ def test_multiindex_dtype(self):
{'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b'])
self._assert_not_equal(df1, df2, check_index_type=True)

def test_empty_dtypes(self):
df1=pd.DataFrame(columns=["col1","col2"])
df1["col1"] = df1["col1"].astype('int64')
df2=pd.DataFrame(columns=["col1","col2"])
self._assert_equal(df1, df2, check_dtype=False)
self._assert_not_equal(df1, df2, check_dtype=True)


class TestRNGContext(unittest.TestCase):

def test_RNGContext(self):
Expand Down
1 change: 1 addition & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -2118,6 +2118,7 @@ def test_range_slice_outofbounds(self):
for idx in [didx, pidx]:
df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx)
empty = DataFrame(index=idx.__class__([], freq='D'), columns=['units'])
empty['units'] = empty['units'].astype('int64')

tm.assert_frame_equal(df['2013/09/01':'2013/09/30'], empty)
tm.assert_frame_equal(df['2013/09/30':'2013/10/02'], df.iloc[:2])
Expand Down

0 comments on commit 69fb279

Please sign in to comment.