Skip to content

Commit

Permalink
Closes #3326 DataFrame.assign (#3327)
Browse files Browse the repository at this point in the history
Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
  • Loading branch information
ajpotts and ajpotts authored Jul 1, 2024
1 parent 22aacdd commit c13f387
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 0 deletions.
27 changes: 27 additions & 0 deletions PROTO_tests/tests/dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,33 @@ def get_tail_values(col):
df.to_pandas(retain_index=True).groupby("a").tail(n=2),
)

def test_assign(self):
ak_df = ak.DataFrame(
{"temp_c": ak.array([17.0, 25.0])}, index=ak.array(["Portland", "Berkeley"])
)
pd_df = ak_df.to_pandas()

assert_frame_equal(
ak_df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32).to_pandas(),
pd_df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32),
)

assert_frame_equal(
ak_df.assign(temp_f=ak_df["temp_c"] * 9 / 5 + 32).to_pandas(),
pd_df.assign(temp_f=pd_df["temp_c"] * 9 / 5 + 32),
)

assert_frame_equal(
ak_df.assign(
temp_f=lambda x: x["temp_c"] * 9 / 5 + 32,
temp_k=lambda x: (x["temp_f"] + 459.67) * 5 / 9,
).to_pandas(),
pd_df.assign(
temp_f=lambda x: x["temp_c"] * 9 / 5 + 32,
temp_k=lambda x: (x["temp_f"] + 459.67) * 5 / 9,
),
)


def pda_to_str_helper(pda):
return ak.array([f"str {i}" for i in pda.to_list()])
88 changes: 88 additions & 0 deletions arkouda/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@
]


def apply_if_callable(maybe_callable, obj, **kwargs):
"""
Evaluate possibly callable input using obj and kwargs if it is callable,
otherwise return as it is.
Parameters
----------
maybe_callable : possibly a callable
obj : NDFrame
**kwargs
"""
if callable(maybe_callable):
return maybe_callable(obj, **kwargs)

return maybe_callable


def groupby_operators(cls):
for name in GROUPBY_REDUCTION_TYPES:
setattr(cls, name, cls._make_aggop(name))
Expand Down Expand Up @@ -1073,6 +1090,9 @@ def __setitem__(self, key, value):

# Set a single column in the dataframe using a an arkouda array
elif isinstance(key, str):
if isinstance(value, Series):
value = value.values

if not isinstance(value, self._COLUMN_CLASSES):
raise ValueError(f"Column must be one of {self._COLUMN_CLASSES}.")
elif self._nrows is not None and self._nrows != value.size:
Expand Down Expand Up @@ -5495,6 +5515,74 @@ def from_return_msg(cls, rep_msg):

return cls(columns, idx)

def assign(self, **kwargs) -> DataFrame:
r"""
Assign new columns to a DataFrame.
Returns a new object with all original columns in addition to new ones.
Existing columns that are re-assigned will be overwritten.
Parameters
----------
**kwargs : dict of {str: callable or Series}
The column names are keywords. If the values are
callable, they are computed on the DataFrame and
assigned to the new columns. The callable must not
change input DataFrame (though pandas doesn't check it).
If the values are not callable, (e.g. a Series, scalar, or array),
they are simply assigned.
Returns
-------
DataFrame
A new DataFrame with the new columns in addition to
all the existing columns.
Notes
-----
Assigning multiple columns within the same ``assign`` is possible.
Later items in '\*\*kwargs' may refer to newly created or modified
columns in 'df'; items are computed and assigned into 'df' in order.
Examples
--------
>>> df = ak.DataFrame({'temp_c': [17.0, 25.0]},
... index=['Portland', 'Berkeley'])
>>> df
temp_c
Portland 17.0
Berkeley 25.0
Where the value is a callable, evaluated on `df`:
>>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32)
temp_c temp_f
Portland 17.0 62.6
Berkeley 25.0 77.0
Alternatively, the same behavior can be achieved by directly
referencing an existing Series or sequence:
>>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32)
temp_c temp_f
Portland 17.0 62.6
Berkeley 25.0 77.0
You can create multiple columns within the same assign where one
of the columns depends on another one defined within the same assign:
>>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9)
temp_c temp_f temp_k
Portland 17.0 62.6 290.15
Berkeley 25.0 77.0 298.15
"""
data = self.copy(deep=None)

for k, v in kwargs.items():
data[k] = apply_if_callable(v, data)
return data


def intx(a, b):
"""
Expand Down

0 comments on commit c13f387

Please sign in to comment.