From c13f38717572597a57dde29f73738790e0dc9c79 Mon Sep 17 00:00:00 2001
From: ajpotts <amanda.j.potts@gmail.com>
Date: Mon, 1 Jul 2024 16:44:20 -0400
Subject: [PATCH] Closes #3326 DataFrame.assign (#3327)

Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
---
 PROTO_tests/tests/dataframe_test.py | 27 +++++++++
 arkouda/dataframe.py                | 88 +++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/PROTO_tests/tests/dataframe_test.py b/PROTO_tests/tests/dataframe_test.py
index 5333213452..2a04a5e62d 100644
--- a/PROTO_tests/tests/dataframe_test.py
+++ b/PROTO_tests/tests/dataframe_test.py
@@ -1381,6 +1381,33 @@ def get_tail_values(col):
             df.to_pandas(retain_index=True).groupby("a").tail(n=2),
         )
 
+    def test_assign(self):
+        ak_df = ak.DataFrame(
+            {"temp_c": ak.array([17.0, 25.0])}, index=ak.array(["Portland", "Berkeley"])
+        )
+        pd_df = ak_df.to_pandas()
+
+        assert_frame_equal(
+            ak_df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32).to_pandas(),
+            pd_df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32),
+        )
+
+        assert_frame_equal(
+            ak_df.assign(temp_f=ak_df["temp_c"] * 9 / 5 + 32).to_pandas(),
+            pd_df.assign(temp_f=pd_df["temp_c"] * 9 / 5 + 32),
+        )
+
+        assert_frame_equal(
+            ak_df.assign(
+                temp_f=lambda x: x["temp_c"] * 9 / 5 + 32,
+                temp_k=lambda x: (x["temp_f"] + 459.67) * 5 / 9,
+            ).to_pandas(),
+            pd_df.assign(
+                temp_f=lambda x: x["temp_c"] * 9 / 5 + 32,
+                temp_k=lambda x: (x["temp_f"] + 459.67) * 5 / 9,
+            ),
+        )
+
 
 def pda_to_str_helper(pda):
     return ak.array([f"str {i}" for i in pda.to_list()])
diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py
index 6d5a451165..5539d037b1 100644
--- a/arkouda/dataframe.py
+++ b/arkouda/dataframe.py
@@ -54,6 +54,23 @@
 ]
 
 
+def apply_if_callable(maybe_callable, obj, **kwargs):
+    """
+    Evaluate possibly callable input using obj and kwargs if it is callable,
+    otherwise return as it is.
+
+    Parameters
+    ----------
+    maybe_callable : possibly a callable
+    obj : NDFrame
+    **kwargs
+    """
+    if callable(maybe_callable):
+        return maybe_callable(obj, **kwargs)
+
+    return maybe_callable
+
+
 def groupby_operators(cls):
     for name in GROUPBY_REDUCTION_TYPES:
         setattr(cls, name, cls._make_aggop(name))
@@ -1073,6 +1090,9 @@ def __setitem__(self, key, value):
 
         # Set a single column in the dataframe using a an arkouda array
         elif isinstance(key, str):
+            if isinstance(value, Series):
+                value = value.values
+
             if not isinstance(value, self._COLUMN_CLASSES):
                 raise ValueError(f"Column must be one of {self._COLUMN_CLASSES}.")
             elif self._nrows is not None and self._nrows != value.size:
@@ -5495,6 +5515,74 @@ def from_return_msg(cls, rep_msg):
 
         return cls(columns, idx)
 
+    def assign(self, **kwargs) -> DataFrame:
+        r"""
+        Assign new columns to a DataFrame.
+
+        Returns a new object with all original columns in addition to new ones.
+        Existing columns that are re-assigned will be overwritten.
+
+        Parameters
+        ----------
+        **kwargs : dict of {str: callable or Series}
+            The column names are keywords. If the values are
+            callable, they are computed on the DataFrame and
+            assigned to the new columns. The callable must not
+            change input DataFrame (though pandas doesn't check it).
+            If the values are not callable, (e.g. a Series, scalar, or array),
+            they are simply assigned.
+
+        Returns
+        -------
+        DataFrame
+            A new DataFrame with the new columns in addition to
+            all the existing columns.
+
+        Notes
+        -----
+        Assigning multiple columns within the same ``assign`` is possible.
+        Later items in '\*\*kwargs' may refer to newly created or modified
+        columns in 'df'; items are computed and assigned into 'df' in order.
+
+        Examples
+        --------
+        >>> df = ak.DataFrame({'temp_c': [17.0, 25.0]},
+        ...                   index=['Portland', 'Berkeley'])
+        >>> df
+                  temp_c
+        Portland    17.0
+        Berkeley    25.0
+
+        Where the value is a callable, evaluated on `df`:
+
+        >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32)
+                  temp_c  temp_f
+        Portland    17.0    62.6
+        Berkeley    25.0    77.0
+
+        Alternatively, the same behavior can be achieved by directly
+        referencing an existing Series or sequence:
+
+        >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32)
+                  temp_c  temp_f
+        Portland    17.0    62.6
+        Berkeley    25.0    77.0
+
+        You can create multiple columns within the same assign where one
+        of the columns depends on another one defined within the same assign:
+
+        >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
+        ...           temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9)
+                  temp_c  temp_f  temp_k
+        Portland    17.0    62.6  290.15
+        Berkeley    25.0    77.0  298.15
+        """
+        data = self.copy(deep=None)
+
+        for k, v in kwargs.items():
+            data[k] = apply_if_callable(v, data)
+        return data
+
 
 def intx(a, b):
     """