From 13924936f10b6108edd795e0752da21734508457 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Tue, 6 Sep 2022 09:18:19 -0500
Subject: [PATCH 01/77] FEAT-#4931: Create a query compiler that can connect to
 a service

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/config/envvars.py                       |  22 +-
 modin/core/execution/client/io.py             |  46 ++
 modin/core/execution/client/query_compiler.py | 686 ++++++++++++++++++
 .../dispatching/factories/factories.py        |  10 +
 modin/pandas/__init__.py                      |   8 +-
 modin/utils.py                                |   2 +-
 6 files changed, 769 insertions(+), 5 deletions(-)
 create mode 100644 modin/core/execution/client/io.py
 create mode 100644 modin/core/execution/client/query_compiler.py

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 72c74258d94..69752d294d1 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -21,7 +21,7 @@
 import secrets
 
 from .pubsub import Parameter, _TYPE_PARAMS, ExactStr, ValueSource
-from typing import Optional
+from typing import Optional, Any
 
 
 class EnvironmentVariable(Parameter, type=str, abstract=True):
@@ -75,7 +75,23 @@ class Engine(EnvironmentVariable, type=str):
     """Distribution engine to run queries by."""
 
     varname = "MODIN_ENGINE"
-    choices = ("Ray", "Dask", "Python", "Native")
+    choices = ("Ray", "Dask", "Python", "Native", "Client")
+
+    @classmethod
+    def put(cls, value: Any) -> None:
+        """
+        Set config value.
+
+        Parameters
+        ----------
+        value : Any
+            Config value to set.
+        """
+        if cls._value_source == ValueSource.SET_BY_USER:
+            cls._check_callbacks(cls._put_nocallback(value))
+        else:
+            cls._value = value
+        cls._value_source = ValueSource.SET_BY_USER
 
     @classmethod
     def _get_default(cls) -> str:
@@ -141,7 +157,7 @@ class StorageFormat(EnvironmentVariable, type=str):
 
     varname = "MODIN_STORAGE_FORMAT"
     default = "Pandas"
-    choices = ("Pandas", "Hdk", "Pyarrow", "Cudf")
+    choices = ("Pandas", "OmniSci", "Pyarrow", "Cudf", "")
 
 
 class IsExperimental(EnvironmentVariable, type=bool):
diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
new file mode 100644
index 00000000000..9e6163a596f
--- /dev/null
+++ b/modin/core/execution/client/io.py
@@ -0,0 +1,46 @@
+from modin.core.io.io import BaseIO
+import os
+from .query_compiler import ClientQueryCompiler
+
+
+class ClientIO(BaseIO):
+    _server_conn = None
+    _data_conn = None
+
+    @classmethod
+    def set_server_connection(cls, conn):
+        cls._server_conn = conn
+
+    @classmethod
+    def set_data_connection(cls, conn):
+        cls._data_conn = conn
+
+    @classmethod
+    def read_csv(cls, filepath_or_buffer, **kwargs):
+        if isinstance(filepath_or_buffer, str):
+            filepath_or_buffer = os.path.abspath(filepath_or_buffer)
+        else:
+            raise NotImplementedError("Only filepaths are supported for read_csv")
+        if cls._server_conn is None:
+            raise ConnectionError(
+                "Missing server connection, did you initialize the connection?"
+            )
+        return ClientQueryCompiler(
+            cls._server_conn.read_csv(cls._data_conn, filepath_or_buffer, **kwargs)
+        )
+
+    @classmethod
+    def read_sql(cls, sql, con, **kwargs):
+        if isinstance(con, str) and con.lower() == "auto" and cls._data_conn is None:
+            raise ConnectionError(
+                "Cannot connect with parameter 'auto' because connection is not set. Did you initialize it?"
+            )
+        if cls._data_conn is None:
+            cls._data_conn = con
+        if cls._server_conn is None:
+            raise ConnectionError(
+                "Missing server connection, did you initialize the connection?"
+            )
+        return ClientQueryCompiler(
+            cls._server_conn.read_sql(sql, cls._data_conn, **kwargs)
+        )
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
new file mode 100644
index 00000000000..6e35a083746
--- /dev/null
+++ b/modin/core/execution/client/query_compiler.py
@@ -0,0 +1,686 @@
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+import numpy as np
+import pickle
+import inspect
+from pandas.api.types import is_list_like
+from pandas.core.computation.parsing import tokenize_string
+
+
+class ClientQueryCompiler(BaseQueryCompiler):
+    @classmethod
+    def set_server_connection(cls, conn):
+        cls._service = conn
+
+    @classmethod
+    def create_table(cls, table_name):
+        return cls(cls._service.create_query_compiler(table_name))
+
+    def __init__(self, id):
+        assert (
+            id is not None
+        ), "Make sure the client is properly connected and returns and ID"
+        self._id = id
+
+    def _set_columns(self, new_columns):
+        self._id = self._service.rename(self._id, new_col_labels=new_columns)
+
+    def _get_columns(self):
+        return self._service.columns(self._id)
+
+    def _set_index(self, new_index):
+        self._id = self._service.rename(self._id, new_row_labels=new_index)
+
+    def _get_index(self):
+        return self._service.index(self._id)
+
+    columns = property(_get_columns, _set_columns)
+    index = property(_get_index, _set_index)
+    _dtypes_cache = None
+
+    @property
+    def dtypes(self):
+        if self._dtypes_cache is None:
+            ref = self._service.dtypes(self._id)
+            self._dtypes_cache = pickle.loads(pickle.dumps(ref))
+        return self._dtypes_cache
+
+    @classmethod
+    def from_pandas(cls, df, data_cls):
+        raise NotImplementedError
+
+    def to_pandas(self):
+        remote_obj = self._service.to_pandas(self._id)
+        return pickle.loads(pickle.dumps(remote_obj))
+
+    def default_to_pandas(self, pandas_op, *args, **kwargs):
+        raise NotImplementedError
+
+    def columnarize(self):
+        return self.__constructor__(self._service.columnarize(self._id))
+
+    def transpose(self):
+        return self.__constructor__(self._service.transpose(self._id))
+
+    def copy(self):
+        return self.__constructor__(self._id)
+
+    def insert(self, loc, column, value):
+        if isinstance(value, ClientQueryCompiler):
+            value = value._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.insert(self._id, loc, column, value, is_qc)
+        )
+
+    def setitem(self, axis, key, value):
+        if isinstance(value, ClientQueryCompiler):
+            value = value._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.setitem(self._id, axis, key, value, is_qc)
+        )
+
+    def getitem_array(self, key):
+        if isinstance(key, ClientQueryCompiler):
+            key = key._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.getitem_array(self._id, key, is_qc))
+
+    def getitem_column_array(self, key, numeric=False):
+        return self.__constructor__(
+            self._service.getitem_column_array(self._id, key, numeric)
+        )
+
+    def getitem_row_labels_array(self, labels):
+        return self.__constructor__(
+            self._service.getitem_row_labels_array(self._id, labels)
+        )
+
+    def getitem_row_array(self, key):
+        return self.__constructor__(self._service.getitem_row_array(self._id, key))
+
+    def pivot(self, index, columns, values):
+        return self.__constructor__(
+            self._service.pivot(self._id, index, columns, values)
+        )
+
+    def get_dummies(self, columns, **kwargs):
+        return self.__constructor__(
+            self._service.get_dummies(self._id, columns, **kwargs)
+        )
+
+    def view(self, index=None, columns=None):
+        return self.__constructor__(self._service.view(self._id, index, columns))
+
+    take_2d = view
+
+    def drop(self, index=None, columns=None):
+        return self.__constructor__(self._service.drop(self._id, index, columns))
+
+    def isna(self):
+        return self.__constructor__(self._service.isna(self._id))
+
+    def notna(self):
+        return self.__constructor__(self._service.notna(self._id))
+
+    def fillna(
+        self,
+        squeeze_self,
+        squeeze_value,
+        value=None,
+        method=None,
+        axis=None,
+        inplace=False,
+        limit=None,
+        downcast=None,
+    ):
+        if isinstance(value, ClientQueryCompiler):
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.fillna(
+                self._id,
+                squeeze_self,
+                squeeze_value,
+                value,
+                method,
+                axis,
+                inplace,
+                limit,
+                downcast,
+                is_qc,
+            )
+        )
+
+    def dropna(self, **kwargs):
+        return self.__constructor__(self._service.dropna(self._id, **kwargs))
+
+    def sum(self, **kwargs):
+        return self.__constructor__(self._service.sum(self._id, **kwargs))
+
+    def prod(self, **kwargs):
+        return self.__constructor__(self._service.prod(self._id, **kwargs))
+
+    def count(self, **kwargs):
+        return self.__constructor__(self._service.count(self._id, **kwargs))
+
+    def mean(self, **kwargs):
+        return self.__constructor__(self._service.mean(self._id, **kwargs))
+
+    def median(self, **kwargs):
+        return self.__constructor__(self._service.median(self._id, **kwargs))
+
+    def std(self, **kwargs):
+        return self.__constructor__(self._service.std(self._id, **kwargs))
+
+    def min(self, **kwargs):
+        return self.__constructor__(self._service.min(self._id, **kwargs))
+
+    def max(self, **kwargs):
+        return self.__constructor__(self._service.max(self._id, **kwargs))
+
+    def any(self, **kwargs):
+        return self.__constructor__(self._service.any(self._id, **kwargs))
+
+    def all(self, **kwargs):
+        return self.__constructor__(self._service.all(self._id, **kwargs))
+
+    def quantile_for_single_value(self, **kwargs):
+        return self.__constructor__(
+            self._service.quantile_for_single_value(self._id, **kwargs)
+        )
+
+    def quantile_for_list_of_values(self, **kwargs):
+        return self.__constructor__(
+            self._service.quantile_for_list_of_values(self._id, **kwargs)
+        )
+
+    def describe(self, **kwargs):
+        return self.__constructor__(self._service.describe(self._id, **kwargs))
+
+    def set_index_from_columns(self, keys, drop: bool = True, append: bool = False):
+        return self.__constructor__(
+            self._service.set_index_from_columns(self._id, keys, drop, append)
+        )
+
+    def reset_index(self, **kwargs):
+        return self.__constructor__(self._service.reset_index(self._id, **kwargs))
+
+    def concat(self, axis, other, **kwargs):
+        if is_list_like(other):
+            other = [o._id for o in other]
+        else:
+            other = [other._id]
+        return self.__constructor__(
+            self._service.concat(self._id, axis, other, **kwargs)
+        )
+
+    def eq(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.eq(self._id, other, is_qc, **kwargs))
+
+    def lt(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.lt(self._id, other, is_qc, **kwargs))
+
+    def le(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.le(self._id, other, is_qc, **kwargs))
+
+    def gt(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.gt(self._id, other, is_qc, **kwargs))
+
+    def ge(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.ge(self._id, other, is_qc, **kwargs))
+
+    def ne(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.ne(self._id, other, is_qc, **kwargs))
+
+    def __and__(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.__and__(self._id, other, is_qc, **kwargs)
+        )
+
+    def __or__(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.__or__(self._id, other, is_qc, **kwargs)
+        )
+
+    def add(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.add(self._id, other, is_qc, **kwargs))
+
+    def radd(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.radd(self._id, other, is_qc, **kwargs)
+        )
+
+    def truediv(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.truediv(self._id, other, is_qc, **kwargs)
+        )
+
+    def mod(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.mod(self._id, other, is_qc, **kwargs))
+
+    def rmod(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.rmod(self._id, other, is_qc, **kwargs)
+        )
+
+    def sub(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.rsub(self._id, other, is_qc, **kwargs)
+        )
+
+    def rsub(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.rsub(self._id, other, is_qc, **kwargs)
+        )
+
+    def mul(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(self._service.mul(self._id, other, is_qc, **kwargs))
+
+    def rmul(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.rmul(self._id, other, is_qc, **kwargs)
+        )
+
+    def floordiv(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.floordiv(self._id, other, is_qc, **kwargs)
+        )
+
+    def rfloordiv(self, other, **kwargs):
+        if isinstance(other, ClientQueryCompiler):
+            other = other._id
+            is_qc = True
+        else:
+            is_qc = False
+        return self.__constructor__(
+            self._service.rfloordiv(self._id, other, is_qc, **kwargs)
+        )
+
+    def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
+        return self.__constructor__(
+            self._service.sort_rows_by_column_values(
+                self._id, columns, ascending=ascending, **kwargs
+            )
+        )
+
+    def sort_index(self, **kwargs):
+        return self.__constructor__(self._service.sort_index(self._id, **kwargs))
+
+    def str_capitalize(self):
+        return self.__constructor__(self._service.str_capitalize(self._id))
+
+    def str_isalnum(self):
+        return self.__constructor__(self._service.str_isalnum(self._id))
+
+    def str_isalpha(self):
+        return self.__constructor__(self._service.str_isalpha(self._id))
+
+    def str_isdecimal(self):
+        return self.__constructor__(self._service.str_isdecimal(self._id))
+
+    def str_isdigit(self):
+        return self.__constructor__(self._service.str_isdigit(self._id))
+
+    def str_islower(self):
+        return self.__constructor__(self._service.str_islower(self._id))
+
+    def str_isnumeric(self):
+        return self.__constructor__(self._service.str_isnumeric(self._id))
+
+    def str_isspace(self):
+        return self.__constructor__(self._service.str_isspace(self._id))
+
+    def str_istitle(self):
+        return self.__constructor__(self._service.str_istitle(self._id))
+
+    def str_isupper(self):
+        return self.__constructor__(self._service.str_isupper(self._id))
+
+    def str_len(self):
+        return self.__constructor__(self._service.str_len(self._id))
+
+    def str_lower(self):
+        return self.__constructor__(self._service.str_lower(self._id))
+
+    def str_title(self):
+        return self.__constructor__(self._service.str_title(self._id))
+
+    def str_upper(self):
+        return self.__constructor__(self._service.str_upper(self._id))
+
+    def str_center(self, width, fillchar=" "):
+        return self.__constructor__(self._service.str_center(self._id, width, fillchar))
+
+    def str_contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
+        return self.__constructor__(
+            self._service.str_contains(self._id, pat, case, flags, na, regex)
+        )
+
+    def str_count(self, pat, flags=0, **kwargs):
+        return self.__constructor__(
+            self._service.str_count(self._id, pat, flags, **kwargs)
+        )
+
+    def str_endswith(self, pat, na=np.nan):
+        return self.__constructor__(self._service.str_endswith(self._id, pat, na))
+
+    def str_find(self, sub, start=0, end=None):
+        return self.__constructor__(self._service.str_find(self._id, sub, start, end))
+
+    def str_findall(self, pat, flags=0, **kwargs):
+        return self.__constructor__(
+            self._service.str_findall(self._id, pat, flags, **kwargs)
+        )
+
+    def str_get(self, i):
+        return self.__constructor__(self._service.str_get(self._id, i))
+
+    str_index = str_find
+
+    def str_join(self, sep):
+        return self.__constructor__(self._service.str_join(self._id, sep))
+
+    def str_lstrip(self, to_strip=None):
+        return self.__constructor__(self._service.str_lstrip(self._id, to_strip))
+
+    def str_ljust(self, width, fillchar=" "):
+        return self.__constructor__(self._service.str_ljust(self._id, width, fillchar))
+
+    def str_match(self, pat, case=True, flags=0, na=np.nan):
+        return self.__constructor__(
+            self._service.str_match(self._id, pat, case, flags, na)
+        )
+
+    def str_pad(self, width, side="left", fillchar=" "):
+        return self.__constructor__(
+            self._service.str_pad(self._id, width, side, fillchar)
+        )
+
+    def str_repeat(self, repeats):
+        return self.__constructor__(self._service.str_repeat(self._id, repeats))
+
+    def str_rsplit(self, pat=None, n=-1, expand=False):
+        return self.__constructor__(self._service.str_rsplit(self._id, pat, n, expand))
+
+    def str_rstrip(self, to_strip=None):
+        return self.__constructor__(self._service.str_rstrip(self._id, to_strip))
+
+    def str_slice(self, start=None, stop=None, step=None):
+        return self.__constructor__(
+            self._service.str_slice(self._id, start, stop, step)
+        )
+
+    def str_slice_replace(self, start=None, stop=None, repl=None):
+        return self.__constructor__(
+            self._service.str_slice_replace(self._id, start, stop, repl)
+        )
+
+    def str_startswith(self, pat, na=np.nan):
+        return self.__constructor__(self._service.str_startswith(self._id, pat, na))
+
+    def str_strip(self, to_strip=None):
+        return self.__constructor__(self._service.str_strip(self._id, to_strip))
+
+    def str_zfill(self, width):
+        return self.__constructor__(self._service.str_zfill(self._id, width))
+
+    def merge(self, right, **kwargs):
+        return self.__constructor__(self._service.merge(self._id, right._id, **kwargs))
+
+    def groupby_mean(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.__constructor__(
+            self._service.groupby_mean(
+                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            )
+        )
+
+    def groupby_count(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.__constructor__(
+            self._service.groupby_count(
+                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            )
+        )
+
+    def groupby_max(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.__constructor__(
+            self._service.groupby_max(
+                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            )
+        )
+
+    def groupby_min(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.__constructor__(
+            self._service.groupby_min(
+                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            )
+        )
+
+    def groupby_sum(
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+    ):
+        return self.__constructor__(
+            self._service.groupby_sum(
+                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            )
+        )
+
+    def cummax(self, fold_axis, skipna, *args, **kwargs):
+        return self.__constructor__(
+            self._service.cummax(self._id, fold_axis, skipna, *args, **kwargs)
+        )
+
+    def cummin(self, fold_axis, skipna, *args, **kwargs):
+        return self.__constructor__(
+            self._service.cummin(self._id, fold_axis, skipna, *args, **kwargs)
+        )
+
+    def cumsum(self, fold_axis, skipna, *args, **kwargs):
+        return self.__constructor__(
+            self._service.cumsum(self._id, fold_axis, skipna, *args, **kwargs)
+        )
+
+    def cumprod(self, fold_axis, skipna, *args, **kwargs):
+        return self.__constructor__(
+            self._service.cumprod(self._id, fold_axis, skipna, *args, **kwargs)
+        )
+
+    def get_index_names(self, axis=0):
+        if axis == 0:
+            return self.index.names
+        else:
+            return self.columns.names
+
+    def is_monotonic_increasing(self):
+        return self.__constructor__(self._service.is_monotonic_increasing(self._id))
+
+    def is_monotonic_decreasing(self):
+        return self.__constructor__(self._service.is_monotonic_decreasing(self._id))
+
+    def idxmin(self, **kwargs):
+        return self.__constructor__(self._service.idxmin(self._id, **kwargs))
+
+    def idxmax(self, **kwargs):
+        return self.__constructor__(self._service.idxmax(self._id, **kwargs))
+
+    def query(self, expr, **kwargs):
+        is_variable = False
+        variable_list = []
+        for k, v in tokenize_string(expr):
+            if v == "" or v == " ":
+                continue
+            if is_variable:
+                frame = inspect.currentframe()
+                identified = False
+                while frame:
+                    if v in frame.f_locals:
+                        value = frame.f_locals[v]
+                        if isinstance(value, list):
+                            value = tuple(value)
+                        variable_list.append(str(value))
+                        identified = True
+                        break
+                    frame = frame.f_back
+                if not identified:
+                    # TODO this error does not quite match pandas
+                    raise ValueError(f"{v} not found")
+                is_variable = False
+            elif v == "@":
+                is_variable = True
+                continue
+            else:
+                variable_list.append(v)
+        expr = " ".join(variable_list)
+        return self.__constructor__(self._service.query(self._id, expr, **kwargs))
+
+    def finalize(self):
+        raise NotImplementedError
+
+    def free(self):
+        raise NotImplementedError
+
+    @classmethod
+    def from_arrow(cls, at, data_cls):
+        raise NotImplementedError
+
+    @classmethod
+    def from_dataframe(cls, df, data_cls):
+        raise NotImplementedError
+
+    def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
+        raise NotImplementedError
diff --git a/modin/core/execution/dispatching/factories/factories.py b/modin/core/execution/dispatching/factories/factories.py
index 686832fead0..8c2aa2c5798 100644
--- a/modin/core/execution/dispatching/factories/factories.py
+++ b/modin/core/execution/dispatching/factories/factories.py
@@ -469,6 +469,16 @@ def prepare(cls):
         cls.io_cls = PandasOnDaskIO
 
 
+@doc(_doc_factory_class, execution_name="Client")
+class ClientFactory(BaseFactory):
+    @classmethod
+    @doc(_doc_factory_prepare_method, io_module_name="`Client`")
+    def prepare(cls):
+        from modin.core.execution.client.io import ClientIO
+
+        cls.io_cls = ClientIO
+
+
 @doc(_doc_abstract_factory_class, role="experimental")
 class ExperimentalBaseFactory(BaseFactory):
     @classmethod
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index 1305ff828ab..c4f8b4dccbf 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -104,11 +104,12 @@
 _is_first_update = {}
 _NOINIT_ENGINES = {
     "Python",
+    "Client",
 }  # engines that don't require initialization, useful for unit tests
 
 
 def _update_engine(publisher: Parameter):
-    from modin.config import StorageFormat, CpuCount
+    from modin.config import StorageFormat, CpuCount, Engine
     from modin.config.envvars import IsExperimental
     from modin.config.pubsub import ValueSource
 
@@ -129,6 +130,11 @@ def _update_engine(publisher: Parameter):
     else:
         is_hdk = False
 
+    if Engine.get() == "Client":
+        if publisher.get_value_source() == ValueSource.DEFAULT:
+            StorageFormat.put("")
+        return
+
     if is_hdk and publisher.get_value_source() == ValueSource.DEFAULT:
         publisher.put("Native")
         IsExperimental.put(True)
diff --git a/modin/utils.py b/modin/utils.py
index 7f039172cf9..1e6acf72f51 100644
--- a/modin/utils.py
+++ b/modin/utils.py
@@ -593,7 +593,7 @@ def get_current_execution() -> str:
     str
         Returns <StorageFormat>On<Engine>-like string.
     """
-    return f"{'Experimental' if IsExperimental.get() else ''}{StorageFormat.get()}On{Engine.get()}"
+    return f"{'Experimental' if IsExperimental.get() else ''}{StorageFormat.get()}{'On' if StorageFormat.get() != '' else ''}{Engine.get()}"
 
 
 def instancer(_class: Callable[[], T]) -> T:

From 3797403fcced3450505eacff39d7c8ee742c2fc4 Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Tue, 6 Sep 2022 14:52:07 -0500
Subject: [PATCH 02/77] Fixes to pass CI + docs for io.py

---
 modin/config/envvars.py                       | 18 +----
 modin/core/execution/client/io.py             | 69 +++++++++++++++++++
 modin/core/execution/client/query_compiler.py | 13 ++++
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 69752d294d1..b61925a825b 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -21,7 +21,7 @@
 import secrets
 
 from .pubsub import Parameter, _TYPE_PARAMS, ExactStr, ValueSource
-from typing import Optional, Any
+from typing import Optional
 
 
 class EnvironmentVariable(Parameter, type=str, abstract=True):
@@ -77,22 +77,6 @@ class Engine(EnvironmentVariable, type=str):
     varname = "MODIN_ENGINE"
     choices = ("Ray", "Dask", "Python", "Native", "Client")
 
-    @classmethod
-    def put(cls, value: Any) -> None:
-        """
-        Set config value.
-
-        Parameters
-        ----------
-        value : Any
-            Config value to set.
-        """
-        if cls._value_source == ValueSource.SET_BY_USER:
-            cls._check_callbacks(cls._put_nocallback(value))
-        else:
-            cls._value = value
-        cls._value_source = ValueSource.SET_BY_USER
-
     @classmethod
     def _get_default(cls) -> str:
         """
diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 9e6163a596f..a72924e0cc9 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -1,22 +1,70 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""The module holds the factory which performs I/O using pandas on a Client."""
+
 from modin.core.io.io import BaseIO
 import os
 from .query_compiler import ClientQueryCompiler
 
 
 class ClientIO(BaseIO):
+    """Factory providing methods for performing I/O operations using a given Client as the execution engine."""
+
     _server_conn = None
     _data_conn = None
 
     @classmethod
     def set_server_connection(cls, conn):
+        """
+        Set the server connection for the I/O object.
+
+        Parameters
+        ----------
+        conn : Any
+            Connection object that implements various methods.
+        """
         cls._server_conn = conn
 
     @classmethod
     def set_data_connection(cls, conn):
+        """
+        Set the data connection for the I/O object.
+
+        Parameters
+        ----------
+        conn : Any
+            Connection object that is implementation specific.
+        """
         cls._data_conn = conn
 
     @classmethod
     def read_csv(cls, filepath_or_buffer, **kwargs):
+        """
+        Read CSV data from given filepath or buffer.
+
+        Parameters
+        ----------
+        filepath_or_buffer : str, path object or file-like object
+            `filepath_or_buffer` parameter of read functions.
+        **kwargs : dict
+            Parameters of ``read_csv`` function.
+
+        Returns
+        -------
+        ClientQueryCompiler
+            Query compiler with CSV data read in.
+        """
         if isinstance(filepath_or_buffer, str):
             filepath_or_buffer = os.path.abspath(filepath_or_buffer)
         else:
@@ -31,6 +79,27 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
 
     @classmethod
     def read_sql(cls, sql, con, **kwargs):
+        """
+        Read data from a SQL connection.
+
+        Parameters
+        ----------
+        sql : str or SQLAlchemy Selectable (select or text object)
+            SQL query to be executed or a table name.
+        con : SQLAlchemy connectable, str, or sqlite3 connection
+            Using SQLAlchemy makes it possible to use any DB supported by that
+            library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible
+            for engine disposal and connection closure for the SQLAlchemy
+            connectable; str connections are closed automatically. See
+            `here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.
+        **kwargs : dict
+            Parameters of ``read_sql`` function.
+
+        Returns
+        -------
+        ClientQueryCompiler
+            Query compiler with data read in from SQL connection.
+        """
         if isinstance(con, str) and con.lower() == "auto" and cls._data_conn is None:
             raise ConnectionError(
                 "Cannot connect with parameter 'auto' because connection is not set. Did you initialize it?"
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 6e35a083746..4045daa0472 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -1,3 +1,16 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 import numpy as np
 import pickle

From dd0e7a528f5ec17270ca3131aa4b44afe2ce2d19 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Tue, 6 Sep 2022 15:58:39 -0500
Subject: [PATCH 03/77] Update implementation

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/query_compiler.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 4045daa0472..80dc9eddf0d 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -24,10 +24,6 @@ class ClientQueryCompiler(BaseQueryCompiler):
     def set_server_connection(cls, conn):
         cls._service = conn
 
-    @classmethod
-    def create_table(cls, table_name):
-        return cls(cls._service.create_query_compiler(table_name))
-
     def __init__(self, id):
         assert (
             id is not None
@@ -38,7 +34,9 @@ def _set_columns(self, new_columns):
         self._id = self._service.rename(self._id, new_col_labels=new_columns)
 
     def _get_columns(self):
-        return self._service.columns(self._id)
+        if self._columns_cache is None:
+            self._columns_cache = pickle.loads(pickle.dumps(self._service.columns(self._id)))
+        return self._columns_cache
 
     def _set_index(self, new_index):
         self._id = self._service.rename(self._id, new_row_labels=new_index)
@@ -47,6 +45,7 @@ def _get_index(self):
         return self._service.index(self._id)
 
     columns = property(_get_columns, _set_columns)
+    _columns_cache = None
     index = property(_get_index, _set_index)
     _dtypes_cache = None
 

From 026a91c6ade1b9a1408ecbbafaf1c1890c80da51 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Tue, 6 Sep 2022 17:29:02 -0500
Subject: [PATCH 04/77] Fix some things

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/base.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index d318334179e..3162a208e41 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -147,7 +147,7 @@ def _build_repr_df(self, num_rows, num_cols):
         """
         # Fast track for empty dataframe.
         if len(self.index) == 0 or (
-            hasattr(self, "columns") and len(self.columns) == 0
+            len(self._query_compiler.columns) == 0
         ):
             return pandas.DataFrame(
                 index=self.index,
@@ -172,26 +172,23 @@ def _build_repr_df(self, num_rows, num_cols):
                 if num_rows_for_tail is not None
                 else []
             )
-        if hasattr(self, "columns"):
-            if len(self.columns) <= num_cols:
-                col_indexer = slice(None)
-            else:
-                num_cols_for_front = num_cols // 2 + 1
-                num_cols_for_back = (
-                    num_cols_for_front
-                    if len(self.columns) > num_cols
-                    else len(self.columns) - num_cols_for_front
-                    if len(self.columns) - num_cols_for_front >= 0
-                    else None
-                )
-                col_indexer = list(range(len(self.columns))[:num_cols_for_front]) + (
-                    list(range(len(self.columns))[-num_cols_for_back:])
-                    if num_cols_for_back is not None
-                    else []
-                )
-            indexer = row_indexer, col_indexer
+        if len(self._query_compiler.columns) <= num_cols:
+            col_indexer = slice(None)
         else:
-            indexer = row_indexer
+            num_cols_for_front = num_cols // 2 + 1
+            num_cols_for_back = (
+                num_cols_for_front
+                if len(self.columns) > num_cols
+                else len(self.columns) - num_cols_for_front
+                if len(self.columns) - num_cols_for_front >= 0
+                else None
+            )
+            col_indexer = list(range(len(self.columns))[:num_cols_for_front]) + (
+                list(range(len(self.columns))[-num_cols_for_back:])
+                if num_cols_for_back is not None
+                else []
+            )
+        indexer = row_indexer, col_indexer
         return self.iloc[indexer]._query_compiler.to_pandas()
 
     def _update_inplace(self, new_query_compiler):

From ea0ac1db4e7feb712e3bcc348fef4b31bc373558 Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Tue, 6 Sep 2022 20:22:46 -0500
Subject: [PATCH 05/77] Lint fixes

---
 modin/core/execution/client/query_compiler.py | 4 +++-
 modin/pandas/base.py                          | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 80dc9eddf0d..726595186b8 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -35,7 +35,9 @@ def _set_columns(self, new_columns):
 
     def _get_columns(self):
         if self._columns_cache is None:
-            self._columns_cache = pickle.loads(pickle.dumps(self._service.columns(self._id)))
+            self._columns_cache = pickle.loads(
+                pickle.dumps(self._service.columns(self._id))
+            )
         return self._columns_cache
 
     def _set_index(self, new_index):
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 3162a208e41..46539e209b0 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -146,9 +146,7 @@ def _build_repr_df(self, num_rows, num_cols):
             A pandas dataset with `num_rows` or fewer rows and `num_cols` or fewer columns.
         """
         # Fast track for empty dataframe.
-        if len(self.index) == 0 or (
-            len(self._query_compiler.columns) == 0
-        ):
+        if len(self.index) == 0 or (len(self._query_compiler.columns) == 0):
             return pandas.DataFrame(
                 index=self.index,
                 columns=self.columns if hasattr(self, "columns") else None,

From c18342edc798d8c1e05272bdcb1907f86e85cfa4 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Tue, 6 Sep 2022 20:58:56 -0500
Subject: [PATCH 06/77] Fix put

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/config/envvars.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index b61925a825b..69752d294d1 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -21,7 +21,7 @@
 import secrets
 
 from .pubsub import Parameter, _TYPE_PARAMS, ExactStr, ValueSource
-from typing import Optional
+from typing import Optional, Any
 
 
 class EnvironmentVariable(Parameter, type=str, abstract=True):
@@ -77,6 +77,22 @@ class Engine(EnvironmentVariable, type=str):
     varname = "MODIN_ENGINE"
     choices = ("Ray", "Dask", "Python", "Native", "Client")
 
+    @classmethod
+    def put(cls, value: Any) -> None:
+        """
+        Set config value.
+
+        Parameters
+        ----------
+        value : Any
+            Config value to set.
+        """
+        if cls._value_source == ValueSource.SET_BY_USER:
+            cls._check_callbacks(cls._put_nocallback(value))
+        else:
+            cls._value = value
+        cls._value_source = ValueSource.SET_BY_USER
+
     @classmethod
     def _get_default(cls) -> str:
         """

From 711c819d604e3873dd3948ba58573c26f0bc9dd6 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 15 Sep 2022 15:05:31 -0500
Subject: [PATCH 07/77] Clean up and add new details

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/query_compiler.py | 11 +++--------
 modin/pandas/base.py                          |  5 +++++
 modin/pandas/series.py                        |  4 +++-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 726595186b8..969a504f92b 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -13,7 +13,6 @@
 
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 import numpy as np
-import pickle
 import inspect
 from pandas.api.types import is_list_like
 from pandas.core.computation.parsing import tokenize_string
@@ -35,9 +34,7 @@ def _set_columns(self, new_columns):
 
     def _get_columns(self):
         if self._columns_cache is None:
-            self._columns_cache = pickle.loads(
-                pickle.dumps(self._service.columns(self._id))
-            )
+            self._columns_cache = self._service.columns(self._id)
         return self._columns_cache
 
     def _set_index(self, new_index):
@@ -54,8 +51,7 @@ def _get_index(self):
     @property
     def dtypes(self):
         if self._dtypes_cache is None:
-            ref = self._service.dtypes(self._id)
-            self._dtypes_cache = pickle.loads(pickle.dumps(ref))
+            self._dtypes_cache = self._service.dtypes(self._id)
         return self._dtypes_cache
 
     @classmethod
@@ -63,8 +59,7 @@ def from_pandas(cls, df, data_cls):
         raise NotImplementedError
 
     def to_pandas(self):
-        remote_obj = self._service.to_pandas(self._id)
-        return pickle.loads(pickle.dumps(remote_obj))
+        return self._service.to_pandas(self._id)
 
     def default_to_pandas(self, pandas_op, *args, **kwargs):
         raise NotImplementedError
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 46539e209b0..d37a51b730b 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3179,6 +3179,11 @@ def __getitem__(self, key):
         """
         if not self._query_compiler.lazy_execution and len(self) == 0:
             return self._default_to_pandas("__getitem__", key)
+        # fastpath for common case
+        if isinstance(key, str) and key in self._query_compiler.columns:
+            return self._getitem(key)
+        elif is_list_like(key) and all(k in self._query_compiler.columns for k in key):
+            return self._getitem(key)
         # see if we can slice the rows
         # This lets us reuse code in pandas to error check
         indexer = None
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 0e8ff29a920..2169e3deb33 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -311,7 +311,9 @@ def __getattr__(self, key):
         try:
             return object.__getattribute__(self, key)
         except AttributeError as err:
-            if key not in _ATTRS_NO_LOOKUP and key in self.index:
+            if not self._query_compiler.lazy_execution and (
+                    key not in _ATTRS_NO_LOOKUP and key in self.index
+            ):
                 return self[key]
             raise err
 

From e5c5f61ed36b03aa6759e4c3640fa606e397e875 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 15 Sep 2022 21:53:19 -0500
Subject: [PATCH 08/77] Use fsspec to get full path and allow URLs

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/io.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index a72924e0cc9..8ba76b56f7c 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -15,6 +15,7 @@
 
 from modin.core.io.io import BaseIO
 import os
+import fsspec
 from .query_compiler import ClientQueryCompiler
 
 
@@ -66,7 +67,11 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
             Query compiler with CSV data read in.
         """
         if isinstance(filepath_or_buffer, str):
-            filepath_or_buffer = os.path.abspath(filepath_or_buffer)
+            filepath_or_buffer = fsspec.open(filepath_or_buffer).full_name
+            if filepath_or_buffer.startswith("file://"):
+                # We will do this so that the backend can know whether this
+                # is a path or a URL.
+                filepath_or_buffer = filepath_or_buffer[7:]
         else:
             raise NotImplementedError("Only filepaths are supported for read_csv")
         if cls._server_conn is None:

From 538dd5404b5884f99cb7eee8ec028f10d2030af7 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Fri, 16 Sep 2022 12:17:46 -0500
Subject: [PATCH 09/77] Add lazy loc

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/indexing.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 16b3003f0f9..374e5295069 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -677,7 +677,10 @@ def __getitem__(self, key):
 
         if isinstance(row_loc, Series) and is_boolean_array(row_loc):
             return self._handle_boolean_masking(row_loc, col_loc)
-
+        if self.qc.lazy_execution:
+            # Since we don't know if the row labels are present or not in lazy evaluation,
+            # immediately hand off computation to the engine
+            return type(self.df)(query_compiler=self.qc.getitem_row_labels_array(row_loc).getitem_column_array(col_loc))
         row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
 
         result = self._getitem_positional(

From 4c3dec6164a3d9b7b112627f8655b1b402a30582 Mon Sep 17 00:00:00 2001
From: Bala Atur <batur@ponder.io>
Date: Mon, 19 Sep 2022 18:24:38 -0700
Subject: [PATCH 10/77] fixes for tests

---
 modin/core/execution/client/query_compiler.py | 61 ++++++++++---------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 969a504f92b..c0220ff76f1 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -73,6 +73,12 @@ def transpose(self):
     def copy(self):
         return self.__constructor__(self._id)
 
+    def add_prefix(self, prefix, axis=1):
+        return self.__constructor__(self._service.add_prefix(self._id, prefix, axis))
+
+    def add_suffix(self, suffix, axis=1):
+        return self.__constructor__(self._service.add_prefix(self._id, suffix, axis))
+
     def insert(self, loc, column, value):
         if isinstance(value, ClientQueryCompiler):
             value = value._id
@@ -535,13 +541,7 @@ def merge(self, right, **kwargs):
         return self.__constructor__(self._service.merge(self._id, right._id, **kwargs))
 
     def groupby_mean(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
+        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_mean(
@@ -550,13 +550,7 @@ def groupby_mean(
         )
 
     def groupby_count(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
+        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_count(
@@ -565,13 +559,7 @@ def groupby_count(
         )
 
     def groupby_max(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
+        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_max(
@@ -580,13 +568,7 @@ def groupby_max(
         )
 
     def groupby_min(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
+        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_min(
@@ -595,17 +577,36 @@ def groupby_min(
         )
 
     def groupby_sum(
+        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
+    ):
+        return self.__constructor__(
+            self._service.groupby_sum(
+                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            )
+        )
+
+    def groupby_agg(
         self,
         by,
+        agg_func,
         axis,
         groupby_kwargs,
         agg_args,
         agg_kwargs,
+        how="axis_wise",
         drop=False,
     ):
         return self.__constructor__(
-            self._service.groupby_sum(
-                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+            self._service.groupby_agg(
+                self._id,
+                by._id,
+                agg_func,
+                axis,
+                groupby_kwargs,
+                agg_args,
+                agg_kwargs,
+                how,
+                drop,
             )
         )
 

From 1f9797cfe859845dd9a4f3639c3f365f87cbe307 Mon Sep 17 00:00:00 2001
From: Bala Atur <batur@ponder.io>
Date: Tue, 20 Sep 2022 19:58:35 -0700
Subject: [PATCH 11/77] porting more tests

---
 modin/core/execution/client/io.py             |  4 +++
 modin/core/execution/client/query_compiler.py | 28 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 8ba76b56f7c..462acdea0d6 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -118,3 +118,7 @@ def read_sql(cls, sql, con, **kwargs):
         return ClientQueryCompiler(
             cls._server_conn.read_sql(sql, cls._data_conn, **kwargs)
         )
+
+    @classmethod
+    def to_sql(cls, qc, **kwargs):
+        qc.to_sql(**kwargs)
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index c0220ff76f1..526221f1f0f 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -58,7 +58,33 @@ def dtypes(self):
     def from_pandas(cls, df, data_cls):
         raise NotImplementedError
 
+    def to_sql(
+        self,
+        name,
+        con,
+        schema=None,
+        if_exists="fail",
+        index=True,
+        index_label=None,
+        chunksize=None,
+        dtype=None,
+        method=None,
+    ):
+        return self._service.to_sql(
+            self._id,
+            name,
+            con,
+            schema,
+            if_exists,
+            index,
+            index_label,
+            chunksize,
+            dtype,
+            method,
+        )
+
     def to_pandas(self):
+        print("calling to_pandas in server")
         return self._service.to_pandas(self._id)
 
     def default_to_pandas(self, pandas_op, *args, **kwargs):
@@ -77,7 +103,7 @@ def add_prefix(self, prefix, axis=1):
         return self.__constructor__(self._service.add_prefix(self._id, prefix, axis))
 
     def add_suffix(self, suffix, axis=1):
-        return self.__constructor__(self._service.add_prefix(self._id, suffix, axis))
+        return self.__constructor__(self._service.add_suffix(self._id, suffix, axis))
 
     def insert(self, loc, column, value):
         if isinstance(value, ClientQueryCompiler):

From 26d0ddca3de545d73ff39bebf85d5b852f9592cd Mon Sep 17 00:00:00 2001
From: Bala Atur <batur@ponder.io>
Date: Wed, 21 Sep 2022 08:13:02 -0700
Subject: [PATCH 12/77] more fixes

---
 modin/core/execution/client/query_compiler.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 526221f1f0f..358632646cd 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -84,7 +84,6 @@ def to_sql(
         )
 
     def to_pandas(self):
-        print("calling to_pandas in server")
         return self._service.to_pandas(self._id)
 
     def default_to_pandas(self, pandas_op, *args, **kwargs):

From 2489b33c0d0589f878c79f6b95fed7f8e0304db9 Mon Sep 17 00:00:00 2001
From: Bala Atur <batur@ponder.io>
Date: Wed, 21 Sep 2022 11:00:26 -0700
Subject: [PATCH 13/77] moar fixes

---
 modin/core/execution/client/query_compiler.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 358632646cd..b553b6d983e 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -505,6 +505,9 @@ def str_endswith(self, pat, na=np.nan):
     def str_find(self, sub, start=0, end=None):
         return self.__constructor__(self._service.str_find(self._id, sub, start, end))
 
+    def str_rfind(self, sub, start=0, end=None):
+        return self.__constructor__(self._service.str_rfind(self._id, sub, start, end))
+
     def str_findall(self, pat, flags=0, **kwargs):
         return self.__constructor__(
             self._service.str_findall(self._id, pat, flags, **kwargs)
@@ -524,6 +527,9 @@ def str_lstrip(self, to_strip=None):
     def str_ljust(self, width, fillchar=" "):
         return self.__constructor__(self._service.str_ljust(self._id, width, fillchar))
 
+    def str_rjust(self, width, fillchar=" "):
+        return self.__constructor__(self._service.str_rjust(self._id, width, fillchar))
+
     def str_match(self, pat, case=True, flags=0, na=np.nan):
         return self.__constructor__(
             self._service.str_match(self._id, pat, case, flags, na)
@@ -537,6 +543,9 @@ def str_pad(self, width, side="left", fillchar=" "):
     def str_repeat(self, repeats):
         return self.__constructor__(self._service.str_repeat(self._id, repeats))
 
+    def str_split(self, pat=None, n=-1, expand=False):
+        return self.__constructor__(self._service.str_split(self._id, pat, n, expand))
+
     def str_rsplit(self, pat=None, n=-1, expand=False):
         return self.__constructor__(self._service.str_rsplit(self._id, pat, n, expand))
 

From 3699df42518934cd600e35899c97ac4295370684 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 22 Sep 2022 14:24:47 -0500
Subject: [PATCH 14/77] Raise exception

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/query_compiler.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index b553b6d983e..75edfa5f23d 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -27,6 +27,8 @@ def __init__(self, id):
         assert (
             id is not None
         ), "Make sure the client is properly connected and returns and ID"
+        if isinstance(id, Exception):
+            raise id
         self._id = id
 
     def _set_columns(self, new_columns):

From c399ce26f2eada39525a5d4c0b9717972ba81e72 Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Thu, 22 Sep 2022 16:45:07 -0500
Subject: [PATCH 15/77] Lint fixes

---
 modin/core/execution/client/io.py             |  1 -
 modin/core/execution/client/query_compiler.py | 40 ++++++++++++++++---
 modin/pandas/indexing.py                      |  6 ++-
 modin/pandas/series.py                        |  2 +-
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 462acdea0d6..f7c6aec4974 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -14,7 +14,6 @@
 """The module holds the factory which performs I/O using pandas on a Client."""
 
 from modin.core.io.io import BaseIO
-import os
 import fsspec
 from .query_compiler import ClientQueryCompiler
 
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 75edfa5f23d..2af90f45221 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -577,7 +577,13 @@ def merge(self, right, **kwargs):
         return self.__constructor__(self._service.merge(self._id, right._id, **kwargs))
 
     def groupby_mean(
-        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_mean(
@@ -586,7 +592,13 @@ def groupby_mean(
         )
 
     def groupby_count(
-        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_count(
@@ -595,7 +607,13 @@ def groupby_count(
         )
 
     def groupby_max(
-        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_max(
@@ -604,7 +622,13 @@ def groupby_max(
         )
 
     def groupby_min(
-        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_min(
@@ -613,7 +637,13 @@ def groupby_min(
         )
 
     def groupby_sum(
-        self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=False,
+        self,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
     ):
         return self.__constructor__(
             self._service.groupby_sum(
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 374e5295069..20b5952b6f5 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -680,7 +680,11 @@ def __getitem__(self, key):
         if self.qc.lazy_execution:
             # Since we don't know if the row labels are present or not in lazy evaluation,
             # immediately hand off computation to the engine
-            return type(self.df)(query_compiler=self.qc.getitem_row_labels_array(row_loc).getitem_column_array(col_loc))
+            return type(self.df)(
+                query_compiler=self.qc.getitem_row_labels_array(
+                    row_loc
+                ).getitem_column_array(col_loc)
+            )
         row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
 
         result = self._getitem_positional(
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 2169e3deb33..023b76ed120 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -312,7 +312,7 @@ def __getattr__(self, key):
             return object.__getattribute__(self, key)
         except AttributeError as err:
             if not self._query_compiler.lazy_execution and (
-                    key not in _ATTRS_NO_LOOKUP and key in self.index
+                key not in _ATTRS_NO_LOOKUP and key in self.index
             ):
                 return self[key]
             raise err

From c7858103fefc39b7619226478baaba959f79e854 Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Fri, 23 Sep 2022 09:40:14 -0500
Subject: [PATCH 16/77] Return Python as the default modin engine

---
 modin/config/envvars.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 69752d294d1..7c7b8a3708d 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -21,7 +21,7 @@
 import secrets
 
 from .pubsub import Parameter, _TYPE_PARAMS, ExactStr, ValueSource
-from typing import Optional, Any
+from typing import Optional
 
 
 class EnvironmentVariable(Parameter, type=str, abstract=True):
@@ -77,22 +77,6 @@ class Engine(EnvironmentVariable, type=str):
     varname = "MODIN_ENGINE"
     choices = ("Ray", "Dask", "Python", "Native", "Client")
 
-    @classmethod
-    def put(cls, value: Any) -> None:
-        """
-        Set config value.
-
-        Parameters
-        ----------
-        value : Any
-            Config value to set.
-        """
-        if cls._value_source == ValueSource.SET_BY_USER:
-            cls._check_callbacks(cls._put_nocallback(value))
-        else:
-            cls._value = value
-        cls._value_source = ValueSource.SET_BY_USER
-
     @classmethod
     def _get_default(cls) -> str:
         """
@@ -147,9 +131,9 @@ def _get_default(cls) -> str:
             pass
         else:
             return "Native"
-        raise ImportError(
-            "Please refer to installation documentation page to install an engine"
-        )
+
+        warnings.warn("No other engine was found so defaulting backend to Python.")
+        return "Python"
 
 
 class StorageFormat(EnvironmentVariable, type=str):
@@ -157,7 +141,7 @@ class StorageFormat(EnvironmentVariable, type=str):
 
     varname = "MODIN_STORAGE_FORMAT"
     default = "Pandas"
-    choices = ("Pandas", "OmniSci", "Pyarrow", "Cudf", "")
+    choices = ("Pandas", "Hdk", "Pyarrow", "Cudf", "")
 
 
 class IsExperimental(EnvironmentVariable, type=bool):

From 3e09a7ff4850a1b1f869051d5466507ee21aead1 Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Fri, 23 Sep 2022 10:11:24 -0500
Subject: [PATCH 17/77] Handle indexing case for client qc

---
 modin/pandas/indexing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 20b5952b6f5..d51b749e10b 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -35,6 +35,7 @@
 from pandas.api.types import is_list_like, is_bool
 from pandas.core.dtypes.common import is_integer, is_bool_dtype, is_integer_dtype
 from pandas.core.indexing import IndexingError
+from modin.core.execution.client.query_compiler import ClientQueryCompiler
 from modin.error_message import ErrorMessage
 from modin.logging import ClassLogger
 
@@ -677,7 +678,7 @@ def __getitem__(self, key):
 
         if isinstance(row_loc, Series) and is_boolean_array(row_loc):
             return self._handle_boolean_masking(row_loc, col_loc)
-        if self.qc.lazy_execution:
+        if isinstance(self.qc, ClientQueryCompiler) and self.qc.lazy_execution:
             # Since we don't know if the row labels are present or not in lazy evaluation,
             # immediately hand off computation to the engine
             return type(self.df)(

From ad0bc7be9ba7e6f81897f10d3f778243f1c5c782 Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Fri, 23 Sep 2022 11:15:47 -0500
Subject: [PATCH 18/77] Call fast path for __getitem__ if not lazy

---
 modin/pandas/base.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index d37a51b730b..f6bbcd1310d 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3177,13 +3177,16 @@ def __getitem__(self, key):
         BasePandasDataset
             Located dataset.
         """
-        if not self._query_compiler.lazy_execution and len(self) == 0:
-            return self._default_to_pandas("__getitem__", key)
-        # fastpath for common case
-        if isinstance(key, str) and key in self._query_compiler.columns:
-            return self._getitem(key)
-        elif is_list_like(key) and all(k in self._query_compiler.columns for k in key):
-            return self._getitem(key)
+        if not self._query_compiler.lazy_execution:
+            if len(self) == 0:
+                return self._default_to_pandas("__getitem__", key)
+            # fastpath for common case
+            if isinstance(key, str) and key in self._query_compiler.columns:
+                return self._getitem(key)
+            elif is_list_like(key) and all(
+                k in self._query_compiler.columns for k in key
+            ):
+                return self._getitem(key)
         # see if we can slice the rows
         # This lets us reuse code in pandas to error check
         indexer = None

From 2f4fbf0d6f0b52a5be3c3c0b1ec0fb3441c6381e Mon Sep 17 00:00:00 2001
From: Karthik Velayutham <vkarthik@ponder.io>
Date: Mon, 26 Sep 2022 09:51:28 -0500
Subject: [PATCH 19/77] Remove user warning for Python-engine fall back

---
 modin/config/envvars.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modin/config/envvars.py b/modin/config/envvars.py
index 7c7b8a3708d..c77792cc61a 100644
--- a/modin/config/envvars.py
+++ b/modin/config/envvars.py
@@ -132,7 +132,8 @@ def _get_default(cls) -> str:
         else:
             return "Native"
 
-        warnings.warn("No other engine was found so defaulting backend to Python.")
+        # If we can't import any other engines we should go ahead and default to Python being
+        # the default backend engine.
         return "Python"
 
 

From 4b613742880959c34183cdba0b0c8ca493cf5271 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Sat, 24 Sep 2022 14:42:18 -0500
Subject: [PATCH 20/77] Add init

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 modin/core/execution/client/__init__.py

diff --git a/modin/core/execution/client/__init__.py b/modin/core/execution/client/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d

From 485793c87ee4d9055542309ce9805f2075f4c460 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 26 Sep 2022 09:30:59 -0500
Subject: [PATCH 21/77] Implement free as a no-op

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/query_compiler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 2af90f45221..4375b68f394 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -748,7 +748,7 @@ def finalize(self):
         raise NotImplementedError
 
     def free(self):
-        raise NotImplementedError
+        return
 
     @classmethod
     def from_arrow(cls, at, data_cls):

From 5d5a617cb2b69bcaa8c793a6d3a6582ff1b8c5ea Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem@ponder.io>
Date: Fri, 23 Sep 2022 17:30:28 -0700
Subject: [PATCH 22/77] Add support for replace - client side

---
 modin/core/execution/client/query_compiler.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 4375b68f394..d1337edfd5c 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -14,6 +14,7 @@
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 import numpy as np
 import inspect
+from pandas._libs.lib import no_default, NoDefault
 from pandas.api.types import is_list_like
 from pandas.core.computation.parsing import tokenize_string
 
@@ -171,6 +172,37 @@ def isna(self):
     def notna(self):
         return self.__constructor__(self._service.notna(self._id))
 
+    def replace(
+        self,
+        to_replace=None,
+        value=no_default,
+        inplace=False,
+        limit=None,
+        regex=False,
+        method: "str | NoDefault" = no_default,
+    ):
+        if isinstance(to_replace, ClientQueryCompiler):
+            is_to_replace_qc = True
+        else:
+            is_to_replace_qc = False
+        if isinstance(regex, ClientQueryCompiler):
+            is_regex_qc = True
+        else:
+            is_regex_qc = False
+        return self.__constructor__(
+            self._service.replace(
+                self._id,
+                to_replace,
+                value,
+                inplace,
+                limit,
+                regex,
+                method,
+                is_to_replace_qc,
+                is_regex_qc,
+            )
+        )
+
     def fillna(
         self,
         squeeze_self,

From 8b169880a1c2f2b8312e76757118e4cc829c0663 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 26 Sep 2022 10:33:13 -0500
Subject: [PATCH 23/77] Fix a couple of issues with Client

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/query_compiler.py | 16 ++++++++--------
 modin/pandas/base.py                          |  4 ++++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index d1337edfd5c..30a01e23527 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -708,24 +708,24 @@ def groupby_agg(
             )
         )
 
-    def cummax(self, fold_axis, skipna, *args, **kwargs):
+    def cummax(self, fold_axis, axis, skipna, *args, **kwargs):
         return self.__constructor__(
-            self._service.cummax(self._id, fold_axis, skipna, *args, **kwargs)
+            self._service.cummax(self._id, fold_axis, axis, skipna, *args, **kwargs)
         )
 
-    def cummin(self, fold_axis, skipna, *args, **kwargs):
+    def cummin(self, fold_axis, axis, skipna, *args, **kwargs):
         return self.__constructor__(
-            self._service.cummin(self._id, fold_axis, skipna, *args, **kwargs)
+            self._service.cummin(self._id, fold_axis, axis, skipna, *args, **kwargs)
         )
 
-    def cumsum(self, fold_axis, skipna, *args, **kwargs):
+    def cumsum(self, fold_axis, axis, skipna, *args, **kwargs):
         return self.__constructor__(
-            self._service.cumsum(self._id, fold_axis, skipna, *args, **kwargs)
+            self._service.cumsum(self._id, fold_axis, axis, skipna, *args, **kwargs)
         )
 
-    def cumprod(self, fold_axis, skipna, *args, **kwargs):
+    def cumprod(self, fold_axis, axis, skipna, *args, **kwargs):
         return self.__constructor__(
-            self._service.cumprod(self._id, fold_axis, skipna, *args, **kwargs)
+            self._service.cumprod(self._id, fold_axis, axis, skipna, *args, **kwargs)
         )
 
     def get_index_names(self, axis=0):
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index f6bbcd1310d..6022956e2e0 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1227,6 +1227,10 @@ def drop(
             elif axes[axis] is not None:
                 if not is_list_like(axes[axis]):
                     axes[axis] = [axes[axis]]
+                # In case of lazy execution we should bypass these error checking components
+                # because they can force the materialization of the row or column labels.
+                if self._query_compiler.lazy_execution:
+                    continue
                 if errors == "raise":
                     non_existent = pandas.Index(axes[axis]).difference(
                         getattr(self, axis)

From 4485cc8881de7d10c27869b53bd6054ab3254fdc Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 26 Sep 2022 13:24:05 -0500
Subject: [PATCH 24/77] Throw errors on to_pandas

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/core/execution/client/query_compiler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 30a01e23527..59100df1fe4 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -87,7 +87,10 @@ def to_sql(
         )
 
     def to_pandas(self):
-        return self._service.to_pandas(self._id)
+        value = self._service.to_pandas(self._id)
+        if isinstance(value, Exception):
+            raise value
+        return value
 
     def default_to_pandas(self, pandas_op, *args, **kwargs):
         raise NotImplementedError

From 7fd51b2d770d32ecf79fd002e7554efa1372948d Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem@ponder.io>
Date: Mon, 26 Sep 2022 21:17:46 -0700
Subject: [PATCH 25/77] Do not default to pandas for str_repeat

---
 modin/pandas/series_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index ae01e884835..5bbb85cf2ae 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -331,7 +331,9 @@ def partition(self, sep=" ", expand=True):
             )
 
     def repeat(self, repeats):
-        return self._default_to_pandas(pandas.Series.str.repeat, repeats)
+        return Series(
+            query_compiler=self._query_compiler.str_repeat(repeats)
+        )        
 
     def rpartition(self, sep=" ", expand=True):
         if sep is not None and len(sep) == 0:

From a12fb00bec9ccd70800babef1869a6cdd57f6766 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem@ponder.io>
Date: Fri, 30 Sep 2022 10:22:05 -0700
Subject: [PATCH 26/77] Add support for 18 datetime functions/properties

---
 modin/core/execution/client/query_compiler.py | 54 +++++++++++++++++++
 .../storage_formats/base/query_compiler.py    |  4 ++
 2 files changed, 58 insertions(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 59100df1fe4..65a01787bfd 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -481,6 +481,60 @@ def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
     def sort_index(self, **kwargs):
         return self.__constructor__(self._service.sort_index(self._id, **kwargs))
 
+    def dt_nanosecond(self):
+        return self.__constructor__(self._service.dt_nanosecond(self._id))
+
+    def dt_microsecond(self):
+        return self.__constructor__(self._service.dt_microsecond(self._id))
+
+    def dt_second(self):
+        return self.__constructor__(self._service.dt_second(self._id))
+
+    def dt_minute(self):
+        return self.__constructor__(self._service.dt_minute(self._id))
+
+    def dt_hour(self):
+        return self.__constructor__(self._service.dt_hour(self._id))
+
+    def dt_day(self):
+        return self.__constructor__(self._service.dt_day(self._id))
+
+    def dt_dayofweek(self):
+        return self.__constructor__(self._service.dt_dayofweek(self._id))
+
+    def dt_day_of_week(self):
+        return self.__constructor__(self._service.dt_day_of_week(self._id))
+
+    def dt_weekday(self):
+        return self.__constructor__(self._service.dt_weekday(self._id))
+
+    def dt_day_name(self):
+        return self.__constructor__(self._service.dt_day_name(self._id))
+
+    def dt_dayofyear(self):
+        return self.__constructor__(self._service.dt_dayofyear(self._id))
+
+    def dt_day_of_year(self):
+        return self.__constructor__(self._service.dt_day_of_year(self._id))
+
+    def dt_week(self):
+        return self.__constructor__(self._service.dt_week(self._id))
+
+    def dt_weekofyear(self):
+        return self.__constructor__(self._service.dt_weekofyear(self._id))
+
+    def dt_month(self):
+        return self.__constructor__(self._service.dt_month(self._id))
+
+    def dt_month_name(self):
+        return self.__constructor__(self._service.dt_month_name(self._id))
+
+    def dt_quarter(self):
+        return self.__constructor__(self._service.dt_quarter(self._id))
+
+    def dt_year(self):
+        return self.__constructor__(self._service.dt_year(self._id))
+
     def str_capitalize(self):
         return self.__constructor__(self._service.str_capitalize(self._id))
 
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index a0e05638a8e..d17d1e32b85 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4066,6 +4066,10 @@ def resample_var(self, resample_kwargs, ddof, *args, **kwargs):
 
     # End of Resample methods
 
+    @doc_utils.doc_str_method(refer_to="capitalize", params="")
+    def str_capitalize(self):
+        return StrDefault.register(pandas.Series.str.capitalize)(self)
+
     # Str methods
 
     @doc_utils.doc_str_method(refer_to="capitalize", params="")

From 613ba25e3537b5d9134584e1ff44aa9032de2717 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem@ponder.io>
Date: Tue, 4 Oct 2022 20:31:31 -0700
Subject: [PATCH 27/77] Fix columns caching when renaming columns

---
 modin/core/execution/client/query_compiler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 65a01787bfd..adf451a8519 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -34,6 +34,7 @@ def __init__(self, id):
 
     def _set_columns(self, new_columns):
         self._id = self._service.rename(self._id, new_col_labels=new_columns)
+        self._columns_cache = self._service.columns(self._id)
 
     def _get_columns(self):
         if self._columns_cache is None:

From 0450f7c479f774b4df819a0896116885a8df9633 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem@ponder.io>
Date: Wed, 5 Oct 2022 14:02:12 -0700
Subject: [PATCH 28/77] Fix test_query: put backticks back for col names

---
 modin/core/execution/client/query_compiler.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index adf451a8519..0b1fc5161c0 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -830,6 +830,8 @@ def query(self, expr, **kwargs):
                 is_variable = True
                 continue
             else:
+                if v in self.columns:
+                    v = f'`{v}`'
                 variable_list.append(v)
         expr = " ".join(variable_list)
         return self.__constructor__(self._service.query(self._id, expr, **kwargs))

From 679813cd45dcb88706e4d72e7446cd080c3727f9 Mon Sep 17 00:00:00 2001
From: Hazem Elmeleegy <hazem@ponder.io>
Date: Mon, 17 Oct 2022 05:47:17 -0700
Subject: [PATCH 29/77] Add support for astype -- client side

---
 modin/core/execution/client/query_compiler.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 0b1fc5161c0..217476793cd 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -176,6 +176,9 @@ def isna(self):
     def notna(self):
         return self.__constructor__(self._service.notna(self._id))
 
+    def astype(self, col_dtypes, **kwargs):
+        return self.__constructor__(self._service.astype(self._id, col_dtypes, **kwargs))
+
     def replace(
         self,
         to_replace=None,

From dff3d54ebf631d0114337e80d123e67549d8e29d Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 25 Oct 2022 12:23:31 -0500
Subject: [PATCH 30/77] Make client query compiler consistent with other query
 compiler. consistency check passes.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py             | 13 ++++-
 modin/core/execution/client/query_compiler.py | 48 +++----------------
 .../storage_formats/base/query_compiler.py    | 13 ++++-
 .../storage_formats/pandas/query_compiler.py  |  9 +++-
 modin/test/test_executions_api.py             | 19 ++++++--
 5 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index f7c6aec4974..34a244d946d 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -120,4 +120,15 @@ def read_sql(cls, sql, con, **kwargs):
 
     @classmethod
     def to_sql(cls, qc, **kwargs):
-        qc.to_sql(**kwargs)
+        self._server_conn.to_sql(
+            qc._id,
+            name,
+            con,
+            schema,
+            if_exists,
+            index,
+            index_label,
+            chunksize,
+            dtype,
+            method,
+        )
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 217476793cd..d07168078af 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -62,31 +62,6 @@ def dtypes(self):
     def from_pandas(cls, df, data_cls):
         raise NotImplementedError
 
-    def to_sql(
-        self,
-        name,
-        con,
-        schema=None,
-        if_exists="fail",
-        index=True,
-        index_label=None,
-        chunksize=None,
-        dtype=None,
-        method=None,
-    ):
-        return self._service.to_sql(
-            self._id,
-            name,
-            con,
-            schema,
-            if_exists,
-            index,
-            index_label,
-            chunksize,
-            dtype,
-            method,
-        )
-
     def to_pandas(self):
         value = self._service.to_pandas(self._id)
         if isinstance(value, Exception):
@@ -144,14 +119,11 @@ def getitem_column_array(self, key, numeric=False):
             self._service.getitem_column_array(self._id, key, numeric)
         )
 
-    def getitem_row_labels_array(self, labels):
+    def getitem_row_array(self, key, numeric=False):
         return self.__constructor__(
-            self._service.getitem_row_labels_array(self._id, labels)
+            self._service.getitem_row_array(self._id, key, numeric)
         )
 
-    def getitem_row_array(self, key):
-        return self.__constructor__(self._service.getitem_row_array(self._id, key))
-
     def pivot(self, index, columns, values):
         return self.__constructor__(
             self._service.pivot(self._id, index, columns, values)
@@ -162,11 +134,9 @@ def get_dummies(self, columns, **kwargs):
             self._service.get_dummies(self._id, columns, **kwargs)
         )
 
-    def view(self, index=None, columns=None):
+    def take_2d(self, index=None, columns=None):
         return self.__constructor__(self._service.view(self._id, index, columns))
 
-    take_2d = view
-
     def drop(self, index=None, columns=None):
         return self.__constructor__(self._service.drop(self._id, index, columns))
 
@@ -177,7 +147,9 @@ def notna(self):
         return self.__constructor__(self._service.notna(self._id))
 
     def astype(self, col_dtypes, **kwargs):
-        return self.__constructor__(self._service.astype(self._id, col_dtypes, **kwargs))
+        return self.__constructor__(
+            self._service.astype(self._id, col_dtypes, **kwargs)
+        )
 
     def replace(
         self,
@@ -506,9 +478,6 @@ def dt_day(self):
     def dt_dayofweek(self):
         return self.__constructor__(self._service.dt_dayofweek(self._id))
 
-    def dt_day_of_week(self):
-        return self.__constructor__(self._service.dt_day_of_week(self._id))
-
     def dt_weekday(self):
         return self.__constructor__(self._service.dt_weekday(self._id))
 
@@ -518,9 +487,6 @@ def dt_day_name(self):
     def dt_dayofyear(self):
         return self.__constructor__(self._service.dt_dayofyear(self._id))
 
-    def dt_day_of_year(self):
-        return self.__constructor__(self._service.dt_day_of_year(self._id))
-
     def dt_week(self):
         return self.__constructor__(self._service.dt_week(self._id))
 
@@ -834,7 +800,7 @@ def query(self, expr, **kwargs):
                 continue
             else:
                 if v in self.columns:
-                    v = f'`{v}`'
+                    v = f"`{v}`"
                 variable_list.append(v)
         expr = " ".join(variable_list)
         return self.__constructor__(self._service.query(self._id, expr, **kwargs))
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index d17d1e32b85..8dbdaf99586 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -529,6 +529,11 @@ def mod(self, other, **kwargs):  # noqa: PR02
     def mul(self, other, **kwargs):  # noqa: PR02
         return BinaryDefault.register(pandas.DataFrame.mul)(self, other=other, **kwargs)
 
+    @doc_utils.doc_binary_method(operation="multiplication", sign="*", self_on_right=True)
+    def rmul(self, other, **kwargs):  # noqa: PR02
+        return BinaryDefault.register(pandas.DataFrame.rmul)(self, other=other, **kwargs)
+
+
     @doc_utils.add_refer_to("DataFrame.corr")
     def corr(self, **kwargs):  # noqa: PR02
         """
@@ -2143,7 +2148,7 @@ def get_column(df, key):
 
         return DataFrameDefault.register(get_column)(self, key=key)
 
-    def getitem_row_array(self, key):
+    def getitem_row_array(self, key: List[Hashable], numeric: bool = False):
         """
         Get row data for target indices.
 
@@ -2151,6 +2156,7 @@ def getitem_row_array(self, key):
         ----------
         key : list-like
             Numeric indices of the rows to pick.
+        numeric : bool, default: False
 
         Returns
         -------
@@ -2159,7 +2165,10 @@ def getitem_row_array(self, key):
         """
 
         def get_row(df, key):
-            return df.iloc[key]
+            if numeric:
+                return df.iloc[key]
+            else:
+                return df.loc[key]
 
         return DataFrameDefault.register(get_row)(self, key=key)
 
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 0e4a4481193..2f944c26801 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -404,6 +404,7 @@ def to_numpy(self, **kwargs):
     lt = Binary.register(pandas.DataFrame.lt)
     mod = Binary.register(pandas.DataFrame.mod)
     mul = Binary.register(pandas.DataFrame.mul)
+    rmul = Binary.register(pandas.DataFrame.rmul)
     ne = Binary.register(pandas.DataFrame.ne)
     pow = Binary.register(pandas.DataFrame.pow)
     radd = Binary.register(pandas.DataFrame.radd)
@@ -2246,9 +2247,13 @@ def getitem_column_array(self, key, numeric=False):
             )
         return self.__constructor__(new_modin_frame)
 
-    def getitem_row_array(self, key):
+    def getitem_row_array(self, key: List[Hashable], numeric: bool = False):
+        if numeric:
+            kwargs = {"row_positions": key}
+        else:
+            kwargs = {"row_labels": key}
         return self.__constructor__(
-            self._modin_frame.take_2d_labels_or_positional(row_positions=key)
+            self._modin_frame.take_2d_labels_or_positional(**kwargs)
         )
 
     def setitem(self, axis, key, value):
diff --git a/modin/test/test_executions_api.py b/modin/test/test_executions_api.py
index 949834ba9e6..f109801d084 100644
--- a/modin/test/test_executions_api.py
+++ b/modin/test/test_executions_api.py
@@ -13,6 +13,7 @@
 
 import pytest
 
+from modin.core.execution.client.query_compiler import ClientQueryCompiler
 from modin.core.storage_formats import (
     BaseQueryCompiler,
     PandasQueryCompiler,
@@ -21,7 +22,7 @@
 
 
 BASE_EXECUTION = BaseQueryCompiler
-EXECUTIONS = [PandasQueryCompiler, PyarrowQueryCompiler]
+EXECUTIONS = [PandasQueryCompiler, PyarrowQueryCompiler, ClientQueryCompiler]
 
 
 def test_base_abstract_methods():
@@ -50,15 +51,23 @@ def test_base_abstract_methods():
     ), f"{BASE_EXECUTION} has not implemented abstract methods: {not_implemented_methods}"
 
 
-@pytest.mark.parametrize("execution", EXECUTIONS)
-def test_api_consistent(execution):
+@pytest.mark.parametrize(
+    "execution,expected_extra_methods",
+    [
+        (PandasQueryCompiler, set()),
+        (PyarrowQueryCompiler, set()),
+        # client query compiler exposes set_server_connection,
+        # which the other compilers should not
+        (ClientQueryCompiler, {"set_server_connection"}),
+    ],
+)
+def test_api_consistent(execution, expected_extra_methods):
     base_methods = set(BASE_EXECUTION.__dict__)
     custom_methods = set(
         [key for key in execution.__dict__.keys() if not key.startswith("_")]
     )
 
     extra_methods = custom_methods.difference(base_methods)
-    # checking that custom execution do not implements extra api methods
     assert (
-        len(extra_methods) == 0
+        extra_methods == expected_extra_methods
     ), f"{execution} implement these extra methods: {extra_methods}"

From 18cf7254b949bfe5dec98eb3899e7d93b40a3a5e Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 25 Oct 2022 12:59:39 -0500
Subject: [PATCH 31/77] Fix black.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/storage_formats/base/query_compiler.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 8dbdaf99586..bc9e349db2c 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -529,10 +529,13 @@ def mod(self, other, **kwargs):  # noqa: PR02
     def mul(self, other, **kwargs):  # noqa: PR02
         return BinaryDefault.register(pandas.DataFrame.mul)(self, other=other, **kwargs)
 
-    @doc_utils.doc_binary_method(operation="multiplication", sign="*", self_on_right=True)
+    @doc_utils.doc_binary_method(
+        operation="multiplication", sign="*", self_on_right=True
+    )
     def rmul(self, other, **kwargs):  # noqa: PR02
-        return BinaryDefault.register(pandas.DataFrame.rmul)(self, other=other, **kwargs)
-
+        return BinaryDefault.register(pandas.DataFrame.rmul)(
+            self, other=other, **kwargs
+        )
 
     @doc_utils.add_refer_to("DataFrame.corr")
     def corr(self, **kwargs):  # noqa: PR02

From ea5dc77284d0c2b0e7097dea330cfd89d9bcbf60 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 25 Oct 2022 15:23:54 -0500
Subject: [PATCH 32/77] Fix black and flake8.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py                 | 13 +------------
 modin/core/storage_formats/base/query_compiler.py |  4 ----
 modin/pandas/series_utils.py                      |  4 +---
 3 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 34a244d946d..689ef92c6ad 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -120,15 +120,4 @@ def read_sql(cls, sql, con, **kwargs):
 
     @classmethod
     def to_sql(cls, qc, **kwargs):
-        self._server_conn.to_sql(
-            qc._id,
-            name,
-            con,
-            schema,
-            if_exists,
-            index,
-            index_label,
-            chunksize,
-            dtype,
-            method,
-        )
+        cls._server_conn.to_sql(qc._id, **kwargs)
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index bc9e349db2c..54b57bfa921 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -4078,10 +4078,6 @@ def resample_var(self, resample_kwargs, ddof, *args, **kwargs):
 
     # End of Resample methods
 
-    @doc_utils.doc_str_method(refer_to="capitalize", params="")
-    def str_capitalize(self):
-        return StrDefault.register(pandas.Series.str.capitalize)(self)
-
     # Str methods
 
     @doc_utils.doc_str_method(refer_to="capitalize", params="")
diff --git a/modin/pandas/series_utils.py b/modin/pandas/series_utils.py
index 5bbb85cf2ae..368f77684fd 100644
--- a/modin/pandas/series_utils.py
+++ b/modin/pandas/series_utils.py
@@ -331,9 +331,7 @@ def partition(self, sep=" ", expand=True):
             )
 
     def repeat(self, repeats):
-        return Series(
-            query_compiler=self._query_compiler.str_repeat(repeats)
-        )        
+        return Series(query_compiler=self._query_compiler.str_repeat(repeats))
 
     def rpartition(self, sep=" ", expand=True):
         if sep is not None and len(sep) == 0:

From 773eff09a80dc52d07e112b7d5e1334ac09f4289 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 26 Oct 2022 13:55:33 -0500
Subject: [PATCH 33/77] Hook up IO and test query compiler, but service missing
 methods that take id.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/conftest.py                             | 87 ++++++++++++++++---
 modin/core/execution/client/query_compiler.py |  2 +
 2 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/modin/conftest.py b/modin/conftest.py
index 4a9687a2f7f..2fb32457ee1 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -21,7 +21,8 @@
 from pandas.util._decorators import doc
 import numpy as np
 import shutil
-from typing import Optional
+from typing import Any, NamedTuple, Optional
+from uuid import uuid4, UUID
 
 assert (
     "modin.utils" not in sys.modules
@@ -46,12 +47,16 @@ def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
 import modin  # noqa: E402
 import modin.config  # noqa: E402
 from modin.config import IsExperimental, TestRayClient  # noqa: E402
-import uuid  # noqa: E402
 
 from modin.core.storage_formats import (  # noqa: E402
     PandasQueryCompiler,
     BaseQueryCompiler,
 )
+from modin.core.execution.client.io import ClientIO  # noqa: E402
+from modin.core.execution.client.query_compiler import ClientQueryCompiler  # noqa: E402
+from modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe import (  # noqa: E402
+    PandasOnPythonDataframe,
+)
 from modin.core.execution.python.implementations.pandas_on_python.io import (  # noqa: E402
     PandasOnPythonIO,
 )
@@ -223,9 +228,6 @@ def __iter__(self):
     os.environ = orig_env
 
 
-BASE_EXECUTION_NAME = "BaseOnPython"
-
-
 class TestQC(BaseQueryCompiler):
     def __init__(self, modin_frame):
         self._modin_frame = modin_frame
@@ -269,16 +271,77 @@ def prepare(cls):
         cls.io_cls = BaseOnPythonIO
 
 
-def set_base_execution(name=BASE_EXECUTION_NAME):
-    setattr(factories, f"{name}Factory", BaseOnPythonFactory)
-    modin.set_execution(engine="python", storage_format=name.split("On")[0])
+def set_base_on_python_execution():
+    factories.BaseOnPythonFactory = BaseOnPythonFactory
+    modin.set_execution(engine="python", storage_format="Base")
+
+
+class BaseExecutionService:
+    class DefaultToPandasResult(NamedTuple):
+        result: Optional[Any]
+        result_is_qc_id: bool
+
+    def __init__(self):
+        self._base_query_compiler_by_id = {}
+
+    def add_query_compiler(self, qc) -> UUID:
+        id = self._generate_id()
+        self._base_query_compiler_by_id[self._generate_id()] = qc
+        return id
+
+    def default_to_pandas(
+        self, id: UUID, pandas_op, *args, **kwargs
+    ) -> DefaultToPandasResult:
+        result = self._base_query_compiler_by_id[id].default_to_pandas(
+            pandas_op, *args, **kwargs
+        )
+        result_is_qc_id = isinstance(result, BaseQueryCompiler)
+        if result_is_qc_id:
+            new_id = self._generate_id()
+            self._base_query_compiler_by_id[new_id] = result
+            result = new_id
+        return self.DefaultToPandasResult(result=result, result_is_qc_id=False)
+
+    def _generate_id(self):
+        id = uuid4()
+        while id in self._base_query_compiler_by_id:
+            id = uuid4()
+        return id
+
+
+class TestClientQueryCompiler(ClientQueryCompiler):
+    @classmethod
+    def from_pandas(cls, df, data_cls):
+        return cls(cls._service.add_query_compiler(TestQC.from_pandas(df, data_cls)))
+
+    def default_to_pandas(self, pandas_op, *args, **kwargs):
+        result = self._service.default_to_pandas(self._id, pandas_op, *args, **kwargs)
+        if result.result_is_qc_id:
+            return self.__constructor__(result.result)
+        return result.result
+
+
+class ClientFactory(factories.BaseFactory):
+    @classmethod
+    def prepare(cls):
+        cls.io_cls = ClientIO
+
+
+def set_client_execution():
+    service = BaseExecutionService()
+    ClientQueryCompiler.set_server_connection(service)
+    ClientIO.query_compiler_cls = TestClientQueryCompiler
+    ClientIO.set_server_connection(service)
+    ClientIO.frame_cls = PandasOnPythonDataframe
+    factories.ClientFactory = ClientFactory
+    modin.set_execution(engine="Client", storage_format="")
 
 
 @pytest.fixture(scope="function")
 def get_unique_base_execution():
     """Setup unique execution for a single function and yield its QueryCompiler that's suitable for inplace modifications."""
     # It's better to use decimal IDs rather than hex ones due to factory names formatting
-    execution_id = int(uuid.uuid4().hex, 16)
+    execution_id = int(uuid4().hex, 16)
     format_name = f"Base{execution_id}"
     engine_name = "Python"
     execution_name = f"{format_name}On{engine_name}"
@@ -319,11 +382,13 @@ def pytest_configure(config):
     if execution is None:
         return
 
-    if execution == BASE_EXECUTION_NAME:
-        set_base_execution(BASE_EXECUTION_NAME)
+    if execution == "BaseOnPython":
+        set_base_on_python_execution()
         config.addinivalue_line(
             "filterwarnings", "default:.*defaulting to pandas.*:UserWarning"
         )
+    elif execution == "Client":
+        set_client_execution()
     else:
         partition, engine = execution.split("On")
         modin.set_execution(engine=engine, storage_format=partition)
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index d07168078af..e8fc0ec20c2 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -20,6 +20,8 @@
 
 
 class ClientQueryCompiler(BaseQueryCompiler):
+    lazy_execution = True
+
     @classmethod
     def set_server_connection(cls, conn):
         cls._service = conn

From 87699b864d47725a005837e91f1aafecc49ea5fd Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 26 Oct 2022 15:36:16 -0500
Subject: [PATCH 34/77] Fix up the service and test_general passes with
 execution 'client'.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/conftest.py                             | 48 ++++--------------
 modin/core/execution/client/query_compiler.py | 50 +++++++++----------
 2 files changed, 34 insertions(+), 64 deletions(-)

diff --git a/modin/conftest.py b/modin/conftest.py
index 2fb32457ee1..58b8c04aa70 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -21,8 +21,8 @@
 from pandas.util._decorators import doc
 import numpy as np
 import shutil
-from typing import Any, NamedTuple, Optional
-from uuid import uuid4, UUID
+from typing import Optional
+from uuid import uuid4
 
 assert (
     "modin.utils" not in sys.modules
@@ -54,6 +54,9 @@ def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
 )
 from modin.core.execution.client.io import ClientIO  # noqa: E402
 from modin.core.execution.client.query_compiler import ClientQueryCompiler  # noqa: E402
+from modin.core.execution.client.service import (  # noqa: E402
+    ForwardingQueryCompilerService,
+)
 from modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe import (  # noqa: E402
     PandasOnPythonDataframe,
 )
@@ -68,6 +71,7 @@ def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
     make_default_file,
     teardown_test_files,
     NROWS,
+    default_to_pandas_ignore_string,
 )
 
 
@@ -276,39 +280,6 @@ def set_base_on_python_execution():
     modin.set_execution(engine="python", storage_format="Base")
 
 
-class BaseExecutionService:
-    class DefaultToPandasResult(NamedTuple):
-        result: Optional[Any]
-        result_is_qc_id: bool
-
-    def __init__(self):
-        self._base_query_compiler_by_id = {}
-
-    def add_query_compiler(self, qc) -> UUID:
-        id = self._generate_id()
-        self._base_query_compiler_by_id[self._generate_id()] = qc
-        return id
-
-    def default_to_pandas(
-        self, id: UUID, pandas_op, *args, **kwargs
-    ) -> DefaultToPandasResult:
-        result = self._base_query_compiler_by_id[id].default_to_pandas(
-            pandas_op, *args, **kwargs
-        )
-        result_is_qc_id = isinstance(result, BaseQueryCompiler)
-        if result_is_qc_id:
-            new_id = self._generate_id()
-            self._base_query_compiler_by_id[new_id] = result
-            result = new_id
-        return self.DefaultToPandasResult(result=result, result_is_qc_id=False)
-
-    def _generate_id(self):
-        id = uuid4()
-        while id in self._base_query_compiler_by_id:
-            id = uuid4()
-        return id
-
-
 class TestClientQueryCompiler(ClientQueryCompiler):
     @classmethod
     def from_pandas(cls, df, data_cls):
@@ -328,7 +299,7 @@ def prepare(cls):
 
 
 def set_client_execution():
-    service = BaseExecutionService()
+    service = ForwardingQueryCompilerService(BaseQueryCompiler)
     ClientQueryCompiler.set_server_connection(service)
     ClientIO.query_compiler_cls = TestClientQueryCompiler
     ClientIO.set_server_connection(service)
@@ -384,10 +355,9 @@ def pytest_configure(config):
 
     if execution == "BaseOnPython":
         set_base_on_python_execution()
-        config.addinivalue_line(
-            "filterwarnings", "default:.*defaulting to pandas.*:UserWarning"
-        )
+        config.addinivalue_line("filterwarnings", default_to_pandas_ignore_string)
     elif execution == "Client":
+        config.addinivalue_line("filterwarnings", default_to_pandas_ignore_string)
         set_client_execution()
     else:
         partition, engine = execution.split("On")
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index e8fc0ec20c2..2d35762f55e 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -89,7 +89,7 @@ def add_suffix(self, suffix, axis=1):
         return self.__constructor__(self._service.add_suffix(self._id, suffix, axis))
 
     def insert(self, loc, column, value):
-        if isinstance(value, ClientQueryCompiler):
+        if isinstance(value, type(self)):
             value = value._id
             is_qc = True
         else:
@@ -99,7 +99,7 @@ def insert(self, loc, column, value):
         )
 
     def setitem(self, axis, key, value):
-        if isinstance(value, ClientQueryCompiler):
+        if isinstance(value, type(self)):
             value = value._id
             is_qc = True
         else:
@@ -109,7 +109,7 @@ def setitem(self, axis, key, value):
         )
 
     def getitem_array(self, key):
-        if isinstance(key, ClientQueryCompiler):
+        if isinstance(key, type(self)):
             key = key._id
             is_qc = True
         else:
@@ -162,11 +162,11 @@ def replace(
         regex=False,
         method: "str | NoDefault" = no_default,
     ):
-        if isinstance(to_replace, ClientQueryCompiler):
+        if isinstance(to_replace, type(self)):
             is_to_replace_qc = True
         else:
             is_to_replace_qc = False
-        if isinstance(regex, ClientQueryCompiler):
+        if isinstance(regex, type(self)):
             is_regex_qc = True
         else:
             is_regex_qc = False
@@ -195,7 +195,7 @@ def fillna(
         limit=None,
         downcast=None,
     ):
-        if isinstance(value, ClientQueryCompiler):
+        if isinstance(value, type(self)):
             is_qc = True
         else:
             is_qc = False
@@ -278,7 +278,7 @@ def concat(self, axis, other, **kwargs):
         )
 
     def eq(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -286,7 +286,7 @@ def eq(self, other, **kwargs):
         return self.__constructor__(self._service.eq(self._id, other, is_qc, **kwargs))
 
     def lt(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -294,7 +294,7 @@ def lt(self, other, **kwargs):
         return self.__constructor__(self._service.lt(self._id, other, is_qc, **kwargs))
 
     def le(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -302,7 +302,7 @@ def le(self, other, **kwargs):
         return self.__constructor__(self._service.le(self._id, other, is_qc, **kwargs))
 
     def gt(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -310,7 +310,7 @@ def gt(self, other, **kwargs):
         return self.__constructor__(self._service.gt(self._id, other, is_qc, **kwargs))
 
     def ge(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -318,7 +318,7 @@ def ge(self, other, **kwargs):
         return self.__constructor__(self._service.ge(self._id, other, is_qc, **kwargs))
 
     def ne(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -326,7 +326,7 @@ def ne(self, other, **kwargs):
         return self.__constructor__(self._service.ne(self._id, other, is_qc, **kwargs))
 
     def __and__(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -336,7 +336,7 @@ def __and__(self, other, **kwargs):
         )
 
     def __or__(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -346,7 +346,7 @@ def __or__(self, other, **kwargs):
         )
 
     def add(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -354,7 +354,7 @@ def add(self, other, **kwargs):
         return self.__constructor__(self._service.add(self._id, other, is_qc, **kwargs))
 
     def radd(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -364,7 +364,7 @@ def radd(self, other, **kwargs):
         )
 
     def truediv(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -374,7 +374,7 @@ def truediv(self, other, **kwargs):
         )
 
     def mod(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -382,7 +382,7 @@ def mod(self, other, **kwargs):
         return self.__constructor__(self._service.mod(self._id, other, is_qc, **kwargs))
 
     def rmod(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -392,7 +392,7 @@ def rmod(self, other, **kwargs):
         )
 
     def sub(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -402,7 +402,7 @@ def sub(self, other, **kwargs):
         )
 
     def rsub(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -412,7 +412,7 @@ def rsub(self, other, **kwargs):
         )
 
     def mul(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -420,7 +420,7 @@ def mul(self, other, **kwargs):
         return self.__constructor__(self._service.mul(self._id, other, is_qc, **kwargs))
 
     def rmul(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -430,7 +430,7 @@ def rmul(self, other, **kwargs):
         )
 
     def floordiv(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:
@@ -440,7 +440,7 @@ def floordiv(self, other, **kwargs):
         )
 
     def rfloordiv(self, other, **kwargs):
-        if isinstance(other, ClientQueryCompiler):
+        if isinstance(other, type(self)):
             other = other._id
             is_qc = True
         else:

From d56db3f744ed9969491418004193707f27077e34 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 26 Oct 2022 19:25:08 -0500
Subject: [PATCH 35/77] got test_indexing.py to pass, going in order through
 test-defaults.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/conftest.py                             |  2 +-
 modin/core/execution/client/io.py             |  4 ++--
 modin/core/execution/client/query_compiler.py | 13 ++++++++-----
 modin/pandas/indexing.py                      | 10 +---------
 modin/pandas/test/dataframe/test_default.py   | 10 +++++++---
 modin/pandas/test/dataframe/test_indexing.py  |  7 +++++++
 6 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/modin/conftest.py b/modin/conftest.py
index 58b8c04aa70..f0cf0a49b26 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -299,7 +299,7 @@ def prepare(cls):
 
 
 def set_client_execution():
-    service = ForwardingQueryCompilerService(BaseQueryCompiler)
+    service = ForwardingQueryCompilerService(BaseQueryCompiler, PandasOnPythonIO)
     ClientQueryCompiler.set_server_connection(service)
     ClientIO.query_compiler_cls = TestClientQueryCompiler
     ClientIO.set_server_connection(service)
diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 689ef92c6ad..866d016b13e 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -62,7 +62,7 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
 
         Returns
         -------
-        ClientQueryCompiler
+        query_compiler_cls
             Query compiler with CSV data read in.
         """
         if isinstance(filepath_or_buffer, str):
@@ -77,7 +77,7 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
             raise ConnectionError(
                 "Missing server connection, did you initialize the connection?"
             )
-        return ClientQueryCompiler(
+        return cls.query_compiler_cls(
             cls._server_conn.read_csv(cls._data_conn, filepath_or_buffer, **kwargs)
         )
 
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 2d35762f55e..3083110a47d 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -137,7 +137,7 @@ def get_dummies(self, columns, **kwargs):
         )
 
     def take_2d(self, index=None, columns=None):
-        return self.__constructor__(self._service.view(self._id, index, columns))
+        return self.__constructor__(self._service.take_2d(self._id, index, columns))
 
     def drop(self, index=None, columns=None):
         return self.__constructor__(self._service.drop(self._id, index, columns))
@@ -397,9 +397,7 @@ def sub(self, other, **kwargs):
             is_qc = True
         else:
             is_qc = False
-        return self.__constructor__(
-            self._service.rsub(self._id, other, is_qc, **kwargs)
-        )
+        return self.__constructor__(self._service.sub(self._id, other, is_qc, **kwargs))
 
     def rsub(self, other, **kwargs):
         if isinstance(other, type(self)):
@@ -706,9 +704,14 @@ def groupby_sum(
         agg_kwargs,
         drop=False,
     ):
+        if isinstance(by, type(self)):
+            by = by._id
+            is_qc = True
+        else:
+            is_qc = False
         return self.__constructor__(
             self._service.groupby_sum(
-                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
             )
         )
 
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index d51b749e10b..16b3003f0f9 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -35,7 +35,6 @@
 from pandas.api.types import is_list_like, is_bool
 from pandas.core.dtypes.common import is_integer, is_bool_dtype, is_integer_dtype
 from pandas.core.indexing import IndexingError
-from modin.core.execution.client.query_compiler import ClientQueryCompiler
 from modin.error_message import ErrorMessage
 from modin.logging import ClassLogger
 
@@ -678,14 +677,7 @@ def __getitem__(self, key):
 
         if isinstance(row_loc, Series) and is_boolean_array(row_loc):
             return self._handle_boolean_masking(row_loc, col_loc)
-        if isinstance(self.qc, ClientQueryCompiler) and self.qc.lazy_execution:
-            # Since we don't know if the row labels are present or not in lazy evaluation,
-            # immediately hand off computation to the engine
-            return type(self.df)(
-                query_compiler=self.qc.getitem_row_labels_array(
-                    row_loc
-                ).getitem_column_array(col_loc)
-            )
+
         row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
 
         result = self._getitem_positional(
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index 710eb4b152a..14897291558 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -129,6 +129,10 @@ def test_to_numpy(data):
     assert_array_equal(modin_df.values, pandas_df.values)
 
 
+@pytest.mark.skipif(
+    get_current_execution() == "Client",
+    reason="Client query compiler does not have partitions",
+)
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_partition_to_numpy(data):
     frame = pd.DataFrame(data)
@@ -1200,9 +1204,9 @@ def test_setattr_axes():
     # Test that setting .index or .columns does not warn
     df = pd.DataFrame([[1, 2], [3, 4]])
     with warnings.catch_warnings():
-        if get_current_execution() != "BaseOnPython":
-            # In BaseOnPython, setting columns raises a warning because get_axis
-            #  defaults to pandas.
+        if get_current_execution() not in ("BaseOnPython", "Client"):
+            # In BaseOnPython and Client executions, setting columns raises a
+            # warning because get_axis defaults to pandas.
             warnings.simplefilter("error")
         df.index = ["foo", "bar"]
         df.columns = [9, 10]
diff --git a/modin/pandas/test/dataframe/test_indexing.py b/modin/pandas/test/dataframe/test_indexing.py
index 88ab6f5a1cd..61492e400f3 100644
--- a/modin/pandas/test/dataframe/test_indexing.py
+++ b/modin/pandas/test/dataframe/test_indexing.py
@@ -1993,6 +1993,13 @@ def test___setitem__mask():
 )
 @pytest.mark.parametrize("convert_to_series", [False, True])
 @pytest.mark.parametrize("new_col_id", [123, "new_col"], ids=["integer", "string"])
+@pytest.mark.skipif(
+    condition=get_current_execution() == "Client",
+    reason=(
+        "client query compiler uses lazy execution, so we don't default "
+        + "to pandas for the empty frame because we don't check whether the frame is empty. we can't do the insertion correctly right now without defaulting to pandas."
+    ),
+)
 def test_setitem_on_empty_df(data, value, convert_to_series, new_col_id):
     pandas_df = pandas.DataFrame(data)
     modin_df = pd.DataFrame(data)

From 3f9bf8c98b764a47e4738fe44e58898491bb25e7 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Thu, 27 Oct 2022 01:34:42 -0500
Subject: [PATCH 36/77] ci.yml tests pass through test_map_metadata.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py |  6 +-
 .../storage_formats/base/query_compiler.py    |  4 +-
 .../storage_formats/pandas/query_compiler.py  |  6 +-
 modin/pandas/base.py                          |  2 +-
 modin/pandas/test/dataframe/test_iter.py      | 11 +--
 .../test/dataframe/test_map_metadata.py       | 67 ++++++++++++++-----
 6 files changed, 69 insertions(+), 27 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 3083110a47d..7dbccdd4124 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -139,8 +139,10 @@ def get_dummies(self, columns, **kwargs):
     def take_2d(self, index=None, columns=None):
         return self.__constructor__(self._service.take_2d(self._id, index, columns))
 
-    def drop(self, index=None, columns=None):
-        return self.__constructor__(self._service.drop(self._id, index, columns))
+    def drop(self, index=None, columns=None, errors: str = "raise"):
+        return self.__constructor__(
+            self._service.drop(self._id, index, columns, errors)
+        )
 
     def isna(self):
         return self.__constructor__(self._service.isna(self._id))
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 54b57bfa921..47f08a8e616 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2214,7 +2214,7 @@ def inserter(df, loc, column, value):
     # END Abstract insert
 
     # Abstract drop
-    def drop(self, index=None, columns=None):
+    def drop(self, index=None, columns=None, errors: str = "raise"):
         """
         Drop specified rows or columns.
 
@@ -2234,7 +2234,7 @@ def drop(self, index=None, columns=None):
             return self
         else:
             return DataFrameDefault.register(pandas.DataFrame.drop)(
-                self, index=index, columns=columns
+                self, index=index, columns=columns, errors=errors
             )
 
     # END drop
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 2f944c26801..3a8e78822d5 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -2355,7 +2355,11 @@ def dropna(self, **kwargs):
             )
         )
 
-    def drop(self, index=None, columns=None):
+    def drop(self, index=None, columns=None, errors: str = "raise"):
+        # `errors` parameter needs to be part of the function signature because
+        # other query compilers may not take care of error handling at the API
+        # layer. This query compiler assumes there won't be any errors due to
+        # invald keys.
         if index is not None:
             index = np.sort(self.index.get_indexer_for(self.index.difference(index)))
         if columns is not None:
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 6022956e2e0..b1609a35684 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -1246,7 +1246,7 @@ def drop(
                         axes[axis] = None
 
         new_query_compiler = self._query_compiler.drop(
-            index=axes["index"], columns=axes["columns"]
+            index=axes["index"], columns=axes["columns"], errors=errors
         )
         return self._create_or_update_from_compiler(new_query_compiler, inplace)
 
diff --git a/modin/pandas/test/dataframe/test_iter.py b/modin/pandas/test/dataframe/test_iter.py
index a9978fbc4a0..d678ee11efa 100644
--- a/modin/pandas/test/dataframe/test_iter.py
+++ b/modin/pandas/test/dataframe/test_iter.py
@@ -17,7 +17,7 @@
 import pandas
 import matplotlib
 import modin.pandas as pd
-import io
+from pandas._testing import ensure_clean
 import warnings
 
 from modin.pandas.test.utils import (
@@ -226,9 +226,12 @@ def test___repr__():
 "2016-08-26 09:00:15.816",1,60.166254,24.700671,3,"WALKING",75,"IN_VEHICLE",5,"ON_BICYCLE",5
 "2016-08-26 09:00:16.413",5,60.193055,24.767427,5,"WALKING",85,"ON_BICYCLE",15,"UNKNOWN",0
 "2016-08-26 09:00:20.578",3,60.152996,24.745216,3.90000009536743,"STILL",69,"IN_VEHICLE",31,"UNKNOWN",0"""
-    pandas_df = pandas.read_csv(io.StringIO(string_data))
-    with warns_that_defaulting_to_pandas():
-        modin_df = pd.read_csv(io.StringIO(string_data))
+    with ensure_clean(".csv") as path:
+        with open(path, "w") as f:
+            f.write(string_data)
+        pandas_df = pandas.read_csv(path)
+        with warns_that_defaulting_to_pandas():
+            modin_df = pd.read_csv(path)
     assert repr(pandas_df) == repr(modin_df)
 
 
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index 8cdf61fb4c1..10fa9880b16 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -162,17 +162,22 @@ def test_empty_df():
     assert len(df.index) == 0
     assert len(df.columns) == 0
 
-    df = pd.DataFrame()
-    pd_df = pandas.DataFrame()
-    df["a"] = [1, 2, 3, 4, 5]
-    pd_df["a"] = [1, 2, 3, 4, 5]
-    df_equals(df, pd_df)
-
-    df = pd.DataFrame()
-    pd_df = pandas.DataFrame()
-    df["a"] = list("ABCDEF")
-    pd_df["a"] = list("ABCDEF")
-    df_equals(df, pd_df)
+    # client query compiler uses lazy execution, so we don't default to pandas
+    # for the empty frame because we don't check whether the frame is empty.
+    # we can't do the insertion correctly right now without defaulting to
+    # pandas.
+    if get_current_execution() != "Client":
+        df = pd.DataFrame()
+        pd_df = pandas.DataFrame()
+        df["a"] = [1, 2, 3, 4, 5]
+        pd_df["a"] = [1, 2, 3, 4, 5]
+        df_equals(df, pd_df)
+
+        df = pd.DataFrame()
+        pd_df = pandas.DataFrame()
+        df["a"] = list("ABCDEF")
+        pd_df["a"] = list("ABCDEF")
+        df_equals(df, pd_df)
 
     df = pd.DataFrame()
     pd_df = pandas.DataFrame()
@@ -293,7 +298,7 @@ def test_copy(data):
     new_modin_df = modin_df.copy()
 
     assert new_modin_df is not modin_df
-    if get_current_execution() != "BaseOnPython":
+    if get_current_execution() not in ("BaseOnPython", "Client"):
         assert np.array_equal(
             new_modin_df._query_compiler._modin_frame._partitions,
             modin_df._query_compiler._modin_frame._partitions,
@@ -628,6 +633,10 @@ def test_convert_dtypes_single_partition(
     get_current_execution() == "BaseOnPython",
     reason="BaseOnPython cannot not have multiple partitions.",
 )
+@pytest.mark.skipif(
+    get_current_execution() == "Client",
+    reason="Client query compiler doesn't have partitions at all.",
+)
 def test_convert_dtypes_multiple_row_partitions():
     # Column 0 should have string dtype
     modin_part1 = pd.DataFrame(["a"]).convert_dtypes()
@@ -708,12 +717,36 @@ def test_drop():
     df_equals(modin_simple.drop([0, 1, 3], axis=0), simple.loc[[2], :])
     df_equals(modin_simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :])
 
-    pytest.raises(ValueError, modin_simple.drop, 5)
-    pytest.raises(ValueError, modin_simple.drop, "C", 1)
-    pytest.raises(ValueError, modin_simple.drop, [1, 5])
-    pytest.raises(ValueError, modin_simple.drop, ["A", "C"], 1)
+    # TODO(https://github.com/modin-project/modin/issues/5163): raise a
+    # KeyError like pandas when the label is not found when lazy_execution is
+    # off. Also use df_equals instead of
+    check_exception_type = modin_simple._query_compiler.lazy_execution
+    eval_general(
+        modin_simple,
+        simple,
+        lambda df: df.drop(5),
+        check_exception_type=check_exception_type,
+    )
+    eval_general(
+        modin_simple,
+        simple,
+        lambda df: df.drop("C", axis=1),
+        check_exception_type=check_exception_type,
+    )
+    eval_general(
+        modin_simple,
+        simple,
+        lambda df: df.drop([1, 5], axis=1),
+        check_exception_type=check_exception_type,
+    )
+    eval_general(
+        modin_simple,
+        simple,
+        lambda df: df.drop(["A", "C"], axis=1),
+        check_exception_type=check_exception_type,
+    )
 
-    # errors = 'ignore'
+    # test errors = 'ignore'
     df_equals(modin_simple.drop(5, errors="ignore"), simple)
     df_equals(modin_simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :])
     df_equals(modin_simple.drop("C", axis=1, errors="ignore"), simple)

From 86d489a5aa617f22f3473e94a18a1f49c94ead41 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Thu, 27 Oct 2022 01:43:24 -0500
Subject: [PATCH 37/77] Tests pass through test_reduce.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 7dbccdd4124..018e7cb0483 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -661,9 +661,14 @@ def groupby_count(
         agg_kwargs,
         drop=False,
     ):
+        if isinstance(by, type(self)):
+            by = by._id
+            is_qc = True
+        else:
+            is_qc = False
         return self.__constructor__(
             self._service.groupby_count(
-                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
             )
         )
 

From 6946ae806e5d4cf559e32e600935a2cd5da32b37 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Thu, 27 Oct 2022 10:39:30 -0500
Subject: [PATCH 38/77] pass through test_udf.py and enable another skipped
 test.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py | 31 -------------------
 modin/pandas/test/dataframe/test_default.py   | 11 ++++---
 .../test/dataframe/test_map_metadata.py       |  8 ++---
 modin/pandas/test/dataframe/test_udf.py       |  9 ++++--
 4 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 018e7cb0483..193a6534f8c 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -13,10 +13,8 @@
 
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 import numpy as np
-import inspect
 from pandas._libs.lib import no_default, NoDefault
 from pandas.api.types import is_list_like
-from pandas.core.computation.parsing import tokenize_string
 
 
 class ClientQueryCompiler(BaseQueryCompiler):
@@ -786,35 +784,6 @@ def idxmax(self, **kwargs):
         return self.__constructor__(self._service.idxmax(self._id, **kwargs))
 
     def query(self, expr, **kwargs):
-        is_variable = False
-        variable_list = []
-        for k, v in tokenize_string(expr):
-            if v == "" or v == " ":
-                continue
-            if is_variable:
-                frame = inspect.currentframe()
-                identified = False
-                while frame:
-                    if v in frame.f_locals:
-                        value = frame.f_locals[v]
-                        if isinstance(value, list):
-                            value = tuple(value)
-                        variable_list.append(str(value))
-                        identified = True
-                        break
-                    frame = frame.f_back
-                if not identified:
-                    # TODO this error does not quite match pandas
-                    raise ValueError(f"{v} not found")
-                is_variable = False
-            elif v == "@":
-                is_variable = True
-                continue
-            else:
-                if v in self.columns:
-                    v = f"`{v}`"
-                variable_list.append(v)
-        expr = " ".join(variable_list)
         return self.__constructor__(self._service.query(self._id, expr, **kwargs))
 
     def finalize(self):
diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py
index 14897291558..f1b1293eea7 100644
--- a/modin/pandas/test/dataframe/test_default.py
+++ b/modin/pandas/test/dataframe/test_default.py
@@ -129,14 +129,15 @@ def test_to_numpy(data):
     assert_array_equal(modin_df.values, pandas_df.values)
 
 
-@pytest.mark.skipif(
-    get_current_execution() == "Client",
-    reason="Client query compiler does not have partitions",
-)
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 def test_partition_to_numpy(data):
     frame = pd.DataFrame(data)
-    for partition in frame._query_compiler._modin_frame._partitions.flatten().tolist():
+    qc = frame._query_compiler
+    if get_current_execution() == "Client":
+        modin_frame = qc._service._qc[qc._id]._modin_frame
+    else:
+        modin_frame = qc._modin_frame
+    for partition in modin_frame._partitions.flatten().tolist():
         assert_array_equal(partition.to_pandas().values, partition.to_numpy())
 
 
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index 10fa9880b16..37a3a1cd0d4 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -630,12 +630,8 @@ def test_convert_dtypes_single_partition(
 
 
 @pytest.mark.skipif(
-    get_current_execution() == "BaseOnPython",
-    reason="BaseOnPython cannot not have multiple partitions.",
-)
-@pytest.mark.skipif(
-    get_current_execution() == "Client",
-    reason="Client query compiler doesn't have partitions at all.",
+    get_current_execution() in ("BaseOnPython", "Client"),
+    reason="These exeuctions will not have multiple partitions.",
 )
 def test_convert_dtypes_multiple_row_partitions():
     # Column 0 should have string dtype
diff --git a/modin/pandas/test/dataframe/test_udf.py b/modin/pandas/test/dataframe/test_udf.py
index adda540b248..13f7e59a5e1 100644
--- a/modin/pandas/test/dataframe/test_udf.py
+++ b/modin/pandas/test/dataframe/test_udf.py
@@ -360,7 +360,7 @@ def f(x, arg2=0, arg3=0):
 @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
 @pytest.mark.parametrize("funcs", query_func_values, ids=query_func_keys)
 def test_query(data, funcs):
-    if get_current_execution() == "BaseOnPython" and funcs != "col3 > col4":
+    if get_current_execution() in ("BaseOnPython", "Client") and funcs != "col3 > col4":
         pytest.xfail(
             reason="In this case, we are faced with the problem of handling empty data frames - #4934"
         )
@@ -374,8 +374,13 @@ def test_query(data, funcs):
             modin_df.query(funcs)
     else:
         modin_result = modin_df.query(funcs)
+        qc = modin_df._query_compiler
+        if get_current_execution() == "Client":
+            modin_frame = qc._service._qc[qc._id]._modin_frame
+        else:
+            modin_frame = qc._modin_frame
         # `dtypes` must be evaluated after `query` so we need to check cache
-        assert modin_result._query_compiler._modin_frame._dtypes is not None
+        assert modin_frame._dtypes is not None
         df_equals(modin_result, pandas_result)
         df_equals(modin_result.dtypes, pandas_result.dtypes)
 

From 4f4831be0988bef8fd4e43c70ef02f28e9657073 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Thu, 27 Oct 2022 11:33:30 -0500
Subject: [PATCH 39/77] Pass through test_series, skipping pickle.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py  |  4 +++-
 .../storage_formats/base/query_compiler.py     | 18 ++++++++++--------
 modin/pandas/series.py                         |  6 ++----
 modin/pandas/test/test_series.py               |  4 ++--
 4 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 193a6534f8c..8c174b30a67 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -196,6 +196,7 @@ def fillna(
         downcast=None,
     ):
         if isinstance(value, type(self)):
+            value = value._id
             is_qc = True
         else:
             is_qc = False
@@ -577,7 +578,8 @@ def str_findall(self, pat, flags=0, **kwargs):
     def str_get(self, i):
         return self.__constructor__(self._service.str_get(self._id, i))
 
-    str_index = str_find
+    def str_index(self, sub, start=0, end=None):
+        return self.__constructor__(self._service.str_index(self._id, sub, start, end))
 
     def str_join(self, sep):
         return self.__constructor__(self._service.str_join(self._id, sep))
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 47f08a8e616..848e12e0560 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1748,20 +1748,22 @@ def describe(self, **kwargs):  # noqa: PR02
     # data in the same place.
 
     @doc_utils.doc_cum_agg(method="sum", refer_to="cumsum")
-    def cumsum(self, fold_axis, **kwargs):  # noqa: PR02
-        return DataFrameDefault.register(pandas.DataFrame.cumsum)(self, **kwargs)
+    def cumsum(self, fold_axis, *args, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cumsum)(self, *args, **kwargs)
 
     @doc_utils.doc_cum_agg(method="maximum", refer_to="cummax")
-    def cummax(self, fold_axis, **kwargs):  # noqa: PR02
-        return DataFrameDefault.register(pandas.DataFrame.cummax)(self, **kwargs)
+    def cummax(self, fold_axis, *args, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cummax)(self, *args, **kwargs)
 
     @doc_utils.doc_cum_agg(method="minimum", refer_to="cummin")
-    def cummin(self, fold_axis, **kwargs):  # noqa: PR02
-        return DataFrameDefault.register(pandas.DataFrame.cummin)(self, **kwargs)
+    def cummin(self, fold_axis, *args, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cummin)(self, *args, **kwargs)
 
     @doc_utils.doc_cum_agg(method="product", refer_to="cumprod")
-    def cumprod(self, fold_axis, **kwargs):  # noqa: PR02
-        return DataFrameDefault.register(pandas.DataFrame.cumprod)(self, **kwargs)
+    def cumprod(self, fold_axis, *args, **kwargs):  # noqa: PR02
+        return DataFrameDefault.register(pandas.DataFrame.cumprod)(
+            self, *args, **kwargs
+        )
 
     @doc_utils.add_refer_to("DataFrame.diff")
     def diff(self, fold_axis, **kwargs):  # noqa: PR02
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 023b76ed120..17ffa05310f 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -311,9 +311,7 @@ def __getattr__(self, key):
         try:
             return object.__getattribute__(self, key)
         except AttributeError as err:
-            if not self._query_compiler.lazy_execution and (
-                key not in _ATTRS_NO_LOOKUP and key in self.index
-            ):
+            if key not in _ATTRS_NO_LOOKUP and key in self.index:
                 return self[key]
             raise err
 
@@ -2467,7 +2465,7 @@ def _getitem(self, key):
         row_positions = self.index.get_indexer_for(key) if is_indexer else key
         if not all(is_integer(x) for x in row_positions):
             raise KeyError(key[0] if reduce_dimension else key)
-        result = self._query_compiler.getitem_row_array(row_positions)
+        result = self._query_compiler.getitem_row_array(row_positions, numeric=True)
 
         if reduce_dimension:
             return self._reduce_dimension(result)
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 22e551d6a7a..61ff44770b4 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -519,7 +519,7 @@ def test___repr__(name, dt_index, data):
         )
         pandas_series.index = modin_series.index = index
 
-    if get_current_execution() == "BaseOnPython" and data == "empty":
+    if get_current_execution() in ("BaseOnPython", "Client") and data == "empty":
         # TODO: Remove this when default `dtype` of empty Series will be `object` in pandas (see #3142).
         assert modin_series.dtype == np.object
         assert pandas_series.dtype == np.float64
@@ -1631,7 +1631,7 @@ def test_dropna_inplace(data):
 
 def test_dtype_empty():
     modin_series, pandas_series = pd.Series(), pandas.Series()
-    if get_current_execution() == "BaseOnPython":
+    if get_current_execution() in ("BaseOnPython", "Client"):
         # TODO: Remove this when default `dtype` of empty Series will be `object` in pandas (see #3142).
         assert modin_series.dtype == np.object
         assert pandas_series.dtype == np.float64

From 577c9896182f350f24bc1765e3db10578c5fc732 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Thu, 27 Oct 2022 21:25:05 -0500
Subject: [PATCH 40/77] Tests pass through test_general.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py | 36 ++++++++++++++++---
 modin/pandas/general.py                       |  3 +-
 modin/pandas/groupby.py                       |  4 +--
 modin/pandas/test/test_groupby.py             | 23 +++++++++---
 4 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 8c174b30a67..df75d5d2ff3 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -646,9 +646,14 @@ def groupby_mean(
         agg_kwargs,
         drop=False,
     ):
+        if isinstance(by, type(self)):
+            by = by._id
+            is_qc = True
+        else:
+            is_qc = False
         return self.__constructor__(
             self._service.groupby_mean(
-                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
             )
         )
 
@@ -681,9 +686,21 @@ def groupby_max(
         agg_kwargs,
         drop=False,
     ):
+        if isinstance(by, type(self)):
+            by = by._id
+            is_qc = True
+        else:
+            is_qc = False
         return self.__constructor__(
             self._service.groupby_max(
-                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+                self._id,
+                by,
+                axis,
+                groupby_kwargs,
+                agg_args,
+                agg_kwargs,
+                drop,
+                is_qc,
             )
         )
 
@@ -696,9 +713,14 @@ def groupby_min(
         agg_kwargs,
         drop=False,
     ):
+        if isinstance(by, type(self)):
+            by = by._id
+            is_qc = True
+        else:
+            is_qc = False
         return self.__constructor__(
             self._service.groupby_min(
-                self._id, by._id, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
             )
         )
 
@@ -733,10 +755,15 @@ def groupby_agg(
         how="axis_wise",
         drop=False,
     ):
+        if isinstance(by, type(self)):
+            by = by._id
+            is_qc = True
+        else:
+            is_qc = False
         return self.__constructor__(
             self._service.groupby_agg(
                 self._id,
-                by._id,
+                by,
                 agg_func,
                 axis,
                 groupby_kwargs,
@@ -744,6 +771,7 @@ def groupby_agg(
                 agg_kwargs,
                 how,
                 drop,
+                is_qc,
             )
         )
 
diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index a34dac3a2e5..b2993b4c9bc 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -472,8 +472,7 @@ def concat(
     list_of_objs = [
         obj._query_compiler
         for obj in list_of_objs
-        if (not obj._query_compiler.lazy_execution and len(obj.index))
-        or len(obj.columns)
+        if len(obj.index) or len(obj.columns)
     ]
     if keys is not None:
         if all_series:
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index dcd990f13b2..fed97028282 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -918,7 +918,7 @@ def _iter(self):
                     k,
                     DataFrame(
                         query_compiler=self._query_compiler.getitem_row_array(
-                            indices[k]
+                            indices[k], numeric=True
                         )
                     ),
                 )
@@ -1228,7 +1228,7 @@ def _iter(self):
                     k,
                     Series(
                         query_compiler=self._query_compiler.getitem_row_array(
-                            indices[k]
+                            indices[k], numeric=True
                         )
                     ),
                 )
diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py
index b805c8e4238..fdd09afe610 100644
--- a/modin/pandas/test/test_groupby.py
+++ b/modin/pandas/test/test_groupby.py
@@ -489,7 +489,7 @@ def maybe_get_columns(df, by):
     )
     eval_fillna(modin_groupby, pandas_groupby)
     eval_count(modin_groupby, pandas_groupby)
-    if get_current_execution() != "BaseOnPython":
+    if get_current_execution() not in ("BaseOnPython", "Client"):
         eval_general(
             modin_groupby,
             pandas_groupby,
@@ -1276,7 +1276,7 @@ def eval_shift(modin_groupby, pandas_groupby):
     # groupby.shift internally masks the source frame with a Series boolean mask,
     # doing so ends up in the `getitem_array` method, that is broken for `BaseOnPython`:
     # https://github.com/modin-project/modin/issues/3701
-    if get_current_execution() != "BaseOnPython":
+    if get_current_execution() not in ("BaseOnPython", "Client"):
         if isinstance(pandas_groupby, pandas.core.groupby.DataFrameGroupBy):
             pandas_res = pandas_groupby.shift(axis=1, fill_value=777)
             modin_res = modin_groupby.shift(axis=1, fill_value=777)
@@ -1441,7 +1441,7 @@ def test_groupby_with_kwarg_dropna(groupby_kwargs, dropna):
     # https://github.com/modin-project/modin/issues/2912
     # "BaseOnPython" tests are disabled because of the bug:
     # https://github.com/modin-project/modin/issues/3827
-    if get_current_execution() != "BaseOnPython" and any(
+    if get_current_execution() not in ("BaseOnPython", "Client") and any(
         col in modin_df.columns for col in by_kwarg
     ):
         df_equals(md_grp.quantile(), pd_grp.quantile())
@@ -1555,7 +1555,7 @@ def test_agg_func_None_rename(by_and_agg_dict, as_index):
         pytest.param(
             False,
             marks=pytest.mark.xfail_executions(
-                ["BaseOnPython"], reason="See Pandas issue #39103"
+                ["BaseOnPython", "Client"], reason="See Pandas issue #39103"
             ),
         ),
     ],
@@ -1910,6 +1910,16 @@ def test_multi_column_groupby_different_partitions(
     eval___getitem__(md_grp, pd_grp, [md_df.columns[1], md_df.columns[2]])
 
 
+# TODO(https://github.com/modin-project/modin/issues/5165): Consider
+# making the dataframe not empty and fixing the resulting bugs.
+@pytest.mark.skipif(
+    get_current_execution() == "Client",
+    reason=(
+        "Dataframe is empty, so other executions default to pandas and "
+        + "behave correctly, but Client execution has lazy_execution=True, so it "
+        + "doesn't default to pandas and it has bugs."
+    ),
+)
 @pytest.mark.parametrize(
     "by",
     [
@@ -2120,8 +2130,11 @@ def test_groupby_with_virtual_partitions():
             PandasDataframeAxisPartition,
         )
     else:
+        qc = big_modin_df._query_compiler
+        if get_current_execution() == "Client":
+            qc = qc._service._qc[qc._id]
         assert not issubclass(
-            type(big_modin_df._query_compiler._modin_frame._partitions[0][0]),
+            type(qc._modin_frame._partitions[0][0]),
             PandasDataframeAxisPartition,
         )
     eval_general(

From b74a95c0cdcf318864d22613580d5178786e3b6a Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 10:59:42 -0500
Subject: [PATCH 41/77] TestCsv and TestSql pass.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py | 11 +++++++++--
 modin/core/io/io.py               | 11 +++++++++++
 modin/pandas/test/test_io.py      | 29 +++++++++++++++++++++++++----
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 866d016b13e..9bcbd18111b 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -15,6 +15,8 @@
 
 from modin.core.io.io import BaseIO
 import fsspec
+import pandas
+
 from .query_compiler import ClientQueryCompiler
 
 
@@ -77,9 +79,14 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
             raise ConnectionError(
                 "Missing server connection, did you initialize the connection?"
             )
-        return cls.query_compiler_cls(
-            cls._server_conn.read_csv(cls._data_conn, filepath_or_buffer, **kwargs)
+        server_result = cls._server_conn.read_csv(
+            cls._data_conn, filepath_or_buffer, **kwargs
         )
+        # This happens when `read_csv` returns a TextFileReader object for
+        # iterating through, e.g. because iterator=True
+        if isinstance(server_result, pandas.io.parsers.TextFileReader):
+            return server_result
+        return cls.query_compiler_cls(server_result)
 
     @classmethod
     def read_sql(cls, sql, con, **kwargs):
diff --git a/modin/core/io/io.py b/modin/core/io/io.py
index b388a2f9e6d..77d988f39e7 100644
--- a/modin/core/io/io.py
+++ b/modin/core/io/io.py
@@ -138,6 +138,17 @@ def _read_csv(
         ErrorMessage.default_to_pandas("`read_csv`")
         return cls._read(filepath_or_buffer=filepath_or_buffer, **kwargs)
 
+    @classmethod
+    @_inherit_docstrings(pandas.read_sql, apilink="pandas.read_sql")
+    @doc(
+        _doc_default_io_method,
+        summary="Read SQL query or database table into query compiler",
+        returns=_doc_returns_qc_or_parser,
+    )
+    def _read_sql(cls, sql, con, **kwargs):  # noqa: PR01
+        ErrorMessage.default_to_pandas("`read_sql`")
+        return cls.from_pandas(pandas.read_sql(sql, con, **kwargs))
+
     @classmethod
     def _read(cls, **kwargs):
         """
diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py
index 768a3ca4ebf..2694b0ffceb 100644
--- a/modin/pandas/test/test_io.py
+++ b/modin/pandas/test/test_io.py
@@ -37,7 +37,7 @@
     ReadSqlEngine,
 )
 from modin._compat import PandasCompatVersion
-from modin.utils import to_pandas
+from modin.utils import get_current_execution, to_pandas
 from modin.pandas.utils import from_arrow
 from modin.test.test_utils import warns_that_defaulting_to_pandas
 import pyarrow as pa
@@ -76,7 +76,7 @@
 else:
     from .utils import eval_io
 
-if StorageFormat.get() == "Pandas":
+if StorageFormat.get() in ("Pandas", ""):
     import modin.pandas as pd
 else:
     import modin.experimental.pandas as pd
@@ -1056,6 +1056,9 @@ def _has_pandas_fallback_reason(self):
         condition="config.getoption('--simulate-cloud').lower() != 'off'",
         reason="The reason of tests fail in `cloud` mode is unknown for now - issue #2340",
     )
+    @pytest.mark.xfail_executions(
+        "Client", reason="Client cannot read from buffer", raises=NotImplementedError
+    )
     def test_read_csv_default_to_pandas(self):
         if self._has_pandas_fallback_reason():
             warning_suffix = "buffers"
@@ -1251,6 +1254,9 @@ def wrapped_read_csv(file, method):
         ],
     )
     @pytest.mark.parametrize("buffer_start_pos", [0, 10])
+    @pytest.mark.xfail_executions(
+        "Client", reason="Client cannot read from buffer", raises=NotImplementedError
+    )
     def test_read_csv_file_handle(self, read_mode, make_csv_file, buffer_start_pos):
         with ensure_clean() as unique_filename:
             make_csv_file(filename=unique_filename)
@@ -1264,7 +1270,10 @@ def test_read_csv_file_handle(self, read_mode, make_csv_file, buffer_start_pos):
 
     def test_unnamed_index(self):
         def get_internal_df(df):
-            partition = read_df._query_compiler._modin_frame._partitions[0][0]
+            qc = read_df._query_compiler
+            if get_current_execution() == "Client":
+                qc = qc._service._qc[qc._id]
+            partition = qc._modin_frame._partitions[0][0]
             return partition.to_pandas()
 
         path = "modin/pandas/test/data/issue_3119.csv"
@@ -2096,7 +2105,19 @@ class TestSql:
         condition="config.getoption('--simulate-cloud').lower() != 'off'",
         reason="The reason of tests fail in `cloud` mode is unknown for now - issue #3264",
     )
-    @pytest.mark.parametrize("read_sql_engine", ["Pandas", "Connectorx"])
+    @pytest.mark.parametrize(
+        "read_sql_engine",
+        [
+            "Pandas",
+            pytest.param(
+                "Connectorx",
+                marks=pytest.mark.skipif(
+                    get_current_execution() == "Client",
+                    reason="Client execution uses pandas.read_sql, which can't read from connectorx connections",
+                ),
+            ),
+        ],
+    )
     def test_read_sql(self, make_sql_connection, read_sql_engine):
         with ensure_clean_dir() as dirname:
             filename = get_unique_filename(".db")

From 81583d2eccc93d9d9e41a0df0f276b1d7d3dc3e5 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 13:05:22 -0500
Subject: [PATCH 42/77] Fix pydocstyle for qc and io.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml                      |  1 +
 modin/core/execution/client/io.py             | 23 +++--
 modin/core/execution/client/query_compiler.py | 85 +++++++++++++++----
 modin/core/storage_formats/base/doc_utils.py  |  1 +
 .../storage_formats/base/query_compiler.py    |  2 +
 5 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7e03bf16cef..250e9fe1155 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -129,6 +129,7 @@ jobs:
       - run: python scripts/doc_checker.py modin/core/storage_formats/base
       - run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
       - run: python scripts/doc_checker.py modin/core/storage_formats/pandas
+      - run: python scripts/doc_checker.py modin/core/execution/client
       - run: |
           python scripts/doc_checker.py \
             modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe \
diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 9bcbd18111b..679ce7017ba 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -17,8 +17,6 @@
 import fsspec
 import pandas
 
-from .query_compiler import ClientQueryCompiler
-
 
 class ClientIO(BaseIO):
     """Factory providing methods for performing I/O operations using a given Client as the execution engine."""
@@ -64,7 +62,7 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
 
         Returns
         -------
-        query_compiler_cls
+        self.query_compiler_cls
             Query compiler with CSV data read in.
         """
         if isinstance(filepath_or_buffer, str):
@@ -108,7 +106,7 @@ def read_sql(cls, sql, con, **kwargs):
 
         Returns
         -------
-        ClientQueryCompiler
+        self.query_compiler_cls
             Query compiler with data read in from SQL connection.
         """
         if isinstance(con, str) and con.lower() == "auto" and cls._data_conn is None:
@@ -121,10 +119,23 @@ def read_sql(cls, sql, con, **kwargs):
             raise ConnectionError(
                 "Missing server connection, did you initialize the connection?"
             )
-        return ClientQueryCompiler(
+        return cls.query_compiler_cls(
             cls._server_conn.read_sql(sql, cls._data_conn, **kwargs)
         )
 
     @classmethod
-    def to_sql(cls, qc, **kwargs):
+    def to_sql(cls, qc, **kwargs) -> None:
+        """
+        Write records stored in a DataFrame to a SQL database.
+
+        Databases supported by SQLAlchemy [1]_ are supported. Tables can be
+        newly created, appended to, or overwritten.
+
+        Parameters
+        ----------
+        qc : self.query_compiler_cls
+            Query compiler with data to write to SQL.
+        **kwargs : dict
+            Parameters of ``read_sql`` function.
+        """
         cls._server_conn.to_sql(qc._id, **kwargs)
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index df75d5d2ff3..635f9b00ad2 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -11,46 +11,99 @@
 # ANY KIND, either express or implied. See the License for the specific language
 # governing permissions and limitations under the License.
 
-from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+"""Module contains ``ClientQueryCompiler`` class."""
+
 import numpy as np
+import pandas
 from pandas._libs.lib import no_default, NoDefault
 from pandas.api.types import is_list_like
+from typing import Any
+import uuid
+
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+from modin.utils import _inherit_docstrings
 
 
+@_inherit_docstrings(BaseQueryCompiler)
 class ClientQueryCompiler(BaseQueryCompiler):
-    lazy_execution = True
+    """
+    Query compiler for sending queries to a remote server.
+
+    This class translates the query compiler API to function calls on a service
+    object, which may be a remote service.
+
+    Parameters
+    ----------
+    id : uuid.UUID
+        ID of this query compiler.
+    """
+
+    lazy_execution: bool = True
+
+    def __init__(self, id: uuid.UUID):
+        self._id = id
 
     @classmethod
-    def set_server_connection(cls, conn):
+    def set_server_connection(cls, conn: Any):
+        """
+        Set the connection to the service.
+
+        Parameters
+        ----------
+        conn : Any
+            Connection to the service.
+        """
         cls._service = conn
 
-    def __init__(self, id):
-        assert (
-            id is not None
-        ), "Make sure the client is properly connected and returns and ID"
-        if isinstance(id, Exception):
-            raise id
-        self._id = id
+    def _set_columns(self, new_columns: pandas.Index) -> None:
+        """
+        Set this query compiler's columns.
 
-    def _set_columns(self, new_columns):
+        Parameters
+        ----------
+        new_columns : pandas.Index
+            New columns to set.
+        """
         self._id = self._service.rename(self._id, new_col_labels=new_columns)
         self._columns_cache = self._service.columns(self._id)
 
-    def _get_columns(self):
+    def _get_columns(self) -> pandas.Index:
+        """
+        Get the columns of this query compiler.
+
+        Returns
+        -------
+        pandas.Index : The columns of this query compiler.
+        """
         if self._columns_cache is None:
             self._columns_cache = self._service.columns(self._id)
         return self._columns_cache
 
-    def _set_index(self, new_index):
+    def _set_index(self, new_index: pandas.Index):
+        """
+        Set this query compiler's index.
+
+        Parameters
+        ----------
+        new_index : pandas.Index
+            New index to set.
+        """
         self._id = self._service.rename(self._id, new_row_labels=new_index)
 
-    def _get_index(self):
+    def _get_index(self) -> pandas.Index:
+        """
+        Get the index of this query compiler.
+
+        Returns
+        -------
+        pandas.Index : The index of this query compiler.
+        """
         return self._service.index(self._id)
 
     columns = property(_get_columns, _set_columns)
-    _columns_cache = None
+    _columns_cache: pandas.Index = None
     index = property(_get_index, _set_index)
-    _dtypes_cache = None
+    _dtypes_cache: pandas.Index = None
 
     @property
     def dtypes(self):
diff --git a/modin/core/storage_formats/base/doc_utils.py b/modin/core/storage_formats/base/doc_utils.py
index b538c47c92b..3efe8fd2294 100644
--- a/modin/core/storage_formats/base/doc_utils.py
+++ b/modin/core/storage_formats/base/doc_utils.py
@@ -288,6 +288,7 @@ def doc_reduce_agg(method, refer_to, params=None, extra_params=None):
     ----------
     fold_axis : {{0, 1}}
     skipna : bool
+    *args : iterable
     **kwargs : dict
         Serves the compatibility purpose. Does not affect the result.
 
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 848e12e0560..4d674d8c4d2 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2226,6 +2226,8 @@ def drop(self, index=None, columns=None, errors: str = "raise"):
             Labels of rows to drop.
         columns : list of labels, optional
             Labels of columns to drop.
+        errors : str, default: "raise"
+            If 'ignore', suppress error and only existing labels are dropped.
 
         Returns
         -------

From 7dc093d4f9d69f3f4def579218a7e6eaa4c45c07 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 14:44:14 -0500
Subject: [PATCH 43/77] REFACTOR: Dedupe single ID service methods.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/service.py | 566 +++++++++++++++++++++++++
 1 file changed, 566 insertions(+)
 create mode 100644 modin/core/execution/client/service.py

diff --git a/modin/core/execution/client/service.py b/modin/core/execution/client/service.py
new file mode 100644
index 00000000000..f376f334562
--- /dev/null
+++ b/modin/core/execution/client/service.py
@@ -0,0 +1,566 @@
+import numpy as np
+import pickle
+from typing import Any, NamedTuple, Optional
+from uuid import UUID, uuid4
+from modin.core.io.io import BaseIO
+
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+
+
+class ForwardingQueryCompilerService:
+    def __init__(self, query_compiler_type: BaseQueryCompiler, io_type: BaseIO):
+        self._qc = {}
+        self._qc_type = query_compiler_type
+        self._io_type = io_type
+
+    def _generate_id(self) -> UUID:
+        id = uuid4()
+        while id in self._qc:
+            id = uuid4()
+        return id
+
+    def add_query_compiler(self, qc) -> UUID:
+        id = self._generate_id()
+        self._qc[id] = qc
+        return id
+
+    def to_pandas(self, id):
+        return self._qc[id].to_pandas()
+
+    class DefaultToPandasResult(NamedTuple):
+        result: Optional[Any]
+        result_is_qc_id: bool
+
+    def default_to_pandas(
+        self, id: UUID, pandas_op, *args, **kwargs
+    ) -> DefaultToPandasResult:
+        result = self._qc[id].default_to_pandas(pandas_op, *args, **kwargs)
+        result_is_qc_id = isinstance(result, self._qc_type)
+        if result_is_qc_id:
+            new_id = self._generate_id()
+            self._qc[new_id] = result
+            result = new_id
+        return self.DefaultToPandasResult(
+            result=result, result_is_qc_id=result_is_qc_id
+        )
+
+    def rename(self, id, new_col_labels=None, new_row_labels=None):
+        new_id = self._generate_id()
+        new_qc = self._qc[new_id] = self._qc[id].copy()
+        if new_col_labels is not None:
+            new_qc.columns = new_col_labels
+        if new_row_labels is not None:
+            new_qc.index = new_row_labels
+        return new_id
+
+    def columns(self, id):
+        return self._qc[id].columns
+
+    def index(self, id):
+        return self._qc[id].index
+
+    def dtypes(self, id):
+        return self._qc[id].dtypes
+
+    def insert(self, id, loc, column, value, is_qc):
+        if is_qc:
+            value = self._qc[value]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].insert(loc, column, value)
+        return new_id
+
+    def setitem(self, id, axis, key, value, is_qc):
+        if is_qc:
+            value = self._qc[value]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].setitem(axis, key, value)
+        return new_id
+
+    def getitem_array(self, id, key, is_qc):
+        if is_qc:
+            key = self._qc[key]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].getitem_array(key)
+        return new_id
+
+    def replace(
+        self,
+        id,
+        to_replace,
+        value,
+        inplace,
+        limit,
+        regex,
+        method,
+        is_to_replace_qc,
+        is_regex_qc,
+    ):
+        if is_to_replace_qc:
+            to_replace = self._qc[to_replace]
+        if is_regex_qc:
+            regex = self._qc[regex]
+        new_id = self._generate_id()
+        # TODO(GH#3108): Use positional arguments instead of keyword arguments
+        # in the query compilers so we don't have to name all the arguments
+        # here.
+        self._qc[new_id] = self._qc[id].replace(
+            to_replace=to_replace,
+            value=value,
+            inplace=inplace,
+            limit=limit,
+            regex=regex,
+            method=method,
+        )
+        return new_id
+
+    def fillna(
+        self,
+        id,
+        squeeze_self,
+        squeeze_value,
+        value,
+        method,
+        axis,
+        inplace,
+        limit,
+        downcast,
+        is_qc,
+    ):
+        if is_qc:
+            value = self._qc[value]
+        new_id = self._generate_id()
+        # TODO(GH#3108): Use positional arguments instead of keyword arguments
+        # in the query compilers so we don't have to name all the
+        # arguments here.
+        self._qc[new_id] = self._qc[id].fillna(
+            squeeze_self=squeeze_self,
+            squeeze_value=squeeze_value,
+            value=value,
+            method=method,
+            axis=axis,
+            inplace=inplace,
+            limit=limit,
+            downcast=downcast,
+        )
+        return new_id
+
+    def concat(self, id, axis, other, **kwargs):
+        # convert id to query compiler
+        other = [self._qc[o] for o in other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].concat(axis, other, **kwargs)
+        return new_id
+
+    def eq(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].eq(other, **kwargs)
+        return new_id
+
+    def lt(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].lt(other, **kwargs)
+        return new_id
+
+    def le(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].le(other, **kwargs)
+        return new_id
+
+    def gt(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].gt(other, **kwargs)
+        return new_id
+
+    def ge(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].ge(other, **kwargs)
+        return new_id
+
+    def ne(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].ne(other, **kwargs)
+        return new_id
+
+    def __and__(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].__and__(other, **kwargs)
+        return new_id
+
+    def __or__(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].__or__(other, **kwargs)
+        return new_id
+
+    def add(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].add(other, **kwargs)
+        return new_id
+
+    def radd(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].radd(other, **kwargs)
+        return new_id
+
+    def truediv(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].truediv(other, **kwargs)
+        return new_id
+
+    def rtruediv(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].rtruediv(other, **kwargs)
+        return new_id
+
+    def mod(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].mod(other, **kwargs)
+        return new_id
+
+    def rmod(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].rmod(other, **kwargs)
+        return new_id
+
+    def sub(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].sub(other, **kwargs)
+        return new_id
+
+    def rsub(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].rsub(other, **kwargs)
+        return new_id
+
+    def mul(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].mul(other, **kwargs)
+        return new_id
+
+    def rmul(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].rmul(other, **kwargs)
+        return new_id
+
+    def floordiv(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].floordiv(other, **kwargs)
+        return new_id
+
+    def rfloordiv(self, id, other, is_qc, **kwargs):
+        if is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].rfloordiv(other, **kwargs)
+        return new_id
+
+    def merge(self, id, right, **kwargs):
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].merge(self._qc[right], **kwargs)
+        return new_id
+
+    def groupby_mean(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_mean(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_count(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_count(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_max(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_max(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_min(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_min(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_sum(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_sum(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_agg(
+        self,
+        id,
+        by,
+        agg_func,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        how="axis_wise",
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_agg(
+            by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop
+        )
+        return new_id
+
+    def read_csv(self, connection, filepath, **kwargs) -> UUID:
+        io_result = self._io_type._read_csv(filepath, **kwargs)
+        if isinstance(io_result, self._qc_type):
+            new_id = self._generate_id()
+            self._qc[new_id] = io_result
+            return new_id
+        return io_result
+
+    def read_sql(self, sql, connection, **kwargs) -> UUID:
+        new_id = self._generate_id()
+        self._qc[new_id] = self._io_type._read_sql(sql, connection, **kwargs)
+        return new_id
+
+    def to_sql(
+        self,
+        id,
+        name,
+        con,
+        schema=None,
+        if_exists="fail",
+        index=True,
+        index_label=None,
+        chunksize=None,
+        dtype=None,
+        method=None,
+    ):
+        self._io_type.to_sql(
+            self._qc[id],
+            name,
+            con,
+            schema,
+            if_exists,
+            index,
+            index_label,
+            chunksize,
+            dtype,
+            method,
+        )
+
+
+def _set_forwarding_method_for_single_id(method_name: str):
+    def forwarding_method(
+        self: "ForwardingQueryCompilerService", id: UUID, *args, **kwargs
+    ):
+        new_id = self._generate_id()
+        self._qc[new_id] = getattr(self._qc[id], method_name)(*args, **kwargs)
+        return new_id
+
+    setattr(ForwardingQueryCompilerService, method_name, forwarding_method)
+
+
+_SINGLE_ID_FORWARDING_METHODS = frozenset(
+    {
+        "columnarize",
+        "transpose",
+        "take_2d",
+        "getitem_column_array",
+        "getitem_row_array",
+        "pivot",
+        "get_dummies",
+        "drop",
+        "isna",
+        "notna",
+        "add_prefix",
+        "add_suffix",
+        "astype",
+        "dropna",
+        "sum",
+        "prod",
+        "count",
+        "mean",
+        "median",
+        "std",
+        "min",
+        "max",
+        "any",
+        "all",
+        "quantile_for_single_value",
+        "quantile_for_list_of_values",
+        "describe",
+        "set_index_from_columns",
+        "reset_index",
+        "sort_rows_by_column_values",
+        "sort_index",
+        "dt_nanosecond",
+        "dt_microsecond",
+        "dt_second",
+        "dt_minute",
+        "dt_hour",
+        "dt_day",
+        "dt_dayofweek",
+        "dt_weekday",
+        "dt_day_name",
+        "dt_dayofyear",
+        "dt_week",
+        "dt_weekofyear",
+        "dt_month",
+        "dt_month_name",
+        "dt_quarter",
+        "dt_year",
+        "str_capitalize",
+        "str_isalnum",
+        "str_isalpha",
+        "str_isdecimal",
+        "str_isdigit",
+        "str_islower",
+        "str_isnumeric",
+        "str_isspace",
+        "str_istitle",
+        "str_isupper",
+        "str_len",
+        "str_lower",
+        "str_title",
+        "str_upper",
+        "str_center",
+        "str_contains",
+        "str_count",
+        "str_endswith",
+        "str_find",
+        "str_index",
+        "str_rfind",
+        "str_findall",
+        "str_get",
+        "str_join",
+        "str_lstrip",
+        "str_ljust",
+        "str_rjust",
+        "str_match",
+        "str_pad",
+        "str_repeat",
+        "str_split",
+        "str_rsplit",
+        "str_rstrip",
+        "str_slice",
+        "str_slice_replace",
+        "str_startswith",
+        "str_strip",
+        "str_zfill",
+        "cummax",
+        "cummin",
+        "cumsum",
+        "cumprod",
+        "is_monotonic_increasing",
+        "is_monotonic_decreasing",
+        "idxmax",
+        "idxmin",
+        "query",
+    }
+)
+
+for method in _SINGLE_ID_FORWARDING_METHODS:
+    _set_forwarding_method_for_single_id(method)

From 7405550c40c12427b8e3707c0bc735a021cdcde6 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 15:51:13 -0500
Subject: [PATCH 44/77] REFACTOR: Dedupe binary code and refactor some is_qc.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py | 273 +++++-------------
 modin/core/execution/client/service.py        | 158 ++++------
 2 files changed, 128 insertions(+), 303 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 635f9b00ad2..095aea6aab0 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -18,7 +18,7 @@
 from pandas._libs.lib import no_default, NoDefault
 from pandas.api.types import is_list_like
 from typing import Any
-import uuid
+from uuid import UUID
 
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 from modin.utils import _inherit_docstrings
@@ -34,13 +34,13 @@ class ClientQueryCompiler(BaseQueryCompiler):
 
     Parameters
     ----------
-    id : uuid.UUID
+    id : UUID
         ID of this query compiler.
     """
 
     lazy_execution: bool = True
 
-    def __init__(self, id: uuid.UUID):
+    def __init__(self, id: UUID):
         self._id = id
 
     @classmethod
@@ -140,32 +140,28 @@ def add_suffix(self, suffix, axis=1):
         return self.__constructor__(self._service.add_suffix(self._id, suffix, axis))
 
     def insert(self, loc, column, value):
-        if isinstance(value, type(self)):
+        value_is_qc = isinstance(value, type(self))
+        if value_is_qc:
             value = value._id
-            is_qc = True
-        else:
-            is_qc = False
         return self.__constructor__(
-            self._service.insert(self._id, loc, column, value, is_qc)
+            self._service.insert(self._id, value_is_qc, loc, column, value)
         )
 
     def setitem(self, axis, key, value):
-        if isinstance(value, type(self)):
+        value_is_qc = isinstance(value, type(self))
+        if value_is_qc:
             value = value._id
-            is_qc = True
-        else:
-            is_qc = False
         return self.__constructor__(
-            self._service.setitem(self._id, axis, key, value, is_qc)
+            self._service.setitem(self._id, value_is_qc, axis, key, value)
         )
 
     def getitem_array(self, key):
-        if isinstance(key, type(self)):
+        key_is_qc = isinstance(key, type(self))
+        if key_is_qc:
             key = key._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.getitem_array(self._id, key, is_qc))
+        return self.__constructor__(
+            self._service.getitem_array(self._id, key_is_qc, key)
+        )
 
     def getitem_column_array(self, key, numeric=False):
         return self.__constructor__(
@@ -215,25 +211,23 @@ def replace(
         regex=False,
         method: "str | NoDefault" = no_default,
     ):
-        if isinstance(to_replace, type(self)):
-            is_to_replace_qc = True
-        else:
-            is_to_replace_qc = False
-        if isinstance(regex, type(self)):
-            is_regex_qc = True
-        else:
-            is_regex_qc = False
+        to_replace_is_qc = isinstance(to_replace, type(self))
+        if to_replace_is_qc:
+            to_replace = to_replace._id
+        regex_is_qc = isinstance(regex, type(self))
+        if regex_is_qc:
+            regex = regex._id
         return self.__constructor__(
             self._service.replace(
                 self._id,
+                to_replace_is_qc,
+                regex_is_qc,
                 to_replace,
                 value,
                 inplace,
                 limit,
                 regex,
                 method,
-                is_to_replace_qc,
-                is_regex_qc,
             )
         )
 
@@ -248,14 +242,13 @@ def fillna(
         limit=None,
         downcast=None,
     ):
-        if isinstance(value, type(self)):
+        value_is_qc = isinstance(value, type(self))
+        if value_is_qc:
             value = value._id
-            is_qc = True
-        else:
-            is_qc = False
         return self.__constructor__(
             self._service.fillna(
                 self._id,
+                value_is_qc,
                 squeeze_self,
                 squeeze_value,
                 value,
@@ -264,7 +257,6 @@ def fillna(
                 inplace,
                 limit,
                 downcast,
-                is_qc,
             )
         )
 
@@ -331,176 +323,6 @@ def concat(self, axis, other, **kwargs):
             self._service.concat(self._id, axis, other, **kwargs)
         )
 
-    def eq(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.eq(self._id, other, is_qc, **kwargs))
-
-    def lt(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.lt(self._id, other, is_qc, **kwargs))
-
-    def le(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.le(self._id, other, is_qc, **kwargs))
-
-    def gt(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.gt(self._id, other, is_qc, **kwargs))
-
-    def ge(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.ge(self._id, other, is_qc, **kwargs))
-
-    def ne(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.ne(self._id, other, is_qc, **kwargs))
-
-    def __and__(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.__and__(self._id, other, is_qc, **kwargs)
-        )
-
-    def __or__(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.__or__(self._id, other, is_qc, **kwargs)
-        )
-
-    def add(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.add(self._id, other, is_qc, **kwargs))
-
-    def radd(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.radd(self._id, other, is_qc, **kwargs)
-        )
-
-    def truediv(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.truediv(self._id, other, is_qc, **kwargs)
-        )
-
-    def mod(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.mod(self._id, other, is_qc, **kwargs))
-
-    def rmod(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.rmod(self._id, other, is_qc, **kwargs)
-        )
-
-    def sub(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.sub(self._id, other, is_qc, **kwargs))
-
-    def rsub(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.rsub(self._id, other, is_qc, **kwargs)
-        )
-
-    def mul(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(self._service.mul(self._id, other, is_qc, **kwargs))
-
-    def rmul(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.rmul(self._id, other, is_qc, **kwargs)
-        )
-
-    def floordiv(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.floordiv(self._id, other, is_qc, **kwargs)
-        )
-
-    def rfloordiv(self, other, **kwargs):
-        if isinstance(other, type(self)):
-            other = other._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.rfloordiv(self._id, other, is_qc, **kwargs)
-        )
-
     def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
         return self.__constructor__(
             self._service.sort_rows_by_column_values(
@@ -885,3 +707,48 @@ def from_dataframe(cls, df, data_cls):
 
     def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
         raise NotImplementedError
+
+
+def _set_forwarding_method_for_binary_function(method_name: str):
+    def forwarding_method(
+        self: ClientQueryCompiler,
+        other: Any,
+        **kwargs,
+    ):
+        other_is_qc = isinstance(other, type(self))
+        if other_is_qc:
+            other = other._id
+        return self.__constructor__(
+            getattr(self._service, method_name)(self._id, other_is_qc, other, **kwargs)
+        )
+
+    setattr(ClientQueryCompiler, method_name, forwarding_method)
+
+
+_BINARY_FORWARDING_METHODS = frozenset(
+    {
+        "eq",
+        "lt",
+        "le",
+        "gt",
+        "ge",
+        "ne",
+        "__and__",
+        "__or__",
+        "add",
+        "radd",
+        "truediv",
+        "rtruediv",
+        "mod",
+        "rmod",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "floordiv",
+        "rfloordiv",
+    }
+)
+
+for method in _BINARY_FORWARDING_METHODS:
+    _set_forwarding_method_for_binary_function(method)
diff --git a/modin/core/execution/client/service.py b/modin/core/execution/client/service.py
index f376f334562..94f64a4a848 100644
--- a/modin/core/execution/client/service.py
+++ b/modin/core/execution/client/service.py
@@ -1,6 +1,4 @@
-import numpy as np
-import pickle
-from typing import Any, NamedTuple, Optional
+from typing import Any, NamedTuple, Optional, Union
 from uuid import UUID, uuid4
 from modin.core.io.io import BaseIO
 
@@ -62,22 +60,22 @@ def index(self, id):
     def dtypes(self, id):
         return self._qc[id].dtypes
 
-    def insert(self, id, loc, column, value, is_qc):
-        if is_qc:
+    def insert(self, id, value_is_qc: bool, loc, column, value):
+        if value_is_qc:
             value = self._qc[value]
         new_id = self._generate_id()
         self._qc[new_id] = self._qc[id].insert(loc, column, value)
         return new_id
 
-    def setitem(self, id, axis, key, value, is_qc):
-        if is_qc:
+    def setitem(self, id, value_is_qc: bool, axis, key, value):
+        if value_is_qc:
             value = self._qc[value]
         new_id = self._generate_id()
         self._qc[new_id] = self._qc[id].setitem(axis, key, value)
         return new_id
 
-    def getitem_array(self, id, key, is_qc):
-        if is_qc:
+    def getitem_array(self, key_is_qc: bool, id, key):
+        if key_is_qc:
             key = self._qc[key]
         new_id = self._generate_id()
         self._qc[new_id] = self._qc[id].getitem_array(key)
@@ -86,18 +84,18 @@ def getitem_array(self, id, key, is_qc):
     def replace(
         self,
         id,
+        to_replace_is_qc: bool,
+        regex_is_qc: bool,
         to_replace,
         value,
         inplace,
         limit,
         regex,
         method,
-        is_to_replace_qc,
-        is_regex_qc,
     ):
-        if is_to_replace_qc:
+        if to_replace_is_qc:
             to_replace = self._qc[to_replace]
-        if is_regex_qc:
+        if regex_is_qc:
             regex = self._qc[regex]
         new_id = self._generate_id()
         # TODO(GH#3108): Use positional arguments instead of keyword arguments
@@ -116,6 +114,7 @@ def replace(
     def fillna(
         self,
         id,
+        value_is_qc: bool,
         squeeze_self,
         squeeze_value,
         value,
@@ -124,9 +123,8 @@ def fillna(
         inplace,
         limit,
         downcast,
-        is_qc,
     ):
-        if is_qc:
+        if value_is_qc:
             value = self._qc[value]
         new_id = self._generate_id()
         # TODO(GH#3108): Use positional arguments instead of keyword arguments
@@ -145,96 +143,11 @@ def fillna(
         return new_id
 
     def concat(self, id, axis, other, **kwargs):
-        # convert id to query compiler
         other = [self._qc[o] for o in other]
         new_id = self._generate_id()
         self._qc[new_id] = self._qc[id].concat(axis, other, **kwargs)
         return new_id
 
-    def eq(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].eq(other, **kwargs)
-        return new_id
-
-    def lt(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].lt(other, **kwargs)
-        return new_id
-
-    def le(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].le(other, **kwargs)
-        return new_id
-
-    def gt(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].gt(other, **kwargs)
-        return new_id
-
-    def ge(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].ge(other, **kwargs)
-        return new_id
-
-    def ne(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].ne(other, **kwargs)
-        return new_id
-
-    def __and__(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].__and__(other, **kwargs)
-        return new_id
-
-    def __or__(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].__or__(other, **kwargs)
-        return new_id
-
-    def add(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].add(other, **kwargs)
-        return new_id
-
-    def radd(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].radd(other, **kwargs)
-        return new_id
-
-    def truediv(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].truediv(other, **kwargs)
-        return new_id
-
-    def rtruediv(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].rtruediv(other, **kwargs)
-        return new_id
-
     def mod(self, id, other, is_qc, **kwargs):
         if is_qc:
             other = self._qc[other]
@@ -463,6 +376,48 @@ def forwarding_method(
     setattr(ForwardingQueryCompilerService, method_name, forwarding_method)
 
 
+def _set_forwarding_method_for_binary_function(method_name: str):
+    def forwarding_method(
+        self: ForwardingQueryCompilerService,
+        id: UUID,
+        other_is_qc: bool,
+        other: Union[UUID, Any],
+        **kwargs,
+    ):
+        if other_is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = getattr(self._qc[id], method_name)(other, **kwargs)
+        return new_id
+
+    setattr(ForwardingQueryCompilerService, method_name, forwarding_method)
+
+
+_BINARY_FORWARDING_METHODS = frozenset(
+    {
+        "eq",
+        "lt",
+        "le",
+        "gt",
+        "ge",
+        "ne",
+        "__and__",
+        "__or__",
+        "add",
+        "radd",
+        "truediv",
+        "rtruediv",
+        "mod",
+        "rmod",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "floordiv",
+        "rfloordiv",
+    }
+)
+
 _SINGLE_ID_FORWARDING_METHODS = frozenset(
     {
         "columnarize",
@@ -564,3 +519,6 @@ def forwarding_method(
 
 for method in _SINGLE_ID_FORWARDING_METHODS:
     _set_forwarding_method_for_single_id(method)
+
+for method in _BINARY_FORWARDING_METHODS:
+    _set_forwarding_method_for_binary_function(method)

From 89ba4b08d4d1c0c251419b5bf2ef2ef8c28d65f7 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 16:23:08 -0500
Subject: [PATCH 45/77] Fix query compiler refactoring.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py | 442 +++++-------------
 modin/core/execution/client/service.py        |   2 +-
 2 files changed, 116 insertions(+), 328 deletions(-)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 095aea6aab0..267a17d250d 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -13,7 +13,6 @@
 
 """Module contains ``ClientQueryCompiler`` class."""
 
-import numpy as np
 import pandas
 from pandas._libs.lib import no_default, NoDefault
 from pandas.api.types import is_list_like
@@ -124,21 +123,9 @@ def to_pandas(self):
     def default_to_pandas(self, pandas_op, *args, **kwargs):
         raise NotImplementedError
 
-    def columnarize(self):
-        return self.__constructor__(self._service.columnarize(self._id))
-
-    def transpose(self):
-        return self.__constructor__(self._service.transpose(self._id))
-
     def copy(self):
         return self.__constructor__(self._id)
 
-    def add_prefix(self, prefix, axis=1):
-        return self.__constructor__(self._service.add_prefix(self._id, prefix, axis))
-
-    def add_suffix(self, suffix, axis=1):
-        return self.__constructor__(self._service.add_suffix(self._id, suffix, axis))
-
     def insert(self, loc, column, value):
         value_is_qc = isinstance(value, type(self))
         if value_is_qc:
@@ -163,45 +150,6 @@ def getitem_array(self, key):
             self._service.getitem_array(self._id, key_is_qc, key)
         )
 
-    def getitem_column_array(self, key, numeric=False):
-        return self.__constructor__(
-            self._service.getitem_column_array(self._id, key, numeric)
-        )
-
-    def getitem_row_array(self, key, numeric=False):
-        return self.__constructor__(
-            self._service.getitem_row_array(self._id, key, numeric)
-        )
-
-    def pivot(self, index, columns, values):
-        return self.__constructor__(
-            self._service.pivot(self._id, index, columns, values)
-        )
-
-    def get_dummies(self, columns, **kwargs):
-        return self.__constructor__(
-            self._service.get_dummies(self._id, columns, **kwargs)
-        )
-
-    def take_2d(self, index=None, columns=None):
-        return self.__constructor__(self._service.take_2d(self._id, index, columns))
-
-    def drop(self, index=None, columns=None, errors: str = "raise"):
-        return self.__constructor__(
-            self._service.drop(self._id, index, columns, errors)
-        )
-
-    def isna(self):
-        return self.__constructor__(self._service.isna(self._id))
-
-    def notna(self):
-        return self.__constructor__(self._service.notna(self._id))
-
-    def astype(self, col_dtypes, **kwargs):
-        return self.__constructor__(
-            self._service.astype(self._id, col_dtypes, **kwargs)
-        )
-
     def replace(
         self,
         to_replace=None,
@@ -260,60 +208,6 @@ def fillna(
             )
         )
 
-    def dropna(self, **kwargs):
-        return self.__constructor__(self._service.dropna(self._id, **kwargs))
-
-    def sum(self, **kwargs):
-        return self.__constructor__(self._service.sum(self._id, **kwargs))
-
-    def prod(self, **kwargs):
-        return self.__constructor__(self._service.prod(self._id, **kwargs))
-
-    def count(self, **kwargs):
-        return self.__constructor__(self._service.count(self._id, **kwargs))
-
-    def mean(self, **kwargs):
-        return self.__constructor__(self._service.mean(self._id, **kwargs))
-
-    def median(self, **kwargs):
-        return self.__constructor__(self._service.median(self._id, **kwargs))
-
-    def std(self, **kwargs):
-        return self.__constructor__(self._service.std(self._id, **kwargs))
-
-    def min(self, **kwargs):
-        return self.__constructor__(self._service.min(self._id, **kwargs))
-
-    def max(self, **kwargs):
-        return self.__constructor__(self._service.max(self._id, **kwargs))
-
-    def any(self, **kwargs):
-        return self.__constructor__(self._service.any(self._id, **kwargs))
-
-    def all(self, **kwargs):
-        return self.__constructor__(self._service.all(self._id, **kwargs))
-
-    def quantile_for_single_value(self, **kwargs):
-        return self.__constructor__(
-            self._service.quantile_for_single_value(self._id, **kwargs)
-        )
-
-    def quantile_for_list_of_values(self, **kwargs):
-        return self.__constructor__(
-            self._service.quantile_for_list_of_values(self._id, **kwargs)
-        )
-
-    def describe(self, **kwargs):
-        return self.__constructor__(self._service.describe(self._id, **kwargs))
-
-    def set_index_from_columns(self, keys, drop: bool = True, append: bool = False):
-        return self.__constructor__(
-            self._service.set_index_from_columns(self._id, keys, drop, append)
-        )
-
-    def reset_index(self, **kwargs):
-        return self.__constructor__(self._service.reset_index(self._id, **kwargs))
-
     def concat(self, axis, other, **kwargs):
         if is_list_like(other):
             other = [o._id for o in other]
@@ -323,192 +217,6 @@ def concat(self, axis, other, **kwargs):
             self._service.concat(self._id, axis, other, **kwargs)
         )
 
-    def sort_rows_by_column_values(self, columns, ascending=True, **kwargs):
-        return self.__constructor__(
-            self._service.sort_rows_by_column_values(
-                self._id, columns, ascending=ascending, **kwargs
-            )
-        )
-
-    def sort_index(self, **kwargs):
-        return self.__constructor__(self._service.sort_index(self._id, **kwargs))
-
-    def dt_nanosecond(self):
-        return self.__constructor__(self._service.dt_nanosecond(self._id))
-
-    def dt_microsecond(self):
-        return self.__constructor__(self._service.dt_microsecond(self._id))
-
-    def dt_second(self):
-        return self.__constructor__(self._service.dt_second(self._id))
-
-    def dt_minute(self):
-        return self.__constructor__(self._service.dt_minute(self._id))
-
-    def dt_hour(self):
-        return self.__constructor__(self._service.dt_hour(self._id))
-
-    def dt_day(self):
-        return self.__constructor__(self._service.dt_day(self._id))
-
-    def dt_dayofweek(self):
-        return self.__constructor__(self._service.dt_dayofweek(self._id))
-
-    def dt_weekday(self):
-        return self.__constructor__(self._service.dt_weekday(self._id))
-
-    def dt_day_name(self):
-        return self.__constructor__(self._service.dt_day_name(self._id))
-
-    def dt_dayofyear(self):
-        return self.__constructor__(self._service.dt_dayofyear(self._id))
-
-    def dt_week(self):
-        return self.__constructor__(self._service.dt_week(self._id))
-
-    def dt_weekofyear(self):
-        return self.__constructor__(self._service.dt_weekofyear(self._id))
-
-    def dt_month(self):
-        return self.__constructor__(self._service.dt_month(self._id))
-
-    def dt_month_name(self):
-        return self.__constructor__(self._service.dt_month_name(self._id))
-
-    def dt_quarter(self):
-        return self.__constructor__(self._service.dt_quarter(self._id))
-
-    def dt_year(self):
-        return self.__constructor__(self._service.dt_year(self._id))
-
-    def str_capitalize(self):
-        return self.__constructor__(self._service.str_capitalize(self._id))
-
-    def str_isalnum(self):
-        return self.__constructor__(self._service.str_isalnum(self._id))
-
-    def str_isalpha(self):
-        return self.__constructor__(self._service.str_isalpha(self._id))
-
-    def str_isdecimal(self):
-        return self.__constructor__(self._service.str_isdecimal(self._id))
-
-    def str_isdigit(self):
-        return self.__constructor__(self._service.str_isdigit(self._id))
-
-    def str_islower(self):
-        return self.__constructor__(self._service.str_islower(self._id))
-
-    def str_isnumeric(self):
-        return self.__constructor__(self._service.str_isnumeric(self._id))
-
-    def str_isspace(self):
-        return self.__constructor__(self._service.str_isspace(self._id))
-
-    def str_istitle(self):
-        return self.__constructor__(self._service.str_istitle(self._id))
-
-    def str_isupper(self):
-        return self.__constructor__(self._service.str_isupper(self._id))
-
-    def str_len(self):
-        return self.__constructor__(self._service.str_len(self._id))
-
-    def str_lower(self):
-        return self.__constructor__(self._service.str_lower(self._id))
-
-    def str_title(self):
-        return self.__constructor__(self._service.str_title(self._id))
-
-    def str_upper(self):
-        return self.__constructor__(self._service.str_upper(self._id))
-
-    def str_center(self, width, fillchar=" "):
-        return self.__constructor__(self._service.str_center(self._id, width, fillchar))
-
-    def str_contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
-        return self.__constructor__(
-            self._service.str_contains(self._id, pat, case, flags, na, regex)
-        )
-
-    def str_count(self, pat, flags=0, **kwargs):
-        return self.__constructor__(
-            self._service.str_count(self._id, pat, flags, **kwargs)
-        )
-
-    def str_endswith(self, pat, na=np.nan):
-        return self.__constructor__(self._service.str_endswith(self._id, pat, na))
-
-    def str_find(self, sub, start=0, end=None):
-        return self.__constructor__(self._service.str_find(self._id, sub, start, end))
-
-    def str_rfind(self, sub, start=0, end=None):
-        return self.__constructor__(self._service.str_rfind(self._id, sub, start, end))
-
-    def str_findall(self, pat, flags=0, **kwargs):
-        return self.__constructor__(
-            self._service.str_findall(self._id, pat, flags, **kwargs)
-        )
-
-    def str_get(self, i):
-        return self.__constructor__(self._service.str_get(self._id, i))
-
-    def str_index(self, sub, start=0, end=None):
-        return self.__constructor__(self._service.str_index(self._id, sub, start, end))
-
-    def str_join(self, sep):
-        return self.__constructor__(self._service.str_join(self._id, sep))
-
-    def str_lstrip(self, to_strip=None):
-        return self.__constructor__(self._service.str_lstrip(self._id, to_strip))
-
-    def str_ljust(self, width, fillchar=" "):
-        return self.__constructor__(self._service.str_ljust(self._id, width, fillchar))
-
-    def str_rjust(self, width, fillchar=" "):
-        return self.__constructor__(self._service.str_rjust(self._id, width, fillchar))
-
-    def str_match(self, pat, case=True, flags=0, na=np.nan):
-        return self.__constructor__(
-            self._service.str_match(self._id, pat, case, flags, na)
-        )
-
-    def str_pad(self, width, side="left", fillchar=" "):
-        return self.__constructor__(
-            self._service.str_pad(self._id, width, side, fillchar)
-        )
-
-    def str_repeat(self, repeats):
-        return self.__constructor__(self._service.str_repeat(self._id, repeats))
-
-    def str_split(self, pat=None, n=-1, expand=False):
-        return self.__constructor__(self._service.str_split(self._id, pat, n, expand))
-
-    def str_rsplit(self, pat=None, n=-1, expand=False):
-        return self.__constructor__(self._service.str_rsplit(self._id, pat, n, expand))
-
-    def str_rstrip(self, to_strip=None):
-        return self.__constructor__(self._service.str_rstrip(self._id, to_strip))
-
-    def str_slice(self, start=None, stop=None, step=None):
-        return self.__constructor__(
-            self._service.str_slice(self._id, start, stop, step)
-        )
-
-    def str_slice_replace(self, start=None, stop=None, repl=None):
-        return self.__constructor__(
-            self._service.str_slice_replace(self._id, start, stop, repl)
-        )
-
-    def str_startswith(self, pat, na=np.nan):
-        return self.__constructor__(self._service.str_startswith(self._id, pat, na))
-
-    def str_strip(self, to_strip=None):
-        return self.__constructor__(self._service.str_strip(self._id, to_strip))
-
-    def str_zfill(self, width):
-        return self.__constructor__(self._service.str_zfill(self._id, width))
-
     def merge(self, right, **kwargs):
         return self.__constructor__(self._service.merge(self._id, right._id, **kwargs))
 
@@ -650,47 +358,12 @@ def groupby_agg(
             )
         )
 
-    def cummax(self, fold_axis, axis, skipna, *args, **kwargs):
-        return self.__constructor__(
-            self._service.cummax(self._id, fold_axis, axis, skipna, *args, **kwargs)
-        )
-
-    def cummin(self, fold_axis, axis, skipna, *args, **kwargs):
-        return self.__constructor__(
-            self._service.cummin(self._id, fold_axis, axis, skipna, *args, **kwargs)
-        )
-
-    def cumsum(self, fold_axis, axis, skipna, *args, **kwargs):
-        return self.__constructor__(
-            self._service.cumsum(self._id, fold_axis, axis, skipna, *args, **kwargs)
-        )
-
-    def cumprod(self, fold_axis, axis, skipna, *args, **kwargs):
-        return self.__constructor__(
-            self._service.cumprod(self._id, fold_axis, axis, skipna, *args, **kwargs)
-        )
-
     def get_index_names(self, axis=0):
         if axis == 0:
             return self.index.names
         else:
             return self.columns.names
 
-    def is_monotonic_increasing(self):
-        return self.__constructor__(self._service.is_monotonic_increasing(self._id))
-
-    def is_monotonic_decreasing(self):
-        return self.__constructor__(self._service.is_monotonic_decreasing(self._id))
-
-    def idxmin(self, **kwargs):
-        return self.__constructor__(self._service.idxmin(self._id, **kwargs))
-
-    def idxmax(self, **kwargs):
-        return self.__constructor__(self._service.idxmax(self._id, **kwargs))
-
-    def query(self, expr, **kwargs):
-        return self.__constructor__(self._service.query(self._id, expr, **kwargs))
-
     def finalize(self):
         raise NotImplementedError
 
@@ -724,6 +397,18 @@ def forwarding_method(
 
     setattr(ClientQueryCompiler, method_name, forwarding_method)
 
+def _set_forwarding_method_for_single_id(method_name: str):
+    def forwarding_method(
+        self: ClientQueryCompiler,
+        *args,
+        **kwargs,
+    ):
+        return self.__constructor__(
+            getattr(self._service, method_name)(self._id, *args, **kwargs)
+        )
+
+    setattr(ClientQueryCompiler, method_name, forwarding_method)
+
 
 _BINARY_FORWARDING_METHODS = frozenset(
     {
@@ -752,3 +437,106 @@ def forwarding_method(
 
 for method in _BINARY_FORWARDING_METHODS:
     _set_forwarding_method_for_binary_function(method)
+
+_SINGLE_ID_FORWARDING_METHODS = frozenset(
+    {
+        "columnarize",
+        "transpose",
+        "take_2d",
+        "getitem_column_array",
+        "getitem_row_array",
+        "pivot",
+        "get_dummies",
+        "drop",
+        "isna",
+        "notna",
+        "add_prefix",
+        "add_suffix",
+        "astype",
+        "dropna",
+        "sum",
+        "prod",
+        "count",
+        "mean",
+        "median",
+        "std",
+        "min",
+        "max",
+        "any",
+        "all",
+        "quantile_for_single_value",
+        "quantile_for_list_of_values",
+        "describe",
+        "set_index_from_columns",
+        "reset_index",
+        "sort_rows_by_column_values",
+        "sort_index",
+        "dt_nanosecond",
+        "dt_microsecond",
+        "dt_second",
+        "dt_minute",
+        "dt_hour",
+        "dt_day",
+        "dt_dayofweek",
+        "dt_weekday",
+        "dt_day_name",
+        "dt_dayofyear",
+        "dt_week",
+        "dt_weekofyear",
+        "dt_month",
+        "dt_month_name",
+        "dt_quarter",
+        "dt_year",
+        "str_capitalize",
+        "str_isalnum",
+        "str_isalpha",
+        "str_isdecimal",
+        "str_isdigit",
+        "str_islower",
+        "str_isnumeric",
+        "str_isspace",
+        "str_istitle",
+        "str_isupper",
+        "str_len",
+        "str_lower",
+        "str_title",
+        "str_upper",
+        "str_center",
+        "str_contains",
+        "str_count",
+        "str_endswith",
+        "str_find",
+        "str_index",
+        "str_rfind",
+        "str_findall",
+        "str_get",
+        "str_join",
+        "str_lstrip",
+        "str_ljust",
+        "str_rjust",
+        "str_match",
+        "str_pad",
+        "str_repeat",
+        "str_split",
+        "str_rsplit",
+        "str_rstrip",
+        "str_slice",
+        "str_slice_replace",
+        "str_startswith",
+        "str_strip",
+        "str_zfill",
+        "cummax",
+        "cummin",
+        "cumsum",
+        "cumprod",
+        "is_monotonic_increasing",
+        "is_monotonic_decreasing",
+        "idxmax",
+        "idxmin",
+        "query",
+    }
+)
+
+
+for method in _SINGLE_ID_FORWARDING_METHODS:
+    _set_forwarding_method_for_single_id(method)
diff --git a/modin/core/execution/client/service.py b/modin/core/execution/client/service.py
index 94f64a4a848..610b4e43acf 100644
--- a/modin/core/execution/client/service.py
+++ b/modin/core/execution/client/service.py
@@ -74,7 +74,7 @@ def setitem(self, id, value_is_qc: bool, axis, key, value):
         self._qc[new_id] = self._qc[id].setitem(axis, key, value)
         return new_id
 
-    def getitem_array(self, key_is_qc: bool, id, key):
+    def getitem_array(self, id, key_is_qc: bool, key):
         if key_is_qc:
             key = self._qc[key]
         new_id = self._generate_id()

From 0a3240f982ed951361583a9f14917dc1a7ccf538 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 16:26:09 -0500
Subject: [PATCH 46/77] Add a newline for black

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/query_compiler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 267a17d250d..4d371177ef5 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -397,6 +397,7 @@ def forwarding_method(
 
     setattr(ClientQueryCompiler, method_name, forwarding_method)
 
+
 def _set_forwarding_method_for_single_id(method_name: str):
     def forwarding_method(
         self: ClientQueryCompiler,

From df5b2a5d2345b64bdc2259f0b6e1227b9800e099 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 20:32:41 -0500
Subject: [PATCH 47/77] Make doc_checker work for all new files except
 container groupby.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml                      |   5 +-
 modin/conftest.py                             |   6 +-
 modin/core/execution/client/container.py      | 826 ++++++++++++++++++
 modin/core/execution/client/query_compiler.py |  20 +-
 modin/core/execution/client/service.py        | 524 -----------
 5 files changed, 851 insertions(+), 530 deletions(-)
 create mode 100644 modin/core/execution/client/container.py
 delete mode 100644 modin/core/execution/client/service.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 250e9fe1155..92c0768f0f6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -129,7 +129,10 @@ jobs:
       - run: python scripts/doc_checker.py modin/core/storage_formats/base
       - run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
       - run: python scripts/doc_checker.py modin/core/storage_formats/pandas
-      - run: python scripts/doc_checker.py modin/core/execution/client
+      - run: |
+          python scripts/doc_checker.py modin/core/execution/client/container.py
+          python scripts/doc_checker.py modin/core/execution/client/io.py
+          python scripts/doc_checker.py modin/core/execution/client/query_compiler.py
       - run: |
           python scripts/doc_checker.py \
             modin/experimental/core/execution/native/implementations/hdk_on_native/dataframe \
diff --git a/modin/conftest.py b/modin/conftest.py
index f0cf0a49b26..53c1eac3022 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -54,8 +54,8 @@ def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
 )
 from modin.core.execution.client.io import ClientIO  # noqa: E402
 from modin.core.execution.client.query_compiler import ClientQueryCompiler  # noqa: E402
-from modin.core.execution.client.service import (  # noqa: E402
-    ForwardingQueryCompilerService,
+from modin.core.execution.client.container import (  # noqa: E402
+    ForwardingQueryCompilerContainer,
 )
 from modin.core.execution.python.implementations.pandas_on_python.dataframe.dataframe import (  # noqa: E402
     PandasOnPythonDataframe,
@@ -299,7 +299,7 @@ def prepare(cls):
 
 
 def set_client_execution():
-    service = ForwardingQueryCompilerService(BaseQueryCompiler, PandasOnPythonIO)
+    service = ForwardingQueryCompilerContainer(BaseQueryCompiler, PandasOnPythonIO)
     ClientQueryCompiler.set_server_connection(service)
     ClientIO.query_compiler_cls = TestClientQueryCompiler
     ClientIO.set_server_connection(service)
diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
new file mode 100644
index 00000000000..52879e520d0
--- /dev/null
+++ b/modin/core/execution/client/container.py
@@ -0,0 +1,826 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership.  The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module contains ``ForwardingQueryCompilerContainer`` class."""
+
+import numpy as np
+import pandas
+from typing import Any, NamedTuple, Optional, Union
+from uuid import UUID, uuid4
+
+from modin.core.io.io import BaseIO
+from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
+
+
+class ForwardingQueryCompilerContainer:
+    """
+    Container that forwards queries to query compilers within.
+
+    Parameters
+    ----------
+    query_compiler_class : BaseQueryCompiler
+        Query compiler class to contain.
+    io_class : BaseIO
+        The IO class to use for reading and writing data.
+    """
+
+    def __init__(self, query_compiler_class: BaseQueryCompiler, io_class: BaseIO):
+        self._qc = {}
+        self._query_compiler_class = query_compiler_class
+        self._io_class = io_class
+
+    def _generate_id(self) -> UUID:
+        """
+        Generate an ID for a new query compiler.
+
+        Returns
+        -------
+        UUID
+            The generated ID.
+        """
+        id = uuid4()
+        while id in self._qc:
+            id = uuid4()
+        return id
+
+    def add_query_compiler(self, qc: BaseQueryCompiler) -> UUID:
+        """
+        Add a query compiler to the container.
+
+        Parameters
+        ----------
+        qc : BaseQueryCompiler
+
+        Returns
+        -------
+        UUID
+            The ID of the query compiler.
+        """
+        id = self._generate_id()
+        self._qc[id] = qc
+        return id
+
+    def to_pandas(self, id: UUID) -> pandas.DataFrame:
+        """
+        Convert the query compiler to a pandas DataFrame.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler to convert.
+
+        Returns
+        -------
+        pandas.DataFrame
+            The converted DataFrame.
+        """
+        return self._qc[id].to_pandas()
+
+    class DefaultToPandasResult(NamedTuple):
+        """
+        The result of ``default_to_pandas``.
+
+        Parameters
+        ----------
+        result : Any
+            The result of the operation.
+        result_is_qc_id : bool
+            Whether the result is a query compiler ID.
+        """
+
+        result: Any
+        result_is_qc_id: bool
+
+    def default_to_pandas(
+        self, id: UUID, pandas_op: Union[str, callable], *args: Any, **kwargs: dict
+    ) -> DefaultToPandasResult:  # noqa: D401
+        """
+        Default to pandas for an operation on a query compiler.
+
+        Use the inner query compiler's default_to_pandas to execute the
+        operation on a pandas dataframe.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        pandas_op : Union[str, callable]
+            The operation to perform.
+        *args : iterable
+            The arguments to pass to the operation.
+        **kwargs : dict
+            The keyword arguments to pass to the operation.
+
+        Returns
+        -------
+        DefaultToPandasResult
+            The result of the operation. The result is a query compiler ID if
+            and only if the result of the pandas operation is a new
+            query compiler.
+        """
+        result = self._qc[id].default_to_pandas(pandas_op, *args, **kwargs)
+        result_is_qc_id = isinstance(result, self._query_compiler_class)
+        if result_is_qc_id:
+            new_id = self._generate_id()
+            self._qc[new_id] = result
+            result = new_id
+        return self.DefaultToPandasResult(
+            result=result, result_is_qc_id=result_is_qc_id
+        )
+
+    def rename(
+        self,
+        id: UUID,
+        new_col_labels: Optional[pandas.Index] = None,
+        new_row_labels: Optional[pandas.Index] = None,
+    ) -> UUID:
+        """
+        Rename the columns and/or rows of a query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        new_col_labels : pandas.Index, default: None
+            The new column labels.
+        new_row_labels : pandas.Index, default: None
+            The new row labels.
+
+        Returns
+        -------
+        UUID
+            The ID of the renamed query compiler.
+        """
+        new_id = self._generate_id()
+        new_qc = self._qc[new_id] = self._qc[id].copy()
+        if new_col_labels is not None:
+            new_qc.columns = new_col_labels
+        if new_row_labels is not None:
+            new_qc.index = new_row_labels
+        return new_id
+
+    def columns(self, id) -> pandas.Index:
+        """
+        Get the columns of the query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of a query compiler.
+
+        Returns
+        -------
+        pandas.Index
+            The columns.
+        """
+        return self._qc[id].columns
+
+    def index(self, id: UUID) -> pandas.Index:
+        """
+        Get the index of a query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+
+        Returns
+        -------
+        pandas.Index
+            The index.
+        """
+        return self._qc[id].index
+
+    def dtypes(self, id: UUID) -> pandas.Series:
+        """
+        Get the dtypes of a query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+
+        Returns
+        -------
+        pandas.Series
+            The dtypes.
+        """
+        return self._qc[id].dtypes
+
+    def insert(self, id: UUID, value_is_qc: bool, loc, column, value) -> UUID:
+        """
+        Insert a value into a query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        value_is_qc : bool
+            Whether ``value`` is the ID of a query compiler.
+        loc : int
+            The location to insert the value.
+        column : str
+            The column to insert the value.
+        value : Any
+            The value to insert.
+
+        Returns
+        -------
+        UUID
+            The ID of the query compiler with the inserted value.
+        """
+        if value_is_qc:
+            value = self._qc[value]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].insert(loc, column, value)
+        return new_id
+
+    def setitem(self, id, value_is_qc: bool, axis, key, value) -> UUID:
+        """
+        Set a value in a query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        value_is_qc : bool
+            Whether ``value`` is the ID of a query compiler.
+        axis : int
+            The axis to set the value.
+        key : Any
+            The key to set the value.
+        value : Any
+            The value to set.
+
+        Returns
+        -------
+        UUID
+            The ID of the query compiler with the value set.
+        """
+        if value_is_qc:
+            value = self._qc[value]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].setitem(axis, key, value)
+        return new_id
+
+    def getitem_array(
+        self, id, key_is_qc: bool, key: Union[UUID, np.ndarray, list]
+    ) -> UUID:
+        """
+        Get the values at ``key`` from a query compiler.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        key_is_qc : bool
+            Whether ``key`` is the ID of a query compiler.
+        key : UUID, np.ndarray or list of column labels
+            Boolean mask represented by QueryCompiler UUID or ``np.ndarray`` of the same
+            shape as query compiler with ID ``id``, or enumerable of columns to pick.
+
+        Returns
+        -------
+        UUID
+            The ID of the new query compiler.
+        """
+        if key_is_qc:
+            key = self._qc[key]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].getitem_array(key)
+        return new_id
+
+    def replace(
+        self,
+        id,
+        to_replace_is_qc: bool,
+        regex_is_qc: bool,
+        to_replace,
+        value,
+        inplace,
+        limit,
+        regex,
+        method,
+    ):
+        """
+        Replace values given in `to_replace` by `value`.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        to_replace_is_qc : bool
+            Whether ``to_replace`` is the ID of a query compiler.
+        regex_is_qc : bool
+            Whether ``regex`` is the ID of a query compiler.
+        to_replace : scalar, list-like, regex, modin.pandas.Series, or None
+            Value to replace.
+        value : scalar, list-like, regex or dict
+            Value to replace matching values with.
+        inplace : bool
+            This parameter is for compatibility. Always has to be False.
+        limit : Optional[int]
+            Maximum size gap to forward or backward fill.
+        regex : bool or same types as ``to_replace``
+            Whether to interpret ``to_replace`` and/or ``value`` as regular
+            expressions.
+        method : {"pad", "ffill", "bfill", None}
+            The method to use when for replacement, when to_replace is a
+            scalar, list or tuple and value is None.
+
+        Returns
+        -------
+        UUID
+            UUID of query compiler with all `to_replace` values replaced by `value`.
+        """
+        if to_replace_is_qc:
+            to_replace = self._qc[to_replace]
+        if regex_is_qc:
+            regex = self._qc[regex]
+        new_id = self._generate_id()
+        # TODO(GH#3108): Use positional arguments instead of keyword arguments
+        # in the query compilers so we don't have to name all the arguments
+        # here.
+        self._qc[new_id] = self._qc[id].replace(
+            to_replace=to_replace,
+            value=value,
+            inplace=inplace,
+            limit=limit,
+            regex=regex,
+            method=method,
+        )
+        return new_id
+
+    def fillna(
+        self,
+        id,
+        value_is_qc: bool,
+        squeeze_self: bool,
+        squeeze_value: bool,
+        value,
+        method,
+        axis,
+        inplace,
+        limit,
+        downcast,
+    ):
+        """
+        Replace NaN values using provided method.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the query compiler.
+        value_is_qc : bool
+            Whether ``value`` is the ID of a query compiler.
+        squeeze_self : bool
+            Whether to squeeze ``self``.
+        squeeze_value : bool
+            Whether to squeeze ``value``.
+        value : scalar or dict
+        method : {"backfill", "bfill", "pad", "ffill", None}
+        axis : {0, 1}
+        inplace : {False}
+            This parameter is for compatibility. Always has to be False.
+        limit : int, optional
+        downcast : dict, optional
+
+        Returns
+        -------
+        BaseQueryCompiler
+            New QueryCompiler with all null values filled.
+        """
+        if value_is_qc:
+            value = self._qc[value]
+        new_id = self._generate_id()
+        # TODO(GH#3108): Use positional arguments instead of keyword arguments
+        # in the query compilers so we don't have to name all the
+        # arguments here.
+        self._qc[new_id] = self._qc[id].fillna(
+            squeeze_self=squeeze_self,
+            squeeze_value=squeeze_value,
+            value=value,
+            method=method,
+            axis=axis,
+            inplace=inplace,
+            limit=limit,
+            downcast=downcast,
+        )
+        return new_id
+
+    def concat(self, id, axis, other, **kwargs):
+        """
+        Concatenate query compilers along the specified axis.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the main query compiler to concatenate.
+        axis : {0, 1}
+            The axis to concatenate along.
+        other : list of UUIDs
+            The IDs of the query compilers to concatenate to the one
+            represented by ``id``.
+        **kwargs : dict
+            Additional parameters to pass to the concatenation function.
+
+        Returns
+        -------
+        UUID
+            The ID of the query compiler containing the concatenation result.
+        """
+        other = [self._qc[o] for o in other]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].concat(axis, other, **kwargs)
+        return new_id
+
+    def merge(self, id, right, **kwargs):
+        """
+        Merge two query compilers using a database-style join.
+
+        Parameters
+        ----------
+        id : UUID
+            The ID of the left query compiler.
+        right : UUID
+            The ID of the right query compiler.
+        **kwargs : dict
+            Additional parameters to pass to the merge function.
+
+        Returns
+        -------
+        UUID
+            The ID of the query compiler containing the merge result.
+        """
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].merge(self._qc[right], **kwargs)
+        return new_id
+
+    def groupby_mean(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_mean(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_count(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_count(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_max(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_max(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_min(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_min(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_sum(
+        self,
+        id,
+        by,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_sum(
+            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
+        )
+        return new_id
+
+    def groupby_agg(
+        self,
+        id,
+        by,
+        agg_func,
+        axis,
+        groupby_kwargs,
+        agg_args,
+        agg_kwargs,
+        how="axis_wise",
+        drop=False,
+        is_qc: bool = False,
+    ):
+        if is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = self._qc[id].groupby_agg(
+            by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop
+        )
+        return new_id
+
+    def read_csv(self, connection, filepath, **kwargs) -> UUID:
+        """
+        Read a CSV file from the specified filepath.
+
+        Parameters
+        ----------
+        connection : object
+            The data connection, e.g. a connnection to the database where the
+            service will store the result.
+        filepath : str
+            The filepath to read the CSV file from.
+        **kwargs : dict
+            Additional parameters to pass to the pandas read_csv function.
+
+        Returns
+        -------
+        UUID
+            The ID of the query compiler containing the read result.
+        """
+        io_result = self._io_class._read_csv(filepath, **kwargs)
+        if isinstance(io_result, self._query_compiler_class):
+            new_id = self._generate_id()
+            self._qc[new_id] = io_result
+            return new_id
+        return io_result
+
+    def read_sql(self, sql, connection, **kwargs) -> UUID:
+        """
+        Read data from a SQL connection.
+
+        Parameters
+        ----------
+        sql : str
+            SQL query to be executed or a table name.
+        connection : SQLAlchemy connectable, str, or sqlite3 connection
+            Using SQLAlchemy makes it possible to use any DB supported by that
+            library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible
+            for engine disposal and connection closure for the SQLAlchemy
+            connectable; str connections are closed automatically. See
+            `here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.
+        **kwargs : dict
+            Parameters of ``read_sql`` function.
+
+        Returns
+        -------
+        UUID
+            ID of query compiler with data read in from SQL connection.
+        """
+        new_id = self._generate_id()
+        self._qc[new_id] = self._io_class._read_sql(sql, connection, **kwargs)
+        return new_id
+
+    def to_sql(self, id, **kwargs) -> None:
+        """
+        Write records stored in a DataFrame to a SQL database.
+
+        Databases supported by SQLAlchemy [1]_ are supported. Tables can be
+        newly created, appended to, or overwritten.
+
+        Parameters
+        ----------
+        id : UUID
+            ID of query compiler to write to database.
+        **kwargs : dict
+            Parameters of ``read_sql`` function.
+        """
+        self._io_class.to_sql(self._qc[id], **kwargs)
+
+
+def _set_forwarding_method_for_single_id(method_name: str):
+    """
+    Define a method that forwards arguments to the inner query compiler.
+
+    Parameters
+    ----------
+    method_name : str
+    """
+
+    def forwarding_method(
+        self: ForwardingQueryCompilerContainer, id: UUID, *args, **kwargs
+    ):
+        new_id = self._generate_id()
+        self._qc[new_id] = getattr(self._qc[id], method_name)(*args, **kwargs)
+        return new_id
+
+    setattr(ForwardingQueryCompilerContainer, method_name, forwarding_method)
+
+
+def _set_forwarding_method_for_binary_function(method_name: str):
+    """
+    Define a binary method that forwards arguments to the inner query compiler.
+
+    Parameters
+    ----------
+    method_name : str
+    """
+
+    def forwarding_method(
+        self: ForwardingQueryCompilerContainer,
+        id: UUID,
+        other_is_qc: bool,
+        other: Union[UUID, Any],
+        **kwargs,
+    ):
+        if other_is_qc:
+            other = self._qc[other]
+        new_id = self._generate_id()
+        self._qc[new_id] = getattr(self._qc[id], method_name)(other, **kwargs)
+        return new_id
+
+    setattr(ForwardingQueryCompilerContainer, method_name, forwarding_method)
+
+
+_BINARY_FORWARDING_METHODS = frozenset(
+    {
+        "eq",
+        "lt",
+        "le",
+        "gt",
+        "ge",
+        "ne",
+        "__and__",
+        "__or__",
+        "add",
+        "radd",
+        "truediv",
+        "rtruediv",
+        "mod",
+        "rmod",
+        "sub",
+        "rsub",
+        "mul",
+        "rmul",
+        "floordiv",
+        "rfloordiv",
+    }
+)
+
+_SINGLE_ID_FORWARDING_METHODS = frozenset(
+    {
+        "columnarize",
+        "transpose",
+        "take_2d",
+        "getitem_column_array",
+        "getitem_row_array",
+        "pivot",
+        "get_dummies",
+        "drop",
+        "isna",
+        "notna",
+        "add_prefix",
+        "add_suffix",
+        "astype",
+        "dropna",
+        "sum",
+        "prod",
+        "count",
+        "mean",
+        "median",
+        "std",
+        "min",
+        "max",
+        "any",
+        "all",
+        "quantile_for_single_value",
+        "quantile_for_list_of_values",
+        "describe",
+        "set_index_from_columns",
+        "reset_index",
+        "sort_rows_by_column_values",
+        "sort_index",
+        "dt_nanosecond",
+        "dt_microsecond",
+        "dt_second",
+        "dt_minute",
+        "dt_hour",
+        "dt_day",
+        "dt_dayofweek",
+        "dt_weekday",
+        "dt_day_name",
+        "dt_dayofyear",
+        "dt_week",
+        "dt_weekofyear",
+        "dt_month",
+        "dt_month_name",
+        "dt_quarter",
+        "dt_year",
+        "str_capitalize",
+        "str_isalnum",
+        "str_isalpha",
+        "str_isdecimal",
+        "str_isdigit",
+        "str_islower",
+        "str_isnumeric",
+        "str_isspace",
+        "str_istitle",
+        "str_isupper",
+        "str_len",
+        "str_lower",
+        "str_title",
+        "str_upper",
+        "str_center",
+        "str_contains",
+        "str_count",
+        "str_endswith",
+        "str_find",
+        "str_index",
+        "str_rfind",
+        "str_findall",
+        "str_get",
+        "str_join",
+        "str_lstrip",
+        "str_ljust",
+        "str_rjust",
+        "str_match",
+        "str_pad",
+        "str_repeat",
+        "str_split",
+        "str_rsplit",
+        "str_rstrip",
+        "str_slice",
+        "str_slice_replace",
+        "str_startswith",
+        "str_strip",
+        "str_zfill",
+        "cummax",
+        "cummin",
+        "cumsum",
+        "cumprod",
+        "is_monotonic_increasing",
+        "is_monotonic_decreasing",
+        "idxmax",
+        "idxmin",
+        "query",
+    }
+)
+
+for method in _SINGLE_ID_FORWARDING_METHODS:
+    _set_forwarding_method_for_single_id(method)
+
+for method in _BINARY_FORWARDING_METHODS:
+    _set_forwarding_method_for_binary_function(method)
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 4d371177ef5..4d0bdb13f09 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -382,7 +382,15 @@ def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
         raise NotImplementedError
 
 
-def _set_forwarding_method_for_binary_function(method_name: str):
+def _set_forwarding_method_for_binary_function(method_name: str) -> None:
+    """
+    Define a binary method that forwards arguments to the service.
+
+    Parameters
+    ----------
+    method_name : str
+    """
+
     def forwarding_method(
         self: ClientQueryCompiler,
         other: Any,
@@ -398,7 +406,15 @@ def forwarding_method(
     setattr(ClientQueryCompiler, method_name, forwarding_method)
 
 
-def _set_forwarding_method_for_single_id(method_name: str):
+def _set_forwarding_method_for_single_id(method_name: str) -> None:
+    """
+    Define a method that forwards arguments to the service.
+
+    Parameters
+    ----------
+    method_name : str
+    """
+
     def forwarding_method(
         self: ClientQueryCompiler,
         *args,
diff --git a/modin/core/execution/client/service.py b/modin/core/execution/client/service.py
deleted file mode 100644
index 610b4e43acf..00000000000
--- a/modin/core/execution/client/service.py
+++ /dev/null
@@ -1,524 +0,0 @@
-from typing import Any, NamedTuple, Optional, Union
-from uuid import UUID, uuid4
-from modin.core.io.io import BaseIO
-
-from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
-
-
-class ForwardingQueryCompilerService:
-    def __init__(self, query_compiler_type: BaseQueryCompiler, io_type: BaseIO):
-        self._qc = {}
-        self._qc_type = query_compiler_type
-        self._io_type = io_type
-
-    def _generate_id(self) -> UUID:
-        id = uuid4()
-        while id in self._qc:
-            id = uuid4()
-        return id
-
-    def add_query_compiler(self, qc) -> UUID:
-        id = self._generate_id()
-        self._qc[id] = qc
-        return id
-
-    def to_pandas(self, id):
-        return self._qc[id].to_pandas()
-
-    class DefaultToPandasResult(NamedTuple):
-        result: Optional[Any]
-        result_is_qc_id: bool
-
-    def default_to_pandas(
-        self, id: UUID, pandas_op, *args, **kwargs
-    ) -> DefaultToPandasResult:
-        result = self._qc[id].default_to_pandas(pandas_op, *args, **kwargs)
-        result_is_qc_id = isinstance(result, self._qc_type)
-        if result_is_qc_id:
-            new_id = self._generate_id()
-            self._qc[new_id] = result
-            result = new_id
-        return self.DefaultToPandasResult(
-            result=result, result_is_qc_id=result_is_qc_id
-        )
-
-    def rename(self, id, new_col_labels=None, new_row_labels=None):
-        new_id = self._generate_id()
-        new_qc = self._qc[new_id] = self._qc[id].copy()
-        if new_col_labels is not None:
-            new_qc.columns = new_col_labels
-        if new_row_labels is not None:
-            new_qc.index = new_row_labels
-        return new_id
-
-    def columns(self, id):
-        return self._qc[id].columns
-
-    def index(self, id):
-        return self._qc[id].index
-
-    def dtypes(self, id):
-        return self._qc[id].dtypes
-
-    def insert(self, id, value_is_qc: bool, loc, column, value):
-        if value_is_qc:
-            value = self._qc[value]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].insert(loc, column, value)
-        return new_id
-
-    def setitem(self, id, value_is_qc: bool, axis, key, value):
-        if value_is_qc:
-            value = self._qc[value]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].setitem(axis, key, value)
-        return new_id
-
-    def getitem_array(self, id, key_is_qc: bool, key):
-        if key_is_qc:
-            key = self._qc[key]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].getitem_array(key)
-        return new_id
-
-    def replace(
-        self,
-        id,
-        to_replace_is_qc: bool,
-        regex_is_qc: bool,
-        to_replace,
-        value,
-        inplace,
-        limit,
-        regex,
-        method,
-    ):
-        if to_replace_is_qc:
-            to_replace = self._qc[to_replace]
-        if regex_is_qc:
-            regex = self._qc[regex]
-        new_id = self._generate_id()
-        # TODO(GH#3108): Use positional arguments instead of keyword arguments
-        # in the query compilers so we don't have to name all the arguments
-        # here.
-        self._qc[new_id] = self._qc[id].replace(
-            to_replace=to_replace,
-            value=value,
-            inplace=inplace,
-            limit=limit,
-            regex=regex,
-            method=method,
-        )
-        return new_id
-
-    def fillna(
-        self,
-        id,
-        value_is_qc: bool,
-        squeeze_self,
-        squeeze_value,
-        value,
-        method,
-        axis,
-        inplace,
-        limit,
-        downcast,
-    ):
-        if value_is_qc:
-            value = self._qc[value]
-        new_id = self._generate_id()
-        # TODO(GH#3108): Use positional arguments instead of keyword arguments
-        # in the query compilers so we don't have to name all the
-        # arguments here.
-        self._qc[new_id] = self._qc[id].fillna(
-            squeeze_self=squeeze_self,
-            squeeze_value=squeeze_value,
-            value=value,
-            method=method,
-            axis=axis,
-            inplace=inplace,
-            limit=limit,
-            downcast=downcast,
-        )
-        return new_id
-
-    def concat(self, id, axis, other, **kwargs):
-        other = [self._qc[o] for o in other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].concat(axis, other, **kwargs)
-        return new_id
-
-    def mod(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].mod(other, **kwargs)
-        return new_id
-
-    def rmod(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].rmod(other, **kwargs)
-        return new_id
-
-    def sub(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].sub(other, **kwargs)
-        return new_id
-
-    def rsub(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].rsub(other, **kwargs)
-        return new_id
-
-    def mul(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].mul(other, **kwargs)
-        return new_id
-
-    def rmul(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].rmul(other, **kwargs)
-        return new_id
-
-    def floordiv(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].floordiv(other, **kwargs)
-        return new_id
-
-    def rfloordiv(self, id, other, is_qc, **kwargs):
-        if is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].rfloordiv(other, **kwargs)
-        return new_id
-
-    def merge(self, id, right, **kwargs):
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].merge(self._qc[right], **kwargs)
-        return new_id
-
-    def groupby_mean(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_mean(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_count(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_count(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_max(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_max(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_min(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_min(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_sum(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_sum(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_agg(
-        self,
-        id,
-        by,
-        agg_func,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        how="axis_wise",
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_agg(
-            by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop
-        )
-        return new_id
-
-    def read_csv(self, connection, filepath, **kwargs) -> UUID:
-        io_result = self._io_type._read_csv(filepath, **kwargs)
-        if isinstance(io_result, self._qc_type):
-            new_id = self._generate_id()
-            self._qc[new_id] = io_result
-            return new_id
-        return io_result
-
-    def read_sql(self, sql, connection, **kwargs) -> UUID:
-        new_id = self._generate_id()
-        self._qc[new_id] = self._io_type._read_sql(sql, connection, **kwargs)
-        return new_id
-
-    def to_sql(
-        self,
-        id,
-        name,
-        con,
-        schema=None,
-        if_exists="fail",
-        index=True,
-        index_label=None,
-        chunksize=None,
-        dtype=None,
-        method=None,
-    ):
-        self._io_type.to_sql(
-            self._qc[id],
-            name,
-            con,
-            schema,
-            if_exists,
-            index,
-            index_label,
-            chunksize,
-            dtype,
-            method,
-        )
-
-
-def _set_forwarding_method_for_single_id(method_name: str):
-    def forwarding_method(
-        self: "ForwardingQueryCompilerService", id: UUID, *args, **kwargs
-    ):
-        new_id = self._generate_id()
-        self._qc[new_id] = getattr(self._qc[id], method_name)(*args, **kwargs)
-        return new_id
-
-    setattr(ForwardingQueryCompilerService, method_name, forwarding_method)
-
-
-def _set_forwarding_method_for_binary_function(method_name: str):
-    def forwarding_method(
-        self: ForwardingQueryCompilerService,
-        id: UUID,
-        other_is_qc: bool,
-        other: Union[UUID, Any],
-        **kwargs,
-    ):
-        if other_is_qc:
-            other = self._qc[other]
-        new_id = self._generate_id()
-        self._qc[new_id] = getattr(self._qc[id], method_name)(other, **kwargs)
-        return new_id
-
-    setattr(ForwardingQueryCompilerService, method_name, forwarding_method)
-
-
-_BINARY_FORWARDING_METHODS = frozenset(
-    {
-        "eq",
-        "lt",
-        "le",
-        "gt",
-        "ge",
-        "ne",
-        "__and__",
-        "__or__",
-        "add",
-        "radd",
-        "truediv",
-        "rtruediv",
-        "mod",
-        "rmod",
-        "sub",
-        "rsub",
-        "mul",
-        "rmul",
-        "floordiv",
-        "rfloordiv",
-    }
-)
-
-_SINGLE_ID_FORWARDING_METHODS = frozenset(
-    {
-        "columnarize",
-        "transpose",
-        "take_2d",
-        "getitem_column_array",
-        "getitem_row_array",
-        "pivot",
-        "get_dummies",
-        "drop",
-        "isna",
-        "notna",
-        "add_prefix",
-        "add_suffix",
-        "astype",
-        "dropna",
-        "sum",
-        "prod",
-        "count",
-        "mean",
-        "median",
-        "std",
-        "min",
-        "max",
-        "any",
-        "all",
-        "quantile_for_single_value",
-        "quantile_for_list_of_values",
-        "describe",
-        "set_index_from_columns",
-        "reset_index",
-        "sort_rows_by_column_values",
-        "sort_index",
-        "dt_nanosecond",
-        "dt_microsecond",
-        "dt_second",
-        "dt_minute",
-        "dt_hour",
-        "dt_day",
-        "dt_dayofweek",
-        "dt_weekday",
-        "dt_day_name",
-        "dt_dayofyear",
-        "dt_week",
-        "dt_weekofyear",
-        "dt_month",
-        "dt_month_name",
-        "dt_quarter",
-        "dt_year",
-        "str_capitalize",
-        "str_isalnum",
-        "str_isalpha",
-        "str_isdecimal",
-        "str_isdigit",
-        "str_islower",
-        "str_isnumeric",
-        "str_isspace",
-        "str_istitle",
-        "str_isupper",
-        "str_len",
-        "str_lower",
-        "str_title",
-        "str_upper",
-        "str_center",
-        "str_contains",
-        "str_count",
-        "str_endswith",
-        "str_find",
-        "str_index",
-        "str_rfind",
-        "str_findall",
-        "str_get",
-        "str_join",
-        "str_lstrip",
-        "str_ljust",
-        "str_rjust",
-        "str_match",
-        "str_pad",
-        "str_repeat",
-        "str_split",
-        "str_rsplit",
-        "str_rstrip",
-        "str_slice",
-        "str_slice_replace",
-        "str_startswith",
-        "str_strip",
-        "str_zfill",
-        "cummax",
-        "cummin",
-        "cumsum",
-        "cumprod",
-        "is_monotonic_increasing",
-        "is_monotonic_decreasing",
-        "idxmax",
-        "idxmin",
-        "query",
-    }
-)
-
-for method in _SINGLE_ID_FORWARDING_METHODS:
-    _set_forwarding_method_for_single_id(method)
-
-for method in _BINARY_FORWARDING_METHODS:
-    _set_forwarding_method_for_binary_function(method)

From 7d2751ae300edd9f7a018b9698d16d66033131e3 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 21:23:15 -0500
Subject: [PATCH 48/77] Fix all docstrings and add ci.yml and push.yml.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml                      |   8 +-
 .github/workflows/push.yml                    |   4 +
 modin/core/execution/client/container.py      | 144 +++------------
 modin/core/execution/client/query_compiler.py | 168 +++---------------
 4 files changed, 65 insertions(+), 259 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 92c0768f0f6..ec006744465 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -339,7 +339,7 @@ jobs:
         shell: bash -l {0}
     strategy:
       matrix:
-        execution: [BaseOnPython]
+        execution: [BaseOnPython, Client]
     env:
       MODIN_TEST_DATASET_SIZE: "small"
     name: Test ${{ matrix.execution }} execution, Python 3.8
@@ -372,7 +372,11 @@ jobs:
       - name: Install HDF5
         run: sudo apt update && sudo apt install -y libhdf5-dev
       - run: pytest modin/experimental/xgboost/test/test_default.py --execution=${{ matrix.execution }}
+        # Client execution doesn't need to work with xgboost
+        if: matrix.execution != 'Client'      
       - run: python -m pytest -n 2 modin/test/storage_formats/base/test_internals.py --execution=${{ matrix.execution }}
+        # Client execution has different internals that we dont' test yet
+        if: matrix.execution != 'Client'
       - run: pytest -n 2 modin/pandas/test/dataframe/test_binary.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_default.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_indexing.py --execution=${{ matrix.execution }}
@@ -383,6 +387,8 @@ jobs:
       - run: pytest -n 2 modin/pandas/test/dataframe/test_udf.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_window.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_pickle.py --execution=${{ matrix.execution }}
+        # Client execution dosen't need to pickle modin.pandas objects.
+        if: matrix.execution != 'Client'      
       - run: python -m pytest -n 2 modin/pandas/test/test_series.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_rolling.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_concat.py --execution=${{ matrix.execution }}
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index eb3ea952f2e..04fc359419b 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -84,6 +84,8 @@ jobs:
       - name: Install HDF5
         run: sudo apt update && sudo apt install -y libhdf5-dev
       - run: pytest -n 2 modin/experimental/xgboost/test/test_default.py --execution=${{ matrix.execution }}
+        # Client execution doesn't need to work with xgboost
+        if: matrix.execution != 'Client'      
       - run: pytest -n 2 modin/pandas/test/dataframe/test_binary.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_default.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_indexing.py --execution=${{ matrix.execution }}
@@ -94,6 +96,8 @@ jobs:
       - run: pytest -n 2 modin/pandas/test/dataframe/test_udf.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_window.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_pickle.py --execution=${{ matrix.execution }}
+        # Client execution dosen't need to pickle modin.pandas objects.
+        if: matrix.execution != 'Client'      
       - run: python -m pytest -n 2 modin/pandas/test/test_series.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_rolling.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_concat.py --execution=${{ matrix.execution }}
diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
index 52879e520d0..65e86837172 100644
--- a/modin/core/execution/client/container.py
+++ b/modin/core/execution/client/container.py
@@ -466,121 +466,7 @@ def merge(self, id, right, **kwargs):
         self._qc[new_id] = self._qc[id].merge(self._qc[right], **kwargs)
         return new_id
 
-    def groupby_mean(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_mean(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_count(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_count(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_max(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_max(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_min(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_min(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_sum(
-        self,
-        id,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_sum(
-            by, axis, groupby_kwargs, agg_args, agg_kwargs, drop
-        )
-        return new_id
-
-    def groupby_agg(
-        self,
-        id,
-        by,
-        agg_func,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        how="axis_wise",
-        drop=False,
-        is_qc: bool = False,
-    ):
-        if is_qc:
-            by = self._qc[by]
-        new_id = self._generate_id()
-        self._qc[new_id] = self._qc[id].groupby_agg(
-            by, agg_func, axis, groupby_kwargs, agg_args, agg_kwargs, how, drop
-        )
-        return new_id
+    ### I/O methods go below. ###
 
     def read_csv(self, connection, filepath, **kwargs) -> UUID:
         """
@@ -651,9 +537,28 @@ def to_sql(self, id, **kwargs) -> None:
         self._io_class.to_sql(self._qc[id], **kwargs)
 
 
+def _set_forwarding_groupby_method(method_name: str):
+    """
+    Define a groupby method that forwards arguments to an inner query compiler.
+
+    Parameters
+    ----------
+    method_name : str
+    """
+
+    def forwarding_method(self, id, by_is_qc, by, *args, **kwargs):
+        if by_is_qc:
+            by = self._qc[by]
+        new_id = self._generate_id()
+        self._qc[new_id] = getattr(self._qc[id], method_name)(by, *args, **kwargs)
+        return new_id
+
+    setattr(ForwardingQueryCompilerContainer, method_name, forwarding_method)
+
+
 def _set_forwarding_method_for_single_id(method_name: str):
     """
-    Define a method that forwards arguments to the inner query compiler.
+    Define a method that forwards arguments to an inner query compiler.
 
     Parameters
     ----------
@@ -672,7 +577,7 @@ def forwarding_method(
 
 def _set_forwarding_method_for_binary_function(method_name: str):
     """
-    Define a binary method that forwards arguments to the inner query compiler.
+    Define a binary method that forwards arguments to an inner query compiler.
 
     Parameters
     ----------
@@ -695,6 +600,8 @@ def forwarding_method(
     setattr(ForwardingQueryCompilerContainer, method_name, forwarding_method)
 
 
+_GROUPBY_FORWARDING_METHODS = frozenset({"mean", "count", "max", "min", "sum", "agg"})
+
 _BINARY_FORWARDING_METHODS = frozenset(
     {
         "eq",
@@ -824,3 +731,6 @@ def forwarding_method(
 
 for method in _BINARY_FORWARDING_METHODS:
     _set_forwarding_method_for_binary_function(method)
+
+for method in _GROUPBY_FORWARDING_METHODS:
+    _set_forwarding_groupby_method("groupby_" + method)
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 4d0bdb13f09..68b5474a4ab 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -220,144 +220,6 @@ def concat(self, axis, other, **kwargs):
     def merge(self, right, **kwargs):
         return self.__constructor__(self._service.merge(self._id, right._id, **kwargs))
 
-    def groupby_mean(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-    ):
-        if isinstance(by, type(self)):
-            by = by._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.groupby_mean(
-                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
-            )
-        )
-
-    def groupby_count(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-    ):
-        if isinstance(by, type(self)):
-            by = by._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.groupby_count(
-                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
-            )
-        )
-
-    def groupby_max(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-    ):
-        if isinstance(by, type(self)):
-            by = by._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.groupby_max(
-                self._id,
-                by,
-                axis,
-                groupby_kwargs,
-                agg_args,
-                agg_kwargs,
-                drop,
-                is_qc,
-            )
-        )
-
-    def groupby_min(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-    ):
-        if isinstance(by, type(self)):
-            by = by._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.groupby_min(
-                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
-            )
-        )
-
-    def groupby_sum(
-        self,
-        by,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        drop=False,
-    ):
-        if isinstance(by, type(self)):
-            by = by._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.groupby_sum(
-                self._id, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop, is_qc
-            )
-        )
-
-    def groupby_agg(
-        self,
-        by,
-        agg_func,
-        axis,
-        groupby_kwargs,
-        agg_args,
-        agg_kwargs,
-        how="axis_wise",
-        drop=False,
-    ):
-        if isinstance(by, type(self)):
-            by = by._id
-            is_qc = True
-        else:
-            is_qc = False
-        return self.__constructor__(
-            self._service.groupby_agg(
-                self._id,
-                by,
-                agg_func,
-                axis,
-                groupby_kwargs,
-                agg_args,
-                agg_kwargs,
-                how,
-                drop,
-                is_qc,
-            )
-        )
-
     def get_index_names(self, axis=0):
         if axis == 0:
             return self.index.names
@@ -382,6 +244,26 @@ def to_dataframe(self, nan_as_null: bool = False, allow_copy: bool = True):
         raise NotImplementedError
 
 
+def _set_forwarding_groupby_method(method_name: str):
+    """
+    Define a groupby method that forwards arguments to the service.
+
+    Parameters
+    ----------
+    method_name : str
+    """
+
+    def forwading_method(self, by, *args, **kwargs):
+        by_is_qc: bool = isinstance(by, type(self))
+        if by_is_qc:
+            by = by._id
+        return self.__constructor__(
+            getattr(self._service, method_name)(self._id, by_is_qc, by, *args, **kwargs)
+        )
+
+    setattr(ClientQueryCompiler, method_name, forwading_method)
+
+
 def _set_forwarding_method_for_binary_function(method_name: str) -> None:
     """
     Define a binary method that forwards arguments to the service.
@@ -427,6 +309,8 @@ def forwarding_method(
     setattr(ClientQueryCompiler, method_name, forwarding_method)
 
 
+_GROUPBY_FORWARDING_METHODS = frozenset({"mean", "count", "max", "min", "sum", "agg"})
+
 _BINARY_FORWARDING_METHODS = frozenset(
     {
         "eq",
@@ -452,9 +336,6 @@ def forwarding_method(
     }
 )
 
-for method in _BINARY_FORWARDING_METHODS:
-    _set_forwarding_method_for_binary_function(method)
-
 _SINGLE_ID_FORWARDING_METHODS = frozenset(
     {
         "columnarize",
@@ -554,6 +435,11 @@ def forwarding_method(
     }
 )
 
+for method in _BINARY_FORWARDING_METHODS:
+    _set_forwarding_method_for_binary_function(method)
 
 for method in _SINGLE_ID_FORWARDING_METHODS:
     _set_forwarding_method_for_single_id(method)
+
+for method in _GROUPBY_FORWARDING_METHODS:
+    _set_forwarding_groupby_method("groupby_" + method)

From aa5be586423282abf5c0eea20ff3c5030965d79d Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 21:26:47 -0500
Subject: [PATCH 49/77] Add binary methods from hazem's
 dfce9189226190bddf6aacab35cbcf44e1a74977.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/container.py      | 6 ++++++
 modin/core/execution/client/query_compiler.py | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
index 65e86837172..e06f6dd6278 100644
--- a/modin/core/execution/client/container.py
+++ b/modin/core/execution/client/container.py
@@ -624,6 +624,12 @@ def forwarding_method(
         "rmul",
         "floordiv",
         "rfloordiv",
+        "__rand__",
+        "__ror__",
+        "__xor__",
+        "__rxor__",
+        "pow",
+        "rpow",
     }
 )
 
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 68b5474a4ab..546bc2685e9 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -333,6 +333,12 @@ def forwarding_method(
         "rmul",
         "floordiv",
         "rfloordiv",
+        "__rand__",
+        "__ror__",
+        "__xor__",
+        "__rxor__",
+        "pow",
+        "rpow",
     }
 )
 

From 1e3bdc640a0ef8d1d4bcdef2f8452e7bd75322f6 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 22:50:46 -0500
Subject: [PATCH 50/77] Fix CI falures.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml                      |  2 --
 modin/conftest.py                             | 33 +++++++++++--------
 .../storage_formats/base/query_compiler.py    |  2 +-
 .../storage_formats/hdk/query_compiler.py     |  6 +++-
 modin/pandas/test/dataframe/test_iter.py      | 13 +++++++-
 5 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ec006744465..3e4ff320f08 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -375,8 +375,6 @@ jobs:
         # Client execution doesn't need to work with xgboost
         if: matrix.execution != 'Client'      
       - run: python -m pytest -n 2 modin/test/storage_formats/base/test_internals.py --execution=${{ matrix.execution }}
-        # Client execution has different internals that we dont' test yet
-        if: matrix.execution != 'Client'
       - run: pytest -n 2 modin/pandas/test/dataframe/test_binary.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_default.py --execution=${{ matrix.execution }}
       - run: pytest -n 2 modin/pandas/test/dataframe/test_indexing.py --execution=${{ matrix.execution }}
diff --git a/modin/conftest.py b/modin/conftest.py
index 53c1eac3022..b6e7be44ec8 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -53,7 +53,6 @@ def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
     BaseQueryCompiler,
 )
 from modin.core.execution.client.io import ClientIO  # noqa: E402
-from modin.core.execution.client.query_compiler import ClientQueryCompiler  # noqa: E402
 from modin.core.execution.client.container import (  # noqa: E402
     ForwardingQueryCompilerContainer,
 )
@@ -280,18 +279,6 @@ def set_base_on_python_execution():
     modin.set_execution(engine="python", storage_format="Base")
 
 
-class TestClientQueryCompiler(ClientQueryCompiler):
-    @classmethod
-    def from_pandas(cls, df, data_cls):
-        return cls(cls._service.add_query_compiler(TestQC.from_pandas(df, data_cls)))
-
-    def default_to_pandas(self, pandas_op, *args, **kwargs):
-        result = self._service.default_to_pandas(self._id, pandas_op, *args, **kwargs)
-        if result.result_is_qc_id:
-            return self.__constructor__(result.result)
-        return result.result
-
-
 class ClientFactory(factories.BaseFactory):
     @classmethod
     def prepare(cls):
@@ -299,6 +286,26 @@ def prepare(cls):
 
 
 def set_client_execution():
+    # Can't always import ClientQueryCompiler, because it uses NoDefault, which
+    # is not available on older pandas.
+
+    from modin.core.execution.client.query_compiler import ClientQueryCompiler
+
+    class TestClientQueryCompiler(ClientQueryCompiler):
+        @classmethod
+        def from_pandas(cls, df, data_cls):
+            return cls(
+                cls._service.add_query_compiler(TestQC.from_pandas(df, data_cls))
+            )
+
+        def default_to_pandas(self, pandas_op, *args, **kwargs):
+            result = self._service.default_to_pandas(
+                self._id, pandas_op, *args, **kwargs
+            )
+            if result.result_is_qc_id:
+                return self.__constructor__(result.result)
+            return result.result
+
     service = ForwardingQueryCompilerContainer(BaseQueryCompiler, PandasOnPythonIO)
     ClientQueryCompiler.set_server_connection(service)
     ClientIO.query_compiler_cls = TestClientQueryCompiler
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 4d674d8c4d2..bb10ef73385 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -3155,7 +3155,7 @@ def mask(idx):
             return (
                 self.getitem_column_array(idx, numeric=True)
                 if axis
-                else self.getitem_row_array(idx)
+                else self.getitem_row_array(idx, numeric=True)
             )
 
         if 0 <= loc < len(self.get_axis(axis)):
diff --git a/modin/experimental/core/storage_formats/hdk/query_compiler.py b/modin/experimental/core/storage_formats/hdk/query_compiler.py
index 7ac735b4a02..b07348e84da 100644
--- a/modin/experimental/core/storage_formats/hdk/query_compiler.py
+++ b/modin/experimental/core/storage_formats/hdk/query_compiler.py
@@ -555,7 +555,11 @@ def concat(self, axis, other, **kwargs):
         )
         return self.__constructor__(new_modin_frame)
 
-    def drop(self, index=None, columns=None):
+    def drop(self, index=None, columns=None, errors: str = "raise"):
+        # `errors` parameter needs to be part of the function signature because
+        # other query compilers may not take care of error handling at the API
+        # layer. This query compiler assumes there won't be any errors due to
+        # invald keys.
         assert index is None, "Only column drop is supported"
         return self.__constructor__(
             self._modin_frame.take_2d_labels_or_positional(
diff --git a/modin/pandas/test/dataframe/test_iter.py b/modin/pandas/test/dataframe/test_iter.py
index d678ee11efa..9b6d5ad15bb 100644
--- a/modin/pandas/test/dataframe/test_iter.py
+++ b/modin/pandas/test/dataframe/test_iter.py
@@ -13,10 +13,12 @@
 
 import pytest
 
+import contextlib
 import numpy as np
 import pandas
 import matplotlib
 import modin.pandas as pd
+from modin.utils import get_current_execution
 from pandas._testing import ensure_clean
 import warnings
 
@@ -39,6 +41,12 @@
 matplotlib.use("Agg")
 
 
+@contextlib.contextmanager
+def _nullcontext():
+    """Replacement for contextlib.nullcontext missing in older Python."""
+    yield
+
+
 @pytest.mark.parametrize("method", ["items", "iteritems", "iterrows"])
 def test_items_iteritems_iterrows(method):
     data = test_data["float_nan_data"]
@@ -230,7 +238,10 @@ def test___repr__():
         with open(path, "w") as f:
             f.write(string_data)
         pandas_df = pandas.read_csv(path)
-        with warns_that_defaulting_to_pandas():
+        with warns_that_defaulting_to_pandas() if get_current_execution() in (
+            "BaseOnPython",
+            "Client",
+        ) else _nullcontext():
             modin_df = pd.read_csv(path)
     assert repr(pandas_df) == repr(modin_df)
 

From f9e0605818fc7be05091dd76a94f25190238dc3d Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 23:44:22 -0500
Subject: [PATCH 51/77] Fix more tests.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml                 |  5 +++++
 modin/conftest.py                        |  2 +-
 modin/core/execution/client/container.py | 11 ++++++-----
 modin/pandas/test/dataframe/test_iter.py |  1 +
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3e4ff320f08..b4a60af3820 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -393,6 +393,11 @@ jobs:
       - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }}
+      - run: |
+          python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
+          python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
+        # Client has to be able to to CSV and SQL I/O.
+        if: matrix.execution == 'Client'
       - uses: codecov/codecov-action@v2
 
   test-hdk:
diff --git a/modin/conftest.py b/modin/conftest.py
index b6e7be44ec8..da83c4c8a68 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -306,7 +306,7 @@ def default_to_pandas(self, pandas_op, *args, **kwargs):
                 return self.__constructor__(result.result)
             return result.result
 
-    service = ForwardingQueryCompilerContainer(BaseQueryCompiler, PandasOnPythonIO)
+    service = ForwardingQueryCompilerContainer(BaseQueryCompiler, BaseOnPythonIO)
     ClientQueryCompiler.set_server_connection(service)
     ClientIO.query_compiler_cls = TestClientQueryCompiler
     ClientIO.set_server_connection(service)
diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
index e06f6dd6278..ea341c64255 100644
--- a/modin/core/execution/client/container.py
+++ b/modin/core/execution/client/container.py
@@ -28,13 +28,14 @@ class ForwardingQueryCompilerContainer:
 
     Parameters
     ----------
-    query_compiler_class : BaseQueryCompiler
-        Query compiler class to contain.
-    io_class : BaseIO
-        The IO class to use for reading and writing data.
+    query_compiler_class : type
+        Query compiler class to contain. Should be a subclass of BaseQueryCompiler.
+    io_class : type
+        The IO class to use for reading and writing data. Should be a subclass
+        of modin.core.io.io.BaseIO.
     """
 
-    def __init__(self, query_compiler_class: BaseQueryCompiler, io_class: BaseIO):
+    def __init__(self, query_compiler_class: type, io_class: type):
         self._qc = {}
         self._query_compiler_class = query_compiler_class
         self._io_class = io_class
diff --git a/modin/pandas/test/dataframe/test_iter.py b/modin/pandas/test/dataframe/test_iter.py
index 9b6d5ad15bb..4fab87739cf 100644
--- a/modin/pandas/test/dataframe/test_iter.py
+++ b/modin/pandas/test/dataframe/test_iter.py
@@ -240,6 +240,7 @@ def test___repr__():
         pandas_df = pandas.read_csv(path)
         with warns_that_defaulting_to_pandas() if get_current_execution() in (
             "BaseOnPython",
+            "PandasOnPython",
             "Client",
         ) else _nullcontext():
             modin_df = pd.read_csv(path)

From 09c07f93852f7acc4981800dd522c9620a537abf Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Fri, 28 Oct 2022 23:46:27 -0500
Subject: [PATCH 52/77] Fix flake8.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/container.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
index ea341c64255..8c998bd5c24 100644
--- a/modin/core/execution/client/container.py
+++ b/modin/core/execution/client/container.py
@@ -18,7 +18,6 @@
 from typing import Any, NamedTuple, Optional, Union
 from uuid import UUID, uuid4
 
-from modin.core.io.io import BaseIO
 from modin.core.storage_formats.base.query_compiler import BaseQueryCompiler
 
 

From 0f53343f77cbd4bdc1bf0cc0d82c6cacaf846e4b Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Sat, 29 Oct 2022 00:19:30 -0500
Subject: [PATCH 53/77] Fix some tests.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml         |  3 ++-
 .github/workflows/push.yml       |  6 ++++++
 modin/pandas/test/test_series.py | 11 +++++++----
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b4a60af3820..f97e5f77ed4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -393,7 +393,8 @@ jobs:
       - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }}
-      - run: |
+      - name: I/O tests
+        run: |
           python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
           python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
         # Client has to be able to to CSV and SQL I/O.
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
index 04fc359419b..92e5c042221 100644
--- a/.github/workflows/push.yml
+++ b/.github/workflows/push.yml
@@ -104,6 +104,12 @@ jobs:
       - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }}
+      - name: I/O tests
+        run: |
+          python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
+          python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
+        # Client has to be able to to CSV and SQL I/O.
+        if: matrix.execution == 'Client'      
       - uses: codecov/codecov-action@v2
 
   test-hdk:
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 61ff44770b4..458552bbe2f 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -1739,11 +1739,14 @@ def dt_with_empty_partition(lib):
         df_b = lib.DataFrame({"B": [lib.to_datetime("27/10/2020")]})
         df = lib.concat([df_a, df_b], axis=1)
         eval_result = df.eval("B - A", engine="python")
-        # BaseOnPython had a single partition after the concat, and it
-        # maintains that partition after eval. In other execution modes,
-        # eval() should re-split the result into two column partitions,
+        # BaseOnPython and Client have a single partition after the concat,
+        # and they maintain that partition after eval. In other execution
+        # modes, eval() should re-split the result into two column partitions,
         # one of which is empty.
-        if isinstance(df, pd.DataFrame) and get_current_execution() != "BaseOnPython":
+        if isinstance(df, pd.DataFrame) and get_current_execution() not in (
+            "BaseOnPython",
+            "Client",
+        ):
             assert eval_result._query_compiler._modin_frame._partitions.shape == (1, 2)
         return eval_result.dt.days
 

From ff9478200cb51372a9ad468b6bf9b7b4ca75c4cf Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mvashishtha@users.noreply.github.com>
Date: Sat, 29 Oct 2022 00:39:36 -0500
Subject: [PATCH 54/77] Update modin/core/execution/client/io.py

---
 modin/core/execution/client/io.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 679ce7017ba..80af63e1f98 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -67,10 +67,11 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
         """
         if isinstance(filepath_or_buffer, str):
             filepath_or_buffer = fsspec.open(filepath_or_buffer).full_name
-            if filepath_or_buffer.startswith("file://"):
+            file_protocol = "file://"
+            if filepath_or_buffer.startswith(file_protocol):
                 # We will do this so that the backend can know whether this
                 # is a path or a URL.
-                filepath_or_buffer = filepath_or_buffer[7:]
+                filepath_or_buffer = filepath_or_buffer[len(file_protocol):]
         else:
             raise NotImplementedError("Only filepaths are supported for read_csv")
         if cls._server_conn is None:

From d4fbf0acdd6ba42d663ffe69730863b007e8ac8b Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Sat, 29 Oct 2022 00:42:05 -0500
Subject: [PATCH 55/77] Fix black.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 80af63e1f98..091d33fbe52 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -71,7 +71,7 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
             if filepath_or_buffer.startswith(file_protocol):
                 # We will do this so that the backend can know whether this
                 # is a path or a URL.
-                filepath_or_buffer = filepath_or_buffer[len(file_protocol):]
+                filepath_or_buffer = filepath_or_buffer[len(file_protocol) :]
         else:
             raise NotImplementedError("Only filepaths are supported for read_csv")
         if cls._server_conn is None:

From 6119b8eb6c4ab6c6e87df7d92677ea2aab9ff65d Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Sat, 29 Oct 2022 01:22:29 -0500
Subject: [PATCH 56/77] Fix omnisci by restoring lazy execution check.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/pandas/general.py          | 3 ++-
 modin/pandas/test/test_concat.py | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/general.py b/modin/pandas/general.py
index b2993b4c9bc..a34dac3a2e5 100644
--- a/modin/pandas/general.py
+++ b/modin/pandas/general.py
@@ -472,7 +472,8 @@ def concat(
     list_of_objs = [
         obj._query_compiler
         for obj in list_of_objs
-        if len(obj.index) or len(obj.columns)
+        if (not obj._query_compiler.lazy_execution and len(obj.index))
+        or len(obj.columns)
     ]
     if keys is not None:
         if all_series:
diff --git a/modin/pandas/test/test_concat.py b/modin/pandas/test/test_concat.py
index 12d23892b2e..4ed4af2c5e5 100644
--- a/modin/pandas/test/test_concat.py
+++ b/modin/pandas/test/test_concat.py
@@ -170,6 +170,10 @@ def test_concat_series_only():
     )
 
 
+@pytest.mark.xfail_executions(
+    "Client",
+    reason="Client query compiler has lazy_execution=True, so it doesn't detect any frames when looking for query compilers here: https://github.com/modin-project/modin/blob/f492ba9888fc05ff7c224db8a22faac8c0106a4b/modin/pandas/general.py#L472-L477",
+)
 def test_concat_with_empty_frame():
     modin_empty_df = pd.DataFrame()
     pandas_empty_df = pandas.DataFrame()

From 7db25b70638b65f1db173f08ad1f60e1ec22107d Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Sat, 29 Oct 2022 02:11:51 -0500
Subject: [PATCH 57/77] Try fixing Client io yml.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f97e5f77ed4..93593fe3a65 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -393,11 +393,11 @@ jobs:
       - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }}
-      - name: I/O tests
-        run: |
-          python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
-          python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
-        # Client has to be able to to CSV and SQL I/O.
+      - run: python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
+        # Client has to be able to to do CSV I/O.
+        if: matrix.execution == 'Client'
+      - run: python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
+        # Client has to be able to to do SQL I/O.
         if: matrix.execution == 'Client'
       - uses: codecov/codecov-action@v2
 

From 86bbc75e331f1d9190cf9baed3fbf3141e43638b Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Sat, 29 Oct 2022 02:37:03 -0500
Subject: [PATCH 58/77] Make test dataset size normal so I/O tests pass.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .github/workflows/ci.yml | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 93593fe3a65..10224ab1406 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -393,11 +393,13 @@ jobs:
       - run: python -m pytest -n 2 modin/pandas/test/test_groupby.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_reshape.py --execution=${{ matrix.execution }}
       - run: python -m pytest -n 2 modin/pandas/test/test_general.py --execution=${{ matrix.execution }}
-      - run: python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
-        # Client has to be able to to do CSV I/O.
-        if: matrix.execution == 'Client'
-      - run: python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
-        # Client has to be able to to do SQL I/O.
+      - name: Test I/O
+        # note that if test dataset size is small like for the other tests in
+        # this job, the tests fail.
+        run: |
+          MODIN_TEST_DATASET_SIZE=NORMAL python -m pytest modin/pandas/test/test_io.py::TestCsv --execution=${{ matrix.execution }}
+          MODIN_TEST_DATASET_SIZE=NORMAL python -m pytest modin/pandas/test/test_io.py::TestSql --execution=${{ matrix.execution }}
+        # Client has to be able to to do CSV and SQL I/O.
         if: matrix.execution == 'Client'
       - uses: codecov/codecov-action@v2
 

From 8826c547c5335002594e602ce87cb95b0850963e Mon Sep 17 00:00:00 2001
From: Mahesh Vashishtha <mvashishtha@users.noreply.github.com>
Date: Mon, 31 Oct 2022 10:06:54 -0500
Subject: [PATCH 59/77] Apply suggestions from code review

Co-authored-by: Karthik Velayutham <karthik.velayutham@gmail.com>
---
 modin/core/storage_formats/pandas/query_compiler.py           | 2 +-
 modin/experimental/core/storage_formats/hdk/query_compiler.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 3a8e78822d5..1f10ff00f7c 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -2359,7 +2359,7 @@ def drop(self, index=None, columns=None, errors: str = "raise"):
         # `errors` parameter needs to be part of the function signature because
         # other query compilers may not take care of error handling at the API
         # layer. This query compiler assumes there won't be any errors due to
-        # invald keys.
+        # invalid keys.
         if index is not None:
             index = np.sort(self.index.get_indexer_for(self.index.difference(index)))
         if columns is not None:
diff --git a/modin/experimental/core/storage_formats/hdk/query_compiler.py b/modin/experimental/core/storage_formats/hdk/query_compiler.py
index b07348e84da..711068621e9 100644
--- a/modin/experimental/core/storage_formats/hdk/query_compiler.py
+++ b/modin/experimental/core/storage_formats/hdk/query_compiler.py
@@ -559,7 +559,7 @@ def drop(self, index=None, columns=None, errors: str = "raise"):
         # `errors` parameter needs to be part of the function signature because
         # other query compilers may not take care of error handling at the API
         # layer. This query compiler assumes there won't be any errors due to
-        # invald keys.
+        # invalid keys.
         assert index is None, "Only column drop is supported"
         return self.__constructor__(
             self._modin_frame.take_2d_labels_or_positional(

From 584ef10cfcdc331ca27585c79f6497a13664c226 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Mon, 31 Oct 2022 12:02:11 -0500
Subject: [PATCH 60/77] Address comments.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py              | 18 ++++++++----------
 .../storage_formats/base/query_compiler.py     |  2 ++
 modin/pandas/base.py                           | 12 ++----------
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 091d33fbe52..be165e168fb 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -14,7 +14,7 @@
 """The module holds the factory which performs I/O using pandas on a Client."""
 
 from modin.core.io.io import BaseIO
-import fsspec
+import os
 import pandas
 
 
@@ -65,19 +65,17 @@ def read_csv(cls, filepath_or_buffer, **kwargs):
         self.query_compiler_cls
             Query compiler with CSV data read in.
         """
-        if isinstance(filepath_or_buffer, str):
-            filepath_or_buffer = fsspec.open(filepath_or_buffer).full_name
-            file_protocol = "file://"
-            if filepath_or_buffer.startswith(file_protocol):
-                # We will do this so that the backend can know whether this
-                # is a path or a URL.
-                filepath_or_buffer = filepath_or_buffer[len(file_protocol) :]
-        else:
-            raise NotImplementedError("Only filepaths are supported for read_csv")
         if cls._server_conn is None:
             raise ConnectionError(
                 "Missing server connection, did you initialize the connection?"
             )
+        if not isinstance(filepath_or_buffer, str):
+            raise NotImplementedError("Only filepaths are supported for read_csv")
+        if os.path.exists(filepath_or_buffer):
+            # In case this is a local path, we should use the absolute path
+            # because the service might be running in a different directory
+            # on the same machine.
+            filepath_or_buffer = os.path.abspath(filepath_or_buffer)
         server_result = cls._server_conn.read_csv(
             cls._data_conn, filepath_or_buffer, **kwargs
         )
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index bb10ef73385..8764b369257 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2162,6 +2162,8 @@ def getitem_row_array(self, key: List[Hashable], numeric: bool = False):
         key : list-like
             Numeric indices of the rows to pick.
         numeric : bool, default: False
+            Whether the key passed in represents the numeric row positions or
+            or the possibly non-numeric row labels.
 
         Returns
         -------
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index b1609a35684..87271fd8053 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -3181,16 +3181,8 @@ def __getitem__(self, key):
         BasePandasDataset
             Located dataset.
         """
-        if not self._query_compiler.lazy_execution:
-            if len(self) == 0:
-                return self._default_to_pandas("__getitem__", key)
-            # fastpath for common case
-            if isinstance(key, str) and key in self._query_compiler.columns:
-                return self._getitem(key)
-            elif is_list_like(key) and all(
-                k in self._query_compiler.columns for k in key
-            ):
-                return self._getitem(key)
+        if not self._query_compiler.lazy_execution and len(self) == 0:
+            return self._default_to_pandas("__getitem__", key)
         # see if we can slice the rows
         # This lets us reuse code in pandas to error check
         indexer = None

From 187115403a0dd7d5dbb0c8cb0d7411584c523986 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Mon, 31 Oct 2022 23:51:50 -0500
Subject: [PATCH 61/77] Respond to comments.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/conftest.py                                | 2 +-
 modin/pandas/test/dataframe/test_map_metadata.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/conftest.py b/modin/conftest.py
index da83c4c8a68..13571d45487 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -364,8 +364,8 @@ def pytest_configure(config):
         set_base_on_python_execution()
         config.addinivalue_line("filterwarnings", default_to_pandas_ignore_string)
     elif execution == "Client":
-        config.addinivalue_line("filterwarnings", default_to_pandas_ignore_string)
         set_client_execution()
+        config.addinivalue_line("filterwarnings", default_to_pandas_ignore_string)
     else:
         partition, engine = execution.split("On")
         modin.set_execution(engine=engine, storage_format=partition)
diff --git a/modin/pandas/test/dataframe/test_map_metadata.py b/modin/pandas/test/dataframe/test_map_metadata.py
index 37a3a1cd0d4..5a82bbd72e6 100644
--- a/modin/pandas/test/dataframe/test_map_metadata.py
+++ b/modin/pandas/test/dataframe/test_map_metadata.py
@@ -715,7 +715,7 @@ def test_drop():
 
     # TODO(https://github.com/modin-project/modin/issues/5163): raise a
     # KeyError like pandas when the label is not found when lazy_execution is
-    # off. Also use df_equals instead of
+    # off.
     check_exception_type = modin_simple._query_compiler.lazy_execution
     eval_general(
         modin_simple,

From 17073902b0c6d4d20afe3634064964a162feed7c Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 1 Nov 2022 00:12:02 -0500
Subject: [PATCH 62/77] Fix fuzzydata by making getitem_row_array use
 numeric=True everywhere.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .../execution/ray/implementations/pandas_on_ray/io/io.py    | 2 +-
 modin/core/storage_formats/base/query_compiler.py           | 2 +-
 modin/core/storage_formats/pandas/query_compiler.py         | 6 +++---
 modin/pandas/base.py                                        | 4 +++-
 modin/pandas/series.py                                      | 2 +-
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py b/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
index 6922ef0406e..329ddea8075 100644
--- a/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
+++ b/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
@@ -91,7 +91,7 @@ def to_sql(cls, qc, **kwargs):
         # since the mapping operation is non-blocking, each partition will return an empty DF
         # so at the end, the blocking operation will be this empty DF to_pandas
 
-        empty_df = qc.getitem_row_array([0]).to_pandas().head(0)
+        empty_df = qc.getitem_row_array([0], numeric=True).to_pandas().head(0)
         empty_df.to_sql(**kwargs)
         # so each partition will append its respective DF
         kwargs["if_exists"] = "append"
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 8764b369257..52ad27dea8a 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2153,7 +2153,7 @@ def get_column(df, key):
 
         return DataFrameDefault.register(get_column)(self, key=key)
 
-    def getitem_row_array(self, key: List[Hashable], numeric: bool = False):
+    def getitem_row_array(self, key: List[Hashable], numeric: bool):
         """
         Get row data for target indices.
 
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 1f10ff00f7c..a9e5d62c749 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -2221,7 +2221,7 @@ def getitem_array(self, key):
             # requested.
             key = pandas.RangeIndex(len(self.index))[key]
             if len(key):
-                return self.getitem_row_array(key)
+                return self.getitem_row_array(key, numeric=True)
             else:
                 return self.from_pandas(
                     pandas.DataFrame(columns=self.columns), type(self._modin_frame)
@@ -2247,7 +2247,7 @@ def getitem_column_array(self, key, numeric=False):
             )
         return self.__constructor__(new_modin_frame)
 
-    def getitem_row_array(self, key: List[Hashable], numeric: bool = False):
+    def getitem_row_array(self, key: List[Hashable], numeric: bool):
         if numeric:
             kwargs = {"row_positions": key}
         else:
@@ -3302,7 +3302,7 @@ def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):
             rows = [rows]
         ErrorMessage.default_to_pandas("sort_values")
         broadcast_value_list = [
-            self.getitem_row_array([row]).to_pandas() for row in rows
+            self.getitem_row_array([row], numeric=True).to_pandas() for row in rows
         ]
         index_builder = list(zip(broadcast_value_list, rows))
         broadcast_values = pandas.concat(
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 87271fd8053..7c776e07971 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2499,7 +2499,9 @@ def _sample(
             query_compiler = self._query_compiler.getitem_column_array(samples)
             return self.__constructor__(query_compiler=query_compiler)
         else:
-            query_compiler = self._query_compiler.getitem_row_array(samples)
+            query_compiler = self._query_compiler.getitem_row_array(
+                samples, numeric=True
+            )
             return self.__constructor__(query_compiler=query_compiler)
 
     def _sem(
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index 17ffa05310f..ea83ab9f6d2 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -2443,7 +2443,7 @@ def _getitem(self, key):
         if is_bool_indexer(key):
             return self.__constructor__(
                 query_compiler=self._query_compiler.getitem_row_array(
-                    pandas.RangeIndex(len(self.index))[key]
+                    pandas.RangeIndex(len(self.index))[key], numeric=True
                 )
             )
         # TODO: More efficiently handle `tuple` case for `Series.__getitem__`

From e7af275cedd31d023b7d3e00369ed2be51e60feb Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 1 Nov 2022 18:22:43 -0500
Subject: [PATCH 63/77] Pass errors through astype.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/storage_formats/base/query_compiler.py   | 6 ++++--
 modin/core/storage_formats/pandas/query_compiler.py | 4 ++--
 modin/pandas/base.py                                | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 52ad27dea8a..3258f57c6f8 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1416,7 +1416,7 @@ def stack(self, level, dropna):
         )
 
     # Abstract map partitions across select indices
-    def astype(self, col_dtypes, **kwargs):  # noqa: PR02
+    def astype(self, col_dtypes, errors: str):
         """
         Convert columns dtypes to given dtypes.
 
@@ -1424,6 +1424,8 @@ def astype(self, col_dtypes, **kwargs):  # noqa: PR02
         ----------
         col_dtypes : dict
             Map for column names and new dtypes.
+        error : {"raise", "ignore"}
+            Control raising of exceptions on invalid data for provided dtype.
         **kwargs : dict
             Serves the compatibility purpose. Does not affect the result.
 
@@ -1433,7 +1435,7 @@ def astype(self, col_dtypes, **kwargs):  # noqa: PR02
             New QueryCompiler with updated dtypes.
         """
         return DataFrameDefault.register(pandas.DataFrame.astype)(
-            self, dtype=col_dtypes, **kwargs
+            self, dtype=col_dtypes, errors=errors
         )
 
     def infer_objects(self):
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index a9e5d62c749..3ac654f85fc 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1539,8 +1539,8 @@ def datetime_freq(df):
 
     # END Dt map partitions operations
 
-    def astype(self, col_dtypes, **kwargs):
-        return self.__constructor__(self._modin_frame.astype(col_dtypes))
+    def astype(self, col_dtypes, errors: str):
+        return self.__constructor__(self._modin_frame.astype(col_dtypes, errors=errors))
 
     def infer_objects(self):
         return self.__constructor__(self._modin_frame.infer_objects())
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 7c776e07971..e021970558a 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -923,7 +923,7 @@ def astype(self, dtype, copy=True, errors="raise"):  # noqa: PR01, RT01, D200
             # Assume that the dtype is a scalar.
             col_dtypes = {column: dtype for column in self._query_compiler.columns}
 
-        new_query_compiler = self._query_compiler.astype(col_dtypes)
+        new_query_compiler = self._query_compiler.astype(col_dtypes, errors=errors)
         return self._create_or_update_from_compiler(new_query_compiler, not copy)
 
     @property

From edf99f8960790801696fd7063024767fa0db90ff Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 1 Nov 2022 19:35:33 -0500
Subject: [PATCH 64/77] Fix astype errors.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/storage_formats/pandas/query_compiler.py           | 4 ++--
 modin/experimental/core/storage_formats/hdk/query_compiler.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 3ac654f85fc..d80323184bb 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1540,7 +1540,7 @@ def datetime_freq(df):
     # END Dt map partitions operations
 
     def astype(self, col_dtypes, errors: str):
-        return self.__constructor__(self._modin_frame.astype(col_dtypes, errors=errors))
+        return self.__constructor__(self._modin_frame.astype(col_dtypes))
 
     def infer_objects(self):
         return self.__constructor__(self._modin_frame.infer_objects())
@@ -2638,7 +2638,7 @@ def groupby_mean(self, by, axis, groupby_kwargs, agg_args, agg_kwargs, drop=Fals
                 )
 
         qc_with_converted_datetime_cols = (
-            self.astype({col: "int64" for col in datetime_cols.keys()})
+            self.astype({col: "int64" for col in datetime_cols.keys()}, errors="raise")
             if len(datetime_cols) > 0
             else self
         )
diff --git a/modin/experimental/core/storage_formats/hdk/query_compiler.py b/modin/experimental/core/storage_formats/hdk/query_compiler.py
index 711068621e9..02bc5187052 100644
--- a/modin/experimental/core/storage_formats/hdk/query_compiler.py
+++ b/modin/experimental/core/storage_formats/hdk/query_compiler.py
@@ -691,9 +691,9 @@ def reset_index(self, **kwargs):
             self._modin_frame.reset_index(drop), shape_hint=shape_hint
         )
 
-    def astype(self, col_dtypes, **kwargs):
+    def astype(self, col_dtypes, errors: str):
         return self.__constructor__(
-            self._modin_frame.astype(col_dtypes), self._shape_hint
+            self._modin_frame.astype(col_dtypes, errors), self._shape_hint
         )
 
     def setitem(self, axis, key, value):

From e74db7cecd12d88e9bd68dc8ce7a6400fdc94ac2 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Mon, 7 Nov 2022 17:51:37 -0600
Subject: [PATCH 65/77] Use new take_2d_labels for most insertion.
 test_indexing passes except one multiindexing case

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/container.py      |  2 +
 modin/core/execution/client/query_compiler.py |  2 +
 .../storage_formats/base/query_compiler.py    | 36 +++++++
 modin/pandas/indexing.py                      | 97 +++++++++++++------
 4 files changed, 107 insertions(+), 30 deletions(-)

diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
index 8c998bd5c24..6e4c0f31268 100644
--- a/modin/core/execution/client/container.py
+++ b/modin/core/execution/client/container.py
@@ -639,7 +639,9 @@ def forwarding_method(
         "transpose",
         "take_2d",
         "getitem_column_array",
+        "get_columns_with_labels",
         "getitem_row_array",
+        "get_rows_with_labels",
         "pivot",
         "get_dummies",
         "drop",
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 546bc2685e9..dffcf750059 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -348,7 +348,9 @@ def forwarding_method(
         "transpose",
         "take_2d",
         "getitem_column_array",
+        "get_columns_with_labels",
         "getitem_row_array",
+        "get_rows_with_labels",
         "pivot",
         "get_dummies",
         "drop",
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 3258f57c6f8..de724bca17a 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -3126,6 +3126,42 @@ def applyer(df):
 
         return DataFrameDefault.register(applyer)(self)
 
+    def take_2d_labels(
+        self,
+        index,
+        columns,
+        index_is_series,
+        column_is_series,
+    ):
+        """
+        Take the given labels.
+
+        Parameters
+        ----------
+        index : slice, scalar, or list-like
+            Labels of rows to grab.
+        columns : slice, scalar, or list-like
+            Labels of columns to grab.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Subset of this QueryCompiler.
+        """
+        if isinstance(index, type(self)):
+            index = index.to_pandas()
+            if index_is_series:
+                index = index.iloc[:, 0]
+        if isinstance(columns, type(self)):
+            columns = columns.to_pandas()
+            if column_is_series:
+                columns = columns.iloc[:, 0]
+
+        def applyer(df):
+            return df.loc[index, columns]
+
+        return DataFrameDefault.register(applyer)(self)
+
     def insert_item(self, axis, loc, value, how="inner", replace=False):
         """
         Insert rows/columns defined by `value` at the specified position.
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 16b3003f0f9..7b0e07ca873 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -35,6 +35,9 @@
 from pandas.api.types import is_list_like, is_bool
 from pandas.core.dtypes.common import is_integer, is_bool_dtype, is_integer_dtype
 from pandas.core.indexing import IndexingError
+from typing import Union
+
+from modin.core.execution.client.query_compiler import ClientQueryCompiler
 from modin.error_message import ErrorMessage
 from modin.logging import ClassLogger
 
@@ -319,10 +322,60 @@ def __setitem__(self, key, item):  # pragma: no cover
         """
         raise NotImplementedError("Implemented by subclasses")
 
-    def _getitem_positional(
+    def _take_2d_labels(self, row_lookup, col_lookup):
+        """
+        Take 2D labels from the DataFrame.
+
+        Parameters
+        ----------
+        row_lookup : list-like
+            List of row labels to take.
+        col_lookup : list-like
+            List of column labels to take.
+
+        Returns
+        -------
+        modin.pandas.DataFrame
+            DataFrame with taken labels.
+        """
+        row_is_series = isinstance(row_lookup, Series)
+        col_is_series = isinstance(col_lookup, Series)
+        if is_scalar(row_lookup):
+            row_lookup = [row_lookup]
+        elif isinstance(row_lookup, (Series, DataFrame)):
+            row_lookup = row_lookup._query_compiler
+        if is_scalar(col_lookup):
+            col_lookup = [col_lookup]
+        elif isinstance(col_lookup, (Series, DataFrame)):
+            col_lookup = col_lookup._query_compiler
+        return self.qc.take_2d_labels(
+            row_lookup, col_lookup, row_is_series, col_is_series
+        )
+
+    def _take_2d_positional(
+        self,
+        row_lookup: Union[slice, range, np.ndarray],
+        col_lookup: Union[slice, range, np.ndarray],
+    ):
+        """ """
+        if isinstance(row_lookup, slice):
+            ErrorMessage.catch_bugs_and_request_email(
+                failure_condition=row_lookup != slice(None),
+                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}",
+            )
+            row_lookup = None
+        if isinstance(col_lookup, slice):
+            ErrorMessage.catch_bugs_and_request_email(
+                failure_condition=col_lookup != slice(None),
+                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}",
+            )
+            col_lookup = None
+
+        return self.qc.take_2d(row_lookup, col_lookup)
+
+    def _get_pandas_object_from_qc_view(
         self,
-        row_lookup,
-        col_lookup,
+        qc_view,
         row_multiindex_full_lookup: bool,
         col_multiindex_full_lookup: bool,
         row_scalar: bool,
@@ -334,10 +387,8 @@ def _getitem_positional(
 
         Parameters
         ----------
-        row_lookup : slice(None), range or np.ndarray
-            The global row index to retrieve data from.
-        col_lookup : slice(None), range or np.ndarray
-            The global col index to retrieve data from.
+        qc_view : BaseQueryCompiler
+            Query compiler to operate on.
         row_multiindex_full_lookup : bool
             See _multiindex_possibly_contains_key.__doc__.
         col_multiindex_full_lookup : bool
@@ -361,20 +412,6 @@ def _getitem_positional(
         Ideally, this API should get rid of using slices as indexers and either use a
         common ``Indexer`` object or range and ``np.ndarray`` only.
         """
-        if isinstance(row_lookup, slice):
-            ErrorMessage.catch_bugs_and_request_email(
-                failure_condition=row_lookup != slice(None),
-                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {row_lookup}",
-            )
-            row_lookup = None
-        if isinstance(col_lookup, slice):
-            ErrorMessage.catch_bugs_and_request_email(
-                failure_condition=col_lookup != slice(None),
-                extra_log=f"Only None-slices are acceptable as a slice argument in masking, got: {col_lookup}",
-            )
-            col_lookup = None
-
-        qc_view = self.qc.take_2d(row_lookup, col_lookup)
 
         if ndim == 2:
             return self.df.__constructor__(query_compiler=qc_view)
@@ -678,11 +715,12 @@ def __getitem__(self, key):
         if isinstance(row_loc, Series) and is_boolean_array(row_loc):
             return self._handle_boolean_masking(row_loc, col_loc)
 
-        row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
-
-        result = self._getitem_positional(
-            row_lookup,
-            col_lookup,
+        if isinstance(self.qc, ClientQueryCompiler):
+            qc_view = self._take_2d_labels(row_loc, col_loc)
+        else:
+            qc_view = self._take_2d_positional(*self._compute_lookup(row_loc, col_loc))
+        result = self._get_pandas_object_from_qc_view(
+            qc_view,
             row_multiindex_full_lookup,
             col_multiindex_full_lookup,
             row_scalar,
@@ -731,7 +769,7 @@ def __getitem__(self, key):
         # This is done for cases where the index passed in has other state, like a
         # frequency in the case of DateTimeIndex.
         if (
-            row_lookup is not None
+            row_loc is not None
             and isinstance(col_loc, slice)
             and col_loc == slice(None)
             and isinstance(key, pandas.Index)
@@ -1064,9 +1102,8 @@ def __getitem__(self, key):
 
         row_lookup, col_lookup = self._compute_lookup(row_loc, col_loc)
 
-        result = self._getitem_positional(
-            row_lookup,
-            col_lookup,
+        result = self._get_pandas_object_from_qc_view(
+            self._take_2d_positional(row_lookup, col_lookup),
             row_multiindex_full_lookup=False,
             col_multiindex_full_lookup=False,
             row_scalar=row_scalar,

From b616c28a1c0064d7cda6a6a09239f9429d8b1900 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Mon, 7 Nov 2022 18:07:56 -0600
Subject: [PATCH 66/77] Actually use client query compiler.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/container.py          | 3 +--
 modin/core/execution/client/query_compiler.py     | 3 +--
 modin/core/storage_formats/base/query_compiler.py | 8 ++++++--
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/modin/core/execution/client/container.py b/modin/core/execution/client/container.py
index 6e4c0f31268..559156c4487 100644
--- a/modin/core/execution/client/container.py
+++ b/modin/core/execution/client/container.py
@@ -639,9 +639,8 @@ def forwarding_method(
         "transpose",
         "take_2d",
         "getitem_column_array",
-        "get_columns_with_labels",
         "getitem_row_array",
-        "get_rows_with_labels",
+        "take_2d_labels",
         "pivot",
         "get_dummies",
         "drop",
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index dffcf750059..93776f57569 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -348,9 +348,8 @@ def forwarding_method(
         "transpose",
         "take_2d",
         "getitem_column_array",
-        "get_columns_with_labels",
         "getitem_row_array",
-        "get_rows_with_labels",
+        "take_2d_labels",
         "pivot",
         "get_dummies",
         "drop",
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index de724bca17a..72ffd2d24ab 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -3148,16 +3148,20 @@ def take_2d_labels(
         BaseQueryCompiler
             Subset of this QueryCompiler.
         """
-        if isinstance(index, type(self)):
+        print(
+            f"call take_2d_labels with index {index} and columns {columns} and {isinstance(index, type(self))=} and {isinstance(columns, type(self))=}"
+        )
+        if isinstance(index, BaseQueryCompiler):
             index = index.to_pandas()
             if index_is_series:
                 index = index.iloc[:, 0]
-        if isinstance(columns, type(self)):
+        if isinstance(columns, BaseQueryCompiler):
             columns = columns.to_pandas()
             if column_is_series:
                 columns = columns.iloc[:, 0]
 
         def applyer(df):
+            print(f"getting loc with index {index} and columns {columns}")
             return df.loc[index, columns]
 
         return DataFrameDefault.register(applyer)(self)

From 832556d79cfd41543ff886ff05ca8879e68d6a41 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 8 Nov 2022 11:51:52 -0600
Subject: [PATCH 67/77] Fix multiindex and fix doc_checker.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .../storage_formats/base/query_compiler.py    | 22 +++-----
 modin/pandas/indexing.py                      | 53 +++++++++++++------
 2 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 72ffd2d24ab..4cc171e2ce9 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -1424,10 +1424,8 @@ def astype(self, col_dtypes, errors: str):
         ----------
         col_dtypes : dict
             Map for column names and new dtypes.
-        error : {"raise", "ignore"}
+        errors : {"raise", "ignore"}
             Control raising of exceptions on invalid data for provided dtype.
-        **kwargs : dict
-            Serves the compatibility purpose. Does not affect the result.
 
         Returns
         -------
@@ -3130,17 +3128,15 @@ def take_2d_labels(
         self,
         index,
         columns,
-        index_is_series,
-        column_is_series,
     ):
         """
         Take the given labels.
 
         Parameters
         ----------
-        index : slice, scalar, or list-like
+        index : slice, scalar, list-like, or BaseQueryCompiler
             Labels of rows to grab.
-        columns : slice, scalar, or list-like
+        columns : slice, scalar, list-like, or BaseQueryCompiler
             Labels of columns to grab.
 
         Returns
@@ -3148,20 +3144,16 @@ def take_2d_labels(
         BaseQueryCompiler
             Subset of this QueryCompiler.
         """
-        print(
-            f"call take_2d_labels with index {index} and columns {columns} and {isinstance(index, type(self))=} and {isinstance(columns, type(self))=}"
-        )
         if isinstance(index, BaseQueryCompiler):
             index = index.to_pandas()
-            if index_is_series:
-                index = index.iloc[:, 0]
+            assert len(index.columns) == 1
+            index = index.iloc[:, 0]
         if isinstance(columns, BaseQueryCompiler):
             columns = columns.to_pandas()
-            if column_is_series:
-                columns = columns.iloc[:, 0]
+            assert len(columns.columns) == 1
+            columns = columns.iloc[:, 0]
 
         def applyer(df):
-            print(f"getting loc with index {index} and columns {columns}")
             return df.loc[index, columns]
 
         return DataFrameDefault.register(applyer)(self)
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 7b0e07ca873..d63e278c79b 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -322,7 +322,13 @@ def __setitem__(self, key, item):  # pragma: no cover
         """
         raise NotImplementedError("Implemented by subclasses")
 
-    def _take_2d_labels(self, row_lookup, col_lookup):
+    def _take_2d_labels(
+        self,
+        row_lookup,
+        col_lookup,
+        row_multiindex_full_lookup,
+        col_multiindex_full_lookup,
+    ):
         """
         Take 2D labels from the DataFrame.
 
@@ -332,32 +338,46 @@ def _take_2d_labels(self, row_lookup, col_lookup):
             List of row labels to take.
         col_lookup : list-like
             List of column labels to take.
+        row_multiindex_full_lookup : bool
+            See _multiindex_possibly_contains_key.__doc__.
+        col_multiindex_full_lookup : bool
+            See _multiindex_possibly_contains_key.__doc__.
 
         Returns
         -------
         modin.pandas.DataFrame
             DataFrame with taken labels.
         """
-        row_is_series = isinstance(row_lookup, Series)
-        col_is_series = isinstance(col_lookup, Series)
-        if is_scalar(row_lookup):
+        if is_scalar(row_lookup) or row_multiindex_full_lookup:
             row_lookup = [row_lookup]
         elif isinstance(row_lookup, (Series, DataFrame)):
             row_lookup = row_lookup._query_compiler
-        if is_scalar(col_lookup):
+        if is_scalar(col_lookup) or col_multiindex_full_lookup:
             col_lookup = [col_lookup]
         elif isinstance(col_lookup, (Series, DataFrame)):
             col_lookup = col_lookup._query_compiler
-        return self.qc.take_2d_labels(
-            row_lookup, col_lookup, row_is_series, col_is_series
-        )
+        return self.qc.take_2d_labels(row_lookup, col_lookup)
 
     def _take_2d_positional(
         self,
         row_lookup: Union[slice, range, np.ndarray],
         col_lookup: Union[slice, range, np.ndarray],
     ):
-        """ """
+        """
+        Take 2D positional data from the DataFrame.
+
+        Parameters
+        ----------
+        row_lookup : slice, range, or np.ndarray
+            Row positions to take.
+        col_lookup : slice, range, or np.ndarray
+            Column positions to take.
+
+        Returns
+        -------
+        BaseQueryCompiler
+            Query compiler with given positions.
+        """
         if isinstance(row_lookup, slice):
             ErrorMessage.catch_bugs_and_request_email(
                 failure_condition=row_lookup != slice(None),
@@ -383,27 +403,27 @@ def _get_pandas_object_from_qc_view(
         ndim: int,
     ):
         """
-        Retrieve dataset according to `row_lookup` and `col_lookup`.
+        Convert the query compiler view to the appropriate pandas object.
 
         Parameters
         ----------
         qc_view : BaseQueryCompiler
-            Query compiler to operate on.
+            Query compiler to convert.
         row_multiindex_full_lookup : bool
             See _multiindex_possibly_contains_key.__doc__.
         col_multiindex_full_lookup : bool
             See _multiindex_possibly_contains_key.__doc__.
         row_scalar : bool
-            Whether indexer for rows is scalar or not.
+            Whether indexer for rows is scalar.
         col_scalar : bool
-            Whether indexer for columns is scalar or not.
+            Whether indexer for columns is scalar.
         ndim : {0, 1, 2}
             Number of dimensions in dataset to be retrieved.
 
         Returns
         -------
         modin.pandas.DataFrame or modin.pandas.Series
-            Located dataset.
+            The pandas object with the data from the query compiler view.
 
         Notes
         -----
@@ -412,7 +432,6 @@ def _get_pandas_object_from_qc_view(
         Ideally, this API should get rid of using slices as indexers and either use a
         common ``Indexer`` object or range and ``np.ndarray`` only.
         """
-
         if ndim == 2:
             return self.df.__constructor__(query_compiler=qc_view)
         if isinstance(self.df, Series) and not row_scalar:
@@ -716,7 +735,9 @@ def __getitem__(self, key):
             return self._handle_boolean_masking(row_loc, col_loc)
 
         if isinstance(self.qc, ClientQueryCompiler):
-            qc_view = self._take_2d_labels(row_loc, col_loc)
+            qc_view = self._take_2d_labels(
+                row_loc, col_loc, row_multiindex_full_lookup, col_multiindex_full_lookup
+            )
         else:
             qc_view = self._take_2d_positional(*self._compute_lookup(row_loc, col_loc))
         result = self._get_pandas_object_from_qc_view(

From 51fd254d6cfda880280ad6b079056cb4a38d91ba Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 8 Nov 2022 13:19:36 -0600
Subject: [PATCH 68/77] Fix IO astype bug.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/io/file_dispatcher.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modin/core/io/file_dispatcher.py b/modin/core/io/file_dispatcher.py
index b0247d90cd7..efda84f31ec 100644
--- a/modin/core/io/file_dispatcher.py
+++ b/modin/core/io/file_dispatcher.py
@@ -170,7 +170,8 @@ def read(cls, *args, **kwargs):
                     t: dtypes[t]
                     for t in dtypes.index
                     if isinstance(dtypes[t], kernel_lib.CategoricalDtype)
-                }
+                },
+                kwargs.get("errors", "raise"),
             )
         return query_compiler
 

From ebe2719dfe1ac672d37b2f225acf2ffd32a5504e Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 8 Nov 2022 14:38:48 -0600
Subject: [PATCH 69/77] Make ClientIO use ClientQueryCompiler by default.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index be165e168fb..52361e37ebc 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -13,6 +13,7 @@
 
 """The module holds the factory which performs I/O using pandas on a Client."""
 
+from .query_compiler import ClientQueryCompiler
 from modin.core.io.io import BaseIO
 import os
 import pandas
@@ -23,6 +24,7 @@ class ClientIO(BaseIO):
 
     _server_conn = None
     _data_conn = None
+    query_compiler_cls = ClientQueryCompiler
 
     @classmethod
     def set_server_connection(cls, conn):

From f2058015884b3f481f62c6f03ed29fc28f77b7d2 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 8 Nov 2022 15:04:07 -0600
Subject: [PATCH 70/77] Debug read_sql.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py             | 1 +
 modin/core/execution/client/query_compiler.py | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 52361e37ebc..4a00e11f6e6 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -110,6 +110,7 @@ def read_sql(cls, sql, con, **kwargs):
         self.query_compiler_cls
             Query compiler with data read in from SQL connection.
         """
+        print(f'called client io read_sql with {sql=} and {con=} and {kwargs=}')
         if isinstance(con, str) and con.lower() == "auto" and cls._data_conn is None:
             raise ConnectionError(
                 "Cannot connect with parameter 'auto' because connection is not set. Did you initialize it?"
diff --git a/modin/core/execution/client/query_compiler.py b/modin/core/execution/client/query_compiler.py
index 93776f57569..cd670ce8ded 100644
--- a/modin/core/execution/client/query_compiler.py
+++ b/modin/core/execution/client/query_compiler.py
@@ -40,6 +40,12 @@ class ClientQueryCompiler(BaseQueryCompiler):
     lazy_execution: bool = True
 
     def __init__(self, id: UUID):
+        # The service can return an exception instead of the ID of a new query
+        # compiler.
+        if isinstance(id, Exception):
+            raise id
+        if not isinstance(id, UUID):
+            raise TypeError(f"Expected UUID, got {type(id)}")
         self._id = id
 
     @classmethod

From 92ad6dd62e266116ff075fa57def8bed0daad7ec Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Tue, 8 Nov 2022 16:32:19 -0600
Subject: [PATCH 71/77] Fix getitem_row_array.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .../ray/implementations/pandas_on_ray/io/io.py       |  2 +-
 modin/core/storage_formats/base/query_compiler.py    | 12 +++---------
 modin/core/storage_formats/pandas/query_compiler.py  | 12 ++++--------
 modin/pandas/base.py                                 |  2 +-
 modin/pandas/groupby.py                              |  4 ++--
 modin/pandas/series.py                               |  4 ++--
 6 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py b/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
index 329ddea8075..6922ef0406e 100644
--- a/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
+++ b/modin/core/execution/ray/implementations/pandas_on_ray/io/io.py
@@ -91,7 +91,7 @@ def to_sql(cls, qc, **kwargs):
         # since the mapping operation is non-blocking, each partition will return an empty DF
         # so at the end, the blocking operation will be this empty DF to_pandas
 
-        empty_df = qc.getitem_row_array([0], numeric=True).to_pandas().head(0)
+        empty_df = qc.getitem_row_array([0]).to_pandas().head(0)
         empty_df.to_sql(**kwargs)
         # so each partition will append its respective DF
         kwargs["if_exists"] = "append"
diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 4cc171e2ce9..08eed980632 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2153,7 +2153,7 @@ def get_column(df, key):
 
         return DataFrameDefault.register(get_column)(self, key=key)
 
-    def getitem_row_array(self, key: List[Hashable], numeric: bool):
+    def getitem_row_array(self, key: List[Hashable]):
         """
         Get row data for target indices.
 
@@ -2161,9 +2161,6 @@ def getitem_row_array(self, key: List[Hashable], numeric: bool):
         ----------
         key : list-like
             Numeric indices of the rows to pick.
-        numeric : bool, default: False
-            Whether the key passed in represents the numeric row positions or
-            or the possibly non-numeric row labels.
 
         Returns
         -------
@@ -2172,10 +2169,7 @@ def getitem_row_array(self, key: List[Hashable], numeric: bool):
         """
 
         def get_row(df, key):
-            if numeric:
-                return df.iloc[key]
-            else:
-                return df.loc[key]
+            return df.loc[key]
 
         return DataFrameDefault.register(get_row)(self, key=key)
 
@@ -3191,7 +3185,7 @@ def mask(idx):
             return (
                 self.getitem_column_array(idx, numeric=True)
                 if axis
-                else self.getitem_row_array(idx, numeric=True)
+                else self.getitem_row_array(idx)
             )
 
         if 0 <= loc < len(self.get_axis(axis)):
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index d80323184bb..08cc058ca96 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -2221,7 +2221,7 @@ def getitem_array(self, key):
             # requested.
             key = pandas.RangeIndex(len(self.index))[key]
             if len(key):
-                return self.getitem_row_array(key, numeric=True)
+                return self.getitem_row_array(key)
             else:
                 return self.from_pandas(
                     pandas.DataFrame(columns=self.columns), type(self._modin_frame)
@@ -2247,13 +2247,9 @@ def getitem_column_array(self, key, numeric=False):
             )
         return self.__constructor__(new_modin_frame)
 
-    def getitem_row_array(self, key: List[Hashable], numeric: bool):
-        if numeric:
-            kwargs = {"row_positions": key}
-        else:
-            kwargs = {"row_labels": key}
+    def getitem_row_array(self, key: List[Hashable]):
         return self.__constructor__(
-            self._modin_frame.take_2d_labels_or_positional(**kwargs)
+            self._modin_frame.take_2d_labels_or_positional(row_labels=key)
         )
 
     def setitem(self, axis, key, value):
@@ -3302,7 +3298,7 @@ def sort_columns_by_row_values(self, rows, ascending=True, **kwargs):
             rows = [rows]
         ErrorMessage.default_to_pandas("sort_values")
         broadcast_value_list = [
-            self.getitem_row_array([row], numeric=True).to_pandas() for row in rows
+            self.getitem_row_array([row]).to_pandas() for row in rows
         ]
         index_builder = list(zip(broadcast_value_list, rows))
         broadcast_values = pandas.concat(
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index e021970558a..8672844acc9 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2500,7 +2500,7 @@ def _sample(
             return self.__constructor__(query_compiler=query_compiler)
         else:
             query_compiler = self._query_compiler.getitem_row_array(
-                samples, numeric=True
+                samples
             )
             return self.__constructor__(query_compiler=query_compiler)
 
diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index fed97028282..dcd990f13b2 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -918,7 +918,7 @@ def _iter(self):
                     k,
                     DataFrame(
                         query_compiler=self._query_compiler.getitem_row_array(
-                            indices[k], numeric=True
+                            indices[k]
                         )
                     ),
                 )
@@ -1228,7 +1228,7 @@ def _iter(self):
                     k,
                     Series(
                         query_compiler=self._query_compiler.getitem_row_array(
-                            indices[k], numeric=True
+                            indices[k]
                         )
                     ),
                 )
diff --git a/modin/pandas/series.py b/modin/pandas/series.py
index ea83ab9f6d2..0e8ff29a920 100644
--- a/modin/pandas/series.py
+++ b/modin/pandas/series.py
@@ -2443,7 +2443,7 @@ def _getitem(self, key):
         if is_bool_indexer(key):
             return self.__constructor__(
                 query_compiler=self._query_compiler.getitem_row_array(
-                    pandas.RangeIndex(len(self.index))[key], numeric=True
+                    pandas.RangeIndex(len(self.index))[key]
                 )
             )
         # TODO: More efficiently handle `tuple` case for `Series.__getitem__`
@@ -2465,7 +2465,7 @@ def _getitem(self, key):
         row_positions = self.index.get_indexer_for(key) if is_indexer else key
         if not all(is_integer(x) for x in row_positions):
             raise KeyError(key[0] if reduce_dimension else key)
-        result = self._query_compiler.getitem_row_array(row_positions, numeric=True)
+        result = self._query_compiler.getitem_row_array(row_positions)
 
         if reduce_dimension:
             return self._reduce_dimension(result)

From 1d7e4942728777a8c2c412629706099c2200bd9c Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 9 Nov 2022 10:14:24 -0600
Subject: [PATCH 72/77] Fix black and flake8, and add a comment.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/execution/client/io.py | 1 -
 modin/pandas/base.py              | 4 +---
 modin/pandas/indexing.py          | 6 ++++++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/modin/core/execution/client/io.py b/modin/core/execution/client/io.py
index 4a00e11f6e6..52361e37ebc 100644
--- a/modin/core/execution/client/io.py
+++ b/modin/core/execution/client/io.py
@@ -110,7 +110,6 @@ def read_sql(cls, sql, con, **kwargs):
         self.query_compiler_cls
             Query compiler with data read in from SQL connection.
         """
-        print(f'called client io read_sql with {sql=} and {con=} and {kwargs=}')
         if isinstance(con, str) and con.lower() == "auto" and cls._data_conn is None:
             raise ConnectionError(
                 "Cannot connect with parameter 'auto' because connection is not set. Did you initialize it?"
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 8672844acc9..de4956206f3 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -2499,9 +2499,7 @@ def _sample(
             query_compiler = self._query_compiler.getitem_column_array(samples)
             return self.__constructor__(query_compiler=query_compiler)
         else:
-            query_compiler = self._query_compiler.getitem_row_array(
-                samples
-            )
+            query_compiler = self._query_compiler.getitem_row_array(samples)
             return self.__constructor__(query_compiler=query_compiler)
 
     def _sem(
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index d63e278c79b..369d1277c1f 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -735,6 +735,12 @@ def __getitem__(self, key):
             return self._handle_boolean_masking(row_loc, col_loc)
 
         if isinstance(self.qc, ClientQueryCompiler):
+            # TODO(https://github.com/modin-project/modin/issues/5202):
+            # currently only the client query compiler implements
+            # take_2d_labels without defaulting to pandas. Eventually we want
+            # the query compilers to use take_2d_labels to do loc indexing
+            # instead of always converting row and column labels to positions
+            # here and passing positions to the query compilers.
             qc_view = self._take_2d_labels(
                 row_loc, col_loc, row_multiindex_full_lookup, col_multiindex_full_lookup
             )

From 8be834aa68b887515aae99f183aa68d2cd6b1019 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 9 Nov 2022 10:28:56 -0600
Subject: [PATCH 73/77] Fix getitem_row_array.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/storage_formats/base/query_compiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/core/storage_formats/base/query_compiler.py b/modin/core/storage_formats/base/query_compiler.py
index 08eed980632..b05e1229535 100644
--- a/modin/core/storage_formats/base/query_compiler.py
+++ b/modin/core/storage_formats/base/query_compiler.py
@@ -2153,7 +2153,7 @@ def get_column(df, key):
 
         return DataFrameDefault.register(get_column)(self, key=key)
 
-    def getitem_row_array(self, key: List[Hashable]):
+    def getitem_row_array(self, key):
         """
         Get row data for target indices.
 
@@ -2169,7 +2169,7 @@ def getitem_row_array(self, key: List[Hashable]):
         """
 
         def get_row(df, key):
-            return df.loc[key]
+            return df.iloc[key]
 
         return DataFrameDefault.register(get_row)(self, key=key)
 

From 61a7aadac6d58e1a0f9b33ed552b59b7d69841fc Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 9 Nov 2022 10:47:47 -0600
Subject: [PATCH 74/77] Fix getitem_row_array again.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/core/storage_formats/pandas/query_compiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
index 08cc058ca96..2cfaad5513d 100644
--- a/modin/core/storage_formats/pandas/query_compiler.py
+++ b/modin/core/storage_formats/pandas/query_compiler.py
@@ -2247,9 +2247,9 @@ def getitem_column_array(self, key, numeric=False):
             )
         return self.__constructor__(new_modin_frame)
 
-    def getitem_row_array(self, key: List[Hashable]):
+    def getitem_row_array(self, key):
         return self.__constructor__(
-            self._modin_frame.take_2d_labels_or_positional(row_labels=key)
+            self._modin_frame.take_2d_labels_or_positional(row_positions=key)
         )
 
     def setitem(self, axis, key, value):

From 07ad5c5293dd8326d2e0f10844db6cbfa07ee929 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 9 Nov 2022 10:59:20 -0600
Subject: [PATCH 75/77] Fix bugs that showed up in CI.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/conftest.py        |  8 ++++++--
 modin/pandas/indexing.py | 12 ++++++++++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/modin/conftest.py b/modin/conftest.py
index 13571d45487..8115a94fd54 100644
--- a/modin/conftest.py
+++ b/modin/conftest.py
@@ -52,7 +52,6 @@ def _saving_make_api_url(token, _make_api_url=modin.utils._make_api_url):
     PandasQueryCompiler,
     BaseQueryCompiler,
 )
-from modin.core.execution.client.io import ClientIO  # noqa: E402
 from modin.core.execution.client.container import (  # noqa: E402
     ForwardingQueryCompilerContainer,
 )
@@ -282,14 +281,19 @@ def set_base_on_python_execution():
 class ClientFactory(factories.BaseFactory):
     @classmethod
     def prepare(cls):
+        # Can't always import ClientIO, because it uses NoDefault, which
+        # is not available on older pandas.
+        from modin.core.execution.client.io import ClientIO
+
         cls.io_cls = ClientIO
 
 
 def set_client_execution():
     # Can't always import ClientQueryCompiler, because it uses NoDefault, which
-    # is not available on older pandas.
+    # is not available on older pandas. ClientIO also uses ClientQueryCompiler.
 
     from modin.core.execution.client.query_compiler import ClientQueryCompiler
+    from modin.core.execution.client.io import ClientIO
 
     class TestClientQueryCompiler(ClientQueryCompiler):
         @classmethod
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index 369d1277c1f..f5b26995594 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -37,7 +37,7 @@
 from pandas.core.indexing import IndexingError
 from typing import Union
 
-from modin.core.execution.client.query_compiler import ClientQueryCompiler
+from modin._compat import PandasCompatVersion
 from modin.error_message import ErrorMessage
 from modin.logging import ClassLogger
 
@@ -734,13 +734,21 @@ def __getitem__(self, key):
         if isinstance(row_loc, Series) and is_boolean_array(row_loc):
             return self._handle_boolean_masking(row_loc, col_loc)
 
-        if isinstance(self.qc, ClientQueryCompiler):
+        is_client_qc = False
+        if PandasCompatVersion.CURRENT == PandasCompatVersion.LATEST:
+            # Can't always import ClientQueryCompiler, because it uses NoDefault, which
+            # is not available on older pandas.
+
+            from modin.core.execution.client.query_compiler import ClientQueryCompiler
+
+            is_client_qc = isinstance(self.qc, ClientQueryCompiler)
             # TODO(https://github.com/modin-project/modin/issues/5202):
             # currently only the client query compiler implements
             # take_2d_labels without defaulting to pandas. Eventually we want
             # the query compilers to use take_2d_labels to do loc indexing
             # instead of always converting row and column labels to positions
             # here and passing positions to the query compilers.
+        if is_client_qc:
             qc_view = self._take_2d_labels(
                 row_loc, col_loc, row_multiindex_full_lookup, col_multiindex_full_lookup
             )

From 38ef127408149d259e1a8420e44d8a88403f1672 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Wed, 9 Nov 2022 12:11:25 -0600
Subject: [PATCH 76/77] Fix a multiindex Client bug, and fix an hdk astype bug.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 .../core/storage_formats/hdk/query_compiler.py           | 2 +-
 modin/pandas/indexing.py                                 | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/modin/experimental/core/storage_formats/hdk/query_compiler.py b/modin/experimental/core/storage_formats/hdk/query_compiler.py
index 02bc5187052..5da3a54027e 100644
--- a/modin/experimental/core/storage_formats/hdk/query_compiler.py
+++ b/modin/experimental/core/storage_formats/hdk/query_compiler.py
@@ -693,7 +693,7 @@ def reset_index(self, **kwargs):
 
     def astype(self, col_dtypes, errors: str):
         return self.__constructor__(
-            self._modin_frame.astype(col_dtypes, errors), self._shape_hint
+            self._modin_frame.astype(col_dtypes), self._shape_hint
         )
 
     def setitem(self, axis, key, value):
diff --git a/modin/pandas/indexing.py b/modin/pandas/indexing.py
index f5b26995594..ae4571831e7 100644
--- a/modin/pandas/indexing.py
+++ b/modin/pandas/indexing.py
@@ -678,7 +678,14 @@ def _multiindex_possibly_contains_key(self, axis, key):
             return False
 
         multiindex = self.df.index if axis == 0 else self.df.columns
-        return isinstance(key, tuple) and len(key) == len(multiindex.levels)
+        # If not every element of the key is a scalar, e.g. the key is
+        # (slice(None), 0), then the key isn't a full key-lookup, and the
+        # entire key behaves more like a slice than like a scalar.
+        return (
+            isinstance(key, tuple)
+            and len(key) == len(multiindex.levels)
+            and all(is_scalar(k) for k in key)
+        )
 
 
 class _LocIndexer(_LocationIndexerBase):

From ead877ef8176c260ddb59e2fb305d101d92175f3 Mon Sep 17 00:00:00 2001
From: mvashishtha <mahesh@ponder.io>
Date: Thu, 10 Nov 2022 16:09:35 -0600
Subject: [PATCH 77/77] Respond to comments.

Signed-off-by: mvashishtha <mahesh@ponder.io>
---
 modin/pandas/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index c4f8b4dccbf..db976ca45ad 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -116,6 +116,11 @@ def _update_engine(publisher: Parameter):
     # Set this so that Pandas doesn't try to multithread by itself
     os.environ["OMP_NUM_THREADS"] = "1"
 
+    if Engine.get() == "Client":
+        if publisher.get_value_source() == ValueSource.DEFAULT:
+            StorageFormat.put("")
+        return
+
     sfmt = StorageFormat.get()
 
     if sfmt == "Hdk":
@@ -130,11 +135,6 @@ def _update_engine(publisher: Parameter):
     else:
         is_hdk = False
 
-    if Engine.get() == "Client":
-        if publisher.get_value_source() == ValueSource.DEFAULT:
-            StorageFormat.put("")
-        return
-
     if is_hdk and publisher.get_value_source() == ValueSource.DEFAULT:
         publisher.put("Native")
         IsExperimental.put(True)