Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT-#5423: Add a NumPy API to Modin #5422

Merged
merged 43 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
6f2a6d7
FEAT-#5423: Begin implementing NumPy API Layer
RehanSD Dec 12, 2022
7a4fa99
Start
devin-petersohn Nov 12, 2022
2a08cf0
Next
devin-petersohn Nov 12, 2022
4b68f50
Added absolute, abs, add, all, subtract to modin.numpy
billiam-wang Nov 12, 2022
0b915b4
Add changes
devin-petersohn Nov 22, 2022
9c7a66b
Add shape + reshape
RehanSD Nov 12, 2022
1c6d708
Added additional math functions for numpy
billiam-wang Nov 12, 2022
30171d2
Add list constructor
RehanSD Nov 15, 2022
ab0ecdb
lint
RehanSD Dec 12, 2022
25510bc
Add dimension handling
RehanSD Jan 12, 2023
4ef400f
Merge remote-tracking branch 'upstream/master' into numpy/init
RehanSD Jan 12, 2023
43e3bb5
Fix partial broadcasting issues
RehanSD Jan 12, 2023
4301b9d
Add testing
RehanSD Jan 12, 2023
5ceca02
Add tests to CI
RehanSD Jan 12, 2023
ff9045c
Add __array_ufunc__, __array_function__, and clean up implementation …
RehanSD Feb 2, 2023
4b174de
Add where
RehanSD Feb 3, 2023
bd2fe98
Fix df conversion retaining index issue
RehanSD Feb 3, 2023
2a87a39
Add max and min and other numpy methods to namespace
RehanSD Feb 4, 2023
d513b03
Fix dtype handling
RehanSD Feb 5, 2023
d8d0d10
Fix keepdims
RehanSD Feb 5, 2023
7404cb3
Fix out and add
RehanSD Feb 5, 2023
0d3be93
Add support for where kwarg
RehanSD Feb 5, 2023
508ecb3
Fix lint
RehanSD Feb 5, 2023
90aaed7
Get tests to run
RehanSD Feb 5, 2023
88aa6b5
Add testing for array ufunc
RehanSD Feb 5, 2023
e0fb8ce
Add testing for array function
RehanSD Feb 5, 2023
db3db83
Add testing for where
RehanSD Feb 5, 2023
f176ac8
Add tests for everything but prod, mean, min, max, and sum
RehanSD Feb 5, 2023
c0a1ecc
Add tests
RehanSD Feb 6, 2023
e706796
Bypass overflow dtype issues
RehanSD Feb 6, 2023
23fe0c4
Cast to output dtype
RehanSD Feb 6, 2023
22b01e0
Fix lint
RehanSD Feb 6, 2023
52f0928
Add defensive dimension check
RehanSD Feb 6, 2023
cfaa066
Fix auto-cast issue
RehanSD Feb 6, 2023
f9be32d
Fix CI bug
RehanSD Feb 6, 2023
48967d8
Address review comments
RehanSD Feb 7, 2023
5b1da61
Fix type computation and add check for where
RehanSD Feb 7, 2023
74ed3a2
Fix auto broadcast of out variable
RehanSD Feb 7, 2023
3244b79
Address review comments (break up testing into multiple files, and fi…
RehanSD Feb 7, 2023
a3d57fe
Fix naming
RehanSD Feb 7, 2023
18588b8
Add to_numpy
RehanSD Feb 8, 2023
19acf64
Add warning about numpy api and fix lint
RehanSD Feb 8, 2023
1bf6c00
Fix lint
RehanSD Feb 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions modin/numpy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .arr import *
Fixed Show fixed Hide fixed
from .math import *
github-advanced-security[bot] marked this conversation as resolved.
Fixed
Show resolved Hide resolved
from .constants import *
Fixed Show fixed Hide fixed
214 changes: 214 additions & 0 deletions modin/numpy/arr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
import numpy


class array(object):
def __init__(
self,
object=None,
dtype=None,
*,
copy=True,
order="K",
subok=False,
ndmin=0,
like=None,
query_compiler=None,
):
if query_compiler is not None:
self._query_compiler = query_compiler
elif isinstance(object, list):
import modin.pandas as pd
RehanSD marked this conversation as resolved.
Show resolved Hide resolved

qc = pd.DataFrame(object)._query_compiler
self._query_compiler = qc
else:
arr = numpy.array(
Fixed Show fixed Hide fixed
object,
dtype=dtype,
copy=copy,
order=order,
subok=subok,
ndmin=ndmin,
like=like,
)
pass
Fixed Show fixed Hide fixed

def _absolute(
RehanSD marked this conversation as resolved.
Show resolved Hide resolved
self,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.abs()
return array(query_compiler=result)

def _add(
RehanSD marked this conversation as resolved.
Show resolved Hide resolved
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.add(x2._query_compiler)
return array(query_compiler=result)

def _divide(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.truediv(x2._query_compiler)
return array(query_compiler=result)

def _float_power(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.add(x2._query_compiler)
return array(query_compiler=result)

def _floor_divide(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.floordiv(x2._query_compiler)
return array(query_compiler=result)

def _power(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.pow(x2._query_compiler)
return array(query_compiler=result)

def _prod(self, axis=None, out=None, keepdims=None, where=None):
print("Series?", self._query_compiler.is_series_like())
if axis is None:
result = self._query_compiler.prod(axis=0).prod(axis=1)
return array(query_compiler=result)
else:
result = self._query_compiler.prod(axis=axis)
return array(query_compiler=result)

def _multiply(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.mul(x2._query_compiler)
return array(query_compiler=result)

def _remainder(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.mod(x2._query_compiler)
return array(query_compiler=result)

def _subtract(
self,
x2,
out=None,
where=True,
casting="same_kind",
order="K",
dtype=None,
subok=True,
):
result = self._query_compiler.sub(x2._query_compiler)
return array(query_compiler=result)

def _sum(
self, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=None
):
Fixed Show fixed Hide fixed
result = self._query_compiler.sum(axis=axis)
if dtype is not None:
result = result.astype(dtype)
if out is not None:
out._query_compiler = result
return
return array(query_compiler=result)

def _get_shape(self):
return (len(self._query_compiler.index), len(self._query_compiler.columns))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be an assert self._ndim == 2 here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also needs a case for _ndim == 0:

>>> np.array(1).ndim  # passing a scalar into the constructor
0

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, we're only focusing on 1D and 2D cases @noloerino. @vnlitvinov why would we need this assert?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To immediately know something is not behaving the way we expected. That scalar is an awesome example - a user can feed a scalar and receive some cryptic error message instead of somewhat clear assertion (with a pinpointed place of erroring out to boot).

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to me - I'll go ahead and add it!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you put this assert in? I don't see it in this version of the code.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the assert in the init. Does that work?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, that's fine


def _set_shape(self, new_shape):
if not (isinstance(new_shape, int)) and not isinstance(new_shape, tuple):
raise TypeError(
f"expected a sequence of integers or a single integer, got '{new_shape}'"
)
elif isinstance(new_shape, tuple):
for dim in new_shape:
if not isinstance(dim, int):
raise TypeError(
f"'{type(dim)}' object cannot be interpreted as an integer"
)
from math import prod
RehanSD marked this conversation as resolved.
Show resolved Hide resolved

new_dimensions = new_shape if isinstance(new_shape, int) else prod(new_shape)
if new_dimensions != prod(self._get_shape()):
raise ValueError(
f"cannot reshape array of size {prod(self._get_shape)} into {new_shape if isinstance(new_shape, tuple) else (new_shape,)}"
)
if isinstance(new_shape, int):
qcs = []
for index_val in self._query_compiler.index[1:]:
qcs.append(
self._query_compiler.getitem_row_array([index_val]).reset_index(
drop=True
)
)
RehanSD marked this conversation as resolved.
Show resolved Hide resolved
self._query_compiler = (
self._query_compiler.getitem_row_array([self._query_compiler.index[0]])
.reset_index(drop=True)
.concat(1, qcs, ignore_index=True)
)
else:
raise NotImplementedError(
"Reshaping from a 2D object to a 2D object is not currently supported!"
)

shape = property(_get_shape, _set_shape)

def __repr__(self):
return repr(self._query_compiler.to_numpy())
17 changes: 17 additions & 0 deletions modin/numpy/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from numpy import (
Inf,
Infinity,
NAN,
NINF,
NZERO,
NaN,
PINF,
PZERO,
e,
euler_gamma,
inf,
infty,
nan,
newaxis,
pi,
)
RehanSD marked this conversation as resolved.
Show resolved Hide resolved
Loading