Skip to content

Commit

Permalink
Set Ray dependency to 0.5.2 (modin-project#78)
Browse files Browse the repository at this point in the history
* Set ray dependency to 0.5.2

* Let travis install from pip

* Update install-dependencies.sh

* Update install-dependencies.sh

* Update install-dependencies.sh

* Update install-dependencies.sh

* Update install-dependencies.sh

* Update install-dependencies.sh

* Ran yapf
  • Loading branch information
simon-mo authored and devin-petersohn committed Sep 17, 2018
1 parent ec7a429 commit cd965ae
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 109 deletions.
60 changes: 7 additions & 53 deletions .travis/install-dependencies.sh
Original file line number Diff line number Diff line change
@@ -1,62 +1,16 @@
#!/usr/bin/env bash

ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)

echo "PYTHON is $PYTHON"

platform="unknown"
unamestr="$(uname)"
if [[ "$unamestr" == "Linux" ]]; then
echo "Platform is linux."
platform="linux"
elif [[ "$unamestr" == "Darwin" ]]; then
echo "Platform is macosx."
platform="macosx"
else
echo "Unrecognized platform."
exit 1
fi

if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
# Install miniconda.
if [[ "$PYTHON" == "2.7" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27mu-manylinux1_x86_64.whl
elif [[ "$PYTHON" == "3.6" ]] && [[ "$platform" == "linux" ]]; then
# Install miniconda.
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-manylinux1_x86_64.whl
elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
# Install miniconda.
wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27m-macosx_10_6_intel.whl
elif [[ "$PYTHON" == "3.6" ]] && [[ "$platform" == "macosx" ]]; then
# Install miniconda.
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
pip install -q pandas==0.22 feather-format lxml openpyxl xlrd
# Install ray from its latest wheels
pip install -q -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-macosx_10_6_intel.whl
elif [[ "$LINT" == "1" ]]; then
# Install miniconda.

elif [[ "$PYTHON" == "3.6" ]] || [[ "$LINT" == "1" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh -nv
bash miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
# Install Python linting tools.
pip install -q flake8 flake8-comprehensions yapf
else
echo "Unrecognized environment."
exit 1
conda install -y python==3.6.5
fi

pip install -r requirements.txt
pip install -q pytest flake8 flake8-comprehensions yapf feather-format lxml openpyxl xlrd numpy
4 changes: 2 additions & 2 deletions modin/pandas/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def name_incrementer(i):
# Put all of the DataFrames into Ray format
# TODO just partition the DataFrames instead of building a new Ray DF.
objs = [
DataFrame(obj)
if isinstance(obj, (pandas.DataFrame, pandas.Series)) else obj
DataFrame(obj) if isinstance(obj, (pandas.DataFrame,
pandas.Series)) else obj
for obj in objs
]

Expand Down
29 changes: 16 additions & 13 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,9 @@ def col_dots_builder(full_front, full_back):
col_dots = pandas.Series(["..." for _ in range(len(full_front))])
col_dots.index = index_of_head
col_dots.name = "..."
return pandas.concat(
[full_front, col_dots, full_back], axis=1, copy=False)
return pandas.concat([full_front, col_dots, full_back],
axis=1,
copy=False)

# If we don't exceed the maximum number of values on either dimension
if len(self.index) <= 60 and len(self.columns) <= 20:
Expand Down Expand Up @@ -524,8 +525,8 @@ def _arithmetic_helper(self, remote_func, axis, level=None):

oid_series = ray.get(
_map_partitions(
remote_func, self._col_partitions
if axis == 0 else self._row_partitions))
remote_func,
self._col_partitions if axis == 0 else self._row_partitions))

if axis == 0:
# We use the index to get the internal index.
Expand All @@ -538,8 +539,9 @@ def _arithmetic_helper(self, remote_func, axis, level=None):
df.index = \
this_partition[this_partition.isin(df.index)].index

result_series = pandas.concat(
[obj[0] for obj in oid_series], axis=0, copy=False)
result_series = pandas.concat([obj[0] for obj in oid_series],
axis=0,
copy=False)
else:
result_series = pandas.concat(oid_series, axis=0, copy=False)
result_series.index = self.index
Expand Down Expand Up @@ -1507,9 +1509,9 @@ def astype(self, dtype, copy=True, errors='raise', **kwargs):
raise KeyError("Only a column name can be used for the key in"
"a dtype mappings argument.")
columns = list(dtype.keys())
col_idx = [(self.columns.get_loc(columns[i]), columns[i]) if
columns[i] in self.columns else (columns[i], columns[i])
for i in range(len(columns))]
col_idx = [(self.columns.get_loc(columns[i]),
columns[i]) if columns[i] in self.columns else
(columns[i], columns[i]) for i in range(len(columns))]
new_dict = {}
for idx, key in col_idx:
new_dict[idx] = dtype[key]
Expand Down Expand Up @@ -3667,8 +3669,9 @@ def check_bad_dtype(t):
if next((True for t in self.dtypes if check_bad_dtype(t)),
False):
dtype = next(t for t in self.dtypes if check_bad_dtype(t))
raise ValueError("Cannot compare type '{}' with type '{}'"
.format(type(dtype), float))
raise ValueError(
"Cannot compare type '{}' with type '{}'".format(
type(dtype), float))
else:
# Normally pandas returns this near the end of the quantile, but we
# can't afford the overhead of running the entire operation before
Expand Down Expand Up @@ -5896,8 +5899,8 @@ def __neg__(self):
for t in self.dtypes:
if not (is_bool_dtype(t) or is_numeric_dtype(t)
or is_timedelta64_dtype(t)):
raise TypeError("Unary negative expects numeric dtype, not {}"
.format(t))
raise TypeError(
"Unary negative expects numeric dtype, not {}".format(t))

new_block_partitions = np.array([
_map_partitions(lambda df: df.__neg__(), block)
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,8 +568,8 @@ def _apply_df_function(self, f, concat_axis=None):
new_df._block_partitions = np.array([
_reindex_helper._submit(
args=tuple([
new_df.columns, self._columns, 0,
new_df._block_partitions.shape[1]
new_df.columns, self._columns, 0, new_df.
_block_partitions.shape[1]
] + block.tolist()),
num_return_vals=new_df._block_partitions.shape[1])
for block in new_df._block_partitions
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/index_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ def groupby(self,
return assignments_df

def partition_series(self, partition):
return self[self._coord_df['partition'] == partition,
'index_within_partition']
return self[self._coord_df['partition'] ==
partition, 'index_within_partition']

def __len__(self):
return int(sum(self._lengths))
Expand Down
13 changes: 7 additions & 6 deletions modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,9 @@ def _write_items(self, row_lookup, col_lookup, item):
row_idx = row_data['index_within_partition']
col_idx = col_data['index_within_partition']

item_to_write = item[row_item_index:row_item_index + row_len,
col_item_index:col_item_index + col_len]
item_to_write = item[row_item_index:row_item_index +
row_len, col_item_index:col_item_index +
col_len]

result_oid = writer.remote(block_oid, row_idx, col_idx,
item_to_write)
Expand Down Expand Up @@ -396,15 +397,15 @@ def _enlarge_axis(self, locator, axis):
]])
nan_blks = nan_blks.T if not row_based_bool else nan_blks

self.block_oids = np.concatenate(
[self.block_oids, nan_blks], axis=0 if row_based_bool else 1)
self.block_oids = np.concatenate([self.block_oids, nan_blks],
axis=0 if row_based_bool else 1)

# 3. Prepare metadata to return
nan_coord_df = pandas.DataFrame(data=[
{
'': name,
'partition': blk_part_n_row
if row_based_bool else blk_part_n_col,
'partition':
blk_part_n_row if row_based_bool else blk_part_n_col,
'index_within_partition': i
} for name, i in zip(nan_labels, np.arange(num_nan_labels))
]).set_index('')
Expand Down
20 changes: 10 additions & 10 deletions modin/pandas/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1870,13 +1870,12 @@ def test_fillna_dataframe():
ray_df = pd.DataFrame(frame_data, index=list('VWXYZ'))

# df2 may have different index and columns
df2 = pandas.DataFrame(
{
'a': [np.nan, 10, 20, 30, 40],
'b': [50, 60, 70, 80, 90],
'foo': ['bar'] * 5
},
index=list('VWXuZ'))
df2 = pandas.DataFrame({
'a': [np.nan, 10, 20, 30, 40],
'b': [50, 60, 70, 80, 90],
'foo': ['bar'] * 5
},
index=list('VWXuZ'))

# only those columns and indices which are shared get filled
assert ray_df_equals_pandas(ray_df.fillna(df2), df.fillna(df2))
Expand Down Expand Up @@ -2567,7 +2566,8 @@ def test_rename_sanity():
}).index)

tm.assert_index_equal(
ray_df.rename(index=str.upper).index, df.rename(index=str.upper).index)
ray_df.rename(index=str.upper).index,
df.rename(index=str.upper).index)

# have to pass something
pytest.raises(TypeError, ray_df.rename)
Expand Down Expand Up @@ -3145,8 +3145,8 @@ def test_update():
df = pd.DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3],
[1.5, np.nan, 3]])

other = pd.DataFrame(
[[3.6, 2., np.nan], [np.nan, np.nan, 7]], index=[1, 3])
other = pd.DataFrame([[3.6, 2., np.nan], [np.nan, np.nan, 7]],
index=[1, 3])

df.update(other)

Expand Down
20 changes: 9 additions & 11 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,8 @@ def _repartition_coord_df(old_coord_df, npartition):
passed in.
"""
length = len(old_coord_df)
chunksize = (len(old_coord_df) // npartition
if len(old_coord_df) % npartition == 0 else
len(old_coord_df) // npartition + 1)
chunksize = (len(old_coord_df) // npartition if len(old_coord_df) %
npartition == 0 else len(old_coord_df) // npartition + 1)

# genereate array([0, 0, 0, 1, 1, 1, 2])
partitions = np.repeat(np.arange(npartition), chunksize)[:length]
Expand All @@ -314,12 +313,11 @@ def _repartition_coord_df(old_coord_df, npartition):
final_n_partition = np.max(partitions)
idx_in_part = np.tile(np.arange(chunksize), final_n_partition + 1)[:length]

final_df = pandas.DataFrame(
{
'partition': partitions,
'index_within_partition': idx_in_part
},
index=old_coord_df.index)
final_df = pandas.DataFrame({
'partition': partitions,
'index_within_partition': idx_in_part
},
index=old_coord_df.index)

return final_df

Expand Down Expand Up @@ -767,8 +765,8 @@ def apply_suffix(s):
block_df_oid = blk_partitions[row_idx, col_idx]
block_df = ray.get(block_df_oid)
chunk = block_df.iloc[row_df[apply_suffix(
'index_within_partition')], col_df[apply_suffix(
'index_within_partition')]]
'index_within_partition'
)], col_df[apply_suffix('index_within_partition')]]
this_column.append(chunk)
df_columns.append(pandas.concat(this_column, axis=1))
final_df = pandas.concat(df_columns)
Expand Down
11 changes: 1 addition & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,2 @@
pandas==0.22
# Install Ray from latest wheels
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27mu-manylinux1_x86_64.whl ; sys_platform == "linux2" and python_version == "2.7"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp33-cp33m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.3"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp34-cp34m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.4"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp35-cp35m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.5"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-manylinux1_x86_64.whl ; sys_platform == "linux" and python_version == "3.6"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp27-cp27m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "2.7"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp34-cp34m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "3.4"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp35-cp35m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "3.5"
https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.4.0-cp36-cp36m-macosx_10_6_intel.whl ; sys_platform == "darwin" and python_version == "3.6"
ray==0.5.2

0 comments on commit cd965ae

Please sign in to comment.