diff --git a/.github/workflows/fuzzydata-test.yml b/.github/workflows/fuzzydata-test.yml
new file mode 100644
index 00000000000..3fc716c4b79
--- /dev/null
+++ b/.github/workflows/fuzzydata-test.yml
@@ -0,0 +1,36 @@
+name: fuzzy
+on: pull_request
+jobs:
+ test-fuzzydata:
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ shell: bash -l {0}
+ strategy:
+ matrix:
+ python-version: ["3.8"]
+ engine: ["ray", "dask"]
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 1
+ - uses: conda-incubator/setup-miniconda@v2
+ with:
+ activate-environment: modin
+ environment-file: environment-dev.yml
+ python-version: 3.8
+ channel-priority: strict
+ use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+ - name: Conda environment
+ run: |
+ conda info
+ conda list
+ - name: test-fuzzydata (engine ${{matrix.engine}}, python ${{matrix.python-version}})
+ run: python -m pytest modin/experimental/fuzzydata/test/test_fuzzydata.py -Wignore::UserWarning
+ env:
+ MODIN_ENGINE: ${{matrix.engine}}
+ - uses: actions/upload-artifact@v3
+ with:
+ name: fuzzydata-test-workflow-${{matrix.engine}}
+ path: /tmp/fuzzydata-test-wf-${{matrix.engine}}/* # Must match output dir in test_fuzzydata.py
+ if-no-files-found: error
diff --git a/README.md b/README.md
index 24431748e80..bcfdf3c7261 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,18 @@
Scale your pandas workflows by changing one line of code
+
+
+|
Dev Community & Support
|
Forums
|
Socials
|
Docs
|
+|:---: | :---: | :---: | :---: |
+| [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/modin-project/shared_invite/zt-yvk5hr3b-f08p_ulbuRWsAfg9rMY3uA) | [![Stack Overflow](https://img.shields.io/badge/-Stackoverflow-FE7A16?style=for-the-badge&logo=stack-overflow&logoColor=white)](https://stackoverflow.com/questions/tagged/modin) |
|
|
+
+
+
-
-
+
-
diff --git a/docs/release_notes/release_notes-0.15.0.rst b/docs/release_notes/release_notes-0.15.0.rst
index 9135a41de5d..2505e7330b9 100644
--- a/docs/release_notes/release_notes-0.15.0.rst
+++ b/docs/release_notes/release_notes-0.15.0.rst
@@ -65,6 +65,7 @@ Key Features and Updates
* DOCS-#4469: Say that commit messages can start with PERF (#4470).
* DOCS-#4466: Recommend GitHub issues over bug_reports@modin.org (#4474).
* DOCS-#4487: Recommend GitHub issues over feature_requests@modin.org (#4489).
+ * DOCS-#4545: Add socials to README (#4555).
* Dependencies
* FIX-#4327: Update min pin for xgboost version (#4328)
* FIX-#4383: Remove `pathlib` from deps (#4384)
diff --git a/docs/release_notes/release_notes-0.16.0.rst b/docs/release_notes/release_notes-0.16.0.rst
index 1931719b41a..574632ddf09 100644
--- a/docs/release_notes/release_notes-0.16.0.rst
+++ b/docs/release_notes/release_notes-0.16.0.rst
@@ -9,6 +9,9 @@ Key Features and Updates
* FIX-#4570: Replace ``np.bool`` -> ``np.bool_`` (#4571)
* FIX-#4543: Fix `read_csv` in case skiprows=<0, []> (#4544)
* FIX-#4059: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
+ * FIX-#4589: Pin protobuf<4.0.0 to fix ray (#4590)
+ * FIX-#4577: Set attribute of Modin dataframe to updated value (#4588)
+ * FIX-#4411: Fix binary_op between datetime64 Series and pandas timedelta (#4592)
* FIX-#4582: Inherit custom log layer (#4583)
* Performance enhancements
* PERF-#4182: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
@@ -31,8 +34,9 @@ Key Features and Updates
* Documentation improvements
* DOCS-#4552: Change default sphinx language to en to fix sphinx >= 5.0.0 build (#4553)
* Dependencies
- *
+ * FEAT-#4598: Add support for pandas 1.4.3 (#4599)
* New Features
+ * FEAT-4463: Add experimental fuzzydata integration for testing against a randomized dataframe workflow (#4556)
Contributors
------------
@@ -40,3 +44,6 @@ Contributors
@NickCrews
@prutskov
@vnlitvinov
+@pyrito
+@suhailrehman
+@RehanSD
diff --git a/docs/requirements-doc.txt b/docs/requirements-doc.txt
index 60eb5fe301b..6028ad30302 100644
--- a/docs/requirements-doc.txt
+++ b/docs/requirements-doc.txt
@@ -12,6 +12,9 @@ sphinx-click
# Pin ray to < 1.13.0 to work around GH#4564
# TODO(https://github.com/modin-project/modin/issues/4564): let ray go past 1.13.0.
ray[default]>=1.4.0,<1.13.0
+# Following https://github.com/ray-project/ray/pull/25648, pin protobuf < 4,
+# because versions >= 4.0.0 are incompatible with ray<1.13.0.
+protobuf<4.0.0
git+https://github.com/modin-project/modin.git@master#egg=modin[all]
sphinxcontrib_plantuml
sphinx-issues
diff --git a/docs/usage_guide/advanced_usage/index.rst b/docs/usage_guide/advanced_usage/index.rst
index 0287bb37f09..d0d3daa6fdb 100644
--- a/docs/usage_guide/advanced_usage/index.rst
+++ b/docs/usage_guide/advanced_usage/index.rst
@@ -89,9 +89,9 @@ internal execution flow.
Logging with Modin
------------------
-Modin logging offers users greater insight into their queries by logging internal Modin API calls, partition metadata,
-and system memory. Logging is disabled by default, but when it is enabled, log files are written to a local `.modin` directory
-at the same directory level as the notebook/script used to run Modin. See our :doc:`Logging with Modin documentation `
+Modin logging offers users greater insight into their queries by logging internal Modin API calls, partition metadata,
+and system memory. Logging is disabled by default, but when it is enabled, log files are written to a local `.modin` directory
+at the same directory level as the notebook/script used to run Modin. See our :doc:`Logging with Modin documentation `
for usage information.
Batch Pipeline API
@@ -100,6 +100,13 @@ Modin provides an experimental batched API that pipelines row parallel queries.
for a walkthrough on how to use this feature, as well as :doc:`Batch Pipeline API documentation `
for more information about the API.
+Fuzzydata Testing
+-----------------
+
+An experimental GitHub Action on pull request has been added to Modin, which automatically runs the Modin codebase against
+`fuzzydata`, a random dataframe workflow generator. The resulting workflow that was used to test Modin codebase can be
+downloaded as an artifact from the GitHub Actions tab for further inspection. See `fuzzydata`_ for more details.
+
.. _`blog post`: https://medium.com/riselab/why-every-data-scientist-using-pandas-needs-modin-bringing-sql-to-dataframes-3b216b29a7c0
.. _`Modin SQL documentation`: modin_sql.html
.. _`Modin Spreadsheet API documentation`: spreadsheets_api.html
@@ -109,3 +116,4 @@ for more information about the API.
.. _`Slack`: https://modin.org/slack.html
.. _`tqdm`: https://github.com/tqdm/tqdm
.. _`distributed XGBoost`: https://medium.com/intel-analytics-software/distributed-xgboost-with-modin-on-ray-fc17edef7720
+.. _`fuzzydata`: https://github.com/suhailrehman/fuzzydata
diff --git a/environment-dev.yml b/environment-dev.yml
index a635b3f002a..4f565ea2b69 100644
--- a/environment-dev.yml
+++ b/environment-dev.yml
@@ -2,7 +2,7 @@ name: modin
channels:
- conda-forge
dependencies:
- - pandas==1.4.2
+ - pandas==1.4.3
- numpy>=1.18.5
- pyarrow>=4.0.1
- dask[complete]>=2.22.0,<2022.2.0
@@ -42,8 +42,11 @@ dependencies:
- tqdm
- git+https://github.com/airspeed-velocity/asv.git@ef016e233cb9a0b19d517135104f49e0a3c380e9
# Pin ray to < 1.13.0 to work around GH#4564
- # TODO(https://github.com/modin-project/modin/issues/4564): let ray go past 1.13.0.
+ # TODO(https://github.com/modin-project/modin/issues/4564): let ray go past 1.13.0.
- ray[default]>=1.4.0,<1.13.0
+ # Following https://github.com/ray-project/ray/pull/25648, pin protobuf < 4,
+ # because versions >= 4.0.0 are incompatible with ray<1.13.0.
+ - protobuf<4.0.0
- connectorx>=0.2.6a4
# TODO: remove when resolving GH#4398
- redis>=3.5.0,<4.0.0
@@ -51,3 +54,5 @@ dependencies:
- flake8
# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
- numpydoc==1.1.0
+ # experimental version of fuzzydata requires at least 0.0.6 to successfully resolve all dependencies
+ - fuzzydata>=0.0.6
diff --git a/modin/experimental/fuzzydata/__init__.py b/modin/experimental/fuzzydata/__init__.py
new file mode 100644
index 00000000000..153148d879b
--- /dev/null
+++ b/modin/experimental/fuzzydata/__init__.py
@@ -0,0 +1,14 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership. The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+"""Module holds experimental fuzzydata specific functionality for Modin."""
diff --git a/modin/experimental/fuzzydata/test/__init__.py b/modin/experimental/fuzzydata/test/__init__.py
new file mode 100644
index 00000000000..cae6413e559
--- /dev/null
+++ b/modin/experimental/fuzzydata/test/__init__.py
@@ -0,0 +1,12 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership. The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
diff --git a/modin/experimental/fuzzydata/test/test_fuzzydata.py b/modin/experimental/fuzzydata/test/test_fuzzydata.py
new file mode 100644
index 00000000000..d110f940a1c
--- /dev/null
+++ b/modin/experimental/fuzzydata/test/test_fuzzydata.py
@@ -0,0 +1,65 @@
+# Licensed to Modin Development Team under one or more contributor license agreements.
+# See the NOTICE file distributed with this work for additional information regarding
+# copyright ownership. The Modin Development Team licenses this file to you under the
+# Apache License, Version 2.0 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under
+# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific language
+# governing permissions and limitations under the License.
+
+import os
+import glob
+import uuid
+import shutil
+from fuzzydata.core.generator import generate_workflow
+from fuzzydata.clients.modin import ModinWorkflow
+
+from modin.config import Engine
+
+
+def test_fuzzydata_sample_workflow():
+ # Workflow Generation Options
+ wf_name = str(uuid.uuid4())[:8] # Unique name for the generated workflow
+ num_versions = 10 # Number of unique CSV files to generate
+ cols = 33 # Columns in Base Artifact
+ rows = 1000 # Rows in Base Artifact
+ bfactor = 1.0 # Branching Factor - 0.1 is linear, 10.0 is star-like
+ exclude_ops = ["groupby"] # In-Memory groupby operations cause issue #4287
+ matfreq = 2 # How many operations to chain before materialization
+
+ engine = Engine.get().lower()
+
+ # Create Output Directory for Workflow Data
+ base_out_directory = (
+ f"/tmp/fuzzydata-test-wf-{engine}/" # Must match corresponding github-action
+ )
+ if os.path.exists(base_out_directory):
+ shutil.rmtree(base_out_directory)
+ output_directory = f"{base_out_directory}/{wf_name}/"
+ os.makedirs(output_directory, exist_ok=True)
+
+ # Start Workflow Generation
+ workflow = generate_workflow(
+ workflow_class=ModinWorkflow,
+ name=wf_name,
+ num_versions=num_versions,
+ base_shape=(cols, rows),
+ out_directory=output_directory,
+ bfactor=bfactor,
+ exclude_ops=exclude_ops,
+ matfreq=matfreq,
+ wf_options={"modin_engine": engine},
+ )
+
+ # Assertions that the workflow generation worked correctly
+ assert len(workflow) == num_versions
+ assert len(list(glob.glob(f"{output_directory}/artifacts/*.csv"))) == len(
+ workflow.artifact_dict
+ )
+ assert os.path.exists(f"{output_directory}/{workflow.name}_operations.json")
+ assert os.path.getsize(f"{output_directory}/{workflow.name}_operations.json") > 0
+ assert os.path.exists(f"{output_directory}/{workflow.name}_gt_graph.csv")
diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py
index dfc6d3aea01..a30871a42a6 100644
--- a/modin/pandas/__init__.py
+++ b/modin/pandas/__init__.py
@@ -14,7 +14,7 @@
import pandas
import warnings
-__pandas_version__ = "1.4.2"
+__pandas_version__ = "1.4.3"
if pandas.__version__ != __pandas_version__:
warnings.warn(
diff --git a/modin/pandas/base.py b/modin/pandas/base.py
index 943c97e2652..4e14333f7b4 100644
--- a/modin/pandas/base.py
+++ b/modin/pandas/base.py
@@ -222,10 +222,7 @@ def _validate_other(
self,
other,
axis,
- numeric_only=False,
- numeric_or_time_only=False,
- numeric_or_object_only=False,
- comparison_dtypes_only=False,
+ dtype_check=False,
compare_index=False,
):
"""
@@ -239,14 +236,8 @@ def _validate_other(
Specifies axis along which to do validation. When `1` or `None`
is specified, validation is done along `index`, if `0` is specified
validation is done along `columns` of `other` frame.
- numeric_only : bool, default: False
- Validates that both frames have only numeric dtypes.
- numeric_or_time_only : bool, default: False
- Validates that both frames have either numeric or time dtypes.
- numeric_or_object_only : bool, default: False
- Validates that both frames have either numeric or object dtypes.
- comparison_dtypes_only : bool, default: False
- Validates that both frames have either numeric or time or equal dtypes.
+ dtype_check : bool, default: False
+ Validates that both frames have compatible dtypes.
compare_index : bool, default: False
Compare Index if True.
@@ -300,22 +291,10 @@ def _validate_other(
if not self.index.equals(other.index):
raise TypeError("Cannot perform operation with non-equal index")
# Do dtype checking.
- if numeric_only:
- if not all(
- is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype)
- for self_dtype, other_dtype in zip(self._get_dtypes(), other_dtypes)
- ):
- raise TypeError("Cannot do operation on non-numeric dtypes")
- elif numeric_or_object_only:
+ if dtype_check:
if not all(
(is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))
or (is_object_dtype(self_dtype) and is_object_dtype(other_dtype))
- for self_dtype, other_dtype in zip(self._get_dtypes(), other_dtypes)
- ):
- raise TypeError("Cannot do operation non-numeric dtypes")
- elif comparison_dtypes_only:
- if not all(
- (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))
or (
is_datetime_or_timedelta_dtype(self_dtype)
and is_datetime_or_timedelta_dtype(other_dtype)
@@ -323,21 +302,7 @@ def _validate_other(
or is_dtype_equal(self_dtype, other_dtype)
for self_dtype, other_dtype in zip(self._get_dtypes(), other_dtypes)
):
- raise TypeError(
- "Cannot do operation non-numeric objects with numeric objects"
- )
- elif numeric_or_time_only:
- if not all(
- (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype))
- or (
- is_datetime_or_timedelta_dtype(self_dtype)
- and is_datetime_or_timedelta_dtype(other_dtype)
- )
- for self_dtype, other_dtype in zip(self._get_dtypes(), other_dtypes)
- ):
- raise TypeError(
- "Cannot do operation non-numeric objects with numeric objects"
- )
+ raise TypeError("Cannot do operation with improper dtypes")
return result
def _validate_function(self, func, on_invalid=None):
@@ -416,7 +381,7 @@ def _binary_op(self, op, other, **kwargs):
return self._default_to_pandas(
getattr(self._pandas_class, op), other, **kwargs
)
- other = self._validate_other(other, axis, numeric_or_object_only=True)
+ other = self._validate_other(other, axis, dtype_check=True)
exclude_list = [
"__add__",
"__radd__",
diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py
index 6abeda7d50b..fa3bcb07026 100644
--- a/modin/pandas/dataframe.py
+++ b/modin/pandas/dataframe.py
@@ -2498,6 +2498,10 @@ def __setattr__(self, key, value):
pass
elif key in self and key not in dir(self):
self.__setitem__(key, value)
+ # Note: return immediately so we don't keep this `key` as dataframe state.
+ # `__getattr__` will return the columns not present in `dir(self)`, so we do not need
+ # to manually track this state in the `dir`.
+ return
elif isinstance(value, pandas.Series):
warnings.warn(
"Modin doesn't allow columns to be created via a new attribute name - see "
diff --git a/modin/pandas/test/dataframe/test_iter.py b/modin/pandas/test/dataframe/test_iter.py
index e54d1af6619..87d612b780d 100644
--- a/modin/pandas/test/dataframe/test_iter.py
+++ b/modin/pandas/test/dataframe/test_iter.py
@@ -248,7 +248,8 @@ def test_inplace_series_ops(data):
df_equals(modin_df, pandas_df)
-def test___setattr__():
+# Note: Tests setting an attribute that is not an existing column label
+def test___setattr__not_column():
pandas_df = pandas.DataFrame([1, 2, 3])
modin_df = pd.DataFrame([1, 2, 3])
@@ -257,6 +258,37 @@ def test___setattr__():
df_equals(modin_df, pandas_df)
+ # While `new_col` is not a column of the dataframe,
+ # it should be accessible with __getattr__.
+ assert modin_df.new_col == pandas_df.new_col
+
+
+def test___setattr__mutating_column():
+ # Use case from issue #4577
+ pandas_df = pandas.DataFrame([[1]], columns=["col0"])
+ modin_df = pd.DataFrame([[1]], columns=["col0"])
+
+ # Replacing a column with a list should mutate the column in place.
+ pandas_df.col0 = [3]
+ modin_df.col0 = [3]
+
+ df_equals(modin_df, pandas_df)
+ # Check that the col0 attribute reflects the value update.
+ df_equals(modin_df.col0, pandas_df.col0)
+
+ pandas_df.col0 = pd.Series([5])
+ modin_df.col0 = pd.Series([5])
+
+ # Check that the col0 attribute reflects this update
+ df_equals(modin_df, pandas_df)
+
+ pandas_df.loc[0, "col0"] = 4
+ modin_df.loc[0, "col0"] = 4
+
+ # Check that the col0 attribute reflects update via loc
+ df_equals(modin_df, pandas_df)
+ assert modin_df.col0.equals(modin_df["col0"])
+
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_isin(data):
diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
index 04c7142f7b6..d7e6822a40c 100644
--- a/modin/pandas/test/test_series.py
+++ b/modin/pandas/test/test_series.py
@@ -1141,6 +1141,14 @@ def test_between_time():
)
+def test_add_series_to_timedeltaindex():
+ # Make a pandas.core.indexes.timedeltas.TimedeltaIndex
+ deltas = pd.to_timedelta([1], unit="h")
+ test_series = create_test_series(np.datetime64("2000-12-12"))
+ eval_general(*test_series, lambda s: s + deltas)
+ eval_general(*test_series, lambda s: s - deltas)
+
+
@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
def test_bfill(data):
modin_series, pandas_series = create_test_series(data)
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5777f4eb0a2..76b6c29656b 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,4 +1,4 @@
-pandas==1.4.2
+pandas==1.4.3
numpy>=1.18.5
pyarrow>=4.0.1
dask[complete]>=2.22.0,<2022.2.0
@@ -6,6 +6,9 @@ distributed>=2.22.0,<2022.2.0
# Pin ray to < 1.13.0 to work around GH#4564
# TODO(https://github.com/modin-project/modin/issues/4564): let ray go past 1.13.0.
ray[default]>=1.4.0,<1.13.0
+# Following https://github.com/ray-project/ray/pull/25648, pin protobuf < 4,
+# because versions >= 4.0.0 are incompatible with ray<1.13.0.
+protobuf<4.0.0
redis>=3.5.0,<4.0.0
psutil
fsspec
@@ -40,4 +43,6 @@ connectorx>=0.2.6a4
black
flake8
# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
-numpydoc==1.1.0
\ No newline at end of file
+numpydoc==1.1.0
+# experimental version of fuzzydata requires at least 0.0.6 to successfully resolve all dependencies
+fuzzydata>=0.0.6
diff --git a/requirements/env_omnisci.yml b/requirements/env_omnisci.yml
index 4478242d2b5..aa4de7a1d04 100644
--- a/requirements/env_omnisci.yml
+++ b/requirements/env_omnisci.yml
@@ -2,7 +2,7 @@ name: modin_on_omnisci
channels:
- conda-forge
dependencies:
- - pandas==1.4.2
+ - pandas==1.4.3
- pyarrow=6
- numpy>=1.18.5
- fsspec
diff --git a/requirements/requirements-no-engine.yml b/requirements/requirements-no-engine.yml
index 93240e89788..b6746552c66 100644
--- a/requirements/requirements-no-engine.yml
+++ b/requirements/requirements-no-engine.yml
@@ -1,7 +1,7 @@
channels:
- conda-forge
dependencies:
- - pandas==1.4.2
+ - pandas==1.4.3
- numpy>=1.18.5
- pyarrow>=4.0.1
- fsspec
diff --git a/setup.py b/setup.py
index 193980b4292..2a709c5e8e9 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,14 @@
# TODO: remove redis dependency when resolving GH#4398
# Pin ray to < 1.13.0 to work around GH#4564
# TODO(https://github.com/modin-project/modin/issues/4564): let ray go past 1.13.0.
-ray_deps = ["ray[default]>=1.4.0,<1.13.0", "pyarrow>=4.0.1", "redis>=3.5.0,<4.0.0"]
+# Following https://github.com/ray-project/ray/pull/25648, pin protobuf < 4,
+# because versions >= 4.0.0 are incompatible with ray<1.13.0.
+ray_deps = [
+ "ray[default]>=1.4.0,<1.13.0",
+ "pyarrow>=4.0.1",
+ "redis>=3.5.0,<4.0.0",
+ "protobuf<4.0.0",
+]
remote_deps = ["rpyc==4.1.5", "cloudpickle", "boto3"]
spreadsheet_deps = ["modin-spreadsheet>=0.1.0"]
sql_deps = ["dfsql>=0.4.2", "pyparsing<=2.4.7"]
@@ -25,7 +32,7 @@
url="https://github.com/modin-project/modin",
long_description=long_description,
long_description_content_type="text/markdown",
- install_requires=["pandas==1.4.2", "packaging", "numpy>=1.18.5", "fsspec", "psutil"],
+ install_requires=["pandas==1.4.3", "packaging", "numpy>=1.18.5", "fsspec", "psutil"],
extras_require={
# can be installed by pip install modin[dask]
"dask": dask_deps,