Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update cudf Java bindings to 21.06.1 #2740

Merged
merged 3 commits into from
Jun 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ The following is the list of options that `rapids-plugin-4-spark` supports.
On startup use: `--conf [conf key]=[conf value]`. For example:

```
${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-21.06.0.jar,cudf-21.06.0-cuda11.jar' \
${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-21.06.0.jar,cudf-21.06.1-cuda11.jar' \
--conf spark.plugins=com.nvidia.spark.SQLPlugin \
--conf spark.rapids.sql.incompatibleOps.enabled=true
```
Expand Down
2 changes: 1 addition & 1 deletion docs/demo/Databricks/generate-init-script-cuda11.ipynb
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-21.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/21.06.0/rapids-4-spark_2.12-21.06.0.jar\nsudo wget -O /databricks/jars/cudf-21.06.0-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/21.06.0/cudf-21.06.0-cuda11.jar\n\nsudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin\nsudo wget -O ~/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb\nsudo dpkg -i ~/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb\nsudo apt-key add /var/cuda-repo-ubuntu1804-11-0-local/7fa2af80.pub\nsudo apt-get update\nsudo apt -y install cuda-toolkit-11-0\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0}
{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-21.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/21.06.0/rapids-4-spark_2.12-21.06.0.jar\nsudo wget -O /databricks/jars/cudf-21.06.1-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/21.06.1/cudf-21.06.1-cuda11.jar\n\nsudo wget -O /etc/apt/preferences.d/cuda-repository-pin-600 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin\nsudo wget -O ~/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb\nsudo dpkg -i ~/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb\nsudo apt-key add /var/cuda-repo-ubuntu1804-11-0-local/7fa2af80.pub\nsudo apt-get update\nsudo apt -y install cuda-toolkit-11-0\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0}
2 changes: 1 addition & 1 deletion docs/demo/Databricks/generate-init-script.ipynb
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-21.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/21.06.0/rapids-4-spark_2.12-21.06.0.jar\nsudo wget -O /databricks/jars/cudf-21.06-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/21.06.0/cudf-21.06.0-cuda11.jar\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0}
{"cells":[{"cell_type":"code","source":["dbutils.fs.mkdirs(\"dbfs:/databricks/init_scripts/\")\n \ndbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n#!/bin/bash\nsudo wget -O /databricks/jars/rapids-4-spark_2.12-21.06.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/21.06.0/rapids-4-spark_2.12-21.06.0.jar\nsudo wget -O /databricks/jars/cudf-21.06.1-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/21.06.1/cudf-21.06.1-cuda11.jar\"\"\", True)"],"metadata":{},"outputs":[],"execution_count":1},{"cell_type":"code","source":["%sh\ncd ../../dbfs/databricks/init_scripts\npwd\nls -ltr\ncat init.sh"],"metadata":{},"outputs":[],"execution_count":2},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":3}],"metadata":{"name":"generate-init-script","notebookId":2645746662301564},"nbformat":4,"nbformat_minor":0}
2 changes: 1 addition & 1 deletion docs/download.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Software Requirements:
### Download v21.06.0
* Download the [RAPIDS
Accelerator for Apache Spark 21.06.0 jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/21.06.0/rapids-4-spark_2.12-21.06.0.jar)
* Download the [RAPIDS cuDF 21.06.0 jar](https://repo1.maven.org/maven2/ai/rapids/cudf/21.06.0/cudf-21.06.0-cuda11.jar)
* Download the [RAPIDS cuDF 21.06.1 jar](https://repo1.maven.org/maven2/ai/rapids/cudf/21.06.1/cudf-21.06.1-cuda11.jar)

This package is built against CUDA 11.2 and has [CUDA forward
compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html) enabled. It is tested
Expand Down
2 changes: 1 addition & 1 deletion docs/get-started/Dockerfile.cuda
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ COPY spark-3.0.2-bin-hadoop3.2/examples /opt/spark/examples
COPY spark-3.0.2-bin-hadoop3.2/kubernetes/tests /opt/spark/tests
COPY spark-3.0.2-bin-hadoop3.2/data /opt/spark/data

COPY cudf-21.06.0-cuda11.jar /opt/sparkRapidsPlugin
COPY cudf-21.06.1-cuda11.jar /opt/sparkRapidsPlugin
COPY rapids-4-spark_2.12-21.06.0.jar /opt/sparkRapidsPlugin
COPY getGpusResources.sh /opt/sparkRapidsPlugin

Expand Down
4 changes: 2 additions & 2 deletions docs/get-started/getting-started-on-prem.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ CUDA and will not run on other versions. The jars use a maven classifier to keep
- CUDA 11.0/11.1/11.2 => classifier cuda11

For example, here is a sample version of the jars and cudf with CUDA 11.0 support:
- cudf-21.06.0-cuda11.jar
- cudf-21.06.1-cuda11.jar
- rapids-4-spark_2.12-21.06.0.jar
jar that your version of the accelerator depends on.

Expand All @@ -64,7 +64,7 @@ For simplicity export the location to these jars. This example assumes the sampl
been placed in the `/opt/sparkRapidsPlugin` directory:
```shell
export SPARK_RAPIDS_DIR=/opt/sparkRapidsPlugin
export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-21.06.0-cuda11.jar
export SPARK_CUDF_JAR=${SPARK_RAPIDS_DIR}/cudf-21.06.1-cuda11.jar
export SPARK_RAPIDS_PLUGIN_JAR=${SPARK_RAPIDS_DIR}/rapids-4-spark_2.12-21.06.0.jar
```

Expand Down
4 changes: 2 additions & 2 deletions integration_tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ If you just want to verify the SQL replacement is working you will need to add t
example assumes CUDA 11.0 is being used.

```
$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-21.06.0.jar,rapids-4-spark-udf-examples_2.12-21.06.0.jar,cudf-21.06.0-cuda11.jar" ./runtests.py
$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-21.06.0.jar,rapids-4-spark-udf-examples_2.12-21.06.0.jar,cudf-21.06.1-cuda11.jar" ./runtests.py
```

You don't have to enable the plugin for this to work, the test framework will do that for you.
Expand Down Expand Up @@ -249,7 +249,7 @@ To run cudf_udf tests, need following configuration changes:
As an example, here is the `spark-submit` command with the cudf_udf parameter on CUDA 11.0:

```
$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-21.06.0.jar,rapids-4-spark-udf-examples_2.12-21.06.0.jar,cudf-21.06.0-cuda11.jar,rapids-4-spark-tests_2.12-21.06.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-21.06.0.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-21.06.0.jar" ./runtests.py --cudf_udf
$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-21.06.0.jar,rapids-4-spark-udf-examples_2.12-21.06.0.jar,cudf-21.06.1-cuda11.jar,rapids-4-spark-tests_2.12-21.06.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-21.06.0.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-21.06.0.jar" ./runtests.py --cudf_udf
```

## Writing tests
Expand Down
2 changes: 1 addition & 1 deletion jenkins/Dockerfile-blossom.integration.centos
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# Arguments:
# CUDA_VER=11.0, 11.1 or 11.2.x
# CENTOS_VER=7 or 8
# CUDF_VER=21.06
# CUDF_VER=<cudf-py version>
# URM_URL=<maven repo url>
###

Expand Down
2 changes: 1 addition & 1 deletion jenkins/Dockerfile-blossom.integration.ubuntu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Arguments:
# CUDA_VER=11.0, 11.1 or 11.2.x
# UBUNTU_VER=18.04 or 20.04
# CUDF_VER=21.06
# CUDF_VER=<cudf-py version>
###

ARG CUDA_VER=11.0
Expand Down
2 changes: 1 addition & 1 deletion jenkins/databricks/init_cudf_udf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# The initscript to set up environment for the cudf_udf tests on Databricks
# Will be automatically pushed into the dbfs:/databricks/init_scripts once it is updated.

CUDF_VER=${CUDF_VER:-21.06}
CUDF_VER=${CUDF_VER:-21.06.01}

# Use mamba to install cudf-udf packages to speed up conda resolve time
base=$(conda info --base)
Expand Down
2 changes: 1 addition & 1 deletion jenkins/printJarVersion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function print_ver(){
SERVER_ID=$5

# Collect snapshot dependency info only in Jenkins build
# In dev build, print 'SNAPSHOT' tag without time stamp, e.g.: cudf-21.06.0-SNAPSHOT.jar
# In dev build, print 'SNAPSHOT' tag without time stamp, e.g.: cudf-<version>-SNAPSHOT.jar
if [[ "$VERSION" == *"-SNAPSHOT" && -n "$JENKINS_URL" ]]; then
PREFIX=${VERSION%-SNAPSHOT}
# List the latest SNAPSHOT jar file in the maven repo
Expand Down
2 changes: 1 addition & 1 deletion jenkins/version-def.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ for VAR in $OVERWRITE_PARAMS;do
done
IFS=$PRE_IFS

CUDF_VER=${CUDF_VER:-"21.06.0"}
CUDF_VER=${CUDF_VER:-"21.06.1"}
CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-"cuda11"}
PROJECT_VER=${PROJECT_VER:-"21.06.0-SNAPSHOT"}
PROJECT_TEST_VER=${PROJECT_TEST_VER:-"21.06.0-SNAPSHOT"}
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
<spark.version>${spark301.version}</spark.version>
<spark.test.version>${spark301.version}</spark.test.version>
<cuda.version>cuda11</cuda.version>
<cudf.version>21.06.0</cudf.version>
<cudf.version>21.06.1</cudf.version>
<scala.binary.version>2.12</scala.binary.version>
<scala.version>2.12.8</scala.version>
<orc.version>1.5.8</orc.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,7 @@ object RapidsConf {
|On startup use: `--conf [conf key]=[conf value]`. For example:
|
|```
|${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-21.06.0.jar,cudf-21.06.0-cuda11.jar' \
|${SPARK_HOME}/bin/spark --jars 'rapids-4-spark_2.12-21.06.0.jar,cudf-21.06.1-cuda11.jar' \
|--conf spark.plugins=com.nvidia.spark.SQLPlugin \
|--conf spark.rapids.sql.incompatibleOps.enabled=true
|```
Expand Down
2 changes: 1 addition & 1 deletion udf-examples/src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w --expt-extended-lambda --expt-relax
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF)

CPMAddPackage(NAME cudf
VERSION "21.06.00"
VERSION "21.06.01"
GIT_REPOSITORY https://github.com/rapidsai/cudf.git
GIT_TAG branch-21.06
GIT_SHALLOW TRUE
Expand Down