Skip to content

Commit

Permalink
Make it so applying and removing patches are repeatable without errors (
Browse files Browse the repository at this point in the history
NVIDIA#2502)

* Make it so applying and removing patches are repeatable without errors

Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>

* Adjust config for skipping a patch

* More fixes

Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>

---------

Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>
  • Loading branch information
revans2 authored and pxLi committed Oct 15, 2024
1 parent 282e0d0 commit 3a9d5fe
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 36 deletions.
49 changes: 42 additions & 7 deletions build/apply-patches
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
# limitations under the License.
#

# Run a command in a Docker container with devtoolset

set -e

BASE_DIR=$( git rev-parse --show-toplevel )
Expand All @@ -26,14 +24,51 @@ PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}

CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}

# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
# making sure that a user can do development work in spark-rapids-jni without the patches
# getting in the way
# The operations I really want to support no matter what state CUDF is in are
# 1) Build the repo from scratch
# 2) Rebuild the repo without having to clean and start over
# 3) upmerge to a new version of the plugin including updating the cudf submodule
#
# Building from scratch is simple. We want clean to unapply any patches and
# build to apply them. But if we want to rebuild without a clean we need to know what
# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
# is to save some state files about what happened. But a user could mess with CUDF directly
# so we want to have ways to double check that they are indeed correct.

FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"

pushd "$CUDF_DIR"
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
echo "Error: CUDF repository has uncommitted changes. No patches will be applied..."
exit 1

PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)

if [ -z "$PATCH_FILES" ] ; then
echo "No patches to apply"
exit 0
fi

CHANGED_FILES=$(git status --porcelain --untracked-files=no)

if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
if git apply -R --check "$FULLY_PATCHED_FILE" ; then
echo "Patches appear to have been applied already"
exit 0
fi
fi

if [ -n "$CHANGED_FILES" ] ; then
echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches"
echo "$CHANGED_FILE"
exit 1
fi

find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do
echo "patching with: $file"
patch --no-backup-if-mismatch -f -t --reject-file=- -p1 -i "$file"
echo "patching with: $file"
git apply -v "$file"
done

git diff > "$FULLY_PATCHED_FILE"

popd
66 changes: 52 additions & 14 deletions build/unapply-patches
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,67 @@
# limitations under the License.
#

# Run a command in a Docker container with devtoolset

set -e

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
BASE_DIR=$( git rev-parse --show-toplevel )

PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}

PATCH_DIR=${PATCH_DIR:-$(realpath "$SCRIPT_DIR/../patches/")}
CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}

CUDF_DIR=${CUDF_DIR:-$(realpath "$SCRIPT_DIR/../thirdparty/cudf/")}
# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
# making sure that a user can do development work in spark-rapids-jni without the patches
# getting in the way
# The operations I really want to support no matter what state CUDF is in are
# 1) Build the repo from scratch
# 2) Rebuild the repo without having to clean and start over
# 3) upmerge to a new version of the plugin including updating the cudf submodule
#
# Building from scratch is simple. We want clean to unapply any patches and
# build to apply them. But if we want to rebuild without a clean we need to know what
# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
# is to save some state files about what happened. But a user could mess with CUDF directly
# so we want to have ways to double check that they are indeed correct.

FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"

pushd "$CUDF_DIR"
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
#only try to remove patches if it looks like something was changed
find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV -r | while IFS= read -r -d '' file; do
echo "patching with: $file"
patch -R --no-backup-if-mismatch --reject-file=- -f -t -p1 -i "$file"
done

PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)

if [ -z "$PATCH_FILES" ] ; then
echo "No patches to remove"
exit 0
fi

# Check for modifications
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
CHANGED_FILES=$(git status --porcelain --untracked-files=no)

if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
if git apply --check -R "$FULLY_PATCHED_FILE"; then
echo "Patches appear to have been applied, so going to remove them"
git apply -R -v "$FULLY_PATCHED_FILE"
rm -f "$FULLY_PATCHED_FILE"

# Check for modifications, again
if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
git status --porcelain --untracked-files=no
exit 1
fi

exit 0
else
echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE"
exit 1
fi
fi

if [ -n "$CHANGED_FILES" ] ; then
echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again."
echo "$CHANGED_FILE"
exit 1
else
echo "No changes in CUDF repository to remove"
fi

popd
30 changes: 21 additions & 9 deletions ci/submodule-sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,26 +57,30 @@ if [ -n "$CUDF_TAG" ]; then
else
git submodule update --remote --merge
fi

cudf_pins_only=false
cudf_sha=$(git -C thirdparty/cudf rev-parse HEAD)
if [[ "${cudf_sha}" == "${cudf_prev_sha}" ]]; then
echo "Submodule is up to date."
exit 0
echo "cuDF submodule is up to date. Try update cudf-pins..."
cudf_pins_only=true
else
echo "Try update cudf submodule to ${cudf_sha}..."
git add .
git commit -s -m "Update submodule cudf to ${cudf_sha}"
fi

echo "Try update cudf submodule to ${cudf_sha}..."
git add .

echo "Test against ${cudf_sha}..."
echo "Build libcudf only to update pinned versions..."

MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
set +e
# Don't do a full build. Just try to update/build CUDF with no patches on top of it.
# calling the antrun directly skips applying patches and also only builds
# libcudf
${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \
-DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
-Dlibcudf.build.configure=true \
-Dlibcudf.dependency.mode=latest \
-Dsubmodule.patch.skip \
-DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
-DUSE_GDS=ON \
-DBUILD_TESTS=ON \
-DUSE_SANITIZER=ON
validate_status=$?
Expand All @@ -100,9 +104,17 @@ sed -i -e 's/4\.0\.1\.0/3.0.6/' \
# the updated versions.json generated by the build
echo "Update cudf submodule to ${cudf_sha} with updated pinned versions"
git add .
git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}"
if ! git diff-index --quiet HEAD; then
# AS we perform a squash merge for submodule-sync, the commit message should match the expectation.
git commit -s -m "Update pinned versions for cudf ${cudf_sha}"
elif ${cudf_pins_only}; then
echo "No changes to commit. Exit early..."
exit 0
fi

sha=$(git rev-parse HEAD)

echo "Test against ${cudf_sha}..."
set +e
# now build and test everything with the patches in place
${MVN} clean verify ${MVN_MIRROR} \
Expand Down
12 changes: 6 additions & 6 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<slf4j.version>1.7.30</slf4j.version>
<submodule.check.skip>false</submodule.check.skip>
<!-- This skips applying and unapplying patchs to CUDF. Be aware that if you
try to build the main jar without patching CUDF it could result in build
failures because the two often need to be in sync with each other. So use
at your own risk. -->
<submodule.patch.skip>false</submodule.patch.skip>
<antrun.version>3.0.0</antrun.version>
<hilbert.version>0.2.2</hilbert.version>
Expand Down Expand Up @@ -429,7 +433,7 @@
<id>build-libcudf</id>
<phase>validate</phase>
<configuration>
<target xmlns:if="ant:if" xmlns:unless="ant:unless">
<target xmlns:if="ant:if">
<condition property="needConfigure">
<or>
<istrue value="${libcudf.build.configure}"/>
Expand Down Expand Up @@ -466,8 +470,7 @@
</exec>
<exec dir="${libcudf.build.path}"
failonerror="true"
executable="cmake"
unless:true="${submodule.patch.skip}">
executable="cmake">
<arg value="--build"/>
<arg value="${libcudf.build.path}"/>
<arg value="--target"/>
Expand All @@ -484,7 +487,6 @@
<id>build-libcudfjni</id>
<phase>validate</phase>
<configuration>
<skip>${submodule.patch.skip}</skip>
<target>
<mkdir dir="${libcudfjni.build.path}"/>
<exec dir="${libcudfjni.build.path}"
Expand Down Expand Up @@ -520,7 +522,6 @@
<id>build-sparkrapidsjni</id>
<phase>validate</phase>
<configuration>
<skip>${submodule.patch.skip}</skip>
<target>
<mkdir dir="${native.build.path}"/>
<exec dir="${native.build.path}"
Expand Down Expand Up @@ -559,7 +560,6 @@
<id>build-info</id>
<phase>generate-resources</phase>
<configuration>
<skip>${submodule.patch.skip}</skip>
<target>
<mkdir dir="${project.build.directory}/extra-resources"/>
<exec executable="bash"
Expand Down

0 comments on commit 3a9d5fe

Please sign in to comment.