From b4c078c92e0a2627edda7d1bc0c24b3894328b30 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 8 Dec 2021 15:02:07 +0800 Subject: [PATCH 1/4] Print meaningful message when call scripts in maven Signed-off-by: Chong Gao --- dist/maven-antrun/build-parallel-worlds.xml | 31 +++++++++++++++++---- dist/scripts/binary-dedupe.sh | 20 ++++++------- pom.xml | 16 +++++++++-- shims/spark301/pom.xml | 15 ++++++++-- shims/spark301db/pom.xml | 15 ++++++++-- shims/spark302/pom.xml | 15 ++++++++-- shims/spark303/pom.xml | 15 ++++++++-- shims/spark304/pom.xml | 15 ++++++++-- shims/spark311/pom.xml | 15 ++++++++-- shims/spark311cdh/pom.xml | 15 ++++++++-- shims/spark312/pom.xml | 15 ++++++++-- shims/spark312db/pom.xml | 16 +++++++++-- shims/spark313/pom.xml | 15 ++++++++-- shims/spark320/pom.xml | 16 +++++++++-- shims/spark321/pom.xml | 16 +++++++++-- shims/spark330/pom.xml | 16 +++++++++-- 16 files changed, 218 insertions(+), 48 deletions(-) diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml index 32b77f69480..f63194f05d6 100644 --- a/dist/maven-antrun/build-parallel-worlds.xml +++ b/dist/maven-antrun/build-parallel-worlds.xml @@ -123,14 +123,33 @@ - + - - - + + + + + + + + + + + + + + + + Generating dependency-reduced-pom.xml diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh index 88672e15983..9a910dac8da 100755 --- a/dist/scripts/binary-dedupe.sh +++ b/dist/scripts/binary-dedupe.sh @@ -59,27 +59,27 @@ echo "$((++STEP))/ SHA1 of all classes > tmp-sha1-class.txt" time ( find ./parallel-world/spark3* -type f -name '*.class' | \ xargs $SHASUM > tmp-sha1-class.txt -) 2>&1 +) echo "$((++STEP))/ make shim column 1 > tmp-shim-sha-package-class.txt" time ( < tmp-sha1-class.txt awk -F/ '$1=$1' | \ awk '{checksum=$1; shim=$4; $1=shim; $2=$3=""; $4=checksum; print $0}' | \ tr -s ' ' > tmp-shim-sha-package-class.txt -) 2>&1 +) echo "$((++STEP))/ sort by path, sha1; output first from each group > tmp-count-shim-sha-package-class.txt" time ( sort -k3 -k2,2 -u tmp-shim-sha-package-class.txt | \ uniq -f 2 -c > tmp-count-shim-sha-package-class.txt -) 2>&1 +) echo "$((++STEP))/ class files with unique sha1 > $SPARK3XX_COMMON_TXT" time ( grep '^\s\+1 .*' tmp-count-shim-sha-package-class.txt | \ awk '{$1=""; $3=""; print $0 }' | \ tr -s ' ' | sed 's/\ /\//g' > "$SPARK3XX_COMMON_TXT" -) 2>&1 +) function retain_single_copy() { set -e @@ -123,7 +123,7 @@ time ( while read spark_common_class; do retain_single_copy "$spark_common_class" done < "$SPARK3XX_COMMON_TXT" -) 2>&1 +) echo "$((++STEP))/ rsyncing common classes to $SPARK3XX_COMMON_DIR" time ( @@ -135,7 +135,7 @@ time ( # use rsync to reduce process forking rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK3XX_COMMON_DIR" done -) 2>&1 +) mv "$SPARK3XX_COMMON_DIR" parallel-world/ @@ -172,7 +172,7 @@ echo "$((++STEP))/ creating sorted list of unshimmed classes > $UNSHIMMED_LIST_T time ( find . -name '*.class' -not -path './parallel-world/spark3*' | \ cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT" -) 2>&1 +) function verify_same_sha_for_unshimmed() { @@ -202,7 +202,7 @@ time ( while read unshimmed_class; do verify_same_sha_for_unshimmed "$unshimmed_class" done < "$UNSHIMMED_LIST_TXT" -) 2>&1 +) # Remove unshimmed classes from parallel worlds # TODO rework with low priority, only a few classes. @@ -215,10 +215,10 @@ time ( [[ -f "$unshimmed_path" ]] && echo "$unshimmed_path" || true done >> "$DELETE_DUPLICATES_TXT" done < "$UNSHIMMED_LIST_TXT" -) 2>&1 +) echo "$((++STEP))/ deleting all class files listed in $DELETE_DUPLICATES_TXT" -time (< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm) 2>&1 +time (< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm) end_time=$(date +%s) echo "binary-dedupe completed in $((end_time - start_time)) seconds" diff --git a/pom.xml b/pom.xml index 13aac964269..16bef167794 100644 --- a/pom.xml +++ b/pom.xml @@ -970,16 +970,28 @@ generate-resources - + - + + + + + + + + + run diff --git a/shims/spark301/pom.xml b/shims/spark301/pom.xml index a8a22de7a24..5a9f6f02831 100644 --- a/shims/spark301/pom.xml +++ b/shims/spark301/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark301db/pom.xml b/shims/spark301db/pom.xml index 0567c462f86..fb540d06178 100644 --- a/shims/spark301db/pom.xml +++ b/shims/spark301db/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark302/pom.xml b/shims/spark302/pom.xml index 31bd34f9262..bceedb1b372 100644 --- a/shims/spark302/pom.xml +++ b/shims/spark302/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark303/pom.xml b/shims/spark303/pom.xml index 82254cc367c..ee65c047e9c 100644 --- a/shims/spark303/pom.xml +++ b/shims/spark303/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark304/pom.xml b/shims/spark304/pom.xml index 07e8b027f9e..b0b3eb3b203 100644 --- a/shims/spark304/pom.xml +++ b/shims/spark304/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark311/pom.xml b/shims/spark311/pom.xml index 471d317b6ac..6bef2e2d089 100644 --- a/shims/spark311/pom.xml +++ b/shims/spark311/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark311cdh/pom.xml b/shims/spark311cdh/pom.xml index 64a135928b3..a565edd8b64 100644 --- a/shims/spark311cdh/pom.xml +++ b/shims/spark311cdh/pom.xml @@ -45,15 +45,26 @@ dependency generate-resources - + - + + + + + + + + run diff --git a/shims/spark312/pom.xml b/shims/spark312/pom.xml index 7c7a5f95929..e853b711101 100644 --- a/shims/spark312/pom.xml +++ b/shims/spark312/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark312db/pom.xml b/shims/spark312db/pom.xml index a9bf1e982c7..2fe9f64622b 100644 --- a/shims/spark312db/pom.xml +++ b/shims/spark312db/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark313/pom.xml b/shims/spark313/pom.xml index e0d5a220525..5084ceaa5ad 100644 --- a/shims/spark313/pom.xml +++ b/shims/spark313/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark320/pom.xml b/shims/spark320/pom.xml index 136cc26f70f..eec4027c1c7 100644 --- a/shims/spark320/pom.xml +++ b/shims/spark320/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark321/pom.xml b/shims/spark321/pom.xml index 991f7830cc3..adba6b4d126 100644 --- a/shims/spark321/pom.xml +++ b/shims/spark321/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark330/pom.xml b/shims/spark330/pom.xml index 58f8af3ae4c..1e31f279607 100644 --- a/shims/spark330/pom.xml +++ b/shims/spark330/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + From 416ee95ab507643edaa438f70b69d485357bb820 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 9 Dec 2021 11:51:23 +0800 Subject: [PATCH 2/4] Update Signed-off-by: Chong Gao --- dist/maven-antrun/build-parallel-worlds.xml | 28 ++++++++++----------- pom.xml | 2 +- shims/spark301/pom.xml | 2 +- shims/spark301db/pom.xml | 2 +- shims/spark302/pom.xml | 2 +- shims/spark303/pom.xml | 2 +- shims/spark304/pom.xml | 2 +- shims/spark311/pom.xml | 2 +- shims/spark311cdh/pom.xml | 2 +- shims/spark312/pom.xml | 2 +- shims/spark312db/pom.xml | 2 +- shims/spark313/pom.xml | 2 +- shims/spark320/pom.xml | 2 +- shims/spark321/pom.xml | 2 +- shims/spark330/pom.xml | 2 +- 15 files changed, 28 insertions(+), 28 deletions(-) diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml index f63194f05d6..b4025fdb555 100644 --- a/dist/maven-antrun/build-parallel-worlds.xml +++ b/dist/maven-antrun/build-parallel-worlds.xml @@ -122,6 +122,19 @@ + + + + + + + + + - + @@ -138,19 +151,6 @@ - - - - - - - - - Generating dependency-reduced-pom.xml <dependency> diff --git a/pom.xml b/pom.xml index 16bef167794..431ab15ec6a 100644 --- a/pom.xml +++ b/pom.xml @@ -982,7 +982,7 @@ - + diff --git a/shims/spark301/pom.xml b/shims/spark301/pom.xml index 5a9f6f02831..d29930fc44b 100644 --- a/shims/spark301/pom.xml +++ b/shims/spark301/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark301db/pom.xml b/shims/spark301db/pom.xml index fb540d06178..a8de161c41b 100644 --- a/shims/spark301db/pom.xml +++ b/shims/spark301db/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark302/pom.xml b/shims/spark302/pom.xml index bceedb1b372..48da472b549 100644 --- a/shims/spark302/pom.xml +++ b/shims/spark302/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark303/pom.xml b/shims/spark303/pom.xml index ee65c047e9c..39338681a3a 100644 --- a/shims/spark303/pom.xml +++ b/shims/spark303/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark304/pom.xml b/shims/spark304/pom.xml index b0b3eb3b203..4fdf6e325b6 100644 --- a/shims/spark304/pom.xml +++ b/shims/spark304/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark311/pom.xml b/shims/spark311/pom.xml index 6bef2e2d089..7b896b92eee 100644 --- a/shims/spark311/pom.xml +++ b/shims/spark311/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark311cdh/pom.xml b/shims/spark311cdh/pom.xml index a565edd8b64..350aea2e820 100644 --- a/shims/spark311cdh/pom.xml +++ b/shims/spark311cdh/pom.xml @@ -58,7 +58,7 @@ - + diff --git a/shims/spark312/pom.xml b/shims/spark312/pom.xml index e853b711101..bbde64f9319 100644 --- a/shims/spark312/pom.xml +++ b/shims/spark312/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark312db/pom.xml b/shims/spark312db/pom.xml index 2fe9f64622b..f7ce3e66bf6 100644 --- a/shims/spark312db/pom.xml +++ b/shims/spark312db/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark313/pom.xml b/shims/spark313/pom.xml index 5084ceaa5ad..e6005ccc5e8 100644 --- a/shims/spark313/pom.xml +++ b/shims/spark313/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark320/pom.xml b/shims/spark320/pom.xml index eec4027c1c7..50cf19a533d 100644 --- a/shims/spark320/pom.xml +++ b/shims/spark320/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark321/pom.xml b/shims/spark321/pom.xml index adba6b4d126..312dba6e83a 100644 --- a/shims/spark321/pom.xml +++ b/shims/spark321/pom.xml @@ -57,7 +57,7 @@ - + diff --git a/shims/spark330/pom.xml b/shims/spark330/pom.xml index 1e31f279607..dce1c07afa3 100644 --- a/shims/spark330/pom.xml +++ b/shims/spark330/pom.xml @@ -57,7 +57,7 @@ - + From 688faa5fb70019338ca38ea698eb5e32cb3d2e8b Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 9 Dec 2021 12:02:37 +0800 Subject: [PATCH 3/4] Remove diagnostic info Signed-off-by: Chong Gao --- dist/scripts/binary-dedupe.sh | 85 ++++++++++++++--------------------- 1 file changed, 33 insertions(+), 52 deletions(-) diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh index 9a910dac8da..1b3230a277c 100755 --- a/dist/scripts/binary-dedupe.sh +++ b/dist/scripts/binary-dedupe.sh @@ -56,30 +56,22 @@ echo "Retrieving class files hashing to a single value ..." echo "$((++STEP))/ SHA1 of all classes > tmp-sha1-class.txt" -time ( - find ./parallel-world/spark3* -type f -name '*.class' | \ - xargs $SHASUM > tmp-sha1-class.txt -) +find ./parallel-world/spark3* -type f -name '*.class' | \ + xargs $SHASUM > tmp-sha1-class.txt echo "$((++STEP))/ make shim column 1 > tmp-shim-sha-package-class.txt" -time ( - < tmp-sha1-class.txt awk -F/ '$1=$1' | \ - awk '{checksum=$1; shim=$4; $1=shim; $2=$3=""; $4=checksum; print $0}' | \ - tr -s ' ' > tmp-shim-sha-package-class.txt -) +< tmp-sha1-class.txt awk -F/ '$1=$1' | \ + awk '{checksum=$1; shim=$4; $1=shim; $2=$3=""; $4=checksum; print $0}' | \ + tr -s ' ' > tmp-shim-sha-package-class.txt echo "$((++STEP))/ sort by path, sha1; output first from each group > tmp-count-shim-sha-package-class.txt" -time ( - sort -k3 -k2,2 -u tmp-shim-sha-package-class.txt | \ - uniq -f 2 -c > tmp-count-shim-sha-package-class.txt -) +sort -k3 -k2,2 -u tmp-shim-sha-package-class.txt | \ + uniq -f 2 -c > tmp-count-shim-sha-package-class.txt echo "$((++STEP))/ class files with unique sha1 > $SPARK3XX_COMMON_TXT" -time ( - grep '^\s\+1 .*' tmp-count-shim-sha-package-class.txt | \ - awk '{$1=""; $3=""; print $0 }' | \ - tr -s ' ' | sed 's/\ /\//g' > "$SPARK3XX_COMMON_TXT" -) +grep '^\s\+1 .*' tmp-count-shim-sha-package-class.txt | \ + awk '{$1=""; $3=""; print $0 }' | \ + tr -s ' ' | sed 's/\ /\//g' > "$SPARK3XX_COMMON_TXT" function retain_single_copy() { set -e @@ -119,23 +111,19 @@ rm -rf "$SPARK3XX_COMMON_DIR" mkdir -p "$SPARK3XX_COMMON_DIR" echo "$((++STEP))/ retaining a single copy of spark3xx-common classes" -time ( - while read spark_common_class; do - retain_single_copy "$spark_common_class" - done < "$SPARK3XX_COMMON_TXT" -) +while read spark_common_class; do + retain_single_copy "$spark_common_class" +done < "$SPARK3XX_COMMON_TXT" echo "$((++STEP))/ rsyncing common classes to $SPARK3XX_COMMON_DIR" -time ( - for copy_list in from-spark3*-to-spark3xx-common.txt; do - echo Initializing rsync of "$copy_list" - IFS='-' <<< "$copy_list" read -ra copy_list_parts - # declare -p copy_list_parts - shim="${copy_list_parts[1]}" - # use rsync to reduce process forking - rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK3XX_COMMON_DIR" - done -) +for copy_list in from-spark3*-to-spark3xx-common.txt; do + echo Initializing rsync of "$copy_list" + IFS='-' <<< "$copy_list" read -ra copy_list_parts + # declare -p copy_list_parts + shim="${copy_list_parts[1]}" + # use rsync to reduce process forking + rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK3XX_COMMON_DIR" +done mv "$SPARK3XX_COMMON_DIR" parallel-world/ @@ -169,11 +157,8 @@ mv "$SPARK3XX_COMMON_DIR" parallel-world/ # Determine the list of unshimmed class files UNSHIMMED_LIST_TXT=unshimmed-result.txt echo "$((++STEP))/ creating sorted list of unshimmed classes > $UNSHIMMED_LIST_TXT" -time ( - find . -name '*.class' -not -path './parallel-world/spark3*' | \ - cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT" -) - +find . -name '*.class' -not -path './parallel-world/spark3*' | \ + cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT" function verify_same_sha_for_unshimmed() { set -e @@ -198,27 +183,23 @@ function verify_same_sha_for_unshimmed() { } echo "$((++STEP))/ verifying unshimmed classes have unique sha1 across shims" -time ( - while read unshimmed_class; do - verify_same_sha_for_unshimmed "$unshimmed_class" - done < "$UNSHIMMED_LIST_TXT" -) +while read unshimmed_class; do + verify_same_sha_for_unshimmed "$unshimmed_class" +done < "$UNSHIMMED_LIST_TXT" # Remove unshimmed classes from parallel worlds # TODO rework with low priority, only a few classes. echo "$((++STEP))/ removing duplicates of unshimmed classes" -time ( - while read unshimmed_class; do - for pw in ./parallel-world/spark3* ; do - unshimmed_path="$pw/$unshimmed_class" - [[ -f "$unshimmed_path" ]] && echo "$unshimmed_path" || true - done >> "$DELETE_DUPLICATES_TXT" - done < "$UNSHIMMED_LIST_TXT" -) +while read unshimmed_class; do + for pw in ./parallel-world/spark3* ; do + unshimmed_path="$pw/$unshimmed_class" + [[ -f "$unshimmed_path" ]] && echo "$unshimmed_path" || true + done >> "$DELETE_DUPLICATES_TXT" +done < "$UNSHIMMED_LIST_TXT" echo "$((++STEP))/ deleting all class files listed in $DELETE_DUPLICATES_TXT" -time (< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm) +< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm end_time=$(date +%s) echo "binary-dedupe completed in $((end_time - start_time)) seconds" From 101963ad961245aa4266aef085ad39278dfd88bf Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Mon, 13 Dec 2021 15:27:49 +0800 Subject: [PATCH 4/4] Update Signed-off-by: Chong Gao --- dist/maven-antrun/build-parallel-worlds.xml | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml index b4025fdb555..145802f71e3 100644 --- a/dist/maven-antrun/build-parallel-worlds.xml +++ b/dist/maven-antrun/build-parallel-worlds.xml @@ -122,19 +122,6 @@ - - - - - - - - - + + + + + + + + + Generating dependency-reduced-pom.xml <dependency>