diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml index 32b77f69480..145802f71e3 100644 --- a/dist/maven-antrun/build-parallel-worlds.xml +++ b/dist/maven-antrun/build-parallel-worlds.xml @@ -123,14 +123,33 @@ - + - - - + + + + + + + + + + + + + + + + Generating dependency-reduced-pom.xml diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh index 88672e15983..1b3230a277c 100755 --- a/dist/scripts/binary-dedupe.sh +++ b/dist/scripts/binary-dedupe.sh @@ -56,30 +56,22 @@ echo "Retrieving class files hashing to a single value ..." echo "$((++STEP))/ SHA1 of all classes > tmp-sha1-class.txt" -time ( - find ./parallel-world/spark3* -type f -name '*.class' | \ - xargs $SHASUM > tmp-sha1-class.txt -) 2>&1 +find ./parallel-world/spark3* -type f -name '*.class' | \ + xargs $SHASUM > tmp-sha1-class.txt echo "$((++STEP))/ make shim column 1 > tmp-shim-sha-package-class.txt" -time ( - < tmp-sha1-class.txt awk -F/ '$1=$1' | \ - awk '{checksum=$1; shim=$4; $1=shim; $2=$3=""; $4=checksum; print $0}' | \ - tr -s ' ' > tmp-shim-sha-package-class.txt -) 2>&1 +< tmp-sha1-class.txt awk -F/ '$1=$1' | \ + awk '{checksum=$1; shim=$4; $1=shim; $2=$3=""; $4=checksum; print $0}' | \ + tr -s ' ' > tmp-shim-sha-package-class.txt echo "$((++STEP))/ sort by path, sha1; output first from each group > tmp-count-shim-sha-package-class.txt" -time ( - sort -k3 -k2,2 -u tmp-shim-sha-package-class.txt | \ - uniq -f 2 -c > tmp-count-shim-sha-package-class.txt -) 2>&1 +sort -k3 -k2,2 -u tmp-shim-sha-package-class.txt | \ + uniq -f 2 -c > tmp-count-shim-sha-package-class.txt echo "$((++STEP))/ class files with unique sha1 > $SPARK3XX_COMMON_TXT" -time ( - grep '^\s\+1 .*' tmp-count-shim-sha-package-class.txt | \ - awk '{$1=""; $3=""; print $0 }' | \ - tr -s ' ' | sed 's/\ /\//g' > "$SPARK3XX_COMMON_TXT" -) 2>&1 +grep '^\s\+1 .*' tmp-count-shim-sha-package-class.txt | \ + awk '{$1=""; $3=""; print $0 }' | \ + tr -s ' ' | sed 's/\ /\//g' > "$SPARK3XX_COMMON_TXT" function retain_single_copy() { set -e @@ -119,23 +111,19 @@ rm -rf "$SPARK3XX_COMMON_DIR" mkdir -p "$SPARK3XX_COMMON_DIR" echo "$((++STEP))/ retaining a single copy of spark3xx-common classes" -time ( - while read spark_common_class; do - retain_single_copy "$spark_common_class" - done < "$SPARK3XX_COMMON_TXT" -) 2>&1 +while read spark_common_class; do + retain_single_copy "$spark_common_class" +done < "$SPARK3XX_COMMON_TXT" echo "$((++STEP))/ rsyncing common classes to $SPARK3XX_COMMON_DIR" -time ( - for copy_list in from-spark3*-to-spark3xx-common.txt; do - echo Initializing rsync of "$copy_list" - IFS='-' <<< "$copy_list" read -ra copy_list_parts - # declare -p copy_list_parts - shim="${copy_list_parts[1]}" - # use rsync to reduce process forking - rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK3XX_COMMON_DIR" - done -) 2>&1 +for copy_list in from-spark3*-to-spark3xx-common.txt; do + echo Initializing rsync of "$copy_list" + IFS='-' <<< "$copy_list" read -ra copy_list_parts + # declare -p copy_list_parts + shim="${copy_list_parts[1]}" + # use rsync to reduce process forking + rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK3XX_COMMON_DIR" +done mv "$SPARK3XX_COMMON_DIR" parallel-world/ @@ -169,11 +157,8 @@ mv "$SPARK3XX_COMMON_DIR" parallel-world/ # Determine the list of unshimmed class files UNSHIMMED_LIST_TXT=unshimmed-result.txt echo "$((++STEP))/ creating sorted list of unshimmed classes > $UNSHIMMED_LIST_TXT" -time ( - find . -name '*.class' -not -path './parallel-world/spark3*' | \ - cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT" -) 2>&1 - +find . -name '*.class' -not -path './parallel-world/spark3*' | \ + cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT" function verify_same_sha_for_unshimmed() { set -e @@ -198,27 +183,23 @@ function verify_same_sha_for_unshimmed() { } echo "$((++STEP))/ verifying unshimmed classes have unique sha1 across shims" -time ( - while read unshimmed_class; do - verify_same_sha_for_unshimmed "$unshimmed_class" - done < "$UNSHIMMED_LIST_TXT" -) 2>&1 +while read unshimmed_class; do + verify_same_sha_for_unshimmed "$unshimmed_class" +done < "$UNSHIMMED_LIST_TXT" # Remove unshimmed classes from parallel worlds # TODO rework with low priority, only a few classes. echo "$((++STEP))/ removing duplicates of unshimmed classes" -time ( - while read unshimmed_class; do - for pw in ./parallel-world/spark3* ; do - unshimmed_path="$pw/$unshimmed_class" - [[ -f "$unshimmed_path" ]] && echo "$unshimmed_path" || true - done >> "$DELETE_DUPLICATES_TXT" - done < "$UNSHIMMED_LIST_TXT" -) 2>&1 +while read unshimmed_class; do + for pw in ./parallel-world/spark3* ; do + unshimmed_path="$pw/$unshimmed_class" + [[ -f "$unshimmed_path" ]] && echo "$unshimmed_path" || true + done >> "$DELETE_DUPLICATES_TXT" +done < "$UNSHIMMED_LIST_TXT" echo "$((++STEP))/ deleting all class files listed in $DELETE_DUPLICATES_TXT" -time (< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm) 2>&1 +< "$DELETE_DUPLICATES_TXT" sort -u | xargs rm end_time=$(date +%s) echo "binary-dedupe completed in $((end_time - start_time)) seconds" diff --git a/pom.xml b/pom.xml index 13aac964269..431ab15ec6a 100644 --- a/pom.xml +++ b/pom.xml @@ -970,16 +970,28 @@ generate-resources - + - + + + + + + + + + run diff --git a/shims/spark301/pom.xml b/shims/spark301/pom.xml index a8a22de7a24..d29930fc44b 100644 --- a/shims/spark301/pom.xml +++ b/shims/spark301/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark301db/pom.xml b/shims/spark301db/pom.xml index 0567c462f86..a8de161c41b 100644 --- a/shims/spark301db/pom.xml +++ b/shims/spark301db/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark302/pom.xml b/shims/spark302/pom.xml index 31bd34f9262..48da472b549 100644 --- a/shims/spark302/pom.xml +++ b/shims/spark302/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark303/pom.xml b/shims/spark303/pom.xml index 82254cc367c..39338681a3a 100644 --- a/shims/spark303/pom.xml +++ b/shims/spark303/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark304/pom.xml b/shims/spark304/pom.xml index 07e8b027f9e..4fdf6e325b6 100644 --- a/shims/spark304/pom.xml +++ b/shims/spark304/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark311/pom.xml b/shims/spark311/pom.xml index 471d317b6ac..7b896b92eee 100644 --- a/shims/spark311/pom.xml +++ b/shims/spark311/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark311cdh/pom.xml b/shims/spark311cdh/pom.xml index 64a135928b3..350aea2e820 100644 --- a/shims/spark311cdh/pom.xml +++ b/shims/spark311cdh/pom.xml @@ -45,15 +45,26 @@ dependency generate-resources - + - + + + + + + + + run diff --git a/shims/spark312/pom.xml b/shims/spark312/pom.xml index 7c7a5f95929..bbde64f9319 100644 --- a/shims/spark312/pom.xml +++ b/shims/spark312/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark312db/pom.xml b/shims/spark312db/pom.xml index a9bf1e982c7..f7ce3e66bf6 100644 --- a/shims/spark312db/pom.xml +++ b/shims/spark312db/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark313/pom.xml b/shims/spark313/pom.xml index e0d5a220525..e6005ccc5e8 100644 --- a/shims/spark313/pom.xml +++ b/shims/spark313/pom.xml @@ -45,14 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark320/pom.xml b/shims/spark320/pom.xml index 136cc26f70f..50cf19a533d 100644 --- a/shims/spark320/pom.xml +++ b/shims/spark320/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark321/pom.xml b/shims/spark321/pom.xml index 991f7830cc3..312dba6e83a 100644 --- a/shims/spark321/pom.xml +++ b/shims/spark321/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + + diff --git a/shims/spark330/pom.xml b/shims/spark330/pom.xml index 58f8af3ae4c..dce1c07afa3 100644 --- a/shims/spark330/pom.xml +++ b/shims/spark330/pom.xml @@ -45,15 +45,25 @@ dependency generate-resources - + - + + + + + + + +