From 95dca2dccc15047d3cca317c8264e22025f6f51b Mon Sep 17 00:00:00 2001 From: Sebastian Achilles Date: Tue, 28 Nov 2023 12:08:11 +0100 Subject: [PATCH] add patch to fix [cz]asums in OpenBLAS 0.3.24 --- .../o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb | 3 + .../OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch | 76 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb index eb7737a76aa..be80c46e97c 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb @@ -20,6 +20,7 @@ patches = [ 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.23_fix-lapack-test.patch', 'OpenBLAS-0.3.23_disable-DDRGES3-LAPACK-test.patch', + 'OpenBLAS-0.3.24_fix-czasum.patch', ] checksums = [ {'v0.3.24.tar.gz': 'ceadc5065da97bd92404cac7254da66cc6eb192679cf1002098688978d4d5132'}, @@ -34,6 +35,8 @@ checksums = [ {'OpenBLAS-0.3.23_fix-lapack-test.patch': 'f6b3d81061f136e34aaf5359bb80fb9d2bba28825cc1dd26179b8dd01a9a0054'}, {'OpenBLAS-0.3.23_disable-DDRGES3-LAPACK-test.patch': '36a16b4d3b867897413b43b774f8b57d641ad9a1b452e9de33ced198ab25e461'}, + {'OpenBLAS-0.3.24_fix-czasum.patch': + '8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'}, ] builddependencies = [ diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch new file mode 100644 index 00000000000..ae926d20167 --- /dev/null +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch @@ -0,0 +1,76 @@ +From f8ad5344c210960fc399ca5b0ad8559ab5ca253e Mon Sep 17 00:00:00 2001 +From: Bart Oldeman +Date: Fri, 17 Nov 2023 23:49:34 +0000 +Subject: [PATCH] Fix casum fallback kernel. + +This kernel is only used on Skylake+ if the kernel with AVX512 +intrinsics can't be used, but used the variable x1 incorrectly +in the tail end of the loop, as it is still at the initial +value instead of where x points to. + +This caused 55 "other error"s in the LAPACK tests +(https://github.com/OpenMathLib/OpenBLAS/issues/4282) + +This change makes casum.c as similar as possible as zasum.c, +because zasum.c does this correctly. +--- + kernel/x86_64/casum.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/kernel/x86_64/casum.c b/kernel/x86_64/casum.c +index e4d0543114..28d78d2797 100644 +--- a/kernel/x86_64/casum.c ++++ b/kernel/x86_64/casum.c +@@ -9,12 +9,12 @@ + #endif + + #ifndef HAVE_CASUM_KERNEL +-static FLOAT casum_kernel(BLASLONG n, FLOAT *x1) ++static FLOAT casum_kernel(BLASLONG n, FLOAT *x) + { + + BLASLONG i=0; + BLASLONG n_8 = n & -8; +- FLOAT *x = x1; ++ FLOAT *x1 = x; + FLOAT temp0, temp1, temp2, temp3; + FLOAT temp4, temp5, temp6, temp7; + FLOAT sum0 = 0.0; +@@ -24,14 +24,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1) + FLOAT sum4 = 0.0; + + while (i < n_8) { +- temp0 = ABS_K(x[0]); +- temp1 = ABS_K(x[1]); +- temp2 = ABS_K(x[2]); +- temp3 = ABS_K(x[3]); +- temp4 = ABS_K(x[4]); +- temp5 = ABS_K(x[5]); +- temp6 = ABS_K(x[6]); +- temp7 = ABS_K(x[7]); ++ temp0 = ABS_K(x1[0]); ++ temp1 = ABS_K(x1[1]); ++ temp2 = ABS_K(x1[2]); ++ temp3 = ABS_K(x1[3]); ++ temp4 = ABS_K(x1[4]); ++ temp5 = ABS_K(x1[5]); ++ temp6 = ABS_K(x1[6]); ++ temp7 = ABS_K(x1[7]); + + sum0 += temp0; + sum1 += temp1; +@@ -43,12 +43,12 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1) + sum2 += temp6; + sum3 += temp7; + +- x+=8; ++ x1+=8; + i+=4; + } + + while (i < n) { +- sum4 += (ABS_K(x1[0]) + ABS_K(x1[1])); ++ sum4 += ABS_K(x1[0]) + ABS_K(x1[1]); + x1 += 2; + i++; + }