Skip to content

Commit

Permalink
Merge pull request #19331 from SebastianAchilles/20231128120809_new_p…
Browse files Browse the repository at this point in the history
…r_OpenBLAS0324

add patch to fix [cz]asums in OpenBLAS 0.3.24
  • Loading branch information
boegel authored Dec 6, 2023
2 parents c5003c0 + 95dca2d commit b4964d0
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ patches = [
'OpenBLAS-0.3.21_fix-order-vectorization.patch',
'OpenBLAS-0.3.23_fix-lapack-test.patch',
'OpenBLAS-0.3.23_disable-DDRGES3-LAPACK-test.patch',
'OpenBLAS-0.3.24_fix-czasum.patch',
]
checksums = [
{'v0.3.24.tar.gz': 'ceadc5065da97bd92404cac7254da66cc6eb192679cf1002098688978d4d5132'},
Expand All @@ -34,6 +35,8 @@ checksums = [
{'OpenBLAS-0.3.23_fix-lapack-test.patch': 'f6b3d81061f136e34aaf5359bb80fb9d2bba28825cc1dd26179b8dd01a9a0054'},
{'OpenBLAS-0.3.23_disable-DDRGES3-LAPACK-test.patch':
'36a16b4d3b867897413b43b774f8b57d641ad9a1b452e9de33ced198ab25e461'},
{'OpenBLAS-0.3.24_fix-czasum.patch':
'8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'},
]

builddependencies = [
Expand Down
76 changes: 76 additions & 0 deletions easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
From f8ad5344c210960fc399ca5b0ad8559ab5ca253e Mon Sep 17 00:00:00 2001
From: Bart Oldeman <bart.oldeman@calculquebec.ca>
Date: Fri, 17 Nov 2023 23:49:34 +0000
Subject: [PATCH] Fix casum fallback kernel.

This kernel is only used on Skylake+ if the kernel with AVX512
intrinsics can't be used, but used the variable x1 incorrectly
in the tail end of the loop, as it is still at the initial
value instead of where x points to.

This caused 55 "other error"s in the LAPACK tests
(https://github.com/OpenMathLib/OpenBLAS/issues/4282)

This change makes casum.c as similar as possible as zasum.c,
because zasum.c does this correctly.
---
kernel/x86_64/casum.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/x86_64/casum.c b/kernel/x86_64/casum.c
index e4d0543114..28d78d2797 100644
--- a/kernel/x86_64/casum.c
+++ b/kernel/x86_64/casum.c
@@ -9,12 +9,12 @@
#endif

#ifndef HAVE_CASUM_KERNEL
-static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
+static FLOAT casum_kernel(BLASLONG n, FLOAT *x)
{

BLASLONG i=0;
BLASLONG n_8 = n & -8;
- FLOAT *x = x1;
+ FLOAT *x1 = x;
FLOAT temp0, temp1, temp2, temp3;
FLOAT temp4, temp5, temp6, temp7;
FLOAT sum0 = 0.0;
@@ -24,14 +24,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
FLOAT sum4 = 0.0;

while (i < n_8) {
- temp0 = ABS_K(x[0]);
- temp1 = ABS_K(x[1]);
- temp2 = ABS_K(x[2]);
- temp3 = ABS_K(x[3]);
- temp4 = ABS_K(x[4]);
- temp5 = ABS_K(x[5]);
- temp6 = ABS_K(x[6]);
- temp7 = ABS_K(x[7]);
+ temp0 = ABS_K(x1[0]);
+ temp1 = ABS_K(x1[1]);
+ temp2 = ABS_K(x1[2]);
+ temp3 = ABS_K(x1[3]);
+ temp4 = ABS_K(x1[4]);
+ temp5 = ABS_K(x1[5]);
+ temp6 = ABS_K(x1[6]);
+ temp7 = ABS_K(x1[7]);

sum0 += temp0;
sum1 += temp1;
@@ -43,12 +43,12 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1)
sum2 += temp6;
sum3 += temp7;

- x+=8;
+ x1+=8;
i+=4;
}

while (i < n) {
- sum4 += (ABS_K(x1[0]) + ABS_K(x1[1]));
+ sum4 += ABS_K(x1[0]) + ABS_K(x1[1]);
x1 += 2;
i++;
}

0 comments on commit b4964d0

Please sign in to comment.