-
Notifications
You must be signed in to change notification settings - Fork 705
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #19331 from SebastianAchilles/20231128120809_new_p…
…r_OpenBLAS0324 add patch to fix [cz]asums in OpenBLAS 0.3.24
- Loading branch information
Showing
2 changed files
with
79 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-czasum.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
From f8ad5344c210960fc399ca5b0ad8559ab5ca253e Mon Sep 17 00:00:00 2001 | ||
From: Bart Oldeman <bart.oldeman@calculquebec.ca> | ||
Date: Fri, 17 Nov 2023 23:49:34 +0000 | ||
Subject: [PATCH] Fix casum fallback kernel. | ||
|
||
This kernel is only used on Skylake+ if the kernel with AVX512 | ||
intrinsics can't be used, but used the variable x1 incorrectly | ||
in the tail end of the loop, as it is still at the initial | ||
value instead of where x points to. | ||
|
||
This caused 55 "other error"s in the LAPACK tests | ||
(https://github.com/OpenMathLib/OpenBLAS/issues/4282) | ||
|
||
This change makes casum.c as similar as possible as zasum.c, | ||
because zasum.c does this correctly. | ||
--- | ||
kernel/x86_64/casum.c | 24 ++++++++++++------------ | ||
1 file changed, 12 insertions(+), 12 deletions(-) | ||
|
||
diff --git a/kernel/x86_64/casum.c b/kernel/x86_64/casum.c | ||
index e4d0543114..28d78d2797 100644 | ||
--- a/kernel/x86_64/casum.c | ||
+++ b/kernel/x86_64/casum.c | ||
@@ -9,12 +9,12 @@ | ||
#endif | ||
|
||
#ifndef HAVE_CASUM_KERNEL | ||
-static FLOAT casum_kernel(BLASLONG n, FLOAT *x1) | ||
+static FLOAT casum_kernel(BLASLONG n, FLOAT *x) | ||
{ | ||
|
||
BLASLONG i=0; | ||
BLASLONG n_8 = n & -8; | ||
- FLOAT *x = x1; | ||
+ FLOAT *x1 = x; | ||
FLOAT temp0, temp1, temp2, temp3; | ||
FLOAT temp4, temp5, temp6, temp7; | ||
FLOAT sum0 = 0.0; | ||
@@ -24,14 +24,14 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1) | ||
FLOAT sum4 = 0.0; | ||
|
||
while (i < n_8) { | ||
- temp0 = ABS_K(x[0]); | ||
- temp1 = ABS_K(x[1]); | ||
- temp2 = ABS_K(x[2]); | ||
- temp3 = ABS_K(x[3]); | ||
- temp4 = ABS_K(x[4]); | ||
- temp5 = ABS_K(x[5]); | ||
- temp6 = ABS_K(x[6]); | ||
- temp7 = ABS_K(x[7]); | ||
+ temp0 = ABS_K(x1[0]); | ||
+ temp1 = ABS_K(x1[1]); | ||
+ temp2 = ABS_K(x1[2]); | ||
+ temp3 = ABS_K(x1[3]); | ||
+ temp4 = ABS_K(x1[4]); | ||
+ temp5 = ABS_K(x1[5]); | ||
+ temp6 = ABS_K(x1[6]); | ||
+ temp7 = ABS_K(x1[7]); | ||
|
||
sum0 += temp0; | ||
sum1 += temp1; | ||
@@ -43,12 +43,12 @@ static FLOAT casum_kernel(BLASLONG n, FLOAT *x1) | ||
sum2 += temp6; | ||
sum3 += temp7; | ||
|
||
- x+=8; | ||
+ x1+=8; | ||
i+=4; | ||
} | ||
|
||
while (i < n) { | ||
- sum4 += (ABS_K(x1[0]) + ABS_K(x1[1])); | ||
+ sum4 += ABS_K(x1[0]) + ABS_K(x1[1]); | ||
x1 += 2; | ||
i++; | ||
} |