-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add complex return style detection and
cblas_*_sub
workaround
When using `libmkl_rt`, it tends to provide the intel interface rather than the gfortran interface, which causes `{c,z}dot{u,c}` to expect an implicit argument to store the return value in. We provide some autodetection of this, and build some wrappers to work around this for the eight functions affected. In MKL v2022.0, special ILP64-suffixed symbols are provided for the FORTRAN symbols, but not for the CBLAS ones. While we tend to use the FORTRAN symbols for most tasks, there are a few CBLAS symbols we use in the Julia world, presumably because of the above ABI problem. Now that that is fixed, we can likely stop using the CBLAS methods completely and use only the FORTRAN symbols, but for completeness, we detect missing ILP64-suffixed versions as well and forward them to the appropriate FORTRAN symbols. Note that a fully complete mapping of CBLAS -> FORTRAN symbols would be ideal here, however that is a large undertaking, and so we choose here to be selective in which functions we provide mappings for.
- Loading branch information
1 parent
f64f859
commit 0829248
Showing
18 changed files
with
689 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#include <complex.h> | ||
#include "libblastrampoline_internal.h" | ||
|
||
/* | ||
* Some libraries provide ILP64-suffixed FORTRAN symbols, but forgot the CBLAS ones. | ||
* To allow Julia to still use `cblas_{c,z}dot{c,u}_sub` when linking against the | ||
* explicitly ILP64-suffixed MKL libraries, we map the CBLAS forwards to the FORTRAN | ||
* symbols, where appropriate. This effects MKL v2022.0, x-ref: | ||
* - https://github.com/JuliaLinearAlgebra/libblastrampoline/issues/56 | ||
*/ | ||
|
||
extern double complex zdotc_(const int32_t *, | ||
const double complex *, const int32_t *, | ||
const double complex *, const int32_t *); | ||
void lbt_cblas_zdotc_sub(const int32_t N, | ||
const double complex *X, const int32_t incX, | ||
const double complex *Y, const int32_t incY, | ||
double complex * z) | ||
{ | ||
*z = zdotc_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
extern double complex zdotc_64_(const int64_t *, | ||
const double complex *, const int64_t *, | ||
const double complex *, const int64_t *); | ||
void lbt_cblas_zdotc_sub64_(const int64_t N, | ||
const double complex *X, const int64_t incX, | ||
const double complex *Y, const int64_t incY, | ||
double complex * z) | ||
{ | ||
*z = zdotc_64_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
|
||
extern double complex zdotu_(const int32_t *, | ||
const double complex *, const int32_t *, | ||
const double complex *, const int32_t *); | ||
void lbt_cblas_zdotu_sub(const int32_t N, | ||
const double complex *X, const int32_t incX, | ||
const double complex *Y, const int32_t incY, | ||
double complex * z) | ||
{ | ||
*z = zdotu_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
extern double complex zdotu_64_(const int64_t *, | ||
const double complex *, const int64_t *, | ||
const double complex *, const int64_t *); | ||
void lbt_cblas_zdotu_sub64_(const int64_t N, | ||
const double complex *X, const int64_t incX, | ||
const double complex *Y, const int64_t incY, | ||
double complex * z) | ||
{ | ||
*z = zdotu_64_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
extern float complex cdotc_(const int32_t *, | ||
const float complex *, const int32_t *, | ||
const float complex *, const int32_t *); | ||
void lbt_cblas_cdotc_sub(const int32_t N, | ||
const float complex *X, const int32_t incX, | ||
const float complex *Y, const int32_t incY, | ||
float complex * z) | ||
{ | ||
*z = cdotc_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
extern float complex cdotc_64_(const int64_t *, | ||
const float complex *, const int64_t *, | ||
const float complex *, const int64_t *); | ||
void lbt_cblas_cdotc_sub64_(const int64_t N, | ||
const float complex *X, const int64_t incX, | ||
const float complex *Y, const int64_t incY, | ||
float complex * z) | ||
{ | ||
*z = cdotc_64_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
|
||
extern float complex cdotu_(const int32_t *, | ||
const float complex *, const int32_t *, | ||
const float complex *, const int32_t *); | ||
void lbt_cblas_cdotu_sub(const int32_t N, | ||
const float complex *X, const int32_t incX, | ||
const float complex *Y, const int32_t incY, | ||
float complex * z) | ||
{ | ||
*z = cdotu_(&N, X, &incX, Y, &incY); | ||
} | ||
|
||
extern float complex cdotu_64_(const int64_t *, | ||
const float complex *, const int64_t *, | ||
const float complex *, const int64_t *); | ||
void lbt_cblas_cdotu_sub64_(const int64_t N, | ||
const float complex *X, const int64_t incX, | ||
const float complex *Y, const int64_t incY, | ||
float complex * z) | ||
{ | ||
*z = cdotu_64_(&N, X, &incX, Y, &incY); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#include <complex.h> | ||
#include "libblastrampoline_internal.h" | ||
|
||
/* | ||
* Some libraries use an argument-passing convention for returning complex numbers. | ||
* We create wrappers to work around this behavior and provide a consistent ABI | ||
* across all libraries. An example of this style of library is MKL, x-ref: | ||
* - https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/ARPACK-with-MKL-crashes-when-calling-zdotc/td-p/1054316 | ||
* - https://scicomp.stackexchange.com/questions/5380/intel-mkl-difference-between-mkl-intel-lp64-and-mkl-gf-lp64 | ||
*/ | ||
|
||
|
||
// zdotc | ||
extern void (*cmplxret_zdotc__addr)(double complex * z, | ||
const int32_t *, | ||
const double complex *, const int32_t *, | ||
const double complex *, const int32_t *); | ||
double complex cmplxret_zdotc_(const int32_t * N, | ||
const double complex *X, const int32_t * incX, | ||
const double complex *Y, const int32_t * incY) | ||
{ | ||
double complex c; | ||
cmplxret_zdotc__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
extern void (*cmplxret_zdotc_64__addr)(double complex * z, | ||
const int64_t *, | ||
const double complex *, const int64_t *, | ||
const double complex *, const int64_t *); | ||
double complex cmplxret_zdotc_64_(const int64_t * N, | ||
const double complex *X, const int64_t * incX, | ||
const double complex *Y, const int64_t * incY) | ||
{ | ||
double complex c; | ||
cmplxret_zdotc_64__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
|
||
// zdotu | ||
extern void (*zdotu__addr)(double complex * z, | ||
const int32_t *, | ||
const double complex *, const int32_t *, | ||
const double complex *, const int32_t *); | ||
double complex cmplxret_zdotu_(const int32_t * N, | ||
const double complex *X, const int32_t * incX, | ||
const double complex *Y, const int32_t * incY) | ||
{ | ||
double complex c; | ||
zdotu__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
extern void (*zdotu_64__addr)(double complex * z, | ||
const int64_t *, | ||
const double complex *, const int64_t *, | ||
const double complex *, const int64_t *); | ||
double complex cmplxret_zdotu_64_(const int64_t * N, | ||
const double complex *X, const int64_t * incX, | ||
const double complex *Y, const int64_t * incY) | ||
{ | ||
double complex c; | ||
zdotu_64__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
|
||
// cdotc | ||
extern void (*cdotc__addr)(float complex * z, | ||
const int32_t *, | ||
const float complex *, const int32_t *, | ||
const float complex *, const int32_t *); | ||
float complex cmplxret_cdotc_(const int32_t * N, | ||
const float complex *X, const int32_t * incX, | ||
const float complex *Y, const int32_t * incY) | ||
{ | ||
float complex c; | ||
cdotc__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
extern void cdotc_64__addr(float complex * z, | ||
const int64_t *, | ||
const float complex *, const int64_t *, | ||
const float complex *, const int64_t *); | ||
float complex cmplxret_cdotc_64_(const int64_t * N, | ||
const float complex *X, const int64_t * incX, | ||
const float complex *Y, const int64_t * incY) | ||
{ | ||
float complex c; | ||
cdotc_64__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
|
||
// cdotu | ||
extern void (*cdotu__addr)(float complex * z, | ||
const int32_t *, | ||
const float complex *, const int32_t *, | ||
const float complex *, const int32_t *); | ||
float complex cmplxret_cdotu_(const int32_t * N, | ||
const float complex *X, const int32_t * incX, | ||
const float complex *Y, const int32_t * incY) | ||
{ | ||
float complex c; | ||
cdotu__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
||
extern void (*cdotu_64__addr)(float complex * z, | ||
const int64_t *, | ||
const float complex *, const int64_t *, | ||
const float complex *, const int64_t *); | ||
float complex cmplxret_cdotu_64_(const int64_t * N, | ||
const float complex *X, const int64_t * incX, | ||
const float complex *Y, const int64_t * incY) | ||
{ | ||
float complex c; | ||
cdotu_64__addr(&c, N, X, incX, Y, incY); | ||
return c; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.