Skip to content

Commit

Permalink
Allow for overriding workarounds even when autodetection is unavailable
Browse files Browse the repository at this point in the history
If a user wants to use MKL on i686, we unfortunately need some
workarounds for certain function calls.  Until we come up with an
autodetection strategy that works on i686 as well as x86_64, we can make
use of the new environment variable-based overrides to make things work.
Let's test that on i686 on CI.
  • Loading branch information
staticfloat committed Jun 13, 2024
1 parent a59ca22 commit 77227d2
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 49 deletions.
8 changes: 0 additions & 8 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,9 @@ MAIN_OBJS += win_utils.o
endif

# If we're on an architecture that supports f2c autodetection, compile that in!
ifeq ($(F2C_AUTODETECTION),1)
MAIN_OBJS += f2c_adapters.o
endif

ifeq ($(CBLAS_DIVERGENCE_AUTODETECTION),1)
MAIN_OBJS += cblas_adapters.o
endif

ifeq ($(COMPLEX_RETSTYLE_AUTODETECTION),1)
MAIN_OBJS += complex_return_style_adapters.o
endif

# Place the `.o` files into `$(builddir)`
MAIN_OBJS := $(addprefix $(builddir)/,$(MAIN_OBJS))
Expand Down
16 changes: 9 additions & 7 deletions src/autodetection.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ int32_t autodetect_interface(void * handle, const char * suffix) {
return LBT_INTERFACE_UNKNOWN;
}

#ifdef COMPLEX_RETSTYLE_AUTODETECTION
int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_RETSTYLE", "normal")) {
return LBT_COMPLEX_RETSTYLE_NORMAL;
Expand All @@ -217,6 +216,8 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_RETSTYLE", "fnda")) {
return LBT_COMPLEX_RETSTYLE_FNDA;
}

#ifdef COMPLEX_RETSTYLE_AUTODETECTION
char symbol_name[MAX_SYMBOL_LEN];

build_symbol_name(symbol_name, "zdotc_", suffix);
Expand Down Expand Up @@ -300,22 +301,21 @@ int32_t autodetect_complex_return_style(void * handle, const char * suffix) {
(creal(retval_float) == 0.0f && cimag(retval_float) == 0.0f)) {
return LBT_COMPLEX_RETSTYLE_NORMAL;
}
#endif // COMPLEX_RETSTYLE_AUTODETECTION

// If we get here, zdotc and cdotc are being uncooperative and we
// do not appreciate it at all, not we don't my precious.
return LBT_COMPLEX_RETSTYLE_UNKNOWN;
}
#endif // COMPLEX_RETSTYLE_AUTODETECTION

#ifdef F2C_AUTODETECTION
int32_t autodetect_f2c(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_F2C", "plain")) {
return LBT_F2C_PLAIN;
}
if (env_lowercase_match("LBT_FORCE_F2C", "required")) {
return LBT_F2C_REQUIRED;
}

#ifdef F2C_AUTODETECTION
char symbol_name[MAX_SYMBOL_LEN];

// Attempt BLAS `sdot()` test
Expand Down Expand Up @@ -346,12 +346,12 @@ int32_t autodetect_f2c(void * handle, const char * suffix) {
// It's an f2c style calling convention
return LBT_F2C_REQUIRED;
}
#endif // F2C_AUTODETECTION

// We have no idea what happened; nothing works and everything is broken
return LBT_F2C_UNKNOWN;
}
#endif // F2C_AUTODETECTION

#ifdef CBLAS_DIVERGENCE_AUTODETECTION
int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
if (env_lowercase_match("LBT_FORCE_CBLAS", "conformant")) {
return LBT_CBLAS_CONFORMANT;
Expand All @@ -360,6 +360,7 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
return LBT_CBLAS_DIVERGENT;
}

#ifdef CBLAS_DIVERGENCE_AUTODETECTION
char symbol_name[MAX_SYMBOL_LEN];

build_symbol_name(symbol_name, "zdotc_", suffix);
Expand All @@ -382,7 +383,8 @@ int32_t autodetect_cblas_divergence(void * handle, const char * suffix) {
return LBT_CBLAS_DIVERGENT;
}
}
#endif // CBLAS_DIVERGENCE_AUTODETECTION

// If we can't even find `zdotc_64`, we don't know what this is.
return LBT_CBLAS_UNKNOWN;
}
#endif // CBLAS_DIVERGENCE_AUTODETECTION
33 changes: 11 additions & 22 deletions src/libblastrampoline.c
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
#include "libblastrampoline_internal.h"
#include "libblastrampoline_trampdata.h"

#ifdef COMPLEX_RETSTYLE_AUTODETECTION
#include "libblastrampoline_complex_retdata.h"
#endif
#ifdef F2C_AUTODETECTION
#include "libblastrampoline_f2cdata.h"
#endif
#ifdef CBLAS_DIVERGENCE_AUTODETECTION
#include "libblastrampoline_cblasdata.h"
#endif

// Sentinel to tell us if we've got a deepbindless workaround active or not
#define DEEPBINDLESS_INTERFACE_LP64_LOADED 0x01
Expand Down Expand Up @@ -69,7 +62,6 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte
}
}

#ifdef COMPLEX_RETSTYLE_AUTODETECTION
for (int array_idx=0; array_idx < sizeof(cmplxret_func_idxs)/sizeof(int *); ++array_idx) {
if ((complex_retstyle == LBT_COMPLEX_RETSTYLE_ARGUMENT) ||
((complex_retstyle == LBT_COMPLEX_RETSTYLE_FNDA) && array_idx == 1)) {
Expand All @@ -96,7 +88,6 @@ int32_t set_forward_by_index(int32_t symbol_idx, const void * addr, int32_t inte
}
}
}
#endif // COMPLEX_RETSTYLE_AUTODETECTION

#ifdef F2C_AUTODETECTION
if (f2c == LBT_F2C_REQUIRED) {
Expand Down Expand Up @@ -224,11 +215,12 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v

// Next, let's figure out what the complex return style is:
int complex_retstyle = LBT_COMPLEX_RETSTYLE_UNKNOWN;
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
complex_retstyle = autodetect_complex_return_style(handle, lib_suffix);
if (complex_retstyle == LBT_COMPLEX_RETSTYLE_UNKNOWN) {
fprintf(stderr, "Unable to autodetect complex return style of \"%s\"\n", libname);
return 0;
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
fprintf(stderr, "Unable to autodetect complex return style of \"%s\"\n", libname);
return 0;
#endif // COMPLEX_RETSTYLE_AUTODETECTION
}
if (verbose) {
if (complex_retstyle == LBT_COMPLEX_RETSTYLE_NORMAL) {
Expand All @@ -238,16 +230,16 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
printf(" -> Autodetected argument-passing complex return style\n");
}
}
#endif // COMPLEX_RETSTYLE_AUTODETECTION

int f2c = LBT_F2C_PLAIN;
#ifdef F2C_AUTODETECTION
// Next, we need to probe to see if this is an f2c-style calling convention library
// The only major example of this that we know of is Accelerate on macOS
f2c = autodetect_f2c(handle, lib_suffix);
if (f2c == LBT_F2C_UNKNOWN) {
fprintf(stderr, "Unable to autodetect calling convention of \"%s\"\n", libname);
return 0;
#ifdef F2C_AUTODETECTION
fprintf(stderr, "Unable to autodetect f2c calling convention of \"%s\"\n", libname);
return 0;
#endif // F2C_AUTODETECTION
}
if (verbose) {
if (f2c == LBT_F2C_REQUIRED) {
Expand All @@ -257,10 +249,8 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
printf(" -> Autodetected gfortran calling convention\n");
}
}
#endif // F2C_AUTODETECTION

int cblas = LBT_CBLAS_UNKNOWN;
#ifdef CBLAS_DIVERGENCE_AUTODETECTION
// Next, we need to probe to see if this is MKL v2022 with missing ILP64-suffixed
// CBLAS symbols, but only if it's an ILP64 library.
if (interface == LBT_INTERFACE_ILP64) {
Expand All @@ -274,7 +264,9 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
printf(" -> Autodetected CBLAS-divergent library!\n");
break;
case LBT_CBLAS_UNKNOWN:
printf(" -> CBLAS not found\n");
#ifdef CBLAS_DIVERGENCE_AUTODETECTION
printf(" -> CBLAS not found/autodetection unavailable\n");
#endif // CBLAS_DIVERGENCE_AUTODETECTION
break;
default:
printf(" -> ERROR: Impossible CBLAS detection result: %d\n", cblas);
Expand All @@ -283,7 +275,6 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
}
}
}
#endif // CBLAS_DIVERGENCE_AUTODETECTION

/*
* Now, if we are opening a 64-bit library with 32-bit names (e.g. suffix == ""),
Expand Down Expand Up @@ -367,7 +358,6 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
}
}

#ifdef CBLAS_DIVERGENCE_AUTODETECTION
// If we're loading a divergent CBLAS library, we need to scan through all
// CBLAS symbols, and forward them to wrappers which will convert them to
// the FORTRAN equivalents.
Expand All @@ -390,7 +380,6 @@ LBT_DLLEXPORT int32_t lbt_forward(const char * libname, int32_t clear, int32_t v
}
}
}
#endif // CBLAS_DIVERGENCE_AUTODETECTION

record_library_load(libname, handle, lib_suffix, &forwards[0], interface, complex_retstyle, f2c, cblas);
if (verbose) {
Expand Down
7 changes: 0 additions & 7 deletions src/libblastrampoline_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,9 @@ const char * autodetect_symbol_suffix(void * handle, const char * suffix_hint);
int32_t autodetect_blas_interface(void * isamax_addr);
int32_t autodetect_lapack_interface(void * dpotrf_addr);
int32_t autodetect_interface(void * handle, const char * suffix);
#ifdef COMPLEX_RETSTYLE_AUTODETECTION
int32_t autodetect_complex_return_style(void * handle, const char * suffix);
#endif

#ifdef F2C_AUTODETECTION
int32_t autodetect_f2c(void * handle, const char * suffix);
#endif
#ifdef CBLAS_DIVERGENCE_AUTODETECTION
int32_t autodetect_cblas_divergence(void * handle, const char * suffix);
#endif

// Functions in deepbindless_surrogates.c
uint8_t push_fake_lsame();
Expand Down
17 changes: 12 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Pkg, Artifacts, Base.BinaryPlatforms, Libdl, Test
include("utils.jl")

# Compile `dgemm_test.c` and `sgesv_test.c` against the given BLAS/LAPACK
function run_test((test_name, test_expected_outputs, expect_success), libblas_name, libdirs, interface, backing_libs)
function run_test((test_name, test_expected_outputs, expect_success), libblas_name, libdirs, interface, backing_libs; extra_env = Dict())
# We need to configure this C build a bit
cflags = String[
"-g",
Expand Down Expand Up @@ -50,6 +50,7 @@ function run_test((test_name, test_expected_outputs, expect_success), libblas_na
"LBT_DEFAULT_LIBS" => backing_libs,
"LBT_STRICT" => 1,
"LBT_VERBOSE" => 1,
pairs(extra_env)...,
)
cmd = `$(dir)/$(test_name)`
p, output = capture_output(addenv(cmd, env))
Expand Down Expand Up @@ -101,9 +102,9 @@ cdotc = ("cdotc_test", (

# Helper function to run all the tests with the given arguments
# Does not include `dgemmt` because that's MKL-only
function run_all_tests(args...; tests = [dgemm, dpstrf, sgesv, sdot, cdotc])
function run_all_tests(args...; tests = [dgemm, dpstrf, sgesv, sdot, cdotc], kwargs...)
for test in tests
run_test(test, args...)
run_test(test, args...; kwargs...)
end
end

Expand Down Expand Up @@ -158,17 +159,23 @@ end

# Test against MKL_jll using `libmkl_rt`, which is :LP64 by default
if MKL_jll.is_available()
# On i686, we can't do complex return style autodetection, so we manually set it,
# knowing that MKL is argument-style.
extra_env = Dict{String,String}()
if Sys.ARCH == :i686
extra_env["LBT_FORCE_RETSTYLE"] = "ARGUMENT"
end
@testset "LBT -> MKL_jll (LP64)" begin
libdirs = unique(vcat(lbt_dir, MKL_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...))
run_all_tests(blastrampoline_link_name(), libdirs, :LP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc])
run_all_tests(blastrampoline_link_name(), libdirs, :LP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc], extra_env)
end

# Test that we can set MKL's interface via an environment variable to select ILP64, and LBT detects it properly
if Sys.WORD_SIZE == 64
@testset "LBT -> MKL_jll (ILP64, via env)" begin
withenv("MKL_INTERFACE_LAYER" => "ILP64") do
libdirs = unique(vcat(lbt_dir, MKL_jll.LIBPATH_list..., CompilerSupportLibraries_jll.LIBPATH_list...))
run_all_tests(blastrampoline_link_name(), libdirs, :ILP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc])
run_all_tests(blastrampoline_link_name(), libdirs, :ILP64, MKL_jll.libmkl_rt_path; tests = [dgemm, dgemmt, dpstrf, sgesv, sdot, cdotc], extra_env)
end
end
end
Expand Down

0 comments on commit 77227d2

Please sign in to comment.