-
-
Notifications
You must be signed in to change notification settings - Fork 240
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sniff for libclang-version-correct headers (#1325)
Bindgen's strategy of autodetecting headers attempts to use a hacky route used by clang-sys, however, it doesn't take into account that the strategy of clang-sys uses `$CLANG_PATH` instead of `$LIBCLANG_PATH`, and then falls back to searching the entire directory structure. This means it's fairly easy for it to pull the headers for a different version, trying to compile them with the wrong clang! This replaces their series of unfortunate hacks with our own, using the collected information from pg_config, clang-sys, and rust-bindgen to try to locate the correct compiler headers. If it can't find anything, it falls back to doing what currently happens.
- Loading branch information
1 parent
6bbe8ff
commit b992f55
Showing
6 changed files
with
166 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
use crate::target_env_tracked; | ||
use bindgen::ClangVersion; | ||
use clang_sys::support::Clang as ClangSys; | ||
use std::{ffi::OsStr, path::PathBuf}; | ||
use walkdir::{DirEntry, WalkDir}; | ||
|
||
/// pgrx's bindgen needs to detect include paths, to keep code building, | ||
/// but the way rust-bindgen does it breaks on Postgres 16 due to code like | ||
/// ```c | ||
/// #include <emmintrin.h> | ||
/// ``` | ||
/// This will pull in builtin headers, but rust-bindgen uses a $CLANG_PATH lookup from clang-sys | ||
/// which is not guaranteed to find the clang that uses the $LIBCLANG_PATH that bindgen intends. | ||
/// | ||
/// Returns the set of paths to include. | ||
pub(crate) fn detect_include_paths_for( | ||
preferred_clang: Option<&std::path::Path>, | ||
) -> (bool, Vec<PathBuf>) { | ||
if target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_some() { | ||
return (false, vec![]); | ||
} | ||
|
||
// By asking bindgen for the version, we force it to pull an appropriate libclang, | ||
// allowing users to override it however they would usually override bindgen. | ||
let clang_major = match bindgen::clang_version() { | ||
ClangVersion { parsed: Some((major, _)), full } => { | ||
eprintln!("Bindgen found {full}"); | ||
major | ||
} | ||
ClangVersion { full, .. } => { | ||
// If bindgen doesn't know what version it has, bail and hope for the best. | ||
eprintln!("Bindgen failed to parse clang version: {full}"); | ||
return (true, vec![]); | ||
} | ||
}; | ||
|
||
// If Postgres is configured --with-llvm, then it may have recorded a CLANG to use | ||
// Ask if there's a clang at the path that Postgres would use for JIT purposes. | ||
// Unfortunately, the responses from clang-sys include clangs from far-off paths, | ||
// so we can only use clangs that match bindgen's libclang major version. | ||
if let Some(ClangSys { path, version: Some(v), c_search_paths, .. }) = | ||
ClangSys::find(preferred_clang, &[]) | ||
{ | ||
if Some(&*path) == preferred_clang && v.Major as u32 == clang_major { | ||
return (false, c_search_paths.unwrap_or_default()); | ||
} | ||
} | ||
|
||
// Oh no, still here? | ||
// Let's go behind bindgen's back to get libclang's path | ||
let libclang_path = | ||
clang_sys::get_library().expect("libclang should have been loaded?").path().to_owned(); | ||
eprintln!("found libclang at {}", libclang_path.display()); | ||
// libclang will probably be in a dynamic library directory, | ||
// which means it will probably be adjacent to its headers, e.g. | ||
// - "/usr/lib/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}" | ||
// - "/usr/lib/clang/${CLANG_MAJOR}/include" | ||
let clang_major_fmt = clang_major.to_string(); | ||
let mut paths = vec![]; | ||
// by adjacent, that does not mean it is always immediately so, e.g. | ||
// - "/usr/lib/x86_64-linux-gnu/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}" | ||
// - "/usr/lib/clang/${CLANG_MAJOR}/include" | ||
// or | ||
// - "/usr/lib64/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}" | ||
// - "/usr/lib/clang/${CLANG_MAJOR}/include" | ||
// so, crawl back up the ancestral tree | ||
for ancestor in libclang_path.ancestors() { | ||
paths = WalkDir::new(ancestor) | ||
.min_depth(1) | ||
.max_depth(6) | ||
.sort_by_file_name() | ||
.into_iter() | ||
// On Unix-y systems this will be like "/usr/lib/clang/$CLANG_MAJOR/include" | ||
// so don't even descend if the directory doesn't have one of those parts | ||
.filter_entry(|entry| { | ||
!is_hidden(entry) && { | ||
entry_contains(entry, "clang") | ||
|| entry_contains(entry, "include") | ||
|| entry_contains(entry, &*clang_major_fmt) | ||
// we always want to descend from a lib dir, but only one step | ||
// as we don't really want to search all of /usr/lib's subdirs | ||
|| os_str_contains(entry.file_name(), "lib") | ||
} | ||
}) | ||
.filter_map(|e| e.ok()) // be discreet | ||
// We now need something that looks like it actually satisfies all our constraints | ||
.filter(|entry| { | ||
entry_contains(entry, &*clang_major_fmt) | ||
&& entry_contains(entry, "clang") | ||
&& entry_contains(entry, "include") | ||
}) | ||
// we need to pull the actual directories that include the SIMD headers | ||
.filter(|entry| { | ||
os_str_contains(entry.file_name(), "emmintrin.h") | ||
|| os_str_contains(entry.file_name(), "arm_neon.h") | ||
}) | ||
.filter_map(|entry| { | ||
let mut pbuf = entry.into_path(); | ||
if pbuf.pop() && pbuf.is_dir() && os_str_contains(&*pbuf.file_name()?, "include") { | ||
Some(pbuf) | ||
} else { | ||
None | ||
} | ||
}) | ||
.collect::<Vec<_>>(); | ||
|
||
if paths.len() > 0 { | ||
paths.sort(); | ||
paths.dedup(); | ||
break; | ||
} | ||
} | ||
// If we have anything better to recommend, don't autodetect! | ||
let autodetect = paths.len() == 0; | ||
eprintln!("Found include dirs {:?}", paths); | ||
(autodetect, paths) | ||
} | ||
|
||
fn is_hidden(entry: &DirEntry) -> bool { | ||
entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) | ||
} | ||
|
||
fn entry_contains(entry: &DirEntry, needle: &str) -> bool { | ||
entry.path().components().any(|part| os_str_contains(part.as_os_str(), needle)) | ||
} | ||
|
||
fn os_str_contains(os_s: &OsStr, needle: &str) -> bool { | ||
os_s.to_str().filter(|part| part.contains(needle)).is_some() | ||
} |