diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b0500d843..315d6f3cf 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -98,6 +98,11 @@ jobs: postgresql-$PG_VER \ postgresql-server-dev-$PG_VER + echo "" + echo "----- pg_config -----" + pg_config + echo "" + - name: Set up PostgreSQL permissions run: sudo chmod a+rwx `/usr/lib/postgresql/$PG_VER/bin/pg_config --pkglibdir` `/usr/lib/postgresql/$PG_VER/bin/pg_config --sharedir`/extension /var/run/postgresql/ diff --git a/Cargo.lock b/Cargo.lock index 19b7a4c6a..bad96f935 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1575,6 +1575,7 @@ name = "pgrx-pg-sys" version = "0.11.0" dependencies = [ "bindgen", + "clang-sys", "eyre", "libc", "memoffset", @@ -1588,6 +1589,7 @@ dependencies = [ "shlex", "sptr", "syn 1.0.109", + "walkdir", ] [[package]] diff --git a/pgrx-pg-config/src/lib.rs b/pgrx-pg-config/src/lib.rs index 9e1bd3c97..71554d39d 100644 --- a/pgrx-pg-config/src/lib.rs +++ b/pgrx-pg-config/src/lib.rs @@ -181,12 +181,10 @@ impl PgConfig { const PREFIX: &str = "PGRX_PG_CONFIG_"; let mut known_props = BTreeMap::new(); - for (k, v) in std::env::vars() { - if k.starts_with(PREFIX) { - // reformat the key to look like an argument option to `pg_config` - let prop = format!("--{}", k.trim_start_matches(PREFIX).to_lowercase()); - known_props.insert(prop, v); - } + for (k, v) in std::env::vars().filter(|(k, _)| k.starts_with(PREFIX)) { + // reformat the key to look like an argument option to `pg_config` + let prop = format!("--{}", k.trim_start_matches(PREFIX).to_lowercase()); + known_props.insert(prop, v); } Ok(Self { @@ -368,6 +366,20 @@ impl PgConfig { Ok(path) } + /// a vaguely-parsed "--configure" + pub fn configure(&self) -> eyre::Result> { + let stdout = self.run("--configure")?; + Ok(stdout + .split('\'') + .filter(|s| s != &"" && s != &" ") + .map(|entry| match entry.split_once('=') { + Some((k, v)) => (k.to_owned(), v.to_owned()), + // some keys are about mere presence + None => (entry.to_owned(), String::from("")), + }) + .collect()) + } + pub fn includedir_server(&self) -> eyre::Result { Ok(self.run("--includedir-server")?.into()) } diff --git a/pgrx-pg-sys/Cargo.toml b/pgrx-pg-sys/Cargo.toml index 52771464a..0e48132f5 100644 --- a/pgrx-pg-sys/Cargo.toml +++ b/pgrx-pg-sys/Cargo.toml @@ -49,6 +49,7 @@ libc = "0.2" [build-dependencies] bindgen = { version = "0.68.1", default-features = false, features = ["runtime"] } +clang-sys = { version = "1", features = ["clang_6_0", "runtime"] } pgrx-pg-config= { path = "../pgrx-pg-config/", version = "=0.11.0" } proc-macro2 = "1.0.69" quote = "1.0.33" @@ -56,3 +57,4 @@ syn = { version = "1.0.109", features = [ "extra-traits", "full", "fold", "parsi eyre = "0.6.8" shlex = "1.2.0" # shell lexing, also used by many of our deps once_cell = "1.18.0" +walkdir = "2" diff --git a/pgrx-pg-sys/build.rs b/pgrx-pg-sys/build.rs index b8a9e5eae..0ef00d07d 100644 --- a/pgrx-pg-sys/build.rs +++ b/pgrx-pg-sys/build.rs @@ -24,6 +24,7 @@ use syn::{ForeignItem, Item, ItemConst}; const BLOCKLISTED_TYPES: [&str; 3] = ["Datum", "NullableDatum", "Oid"]; mod build { + pub(super) mod clang; pub(super) mod sym_blocklist; } @@ -708,14 +709,22 @@ fn run_bindgen( include_h: &PathBuf, ) -> eyre::Result { eprintln!("Generating bindings for pg{major_version}"); + let configure = pg_config.configure()?; + let preferred_clang: Option<&std::path::Path> = configure.get("CLANG").map(|s| s.as_ref()); + eprintln!("pg_config --configure CLANG = {:?}", preferred_clang); + let (autodetect, includes) = build::clang::detect_include_paths_for(preferred_clang); let mut binder = bindgen::Builder::default(); binder = add_blocklists(binder); binder = add_derives(binder); + if !autodetect { + let builtin_includes = includes.iter().filter_map(|p| Some(format!("-I{}", p.to_str()?))); + binder = binder.clang_args(builtin_includes); + }; let bindings = binder .header(include_h.display().to_string()) .clang_args(&extra_bindgen_clang_args(pg_config)?) .clang_args(pg_target_include_flags(major_version, pg_config)?) - .detect_include_paths(target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_none()) + .detect_include_paths(autodetect) .parse_callbacks(Box::new(PgrxOverrides::default())) // The NodeTag enum is closed: additions break existing values in the set, so it is not extensible .rustified_non_exhaustive_enum("NodeTag") diff --git a/pgrx-pg-sys/build/clang.rs b/pgrx-pg-sys/build/clang.rs new file mode 100644 index 000000000..ec191518e --- /dev/null +++ b/pgrx-pg-sys/build/clang.rs @@ -0,0 +1,129 @@ +use crate::target_env_tracked; +use bindgen::ClangVersion; +use clang_sys::support::Clang as ClangSys; +use std::{ffi::OsStr, path::PathBuf}; +use walkdir::{DirEntry, WalkDir}; + +/// pgrx's bindgen needs to detect include paths, to keep code building, +/// but the way rust-bindgen does it breaks on Postgres 16 due to code like +/// ```c +/// #include +/// ``` +/// This will pull in builtin headers, but rust-bindgen uses a $CLANG_PATH lookup from clang-sys +/// which is not guaranteed to find the clang that uses the $LIBCLANG_PATH that bindgen intends. +/// +/// Returns the set of paths to include. +pub(crate) fn detect_include_paths_for( + preferred_clang: Option<&std::path::Path>, +) -> (bool, Vec) { + if target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_some() { + return (false, vec![]); + } + + // By asking bindgen for the version, we force it to pull an appropriate libclang, + // allowing users to override it however they would usually override bindgen. + let clang_major = match bindgen::clang_version() { + ClangVersion { parsed: Some((major, _)), full } => { + eprintln!("Bindgen found {full}"); + major + } + ClangVersion { full, .. } => { + // If bindgen doesn't know what version it has, bail and hope for the best. + eprintln!("Bindgen failed to parse clang version: {full}"); + return (true, vec![]); + } + }; + + // If Postgres is configured --with-llvm, then it may have recorded a CLANG to use + // Ask if there's a clang at the path that Postgres would use for JIT purposes. + // Unfortunately, the responses from clang-sys include clangs from far-off paths, + // so we can only use clangs that match bindgen's libclang major version. + if let Some(ClangSys { path, version: Some(v), c_search_paths, .. }) = + ClangSys::find(preferred_clang, &[]) + { + if Some(&*path) == preferred_clang && v.Major as u32 == clang_major { + return (false, c_search_paths.unwrap_or_default()); + } + } + + // Oh no, still here? + // Let's go behind bindgen's back to get libclang's path + let libclang_path = + clang_sys::get_library().expect("libclang should have been loaded?").path().to_owned(); + eprintln!("found libclang at {}", libclang_path.display()); + // libclang will probably be in a dynamic library directory, + // which means it will probably be adjacent to its headers, e.g. + // - "/usr/lib/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}" + // - "/usr/lib/clang/${CLANG_MAJOR}/include" + let clang_major_fmt = clang_major.to_string(); + let mut paths = vec![]; + // by adjacent, that does not mean it is always immediately so, e.g. + // - "/usr/lib/x86_64-linux-gnu/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}" + // - "/usr/lib/clang/${CLANG_MAJOR}/include" + // or + // - "/usr/lib64/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}" + // - "/usr/lib/clang/${CLANG_MAJOR}/include" + // so, crawl back up the ancestral tree + for ancestor in libclang_path.ancestors() { + paths = WalkDir::new(ancestor) + .min_depth(1) + .max_depth(6) + .sort_by_file_name() + .into_iter() + // On Unix-y systems this will be like "/usr/lib/clang/$CLANG_MAJOR/include" + // so don't even descend if the directory doesn't have one of those parts + .filter_entry(|entry| { + !is_hidden(entry) && { + entry_contains(entry, "clang") + || entry_contains(entry, "include") + || entry_contains(entry, &*clang_major_fmt) + // we always want to descend from a lib dir, but only one step + // as we don't really want to search all of /usr/lib's subdirs + || os_str_contains(entry.file_name(), "lib") + } + }) + .filter_map(|e| e.ok()) // be discreet + // We now need something that looks like it actually satisfies all our constraints + .filter(|entry| { + entry_contains(entry, &*clang_major_fmt) + && entry_contains(entry, "clang") + && entry_contains(entry, "include") + }) + // we need to pull the actual directories that include the SIMD headers + .filter(|entry| { + os_str_contains(entry.file_name(), "emmintrin.h") + || os_str_contains(entry.file_name(), "arm_neon.h") + }) + .filter_map(|entry| { + let mut pbuf = entry.into_path(); + if pbuf.pop() && pbuf.is_dir() && os_str_contains(&*pbuf.file_name()?, "include") { + Some(pbuf) + } else { + None + } + }) + .collect::>(); + + if paths.len() > 0 { + paths.sort(); + paths.dedup(); + break; + } + } + // If we have anything better to recommend, don't autodetect! + let autodetect = paths.len() == 0; + eprintln!("Found include dirs {:?}", paths); + (autodetect, paths) +} + +fn is_hidden(entry: &DirEntry) -> bool { + entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) +} + +fn entry_contains(entry: &DirEntry, needle: &str) -> bool { + entry.path().components().any(|part| os_str_contains(part.as_os_str(), needle)) +} + +fn os_str_contains(os_s: &OsStr, needle: &str) -> bool { + os_s.to_str().filter(|part| part.contains(needle)).is_some() +}