Skip to content

Commit

Permalink
Sniff for libclang-version-correct headers (#1325)
Browse files Browse the repository at this point in the history
Bindgen's strategy of autodetecting headers attempts to use a hacky
route used by clang-sys, however, it doesn't take into account that the
strategy of clang-sys uses `$CLANG_PATH` instead of `$LIBCLANG_PATH`,
and then falls back to searching the entire directory structure. This
means it's fairly easy for it to pull the headers for a different
version, trying to compile them with the wrong clang!

This replaces their series of unfortunate hacks with our own, using
the collected information from pg_config, clang-sys, and rust-bindgen
to try to locate the correct compiler headers. If it can't find anything,
it falls back to doing what currently happens.
  • Loading branch information
workingjubilee committed Nov 27, 2023
1 parent 6bbe8ff commit b992f55
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ jobs:
postgresql-$PG_VER \
postgresql-server-dev-$PG_VER
echo ""
echo "----- pg_config -----"
pg_config
echo ""
- name: Set up PostgreSQL permissions
run: sudo chmod a+rwx `/usr/lib/postgresql/$PG_VER/bin/pg_config --pkglibdir` `/usr/lib/postgresql/$PG_VER/bin/pg_config --sharedir`/extension /var/run/postgresql/
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 18 additions & 6 deletions pgrx-pg-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,10 @@ impl PgConfig {
const PREFIX: &str = "PGRX_PG_CONFIG_";

let mut known_props = BTreeMap::new();
for (k, v) in std::env::vars() {
if k.starts_with(PREFIX) {
// reformat the key to look like an argument option to `pg_config`
let prop = format!("--{}", k.trim_start_matches(PREFIX).to_lowercase());
known_props.insert(prop, v);
}
for (k, v) in std::env::vars().filter(|(k, _)| k.starts_with(PREFIX)) {
// reformat the key to look like an argument option to `pg_config`
let prop = format!("--{}", k.trim_start_matches(PREFIX).to_lowercase());
known_props.insert(prop, v);
}

Ok(Self {
Expand Down Expand Up @@ -368,6 +366,20 @@ impl PgConfig {
Ok(path)
}

/// a vaguely-parsed "--configure"
pub fn configure(&self) -> eyre::Result<BTreeMap<String, String>> {
let stdout = self.run("--configure")?;
Ok(stdout
.split('\'')
.filter(|s| s != &"" && s != &" ")
.map(|entry| match entry.split_once('=') {
Some((k, v)) => (k.to_owned(), v.to_owned()),
// some keys are about mere presence
None => (entry.to_owned(), String::from("")),
})
.collect())
}

pub fn includedir_server(&self) -> eyre::Result<PathBuf> {
Ok(self.run("--includedir-server")?.into())
}
Expand Down
2 changes: 2 additions & 0 deletions pgrx-pg-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ libc = "0.2"

[build-dependencies]
bindgen = { version = "0.68.1", default-features = false, features = ["runtime"] }
clang-sys = { version = "1", features = ["clang_6_0", "runtime"] }
pgrx-pg-config= { path = "../pgrx-pg-config/", version = "=0.11.0" }
proc-macro2 = "1.0.69"
quote = "1.0.33"
syn = { version = "1.0.109", features = [ "extra-traits", "full", "fold", "parsing" ] }
eyre = "0.6.8"
shlex = "1.2.0" # shell lexing, also used by many of our deps
once_cell = "1.18.0"
walkdir = "2"
11 changes: 10 additions & 1 deletion pgrx-pg-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use syn::{ForeignItem, Item, ItemConst};
const BLOCKLISTED_TYPES: [&str; 3] = ["Datum", "NullableDatum", "Oid"];

mod build {
pub(super) mod clang;
pub(super) mod sym_blocklist;
}

Expand Down Expand Up @@ -708,14 +709,22 @@ fn run_bindgen(
include_h: &PathBuf,
) -> eyre::Result<String> {
eprintln!("Generating bindings for pg{major_version}");
let configure = pg_config.configure()?;
let preferred_clang: Option<&std::path::Path> = configure.get("CLANG").map(|s| s.as_ref());
eprintln!("pg_config --configure CLANG = {:?}", preferred_clang);
let (autodetect, includes) = build::clang::detect_include_paths_for(preferred_clang);
let mut binder = bindgen::Builder::default();
binder = add_blocklists(binder);
binder = add_derives(binder);
if !autodetect {
let builtin_includes = includes.iter().filter_map(|p| Some(format!("-I{}", p.to_str()?)));
binder = binder.clang_args(builtin_includes);
};
let bindings = binder
.header(include_h.display().to_string())
.clang_args(&extra_bindgen_clang_args(pg_config)?)
.clang_args(pg_target_include_flags(major_version, pg_config)?)
.detect_include_paths(target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_none())
.detect_include_paths(autodetect)
.parse_callbacks(Box::new(PgrxOverrides::default()))
// The NodeTag enum is closed: additions break existing values in the set, so it is not extensible
.rustified_non_exhaustive_enum("NodeTag")
Expand Down
129 changes: 129 additions & 0 deletions pgrx-pg-sys/build/clang.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use crate::target_env_tracked;
use bindgen::ClangVersion;
use clang_sys::support::Clang as ClangSys;
use std::{ffi::OsStr, path::PathBuf};
use walkdir::{DirEntry, WalkDir};

/// pgrx's bindgen needs to detect include paths, to keep code building,
/// but the way rust-bindgen does it breaks on Postgres 16 due to code like
/// ```c
/// #include <emmintrin.h>
/// ```
/// This will pull in builtin headers, but rust-bindgen uses a $CLANG_PATH lookup from clang-sys
/// which is not guaranteed to find the clang that uses the $LIBCLANG_PATH that bindgen intends.
///
/// Returns the set of paths to include.
pub(crate) fn detect_include_paths_for(
preferred_clang: Option<&std::path::Path>,
) -> (bool, Vec<PathBuf>) {
if target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_some() {
return (false, vec![]);
}

// By asking bindgen for the version, we force it to pull an appropriate libclang,
// allowing users to override it however they would usually override bindgen.
let clang_major = match bindgen::clang_version() {
ClangVersion { parsed: Some((major, _)), full } => {
eprintln!("Bindgen found {full}");
major
}
ClangVersion { full, .. } => {
// If bindgen doesn't know what version it has, bail and hope for the best.
eprintln!("Bindgen failed to parse clang version: {full}");
return (true, vec![]);
}
};

// If Postgres is configured --with-llvm, then it may have recorded a CLANG to use
// Ask if there's a clang at the path that Postgres would use for JIT purposes.
// Unfortunately, the responses from clang-sys include clangs from far-off paths,
// so we can only use clangs that match bindgen's libclang major version.
if let Some(ClangSys { path, version: Some(v), c_search_paths, .. }) =
ClangSys::find(preferred_clang, &[])
{
if Some(&*path) == preferred_clang && v.Major as u32 == clang_major {
return (false, c_search_paths.unwrap_or_default());
}
}

// Oh no, still here?
// Let's go behind bindgen's back to get libclang's path
let libclang_path =
clang_sys::get_library().expect("libclang should have been loaded?").path().to_owned();
eprintln!("found libclang at {}", libclang_path.display());
// libclang will probably be in a dynamic library directory,
// which means it will probably be adjacent to its headers, e.g.
// - "/usr/lib/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}"
// - "/usr/lib/clang/${CLANG_MAJOR}/include"
let clang_major_fmt = clang_major.to_string();
let mut paths = vec![];
// by adjacent, that does not mean it is always immediately so, e.g.
// - "/usr/lib/x86_64-linux-gnu/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}"
// - "/usr/lib/clang/${CLANG_MAJOR}/include"
// or
// - "/usr/lib64/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}"
// - "/usr/lib/clang/${CLANG_MAJOR}/include"
// so, crawl back up the ancestral tree
for ancestor in libclang_path.ancestors() {
paths = WalkDir::new(ancestor)
.min_depth(1)
.max_depth(6)
.sort_by_file_name()
.into_iter()
// On Unix-y systems this will be like "/usr/lib/clang/$CLANG_MAJOR/include"
// so don't even descend if the directory doesn't have one of those parts
.filter_entry(|entry| {
!is_hidden(entry) && {
entry_contains(entry, "clang")
|| entry_contains(entry, "include")
|| entry_contains(entry, &*clang_major_fmt)
// we always want to descend from a lib dir, but only one step
// as we don't really want to search all of /usr/lib's subdirs
|| os_str_contains(entry.file_name(), "lib")
}
})
.filter_map(|e| e.ok()) // be discreet
// We now need something that looks like it actually satisfies all our constraints
.filter(|entry| {
entry_contains(entry, &*clang_major_fmt)
&& entry_contains(entry, "clang")
&& entry_contains(entry, "include")
})
// we need to pull the actual directories that include the SIMD headers
.filter(|entry| {
os_str_contains(entry.file_name(), "emmintrin.h")
|| os_str_contains(entry.file_name(), "arm_neon.h")
})
.filter_map(|entry| {
let mut pbuf = entry.into_path();
if pbuf.pop() && pbuf.is_dir() && os_str_contains(&*pbuf.file_name()?, "include") {
Some(pbuf)
} else {
None
}
})
.collect::<Vec<_>>();

if paths.len() > 0 {
paths.sort();
paths.dedup();
break;
}
}
// If we have anything better to recommend, don't autodetect!
let autodetect = paths.len() == 0;
eprintln!("Found include dirs {:?}", paths);
(autodetect, paths)
}

fn is_hidden(entry: &DirEntry) -> bool {
entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
}

fn entry_contains(entry: &DirEntry, needle: &str) -> bool {
entry.path().components().any(|part| os_str_contains(part.as_os_str(), needle))
}

fn os_str_contains(os_s: &OsStr, needle: &str) -> bool {
os_s.to_str().filter(|part| part.contains(needle)).is_some()
}

0 comments on commit b992f55

Please sign in to comment.