From a9ebbab8b04e843f5d72c5ebe2f52995bc6d45f9 Mon Sep 17 00:00:00 2001 From: Gregory LEOCADIE Date: Wed, 18 Sep 2024 12:36:54 +0200 Subject: [PATCH] Fix dlsym issue (#6048) This PR addresses the issue https://github.com/DataDog/dd-trace-dotnet/issues/6045 When using the `dlsym` function, the compiler adds in the import symbols table that we need the `dlsym` symbol. Before being a universal binary (same binary used for glibc-based linux and musl-libc-based linux) and the compiler added in a `DT_NEEDED` section the library `libdl.so` (the library containing `dlsym`). When the wrapper is loaded, it will look through all the `DT_NEEDED` sections to find a library that contains the `dlsym` symbol. Since being a universal binary, the `DT_NEEDED` sections are removed (part of being universal) and we have to resolve by hand needed symbols (`dlsym`, `pthread_once` ..). If we use `dlsym` (or other symbol), we will hit this issue. - use `__dd_dlsym` instead Added a snapshot test using `nm` that verifies that the undefined symbols in the universal binary haven't changed. It's equivalent to running ```bash nm -D Datadog.Linux.ApiWrapper.x64.so | grep ' U ' | awk '{print $2}' | sed 's/@.*//' | sort ``` but done using Nuke instead. It would probably make sense for this to be a "normal" test in the native tests, but given it has a dependency on `nm`, which is _definitely_ available in the universal build dockerfile it was quicker and easier to get this up and running directly. When it fails, it prints the diff and throws an exception, e.g. ```bash System.Exception: Found differences in undefined symbols (dlsym) in the Native Wrapper library. Verify that these changes are expected, and will not cause problems. Removing symbols is generally a safe operation, but adding them could cause crashes. If the new symbols are safe to add, update the snapshot file at C:\repos\dd-trace-dotnet\tracer\test\snapshots\native-wrapper-symbols-x64.verified.txt with the new values ``` This will be hotfixed onto 3.3.1 and 2.59.1 --------- Co-authored-by: Andrew Lock --- .nuke/build.schema.json | 2 + .../functions_to_wrap.c | 4 +- tracer/build/_build/Build.Profiler.Steps.cs | 79 +++++++++++++++++++ tracer/build/_build/Build.Steps.cs | 1 + tracer/build/_build/Build.Utilities.cs | 72 +++++++++-------- tracer/build/_build/Build.cs | 1 + .../native-wrapper-symbols-arm64.verified.txt | 14 ++++ .../native-wrapper-symbols-x64.verified.txt | 14 ++++ 8 files changed, 152 insertions(+), 35 deletions(-) create mode 100644 tracer/test/snapshots/native-wrapper-symbols-arm64.verified.txt create mode 100644 tracer/test/snapshots/native-wrapper-symbols-x64.verified.txt diff --git a/.nuke/build.schema.json b/.nuke/build.schema.json index 0afd062edc79..78b762c86401 100644 --- a/.nuke/build.schema.json +++ b/.nuke/build.schema.json @@ -464,6 +464,7 @@ "SignDlls", "SignMsi", "SummaryOfSnapshotChanges", + "TestNativeWrapper", "UpdateChangeLog", "UpdateSnapshots", "UpdateSnapshotsFromBuild", @@ -680,6 +681,7 @@ "SignDlls", "SignMsi", "SummaryOfSnapshotChanges", + "TestNativeWrapper", "UpdateChangeLog", "UpdateSnapshots", "UpdateSnapshotsFromBuild", diff --git a/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c b/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c index 10a924e61fe9..370628f97647 100644 --- a/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c +++ b/profiler/src/ProfilerEngine/Datadog.Linux.ApiWrapper/functions_to_wrap.c @@ -201,8 +201,8 @@ void initLibrary(void) // Bash provides its own version of the getenv/setenv functions // Fetch the original ones and use those instead - char *(*real_getenv)(const char *) = (char *(*)(const char *))dlsym(RTLD_NEXT, "getenv"); - int (*real_setenv)(const char *, const char *, int) = (int (*)(const char *, const char *, int))dlsym(RTLD_NEXT, "setenv"); + char *(*real_getenv)(const char *) = __dd_dlsym(RTLD_NEXT, "getenv"); + int (*real_setenv)(const char *, const char *, int) = __dd_dlsym(RTLD_NEXT, "setenv"); if (real_getenv == NULL || real_setenv == NULL) { diff --git a/tracer/build/_build/Build.Profiler.Steps.cs b/tracer/build/_build/Build.Profiler.Steps.cs index 403c87b3cb79..862ab3bd9596 100644 --- a/tracer/build/_build/Build.Profiler.Steps.cs +++ b/tracer/build/_build/Build.Profiler.Steps.cs @@ -17,6 +17,7 @@ using Nuke.Common.Utilities; using System.Collections; using System.Threading.Tasks; +using DiffMatchPatch; using Logger = Serilog.Log; partial class Build @@ -115,6 +116,84 @@ partial class Build } }); + Target TestNativeWrapper => _ => _ + .Unlisted() + .Description("Test that the Native wrapper symbols haven't changed") + .After(CompileProfilerNativeSrc) + .OnlyWhenStatic(() => IsLinux) + .Executes(() => + { + var (arch, _) = GetUnixArchitectureAndExtension(); + var libraryPath = ProfilerDeployDirectory / arch / LinuxApiWrapperLibrary; + + var output = Nm.Value($"-D {libraryPath}").Select(x => x.Text).ToList(); + + // Gives output similar to this: + // 0000000000006bc8 D DdDotnetFolder + // 0000000000006bd0 D DdDotnetMuslFolder + // w _ITM_deregisterTMCloneTable + // w _ITM_registerTMCloneTable + // w __cxa_finalize + // w __deregister_frame_info + // U __errno_location + // U __tls_get_addr + // 0000000000002d1b T _fini + // 0000000000002d18 T _init + // 0000000000003d70 T accept + // 0000000000003e30 T accept4 + // U access + // + // The types of symbols are: + // D: Data section symbol. These symbols are initialized global variables. + // w: Weak symbol. These symbols are weakly referenced and can be overridden by other symbols. + // U: Undefined symbol. These symbols are referenced in the file but defined elsewhere. + // T: Text section symbol. These symbols are functions or executable code. + // B: BSS (Block Started by Symbol) section symbol. These symbols are uninitialized global variables. + // + // We only care about the Undefined symbols - we don't want to accidentally add more of them + + Logger.Debug("NM output: {Output}", string.Join(Environment.NewLine, output)); + + var symbols = output + .Select(x => x.Trim()) + .Where(x => x.StartsWith("U ")) + .Select(x => x.TrimStart("U ")) + .OrderBy(x => x) + .ToList(); + + + var received = string.Join(Environment.NewLine, symbols); + var verifiedPath = TestsDirectory / "snapshots" / $"native-wrapper-symbols-{UnixArchitectureIdentifier}.verified.txt"; + var verified = File.Exists(verifiedPath) + ? File.ReadAllText(verifiedPath) + : string.Empty; + + Logger.Information("Comparing snapshot of Undefined symbols in the Native Wrapper library using {Path}...", verifiedPath); + + var dmp = new diff_match_patch(); + var diff = dmp.diff_main(verified, received); + dmp.diff_cleanupSemantic(diff); + + var changedSymbols = diff + .Where(x => x.operation != Operation.EQUAL) + .Select(x => x.text.Trim()) + .ToList(); + + if (changedSymbols.Count == 0) + { + Logger.Information("No changes found in Undefined symbols in the Native Wrapper library"); + return; + } + + PrintDiff(diff); + + throw new Exception($"Found differences in undefined symbols ({string.Join(",", changedSymbols)}) in the Native Wrapper library. " + + "Verify that these changes are expected, and will not cause problems. " + + "Removing symbols is generally a safe operation, but adding them could cause crashes. " + + $"If the new symbols are safe to add, update the snapshot file at {verifiedPath} with the " + + "new values"); + }); + Target RunProfilerNativeUnitTestsLinux => _ => _ .Unlisted() .Description("Run profiler native unit tests") diff --git a/tracer/build/_build/Build.Steps.cs b/tracer/build/_build/Build.Steps.cs index c9be494ade5d..c4008a9c0358 100644 --- a/tracer/build/_build/Build.Steps.cs +++ b/tracer/build/_build/Build.Steps.cs @@ -97,6 +97,7 @@ partial class Build [LazyPathExecutable(name: "ln")] readonly Lazy HardLinkUtil; [LazyPathExecutable(name: "cppcheck")] readonly Lazy CppCheck; [LazyPathExecutable(name: "run-clang-tidy")] readonly Lazy RunClangTidy; + [LazyPathExecutable(name: "nm")] readonly Lazy Nm; //OSX Tools readonly string[] OsxArchs = { "arm64", "x86_64" }; diff --git a/tracer/build/_build/Build.Utilities.cs b/tracer/build/_build/Build.Utilities.cs index 937f17905840..71f092655ec6 100644 --- a/tracer/build/_build/Build.Utilities.cs +++ b/tracer/build/_build/Build.Utilities.cs @@ -338,39 +338,7 @@ partial class Build var diff = dmp.diff_main(File.ReadAllText(source.ToString().Replace("received", "verified")), File.ReadAllText(source)); dmp.diff_cleanupSemantic(diff); - foreach (var t in diff) - { - if (t.operation != Operation.EQUAL) - { - var str = DiffToString(t); - if (str.Contains(value: '\n')) - { - // if the diff is multiline, start with a newline so that all changes are aligned - // otherwise it's easy to miss the first line of the diff - str = "\n" + str; - } - - Logger.Information(str); - } - } - } - - string DiffToString(Diff diff) - { - if (diff.operation == Operation.EQUAL) - { - return string.Empty; - } - - var symbol = diff.operation switch - { - Operation.DELETE => '-', - Operation.INSERT => '+', - _ => throw new Exception("Unknown value of the Option enum.") - }; - // put the symbol at the beginning of each line to make diff clearer when whole blocks of text are missing - var lines = diff.text.TrimEnd(trimChar: '\n').Split(Environment.NewLine); - return string.Join(Environment.NewLine, lines.Select(l => symbol + l)); + PrintDiff(diff); } }); @@ -527,4 +495,42 @@ private static string GetDefaultRuntimeIdentifier(bool isAlpine) private static MSBuildTargetPlatform ARM64TargetPlatform = (MSBuildTargetPlatform)"ARM64"; private static MSBuildTargetPlatform ARM64ECTargetPlatform = (MSBuildTargetPlatform)"ARM64EC"; + + private static void PrintDiff(List diff, bool printEqual = false) + { + foreach (var t in diff) + { + if (printEqual || t.operation != Operation.EQUAL) + { + var str = DiffToString(t); + if (str.Contains(value: '\n')) + { + // if the diff is multiline, start with a newline so that all changes are aligned + // otherwise it's easy to miss the first line of the diff + str = "\n" + str; + } + + Logger.Information(str); + } + } + + string DiffToString(Diff diff) + { + if (diff.operation == Operation.EQUAL) + { + return string.Empty; + } + + var symbol = diff.operation switch + { + Operation.DELETE => '-', + Operation.INSERT => '+', + _ => throw new Exception("Unknown value of the Option enum.") + }; + // put the symbol at the beginning of each line to make diff clearer when whole blocks of text are missing + var lines = diff.text.TrimEnd(trimChar: '\n').Split(Environment.NewLine); + return string.Join(Environment.NewLine, lines.Select(l => symbol + l)); + } + + } } diff --git a/tracer/build/_build/Build.cs b/tracer/build/_build/Build.cs index 986bfc277521..4b1f3d7c6bbc 100644 --- a/tracer/build/_build/Build.cs +++ b/tracer/build/_build/Build.cs @@ -211,6 +211,7 @@ void DeleteReparsePoints(string path) .Description("Builds the Profiler native and managed src, and publishes the profiler home directory") .After(Clean) .DependsOn(CompileProfilerNativeSrc) + .DependsOn(TestNativeWrapper) .DependsOn(PublishProfiler); Target BuildNativeLoader => _ => _ diff --git a/tracer/test/snapshots/native-wrapper-symbols-arm64.verified.txt b/tracer/test/snapshots/native-wrapper-symbols-arm64.verified.txt new file mode 100644 index 000000000000..78ccd23e4a19 --- /dev/null +++ b/tracer/test/snapshots/native-wrapper-symbols-arm64.verified.txt @@ -0,0 +1,14 @@ +__errno_location +access +asprintf +free +getauxval +getenv +malloc +strcasecmp +strcmp +strcpy +strlen +strncmp +strncpy +strrchr \ No newline at end of file diff --git a/tracer/test/snapshots/native-wrapper-symbols-x64.verified.txt b/tracer/test/snapshots/native-wrapper-symbols-x64.verified.txt new file mode 100644 index 000000000000..dbe81a1f77d1 --- /dev/null +++ b/tracer/test/snapshots/native-wrapper-symbols-x64.verified.txt @@ -0,0 +1,14 @@ +__errno_location +__tls_get_addr +access +asprintf +free +getenv +malloc +strcasecmp +strcmp +strcpy +strlen +strncmp +strncpy +strrchr \ No newline at end of file