use GCC with "-fsanitize=address" options, detected memory leaks #20803

EricGeng · 2024-05-24T07:37:16Z

Describe the issue

This is my code

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include "../include/onnxruntime_c_api.h"

#define CheckORTError(val) (InternalORTErrorCheck((val), #val, __FILE__, __LINE__))

// A global pointer to the OrtApi.
const OrtApi *ort_api = NULL;

static void InternalORTErrorCheck(OrtStatus *status, const char *text, const char *file, int line) {
    if (!status) return;
    printf("Got onnxruntime error %s, (%s at line %d in %s)\n", ort_api->GetErrorMessage(status), text, line, file);
    ort_api->ReleaseStatus(status);
    exit(1);
}

int main(int argc, char **argv)
{
    const char *model_path = "../model/test.onnx";
    OrtEnv *ort_env = NULL;
    OrtSessionOptions *options = NULL;
    OrtSession *session = NULL;
    OrtMemoryInfo *memory_info = NULL;
    OrtValue *input_tensor1 = NULL;
    OrtValue *input_tensor2 = NULL;
    OrtValue *output_tensor = NULL;
    OrtTensorTypeAndShapeInfo *output_info = NULL;

    // These were copied from the output of generate_network.py; update these
    // values if the network is ever re-generated.
    float input_data1[49]; // Fill with example data
    for (int i = 0; i < 49; i++) {
        input_data1[i] = 1.0f;
    }
    int64_t input1_shape[] = {1, 1, 7, 7};
    const size_t input1_shape_len = sizeof(input1_shape) / sizeof(input1_shape[0]);

    float input_data2[34]; // Fill with example data
    for (int i = 0; i < 34; i++) {
        input_data2[i] = 1.0f;
    }
    int64_t input2_shape[] = {1, 1, 1, 34};
    const size_t input2_shape_len = sizeof(input2_shape) / sizeof(input2_shape[0]);

    const char *input_names[] = {"input_matrix", "input_ft"};
    const char *output_names[] = {"output"};
    float *output_values = NULL;
    size_t output_element_count = 0;
    ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
    if (!ort_api) {
        printf("Failed getting the ORT API.\n");
        return 1;
    }
    printf("ORT API @ %p\n", ort_api);

    // Create the environment.
    CheckORTError(ort_api->CreateEnv(ORT_LOGGING_LEVEL_VERBOSE, "Example", &ort_env));

    // Create the session and load the model.
    printf("About to load %s\n", model_path);
    CheckORTError(ort_api->CreateSessionOptions(&options));
    CheckORTError(ort_api->CreateSession(ort_env, "../model/test.onnx", options, &session));
    printf("Loaded %s OK.\n", model_path);

    // Load the input data
    CheckORTError(ort_api->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info));
    CheckORTError(ort_api->CreateTensorWithDataAsOrtValue(memory_info, input_data1, sizeof(input_data1), input1_shape, 
        input1_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor1));
    CheckORTError(ort_api->CreateTensorWithDataAsOrtValue(memory_info, input_data2, sizeof(input_data2), input2_shape, 
        input2_shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor2));
    
    OrtValue* input_tensors[] = { input_tensor1, input_tensor2 };
    // Actually run the inference
    CheckORTError(ort_api->Run(session, NULL, input_names, (const OrtValue* const*) &input_tensors, 2, output_names, 1,
        &output_tensor));

    // Get the output data from its tensor.
    CheckORTError(ort_api->GetTensorTypeAndShape(output_tensor, &output_info));
    CheckORTError(ort_api->GetTensorShapeElementCount(output_info, &output_element_count));
    CheckORTError(ort_api->GetTensorMutableData(output_tensor, (void **) (&output_values)));

    printf("The network produced the expected results %f.\n", output_values[0]);

    ort_api->ReleaseTensorTypeAndShapeInfo(output_info);
    ort_api->ReleaseValue(output_tensor);
    ort_api->ReleaseValue(input_tensor1);
    ort_api->ReleaseValue(input_tensor2);
    ort_api->ReleaseMemoryInfo(memory_info);
    ort_api->ReleaseSession(session);
    ort_api->ReleaseSessionOptions(options);
    ort_api->ReleaseEnv(ort_env);
    ort_env = NULL;
    printf("Cleanup complete.\n");
    return 0;
}

Urgency

No response

Target platform

Ubuntu 22.04

Build script

gcc -g test.c -o inference -fsanitize=address -I../include -L../lib -lonnxruntime -Wl,-rpath=../lib

Error / output

2024-05-24 15:22:33.330881806 [I:onnxruntime:, inference_session.cc:1985 Initialize] Session successfully initialized.
Loaded ../model/test.onnx OK.
The network produced the expected results 0.587069.
Cleanup complete.

=================================================================
==3861543==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 1536 byte(s) in 1 object(s) allocated from:
    #0 0x7f1f753a4a57 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154
    #1 0x7f1f74e53549  (../lib/libonnxruntime.so.1.17.3+0xa4a549)

Direct leak of 1536 byte(s) in 1 object(s) allocated from:
    #0 0x7f1f753a4a57 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154
    #1 0x7f1f74e5352d  (../lib/libonnxruntime.so.1.17.3+0xa4a52d)

SUMMARY: AddressSanitizer: 3072 byte(s) leaked in 2 allocation(s).

Even if I use the new version, this problem still happens：

=================================================================
==4025715==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 1536 byte(s) in 1 object(s) allocated from:
    #0 0x7f19b77c6a57 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154
    #1 0x7f19b72a5df9  (../lib/libonnxruntime.so.1.18.0+0xa2cdf9)

Direct leak of 1536 byte(s) in 1 object(s) allocated from:
    #0 0x7f19b77c6a57 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154
    #1 0x7f19b72a5ddd  (../lib/libonnxruntime.so.1.18.0+0xa2cddd)

SUMMARY: AddressSanitizer: 3072 byte(s) leaked in 2 allocation(s).

According to the call stack of the function, it seems that the memory leakage occurs in the ort_api->Run() function.

    CheckORTError(ort_api->Run(session, NULL, input_names, (const OrtValue* const*) &input_tensors, 2, output_names, 1,
        &output_tensor));

Which confuses me is that I can't see the exact memory leak location from the error.

Visual Studio Version

No response

GCC / Compiler Version

gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0

snnn · 2024-05-26T03:50:29Z

Could you try this example? https://github.com/microsoft/onnxruntime-inference-examples/tree/main/c_cxx/fns_candy_style_transfer
Does it have memory leak?

EricGeng · 2024-05-27T02:15:01Z

I tried the fns_candy_style_transfer example with -fsanitize=address compile option, this example has three memory leaks

Build script

gcc -o fns_candy_style_transfer fns_candy_style_transfer.c image_file_libpng.c -fsanitize=address -Iinclude -Llib -lonnxruntime -lpng -Wl,-rpath=lib

Error / Output

./fns_candy_style_transfer candy.onnx test.png test2.png cpu

=================================================================
==3953065==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 1555200 byte(s) in 1 object(s) allocated from:
    #0 0x7fc3bf15b887 in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145
    #1 0x556fe87576fb in chw_to_hwc (/home/gh/workspace/test/fns_candy_style_transfer/fns_candy_style_transfer+0x26fb)
    #2 0x556fe8758712 in run_inference (/home/gh/workspace/test/fns_candy_style_transfer/fns_candy_style_transfer+0x3712)
    #3 0x556fe875981e in main (/home/gh/workspace/test/fns_candy_style_transfer/fns_candy_style_transfer+0x481e)
    #4 0x7fc3bdfd3d8f in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58

Direct leak of 1536 byte(s) in 1 object(s) allocated from:
    #0 0x7fc3bf15ba57 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154
    #1 0x7fc3bec3adf9  (lib/libonnxruntime.so.1.18.0+0xa2cdf9)

Direct leak of 1536 byte(s) in 1 object(s) allocated from:
    #0 0x7fc3bf15ba57 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154
    #1 0x7fc3bec3addd  (lib/libonnxruntime.so.1.18.0+0xa2cddd)

SUMMARY: AddressSanitizer: 1558272 byte(s) leaked in 3 allocation(s).

The memory leaks at 0xa2cdf9 and 0xa2cddd still exist.
The memory leak in the chw_to_hwc function occurs because the variable output_data is allocated using malloc but is not freed after its use. The suggested solution is to manually release the allocated memory when it is no longer needed by freeing the memory pointed to by output_data.

  float* output_tensor_data = NULL;
  ORT_ABORT_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data));
  uint8_t* output_image_data = NULL;
  chw_to_hwc(output_tensor_data, 720, 720, &output_image_data);
  if (write_image_file(output_image_data, 720, 720, output_file) != 0) {
    ret = -1;
  }
  g_ort->ReleaseValue(output_tensor);
  g_ort->ReleaseValue(input_tensor);
  free(model_input);
  free(output_image_data); // Manually release the memory when it is no longer needed

snnn · 2024-05-28T18:03:16Z

It might be a false alarm. I rebuilt the code with address sanitizer, then the alert is gone. You may also try it locally. The command I used was:

python3 tools/ci_build/build.py --config Release --build_dir /tmp/cpubuild2 --parallel --skip_submodule_sync --enable_address_sanitizer  --build_shared_lib

Then use the newly built libruntime.so to replace the one you got from our release page.

snnn · 2024-05-28T18:04:14Z

The code change you suggested for fns_candy_style_transfer is good. We should take it.

EricGeng · 2024-05-29T03:10:59Z

It might be a false alarm. I rebuilt the code with address sanitizer, then the alert is gone. You may also try it locally. The command I used was:
python3 tools/ci_build/build.py --config Release --build_dir /tmp/cpubuild2 --parallel --skip_submodule_sync --enable_address_sanitizer  --build_shared_lib
Then use the newly built libruntime.so to replace the one you got from our release page.

Thanks a lot. I tried what you suggested locally, now I'm sure it was a false alarm.

EricGeng · 2024-06-06T07:28:00Z

It might be a false alarm. I rebuilt the code with address sanitizer, then the alert is gone. You may also try it locally. The command I used was:
python3 tools/ci_build/build.py --config Release --build_dir /tmp/cpubuild2 --parallel --skip_submodule_sync --enable_address_sanitizer  --build_shared_lib
Then use the newly built libruntime.so to replace the one you got from our release page.

I rebuilt the code with address sanitizer on Ubuntu 22.04, the alert is gone. But when I copied the the newly built libonnxruntime.so to Ubuntu20.04 this library did not work well, it seems that the newly built library has requirements on the glibc version.

lib/libonnxruntime.so: undefined reference to `stat64@GLIBC_2.33'
lib/libonnxruntime.so: undefined reference to `dlclose@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `fstat@GLIBC_2.33'
lib/libonnxruntime.so: undefined reference to `dlerror@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_key_create@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_rwlock_unlock@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_setspecific@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `__libc_single_threaded@GLIBC_2.32'
lib/libonnxruntime.so: undefined reference to `pthread_join@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_rwlock_destroy@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_rwlock_wrlock@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `dlsym@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `std::__throw_bad_array_new_length()@GLIBCXX_3.4.29'
lib/libonnxruntime.so: undefined reference to `pthread_attr_setstacksize@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_rwlock_init@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `stat@GLIBC_2.33'
lib/libonnxruntime.so: undefined reference to `dlopen@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_setaffinity_np@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_once@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_rwlock_rdlock@GLIBC_2.34'
lib/libonnxruntime.so: undefined reference to `pthread_create@GLIBC_2.34'

The one I got from the release page libonnxruntime.so.1.18.0 does not have this problem and supports multiple versions of Ubuntu. How to remove the dependency of the glibc?

snnn · 2024-06-06T16:16:37Z

This is normal. You may build it on Ubuntu 20.04 then ship it to Ubuntu 22.04, but not vice versa.

EricGeng added the build build issues; typically submitted using template label May 24, 2024

EricGeng changed the title ~~[Build] use GCC with "-fsanitize=address" options, detected memory leaks~~ use GCC with "-fsanitize=address" options, detected memory leaks May 24, 2024

snnn added core runtime issues related to core runtime and removed build build issues; typically submitted using template labels May 28, 2024

snnn self-assigned this May 28, 2024

snnn closed this as not planned Won't fix, can't repro, duplicate, stale Jun 6, 2024

snnn mentioned this issue Aug 30, 2024

Memory leak in examples for C++ in your project. microsoft/onnxruntime-inference-examples#457

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

use GCC with "-fsanitize=address" options, detected memory leaks #20803

use GCC with "-fsanitize=address" options, detected memory leaks #20803

EricGeng commented May 24, 2024 •

edited

Loading

snnn commented May 26, 2024

EricGeng commented May 27, 2024 •

edited

Loading

snnn commented May 28, 2024

snnn commented May 28, 2024

EricGeng commented May 29, 2024

EricGeng commented Jun 6, 2024

snnn commented Jun 6, 2024

use GCC with "-fsanitize=address" options, detected memory leaks #20803

use GCC with "-fsanitize=address" options, detected memory leaks #20803

Comments

EricGeng commented May 24, 2024 • edited Loading

Describe the issue

Urgency

Target platform

Build script

Error / output

Visual Studio Version

GCC / Compiler Version

snnn commented May 26, 2024

EricGeng commented May 27, 2024 • edited Loading

Build script

Error / Output

snnn commented May 28, 2024

snnn commented May 28, 2024

EricGeng commented May 29, 2024

EricGeng commented Jun 6, 2024

snnn commented Jun 6, 2024

EricGeng commented May 24, 2024 •

edited

Loading

EricGeng commented May 27, 2024 •

edited

Loading