onnxruntime icon indicating copy to clipboard operation
onnxruntime copied to clipboard

[Mobile] Please advise: How to use onnxruntime_c/c++_api to determine whether QNN_EP is effective?

Open Panda-Young opened this issue 9 months ago • 1 comments

Describe the issue

Main problem: I found that even if I write some abnormal values ​​for the value of backend_type the program can be successfully executed. ` const char *provider_options_keys[] = { "backend_type", "qnn_context_file_path"};

const char *provider_options_values[] = {
    "aaaaaaaaaa",
    "/data/local/tmp/qnn_context"};

CHECK_STATUS(ort->SessionOptionsAppendExecutionProvider(
    session_options,
    "QNN",
    provider_options_keys,
    provider_options_values,
    sizeof(provider_options_keys) / sizeof(provider_options_keys[0])));
printf("Successfully appended QNN execution provider. Backend type: %s\n", provider_options_values[0]);`

Program running results: Successfully appended QNN execution provider. Backend type: aaaaaaaaaa

To reproduce

The complete C/C++ code is as follows: #include <onnxruntime_c_api.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <time.h>

#define CHECK_STATUS(expr) \ do { \ OrtStatus *__status = (expr); \ if (__status != NULL) { \ const char *msg = ort->GetErrorMessage(__status); \ fprintf(stderr, "ERROR: %s\n", msg); \ ort->ReleaseStatus(__status); \ exit(1); \ } \ } while (0)

static inline long long current_time_ns() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; }

` int main() { const OrtApi *ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); printf("ONNX Runtime API initialized\n");

OrtEnv *env;
CHECK_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_INFO, "test", &env));
printf("Environment created successfully\n");

OrtSessionOptions *session_options;
CHECK_STATUS(ort->CreateSessionOptions(&session_options));
printf("Session options created\n");

const char *provider_options_keys[] = {
    "backend_type",
    "qnn_context_file_path"};

const char *provider_options_values[] = {
    "HTP",
    "/data/local/tmp/qnn_context"};

CHECK_STATUS(ort->SessionOptionsAppendExecutionProvider(
    session_options,
    "QNN",
    provider_options_keys,
    provider_options_values,
    sizeof(provider_options_keys) / sizeof(provider_options_keys[0])));
printf("Successfully appended QNN execution provider. Backend type: %s\n", provider_options_values[0]);

OrtSession *session;
const char *model_path = "sample_model.onnx";
printf("Loading model from: %s\n", model_path);
CHECK_STATUS(ort->CreateSession(env, model_path, session_options, &session));
printf("Model loaded successfully\n");

const int64_t input_shape[] = {1, 10};
float input_data[10];
srand((unsigned int)time(NULL));

for (int i = 0; i < 10; i++) {
    input_data[i] = (float)rand() / (float)RAND_MAX;
}

printf("Input data: [");
for (int i = 0; i < 5; i++) {
    printf("%f, ", input_data[i]);
}
printf("...]\n");

OrtMemoryInfo *mem_info;
CHECK_STATUS(ort->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &mem_info));
printf("Memory info created\n");

OrtValue *input_tensor;
CHECK_STATUS(ort->CreateTensorWithDataAsOrtValue(
    mem_info,
    input_data,
    sizeof(input_data),
    input_shape,
    2,
    ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT,
    &input_tensor));
printf("Input tensor created\n");

const char *input_name = "input";
const char *output_name = "output";
OrtValue *output_tensor;
printf("Starting inference...\n");
long long start_ns = current_time_ns();
const OrtValue *input_tensor_ptr = input_tensor;
CHECK_STATUS(ort->Run(
    session,
    NULL,
    &input_name,
    &input_tensor_ptr,
    1,
    &output_name,
    1,
    &output_tensor));
long long end_ns = current_time_ns();
printf("Inference completed successfully\n");

double infer_time_ms = (end_ns - start_ns) / 1e6;
printf("Inference time: %.3f ms\n", infer_time_ms);

float *output_data;
CHECK_STATUS(ort->GetTensorMutableData(output_tensor, (void **)&output_data));

printf("First 5 output values: [");
for (int i = 0; i < 5; i++) {
    printf("%f, ", output_data[i]);
}
printf("...]\n");

ort->ReleaseValue(output_tensor);
ort->ReleaseValue(input_tensor);
ort->ReleaseSession(session);
ort->ReleaseSessionOptions(session_options);
ort->ReleaseEnv(env);

printf("Resources released successfully\n");
return 0;

} The results of running the complete program are as follows:ONNX Runtime API initialized Environment created successfully Session options created Successfully appended QNN execution provider. Backend type: HTP Loading model from: sample_model.onnx Model loaded successfully Input data: [0.286723, 0.192322, 0.439166, 0.737144, 0.218138, ...] Memory info created Input tensor created Starting inference... Inference completed successfully Inference time: 0.261 ms First 5 output values: [0.728386, 0.328756, 0.229289, 0.102968, -0.019134, ...] Resources released successfullyThe model is derived from the following python code:def export_onnx_model(): model = SimpleModel() model.eval()

# Create dummy input
dummy_input = torch.randn(1, 10)

# Export to ONNX
torch.onnx.export(
    model,
    dummy_input,
    "sample_model.onnx",
    export_params=True,
    opset_version=13,
    do_constant_folding=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={
        'input': {0: 'batch_size'},
        'output': {0: 'batch_size'}
    }
)
print("✅ ONNX model exported to sample_model.onnx")`

I built the dynamic library of onnxruntime QNN EP locally. The build command is: ./build.sh --build_shared_lib --android --config Release --parallel --use_qnn static_lib --qnn_home $QNN_SDK_ROOT --android_sdk_path $ANDROID_SDK_ROOT --android_ndk_path $ANDROID_NDK_ROOT --android_abi arm64-v8a --android_api [api-version] --cmake_generator Ninja --build_dir build/Android I also tried --config Debug I have the corresponding environment on the physical machine of Ubuntu22.04, so I can build it quickly. Then I use the command cd build/Android cmake --install . --prefix ./install to collect the build products and get the include and lib directories for easy use.

Urgency

No response

Platform

Android

OS Version

12/15

ONNX Runtime Installation

Built from Source

Compiler Version (if 'Built from Source')

cmake version 4.0.3 gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0

Package Name (if 'Released Package')

None

ONNX Runtime Version or Commit ID

1.23.0

ONNX Runtime API

C++/C

Architecture

X64

Execution Provider

Other / Unknown

Execution Provider Library Version

qcom/aistack/qairt/2.31.0.250130

Panda-Young avatar Jun 17 '25 09:06 Panda-Young

Could you run the program with verbose flag and paste the output? AFAIK, if the program is unable to determine the QNN backend path, it should fall back to the CPU EP.

Specifically, look for the verbose logs: "Unable to determine backend path from provider options. Using default", or "Using backend path: ". The verbose logs should also contain what nodes are being assigned to which EP's.

Also, if you know that all the nodes in the model are supported on the QNN EP, you can also try setting the disable_cpu_ep_fallback flag in session options, which should cause your program to throw if it's unable to resolve the QNN backend.

carzh avatar Jun 18 '25 18:06 carzh

Could you run the program with verbose flag and paste the output? AFAIK, if the program is unable to determine the QNN backend path, it should fall back to the CPU EP.

Specifically, look for the verbose logs: "Unable to determine backend path from provider options. Using default", or "Using backend path: ". The verbose logs should also contain what nodes are being assigned to which EP's.

Also, if you know that all the nodes in the model are supported on the QNN EP, you can also try setting the disable_cpu_ep_fallback flag in session options, which should cause your program to throw if it's unable to resolve the QNN backend.

Thank you for your reply. I changed the log level to detailed, and the output did not change. And I found that even if these keys and values ​​are irrelevant strings, the function SessionOptionsAppendExecutionProvider can run normally. The C/C++ code is as follows: ` const OrtApi *ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); printf("ONNX Runtime API initialized\n");

OrtEnv *env;
CHECK_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_VERBOSE, "test", &env));
printf("Environment created successfully\n");

OrtSessionOptions *session_options;
CHECK_STATUS(ort->CreateSessionOptions(&session_options));
printf("Session options created\n");

const char *provider_options_keys[] = {
    "backend_typesssss",
    "qnn_context_file_path"};

const char *provider_options_values[] = {
    "HTPsssssssss",
    "/data/local/tmp/qnn_context"};

CHECK_STATUS(ort->SessionOptionsAppendExecutionProvider(
    session_options,
    "QNN",
    provider_options_keys,
    provider_options_values,
    sizeof(provider_options_keys) / sizeof(provider_options_keys[0])));
printf("Successfully appended QNN execution provider. Backend type: %s\n", provider_options_values[0]);`

Program running results: ONNX Runtime API initialized Environment created successfully Session options created Successfully appended QNN execution provider. Backend type: HTPsssssssss Loading model from: sample_model.onnx Model loaded successfully Input data: [0.189516, 0.335345, 0.141616, 0.688885, 0.072668, ...] Memory info created Input tensor created Starting inference... Inference completed successfully Inference time: 0.147 ms First 5 output values: [0.322046, 0.174284, 0.229289, 0.102968, -0.019134, ...] Resources released successfully

Panda-Young avatar Jun 20 '25 06:06 Panda-Young

I'm keeping an eye on this, but nothing has changed so far

Panda-Young avatar Aug 11 '25 06:08 Panda-Young

I'm keeping an eye on this, but nothing has changed so far

Panda-Young avatar Oct 31 '25 11:10 Panda-Young

By setting ORT_LOGGING_LEVEL_VERBOSE and monitoring the logcat output (adb logcat -c && adb logcat | grep -i -e onnxruntime -e qnn), I found that the above code actually uses the CPU to obtain the results. QNN fails during device creation due to missing dependency libraries.

Panda-Young avatar Nov 07 '25 21:11 Panda-Young