cudnn-frontend
cudnn-frontend copied to clipboard
cudnn-frontend crashes in case of MAX_OPGRAPH_OPS violation
Describe the bug Consider a graph with more than MAX_OPGRAPH_OPS nodes, for example in this code
#include "cudnn-frontend/include/cudnn_frontend.h"
namespace fe = cudnn_frontend;
int main()
{
cudnnHandle_t handle;
assert(cudnnCreate(&handle) == CUDNN_STATUS_SUCCESS);
auto graph = std::make_shared<fe::graph::Graph>();
auto x = graph->tensor(
fe::graph::Tensor_attributes()
.set_name("x")
.set_dim({1, 1, 1})
.set_stride({1, 1, 1})
.set_data_type(fe::DataType_t::FLOAT));
auto y = graph->tensor(
fe::graph::Tensor_attributes()
.set_name("y")
.set_dim({1, 1, 1})
.set_stride({1, 1, 1})
.set_data_type(fe::DataType_t::FLOAT));
auto inX = x;
auto inY = y;
for (int i = 0; i < 60; ++i) {
auto sum = graph->pointwise(
x,
y,
fe::graph::Pointwise_attributes()
.set_mode(fe::PointwiseMode_t::ADD)
.set_compute_data_type(fe::DataType_t::FLOAT));
sum->set_data_type(fe::DataType_t::FLOAT);
x = y;
y = sum;
}
y->set_output(true);
assert(graph->validate().is_good());
auto r = graph->build_operation_graph(handle);
if (!r.is_good()) {
std::cerr << r.get_message() << std::endl;
return 0;
}
assert(graph->create_execution_plans({fe::HeurMode_t::A}).is_good());
assert(graph->build_plans(handle, fe::BuildPlanPolicy_t::ALL).is_good());
float One = 1;
void* inXPtr;
assert(cudaMalloc(&inXPtr, sizeof(float)) == cudaSuccess);
assert(cudaMemcpy(inXPtr, &One, sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
void* inYPtr;
assert(cudaMalloc(&inYPtr, sizeof(float)) == cudaSuccess);
assert(cudaMemcpy(inYPtr, &One, sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
void* outPtr;
assert(cudaMalloc(&outPtr, sizeof(float)) == cudaSuccess);
void* workspacePtr;
assert(cudaMalloc(&workspacePtr, graph->get_workspace_size()) == cudaSuccess);
std::unordered_map<std::shared_ptr<fe::graph::Tensor_attributes>, void*> tensorMap;
tensorMap[inX] = inXPtr;
tensorMap[inY] = inYPtr;
tensorMap[y] = outPtr;
r = graph->execute(handle, tensorMap, workspacePtr);
if (!r.is_good()) {
std::cerr << r.get_message() << std::endl;
return 0;
}
assert(cudaDeviceSynchronize() == cudaSuccess);
float outData;
assert(cudaMemcpy(&outData, outPtr, sizeof(float), cudaMemcpyDeviceToHost) == cudaSuccess);
std::cout << outData << std::endl;
assert(cudaFree(inXPtr) == cudaSuccess);
assert(cudaFree(inYPtr) == cudaSuccess);
assert(cudaFree(outPtr) == cudaSuccess);
assert(cudaFree(workspacePtr) == cudaSuccess);
assert(cudnnDestroy(handle) == CUDNN_STATUS_SUCCESS);
}
Instead of giving an error during compilation it crashes with the following trace because of the out-of-bounds access to m_operationGraph.ops.
(gdb) bt
#0 0x00005555555a56d5 in __gnu_cxx::__exchange_and_add (__val=-1, __mem=0xf8) at /usr/include/c++/11/ext/atomicity.h:66
#1 __gnu_cxx::__exchange_and_add_dispatch (__val=-1, __mem=0xf8) at /usr/include/c++/11/ext/atomicity.h:101
#2 std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (this=0xf0) at /usr/include/c++/11/bits/shared_ptr_base.h:165
#3 0x0000555555596de3 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=0x7fffffffcfe8,
__in_chrg=<optimized out>) at /usr/include/c++/11/bits/shared_ptr_base.h:705
#4 0x000055555556decc in std::__shared_ptr<cudnn_frontend::OpaqueBackendPointer, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (
this=0x7fffffffcfe0, __in_chrg=<optimized out>) at /usr/include/c++/11/bits/shared_ptr_base.h:1154
#5 0x00005555555a75e6 in std::__shared_ptr<cudnn_frontend::OpaqueBackendPointer, (__gnu_cxx::_Lock_policy)2>::operator= (
this=0x7fffffffd4e8, __r=...) at /usr/include/c++/11/bits/shared_ptr_base.h:1250
#6 0x000055555559883e in std::shared_ptr<cudnn_frontend::OpaqueBackendPointer>::operator= (this=0x7fffffffd4e8, __r=...)
at /usr/include/c++/11/bits/shared_ptr.h:385
#7 0x0000555555578ef1 in cudnn_frontend::OperationGraphBuilder_v8::setOperationGraph (this=0x7fffffffd170, numOps_=60,
ops_=0x555555ab2f80) at cudnn-frontend/include/cudnn_frontend_OperationGraph.h:157
#8 0x0000555555586552 in cudnn_frontend::ICudnn::create_cudnn_operation_graph (this=0x5555562310f8, handle=0x5555557379c0)
at cudnn-frontend/include/cudnn_frontend/node/../cudnn_interface.h:123
#9 0x0000555555592b85 in cudnn_frontend::graph::Graph::build_operation_graph (this=0x5555562310f0, handle=0x5555557379c0)
at cudnn-frontend/include/cudnn_frontend/graph_interface.h:265
#10 0x00005555555681d4 in main () at example.cpp:43
Expected behavior
I expect to get an error in this case, so my code can fallback to another computational engine.
Also, I expect this restriction to be mentioned in the documentation.