paleo
paleo copied to clipboard
CuDNN error
Dear developers,
I installed paleo on a machine that did not have cuDNN (since the install instructions say it is optional). Then I ran
#! /bin/sh
NET_FILE=nets/inception_v3.json OUT_FILE=results/case_inception
PPP_COMP=0.62 PPP_COMM=0.72
./paleo.sh simulate $NET_FILE
--use_only_gemm
--batch_size=256
--network_name=ethernet
--device_name=K40
--num_workers=1,2,4,8,16,50,100
--ppp_comp=$PPP_COMP
--ppp_comm=$PPP_COMM
--scaling=weak >> $OUT_FILE
but I got the following error.
File "paleo/profiler.py", line 427, in <module> cli() File "/Users/pbalapra/anaconda2/envs/paleo/lib/python2.7/site-packages/click/core.py", line 722, in __call__ return self.main(*args, **kwargs) File "/Users/pbalapra/anaconda2/envs/paleo/lib/python2.7/site-packages/click/core.py", line 697, in main rv = self.invoke(ctx) File "/Users/pbalapra/anaconda2/envs/paleo/lib/python2.7/site-packages/click/core.py", line 1066, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/Users/pbalapra/anaconda2/envs/paleo/lib/python2.7/site-packages/click/core.py", line 895, in invoke return ctx.invoke(self.callback, **ctx.params) File "/Users/pbalapra/anaconda2/envs/paleo/lib/python2.7/site-packages/click/core.py", line 535, in invoke return callback(*args, **kwargs) File "paleo/profiler.py", line 297, in simulate ppp_comm, parallel, hybrid_workers) File "paleo/profiler.py", line 149, in simulate ppp_comm) File "/Users/pbalapra/Projects/repos/2018/paleo/paleo/simulation.py", line 250, in simulate_scaling device) File "/Users/pbalapra/Projects/repos/2018/paleo/paleo/simulation.py", line 59, in _profile_for_apply_updates device) File "/Users/pbalapra/Projects/repos/2018/paleo/paleo/profilers/flops_profiler.py", line 24, in __init__ from paleo.profilers import cudnn_profiler as cudnn File "/Users/pbalapra/Projects/repos/2018/paleo/paleo/profilers/cudnn_profiler.py", line 10, in <module> from paleo.third_party import libcudnn File "/Users/pbalapra/Projects/repos/2018/paleo/paleo/third_party/libcudnn.py", line 52, in <module> raise OSError('cuDNN library not found') OSError: cuDNN library not found
Same error with a cleaned up backtrace:
$ ./paleo.sh simulate --use_only_gemm nets/alex_v2_1gpu.json
nets/alex_v2_1gpu.json
==========
Traceback (most recent call last):
File "paleo/profiler.py", line 427, in <module>
cli()
File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 716, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 696, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 1060, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 889, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python2.7/dist-packages/click/core.py", line 534, in invoke
return callback(*args, **kwargs)
File "paleo/profiler.py", line 297, in simulate
ppp_comm, parallel, hybrid_workers)
File "paleo/profiler.py", line 149, in simulate
ppp_comm)
File "build/bdist.linux-x86_64/egg/paleo/simulation.py", line 250, in simulate_scaling
device)
File "build/bdist.linux-x86_64/egg/paleo/simulation.py", line 59, in _profile_for_apply_updates
device)
File "build/bdist.linux-x86_64/egg/paleo/profilers/flops_profiler.py", line 24, in __init__
File "build/bdist.linux-x86_64/egg/paleo/profilers/cudnn_profiler.py", line 10, in <module>
File "build/bdist.linux-x86_64/egg/paleo/third_party/libcudnn.py", line 52, in <module>
class cudnnMappingError(cudnnError):
OSError: cuDNN library not found
@gaul, @pbalapra r u able to solve this issue? I am also getting a similar issue