torchdrug icon indicating copy to clipboard operation
torchdrug copied to clipboard

ImportError: No module named 'torch_ext'

Open YiZhang025 opened this issue 3 years ago • 14 comments

Hi, I was running the "quickstart" code on my win10. I used torch = 1.8.0 and python = 3.7 with cuda = 10.2.

The problems happened when I tried training the model in Jupyter:

optimizer = torch.optim.Adam(task.parameters(), lr=1e-3)
solver = core.Engine(task, train_set, valid_set, test_set, optimizer, gpus=[0],
                     batch_size=512) solver.train(num_epoch=100)

And this turned to:

---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_19744/1406504193.py in <module>
----> 1 solver.train(num_epoch=100)

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\core\engine.py in train(self, num_epoch, batch_per_epoch)
    141                     batch = utils.cuda(batch, device=self.device)
    142 
--> 143                 loss, metric = model(batch)
    144                 if not loss.requires_grad:
    145                     raise RuntimeError("Loss doesn't require grad. Did you define any loss in the task?")

d:\conda\envs\torchdrug\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\tasks\property_prediction.py in forward(self, batch)
     72         metric = {}
     73 
---> 74         pred = self.predict(batch, all_loss, metric)
     75 
     76         if all([t not in batch for t in self.task]):

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\tasks\property_prediction.py in predict(self, batch, all_loss, metric)
    103     def predict(self, batch, all_loss=None, metric=None):
    104         graph = batch["graph"]
--> 105         output = self.model(graph, graph.node_feature.float(), all_loss=all_loss, metric=metric)
    106         pred = self.linear(output["graph_feature"])
    107         return pred

d:\conda\envs\torchdrug\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\models\gin.py in forward(self, graph, input, all_loss, metric)
     74 
     75         for layer in self.layers:
---> 76             hidden = layer(graph, layer_input)
     77             if self.short_cut and hidden.shape == layer_input.shape:
     78                 hidden = hidden + layer_input

d:\conda\envs\torchdrug\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\layers\conv.py in forward(self, graph, input)
     89             update = checkpoint.checkpoint(self._message_and_aggregate, *graph.to_tensors(), input)
     90         else:
---> 91             update = self.message_and_aggregate(graph, input)
     92         output = self.combine(input, update)
     93         return output

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\layers\conv.py in message_and_aggregate(self, graph, input)
    339     def message_and_aggregate(self, graph, input):
    340         adjacency = utils.sparse_coo_tensor(graph.edge_list.t()[:2], graph.edge_weight,
--> 341                                             (graph.num_node, graph.num_node))
    342         update = torch.sparse.mm(adjacency.t(), input)
    343         if self.edge_linear:

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\utils\torch.py in sparse_coo_tensor(indices, values, size)
    160         size (list): size of the tensor
    161     """
--> 162     return torch_ext.sparse_coo_tensor_unsafe(indices, values, size)
    163 
    164 

d:\conda\envs\torchdrug\lib\site-packages\torchdrug-0.1.0-py3.7.egg\torchdrug\utils\torch.py in __getattr__(self, key)
     28             self.module = cpp_extension.load(self.name, self.sources, self.extra_cflags, self.extra_cuda_cflags,
     29                                              self.extra_ldflags, self.extra_include_paths, self.build_directory,
---> 30                                              self.verbose, **self.kwargs)
     31         return getattr(self.module, key)
     32 

d:\conda\envs\torchdrug\lib\site-packages\torch\utils\cpp_extension.py in load(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, is_standalone, keep_intermediates)
   1089         is_python_module,
   1090         is_standalone,
-> 1091         keep_intermediates=keep_intermediates)
   1092 
   1093 

d:\conda\envs\torchdrug\lib\site-packages\torch\utils\cpp_extension.py in _jit_compile(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, is_standalone, keep_intermediates)
   1315         return _get_exec_path(name, build_directory)
   1316 
-> 1317     return _import_module_from_library(name, build_directory, is_python_module)
   1318 
   1319 

d:\conda\envs\torchdrug\lib\site-packages\torch\utils\cpp_extension.py in _import_module_from_library(module_name, path, is_python_module)
   1697 def _import_module_from_library(module_name, path, is_python_module):
   1698     # https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
-> 1699     file, path, description = imp.find_module(module_name, [path])
   1700     # Close the .so file after load.
   1701     with file:

d:\conda\envs\torchdrug\lib\imp.py in find_module(name, path)
    294         break  # Break out of outer loop when breaking out of inner loop.
    295     else:
--> 296         raise ImportError(_ERR_MSG.format(name), name=name)
    297 
    298     encoding = None

ImportError: No module named 'torch_ext'

The same code works well in Colab and I suspect this is because I couldn't install rdkit-pypi and installed rdkit on conda instead.

YiZhang025 avatar Aug 14 '21 19:08 YiZhang025

Thanks for the observation. TorchDrug is mainly developed and tested on Linux, and should work with Colab. Currently there is no guarantee for installing TorchDrug on Windows, but we will come up with Windows support soon.

KiddoZhu avatar Aug 20 '21 03:08 KiddoZhu

Hi, I met the same problem when I was running the "quickstart" code on ubunte20.04. I used torch = 1.8.0 and python = 3.8 with cuda = 11.1.

yrq3027 avatar Aug 26 '21 02:08 yrq3027

Got same problem on CentOS8, with PyTorch 1.8.0, CUDA 11.0 and python 3.7

nuistcz avatar Sep 10 '21 05:09 nuistcz

Do you guys install from conda or from source? I just checked the conda package and torch_ext.cpp lies correctly under torchdrug/utils/extension. torch_ext should be automatically imported and JIT compiled by PyTorch when it is used.

One issue I've encountered with PyTorch JIT is that if you kill the process during the compilation of JIT, it sometimes causes a broken compiled binary and JIT can neither load it or recompile it. You need to manually delete the folder torch_extensions, which lies under either /home/your_user_name/.cache or /tmp by default.

KiddoZhu avatar Sep 10 '21 07:09 KiddoZhu

Had the same issue, and was able to fix it by:

rm -rf /home/your_user_name/.cache/torch_extensions

chaitjo avatar Oct 04 '21 04:10 chaitjo

Had the same issue, and was able to fix it by:

rm -rf /home/your_user_name/.cache/torch_extensions

If this can solve, I believe it's caused by the broken cache in JIT. This is typically caused by Ctrl-C break during compilation or compiling multiple instances of the same extension at the same time.

If you want to import torchdrug in several processes on the same machine, a safe solution is to manually specify a separate extension directory for each process like

TORCH_EXTENSIONS_DIR=/path/to/your/extensions python script.py

KiddoZhu avatar Oct 04 '21 06:10 KiddoZhu

I'm using anaconda on windows and install torchdrug and other required packages in specific environment. I've got the message : "_ImportError: DLL load failed while importing torch_ext: The specified module could not be found._" I dont know if it is the same problem as mention above. If yes, there is no solution fo now until windows compatible version release right?

Does it compatible with conda in linux windows subsystem WSL?

natnondesu avatar Oct 04 '21 12:10 natnondesu

@natchanon-me Hi! I am not sure if they are the same problem, but you problem must also be caused by JIT. We didn't test torchdrug on Windows. I think TorchDrug might be compatible with Windows if the JIT utilities (e.g. g++ on Linux. on Windows I guess it's MSVC) from PyTorch work well, since the library doesn't rely on any pre-compiled binaries.

We will test the installation on Windows soon.

KiddoZhu avatar Oct 04 '21 15:10 KiddoZhu

@natchanon-me Windows is now supported! Please check https://torchdrug.ai/docs/installation.html#windows-powershell

KiddoZhu avatar Oct 27 '21 08:10 KiddoZhu

@natchanon-me Windows is now supported! Please check https://torchdrug.ai/docs/installation.html#windows-powershell

Yeah, I'll try soon. Thanks for great work!!

natnondesu avatar Oct 27 '21 11:10 natnondesu

I am running into a similar issue, CentOS7, python 3.8, pytorch 1.8, cuda 10.1.

Pytorch works so not sure what the compiler warning is from. Another strange point -- I only have issues running torchdrug from the command line, I am able to train a network in a jupyter notebook....

14:20:53   >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
14:20:53   Epoch 0 begin
/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/utils/cpp_extension.py:283: UserWarning: 

                               !! WARNING !!

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Your compiler (c++) is not compatible with the compiler Pytorch was
built with for this platform, which is g++ on linux. Please
use g++ to to compile your extension. Alternatively, you may
compile PyTorch from source using c++, and then you can also use
c++ to compile your extension.

See https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md for help
with compiling PyTorch from source.
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

                              !! WARNING !!

  warnings.warn(WRONG_COMPILER_WARNING.format(
Traceback (most recent call last):
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1667, in _run_ninja_build
    subprocess.run(
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/subprocess.py", line 516, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "pretrain_chem.py", line 82, in <module>
    sys.exit(main())
  File "pretrain_chem.py", line 78, in main
    process(args)
  File "pretrain_chem.py", line 43, in process
    solver.train(num_epoch=5)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/core/engine.py", line 143, in train
    loss, metric = model(batch)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/tasks/pretrain.py", line 139, in forward
    pred, target = self.predict_and_target(batch, all_loss, metric)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/tasks/pretrain.py", line 118, in predict_and_target
    output = self.model(graph, node_feature.float(), all_loss, metric)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/models/gin.py", line 76, in forward
    hidden = layer(graph, layer_input)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/layers/conv.py", line 91, in forward
    update = self.message_and_aggregate(graph, input)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/layers/conv.py", line 340, in message_and_aggregate
    adjacency = utils.sparse_coo_tensor(graph.edge_list.t()[:2], graph.edge_weight,
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/utils/torch.py", line 182, in sparse_coo_tensor
    return torch_ext.sparse_coo_tensor_unsafe(indices, values, size)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/utils/torch.py", line 27, in __getattr__
    return getattr(self.module, key)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/utils/decorator.py", line 21, in __get__
    result = self.func(obj)
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/utils/torch.py", line 31, in module
    return cpp_extension.load(self.name, self.sources, self.extra_cflags, self.extra_cuda_cflags,
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1079, in load
    return _jit_compile(
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1292, in _jit_compile
    _write_ninja_file_and_build_library(
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1404, in _write_ninja_file_and_build_library
    _run_ninja_build(
  File "/srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1683, in _run_ninja_build
    raise RuntimeError(message) from e
RuntimeError: Error building extension 'torch_ext': [1/2] c++ -MMD -MF torch_ext.o.d -DTORCH_EXTENSION_NAME=torch_ext -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include/TH -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include/THC -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -Ofast -fopenmp -DAT_PARALLEL_OPENMP -DCUDA_OP -c /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/utils/extension/torch_ext.cpp -o torch_ext.o 
FAILED: torch_ext.o 
c++ -MMD -MF torch_ext.o.d -DTORCH_EXTENSION_NAME=torch_ext -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include/TH -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torch/include/THC -isystem /srv/home/wconnell/anaconda3/envs/10-torchdrug/include/python3.8 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -Ofast -fopenmp -DAT_PARALLEL_OPENMP -DCUDA_OP -c /srv/home/wconnell/anaconda3/envs/10-torchdrug/lib/python3.8/site-packages/torchdrug/utils/extension/torch_ext.cpp -o torch_ext.o 
c++: error: unrecognized command line option ‘-std=c++14’
ninja: build stopped: subcommand failed.

wconnell avatar Nov 24 '21 22:11 wconnell

I met the same problems, could you tell me how do you solve it?

drumzhang avatar Mar 10 '22 18:03 drumzhang

I met the same problems, could you tell me how do you solve it?

drumzhang avatar Mar 10 '22 18:03 drumzhang

@natchanon-me Windows is now supported! Please check https://torchdrug.ai/docs/installation.html#windows-powershell

What if I am running VSCode and my project is in a conda environment. Which steps are required to get it up and running? I run Python 3.8 rather than the specified 3.7.

bobseboy avatar Sep 20 '22 19:09 bobseboy