UniRes icon indicating copy to clipboard operation
UniRes copied to clipboard

Error when trying to run

Open SophieOstmeier opened this issue 1 year ago • 10 comments

I encount this problem, no matter how I install the enviroment. Any idea what can cause it?

15/09/2023 12:58:30 | GPU: NVIDIA RTX A6000, CUDA: True, PyTorch: 2.0.1+cu117

Input c=0, n=0 | fname=data/pd_icbm_normal_1mm_pn0_rf0.nii.gz c=1, n=0 | fname=data/t1_icbm_normal_1mm_pn0_rf0.nii.gz

Estimating model hyper-parameters... completed in 0.78400 seconds: c=0 | tau= 0.000452 | sd= 47.04 | mu= 4314 | ct=False c=1 | tau= 0.1775 | sd= 2.373 | mu= 427.9 | ct=False

Performing multi-channel (N=2) alignment...<class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> Traceback (most recent call last): File "/home/sophie/miniconda3/envs/UniRes_cpu/bin/unires", line 8, in sys.exit(run()) ^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/unires/_cli.py", line 289, in run _preproc(**vars(args)) File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/unires/_cli.py", line 75, in _preproc dat_y, mat_y, pth_y = preproc(pth, s) ^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/unires/run.py", line 313, in preproc x, y, sett = init(data, sett) ^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/unires/run.py", line 265, in init x, sett = _init_reg(x, sett) ^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/unires/_core.py", line 330, in _init_reg mat_a = affine_align(imgs, **sett.coreg_params, fix=fix, device=sett.device)[1] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/tools/preproc.py", line 166, in affine_align mat_a, mat_fix, dim_fix, _ = _affine_align(dat, mat, ^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/tools/affine_reg/_align.py", line 150, in _affine_align q, args = _fit_q(q, dat_fix, grid, mat_fix, dat, mat, mov, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/tools/affine_reg/_core.py", line 199, in _fit_q q[m, ...] = _do_optimisation(q[m, ...], args, s, opt, dim) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/tools/affine_reg/_core.py", line 139, in _do_optimisation res = minimize(_compute_cost, q, args, method='Powell', ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/scipy/optimize/_minimize.py", line 701, in minimize res = _minimize_powell(fun, x0, args, callback, bounds, **options) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/scipy/optimize/_optimize.py", line 3507, in _minimize_powell fval, x, direc1 = _linesearch_powell(func, x, direc1, ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/scipy/optimize/_optimize.py", line 3195, in _linesearch_powell res = _minimize_scalar_bounded(myfunc, bound, xatol=tol / 100) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/scipy/optimize/_optimize.py", line 2285, in _minimize_scalar_bounded fx = func(x, args) ^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/scipy/optimize/_optimize.py", line 3176, in myfunc return func(p + alphaxi) ^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/scipy/optimize/_optimize.py", line 620, in function_wrapper fx = function(np.copy(x), *(wrapper_args + args)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/tools/affine_reg/_costs.py", line 86, in _compute_cost dat_new = grid_pull(dat[m], grid, bound='dft', extrapolate=True, interpolation=1) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/spatial/_grid.py", line 201, in grid_pull out = GridPull.apply(input, grid, interpolation, bound, extrapolate) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/torch/autograd/function.py", line 506, in apply return super().apply(*args, **kwargs) # type: ignore[misc] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/torch/cuda/amp/autocast_mode.py", line 106, in decorate_fwd return fwd(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/_C/grid.py", line 252, in forward output = grid_pull(input, grid, *opt) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/sophie/miniconda3/envs/UniRes_cpu/lib/python3.11/site-packages/nitorch/_C/_ts/pushpull.py", line 44, in grid_pull return iso1.pull3d(inp, grid, bound_fn, extrapolate) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RuntimeError: r INTERNAL ASSERT FAILED at "../aten/src/ATen/core/jit_type_base.h":549, please report a bug to PyTorch.

SophieOstmeier avatar Sep 15 '23 21:09 SophieOstmeier

Hi Sophie,

Would you be able to try with an older version of pytorch (1.x instead of 2.x)? That would be the fastest way to get it to work I think.

Best Yael

balbasty avatar Sep 16 '23 12:09 balbasty

Thanks for your response. I will try!

SophieOstmeier avatar Sep 16 '23 17:09 SophieOstmeier

I get the same error with pytorch 1.13.1 py3.10_cpu_0 pytorch cuda-cudart 11.8.89 0 nvidia cuda-cupti 11.8.87 0 nvidia cuda-libraries 11.8.0 0 nvidia cuda-nvrtc 11.8.89 0 nvidia cuda-nvtx 11.8.86 0 nvidia cuda-runtime 11.8.0 0 nvidia

and nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2022 NVIDIA Corporation Built on Wed_Sep_21_10:33:58_PDT_2022 Cuda compilation tools, release 11.8, V11.8.89 Build cuda_11.8.r11.8/compiler.31833905_0

SophieOstmeier avatar Sep 16 '23 18:09 SophieOstmeier

Unfortunately also with another sever, pytorch 1.13.0 and an older cuda version: nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2019 NVIDIA Corporation Built on Sun_Jul_28_19:07:16_PDT_2019 Cuda compilation tools, release 10.1, V10.1.243

SophieOstmeier avatar Sep 16 '23 18:09 SophieOstmeier

Hi @SophieOstmeier, Do you ensure that your local CUDA version is the same as the one that was used to build the pytorch version you are using?

brudfors avatar Sep 18 '23 09:09 brudfors

I checked that very closely and tried multiple pytorch, python versions and different servers with the respective cuda version. I still get the same issue everytime. Is there something else to try/fix?

SophieOstmeier avatar Sep 18 '23 19:09 SophieOstmeier

Are you familiar with Docker? If so, you could try to build a Docker image from a Dockerfile with the following content:

FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04

RUN apt-get update --fix-missing && DEBIAN_FRONTEND=noninteractive apt-get install --assume-yes --no-install-recommends \
   build-essential \
   python3 \
   python3-dev \
   python3-pip \
   git

RUN pip install torch==1.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
RUN pip install numpy nibabel==4.0.2

# Install nitorch
WORKDIR /workspace
RUN git clone -n https://github.com/balbasty/nitorch.git
WORKDIR /workspace/nitorch
ARG MKL_SERVICE_FORCE_INTEL=1
ARG NI_COMPILED_BACKEND="C"
RUN pip install .

# Install unires
WORKDIR /workspace
RUN git clone -n https://github.com/brudfors/UniRes
WORKDIR /workspace/UniRes
RUN pip install .

WORKDIR /workspace

Build with something like:

docker build --rm --tag unires:latest .

and then try to run unires with a command like this.

brudfors avatar Sep 19 '23 07:09 brudfors

Thanks!

I was not able to get it working locally (GPU is not compatible) but I was able to get docker to work on runpod.

If I run unires with many images, does it coregister all of them to the first one?

SophieOstmeier avatar Sep 25 '23 05:09 SophieOstmeier

Glad to hear you got it working.

Yes, for multiple input images, it will use the first input image as the fixed reference.

brudfors avatar Sep 25 '23 10:09 brudfors

WARNING:root:nitorch uses its non-compiled backend (TS). Some algorithms may be slow.


| | | |_ __ () _ \ ___ ___ | | | | ' | | |) / _ / __| | || | | | | | _ < /_
_
/|| |||| __||__/

11/07/2024 21:09:04 | GPU: NVIDIA GeForce RTX 4090, CUDA: True, PyTorch: 1.9.0+cu111

Input c=0, n=0 | fname=/root/autodl-tmp/UniRes-master/data/IXI069-Guys-0769-T1.nii.gz

Estimating model hyper-parameters... Traceback (most recent call last): File "/root/miniconda3/bin/unires", line 8, in sys.exit(run()) File "/root/miniconda3/lib/python3.8/site-packages/unires/_cli.py", line 243, in run _preproc(**vars(args)) File "/root/miniconda3/lib/python3.8/site-packages/unires/_cli.py", line 52, in _preproc dat_y, mat_y, pth_y = preproc(pth, s) File "/root/miniconda3/lib/python3.8/site-packages/unires/run.py", line 313, in preproc x, y, sett = init(data, sett) File "/root/miniconda3/lib/python3.8/site-packages/unires/run.py", line 253, in init x = _estimate_hyperpar(x, sett) File "/root/miniconda3/lib/python3.8/site-packages/unires/_core.py", line 125, in _estimate_hyperpar prm_noise, prm_not_noise = estimate_noise( File "/root/miniconda3/lib/python3.8/site-packages/nitorch/tools/img_statistics.py", line 196, in estimate_noise model.fit(x, W=dat, verbose=verbose, max_iter=max_iter, File "/root/miniconda3/lib/python3.8/site-packages/nitorch/vb/mixtures.py", line 104, in fit Z, lb = self._em(X, max_iter=max_iter, tol=tol, verbose=verbose, W=W) File "/root/miniconda3/lib/python3.8/site-packages/nitorch/vb/mixtures.py", line 162, in _em Z[:, k] = torch.log(self.mp[k]) + self._log_likelihood(X, k) File "/root/miniconda3/lib/python3.8/site-packages/nitorch/vb/mixtures.py", line 506, in _log_likelihood log_pdf = log_pdf + besseli(0, X * (nu / sig2), 'log') File "/root/miniconda3/lib/python3.8/site-packages/nitorch/core/math.py", line 1255, in besseli z = besseli0(z, code) RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)

nvrtc compilation failed:

#define NAN __int_as_float(0x7fffffff) #define POS_INFINITY __int_as_float(0x7f800000) #define NEG_INFINITY __int_as_float(0xff800000)

template<typename T> device T maximum(T a, T b) { return isnan(a) ? a : (a > b ? a : b); }

template<typename T> device T minimum(T a, T b) { return isnan(a) ? a : (a < b ? a : b); }

extern "C" global void fused_mul_pow_mul_a_11258522613074717678(double* tzm_1, double* aten_add) { { double tzm_1_1 = __ldg(tzm_1 + 512 * blockIdx.x + threadIdx.x); aten_add[512 * blockIdx.x + threadIdx.x] = ((tzm_1_1 * 0.2666666666666667) * (tzm_1_1 * 0.2666666666666667)) * (((tzm_1_1 * 0.2666666666666667) * (tzm_1_1 * 0.2666666666666667)) * (((tzm_1_1 * 0.2666666666666667) * (tzm_1_1 * 0.2666666666666667)) * (((tzm_1_1 * 0.2666666666666667) * (tzm_1_1 * 0.2666666666666667)) * (((tzm_1_1 * 0.2666666666666667) * (tzm_1_1 * 0.2666666666666667)) * (((tzm_1_1 * 0.2666666666666667) * (tzm_1_1 * 0.2666666666666667)) * 0.0045813 + 0.0360768) + 0.2659732) + 1.2067492) + 3.0899424) + 3.5156229) + 1.0; } } Cloud you help me slove this problem? @brudfors

06Liz avatar Jul 11 '24 13:07 06Liz