ZLUDA icon indicating copy to clipboard operation
ZLUDA copied to clipboard

ZLUDA not working with Pytorch on Ubuntu 22.04

Open radna0 opened this issue 9 months ago • 0 comments

I'm trying to setup pytorch and zluda to run cuda on amd gpus, but to no avail

(base) r4-0@r40-desktop:~/pytorch$ LD_LIBRARY_PATH="$HOME/zluda:$LD_LIBRARY_PATH" python3 
Python 3.12.3 | packaged by Anaconda, Inc. | (main, May  6 2024, 19:46:43) [GCC 11.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import torch
>>> print(torch.cuda.is_available())
False
>>> torch.cuda.device_count()
0

This is the setup script I use, for setting up the OS and building pytorch

OS

#!/bin/bash

# Update system and install essential packages
sudo apt-get update -y && sudo apt-get upgrade -y
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    ca-certificates \
    nano \
    wget \
    curl \
    gnupg \
    ripgrep \
    ltrace \
    file \
    python3-minimal \
    build-essential \
    git \
    cmake \
    ninja-build \
    python3-pip

# Set environment variables
export PATH="${PATH}:/opt/rocm/bin:/opt/rocm/llvm/bin:/usr/local/cuda/bin/"
export NVIDIA_VISIBLE_DEVICES=all
export NVIDIA_DRIVER_CAPABILITIES=compute,utility

# Install CUDA
CUDA_VERSION="11-8"
sudo wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
sudo dpkg -i cuda-keyring_1.0-1_all.deb
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
    nvidia-headless-no-dkms-515 \
    nvidia-utils-515 \
    cuda-cudart-${CUDA_VERSION} \
    cuda-compiler-${CUDA_VERSION} \
    libcufft-dev-${CUDA_VERSION} \
    libcusparse-dev-${CUDA_VERSION} \
    libcublas-dev-${CUDA_VERSION} \
    cuda-nvml-dev-${CUDA_VERSION} \
    libcudnn8-dev \
    cuda-toolkit-${CUDA_VERSION} \
    cudnn9-cuda-${CUDA_VERSION}

export CUDA_HOME=/usr/local/cuda-11.8
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64:/usr/local/cuda/extras/CUPTI/lib64
export PATH=$PATH:$CUDA_HOME/bin


# Install Rust
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sudo sh -s -- -y
source $HOME/.cargo/env

# Install ROCm
#* Can install hiplibsdk 5.7 separately, can not install from amdgpu-install because of dkms error

sudo apt update
wget http://repo.radeon.com/amdgpu-install/23.40.2/ubuntu/jammy/amdgpu-install_6.0.60002-1_all.deb
sudo apt install -y ./amdgpu-install_6.0.60002-1_all.deb
sudo amdgpu-install -y --usecase=graphics,rocm,hip,hiplibsdk
sudo usermod -a -G render,video $LOGNAME



# Install Miniconda
cd $HOME
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x Miniconda3-latest-Linux-x86_64.sh
./Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/miniconda3
echo 'export PATH="$HOME/miniconda3:$PATH"' >> $HOME/.bashrc
source $HOME/miniconda3/etc/profile.d/conda.sh
source $HOME/.bashrc
conda init
conda --version
source $HOME/.bashrc



# Cleanup
cd $HOME
sudo rm -rf cuda-keyring_1.0-1_all.deb Miniconda3-latest-Linux-x86_64.sh
sudo apt-get autoclean -y
sudo apt-get autoremove -y

# Default to a login shell
source $HOME/.bashrc

ZLUDA and Pytorch

*Normally I would compile the main ZLUDA repo but currently trying out a fork from @lshqqytiger

#!/bin/bash

cd $HOME

# Set destination directory
destination="$HOME"
wget https://github.com/lshqqytiger/ZLUDA/releases/download/rel.11cc5844514f93161e0e74387f04e2c537705a82/ZLUDA-linux-amd64.tar.gz -P "$destination"
tar -xzf "$destination/ZLUDA-linux-amd64.tar.gz" -C "$destination"


#git clone --recurse-submodules https://github.com/vosen/ZLUDA.git $HOME/ZLUDA
#cd $HOME/ZLUDA
#cargo xtask --release


# Install PyTorch
git clone --recursive https://github.com/pytorch/pytorch $HOME/pytorch
cd $HOME/pytorch
git submodule sync
git submodule update --init --recursive
conda install -y cmake ninja
pip install -r requirements.txt
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
export TORCH_CUDA_ARCH_LIST="6.1+PTX"
export CUDAARCHS=61
export CMAKE_CUDA_ARCHITECTURES=61
export USE_SYSTEM_NCCL=1
export USE_NCCL=0
export USE_EXPERIMENTAL_CUDNN_V8_API=OFF
export DISABLE_ADDMM_CUDA_LT=1
export USE_ROCM=OFF
LD_LIBRARY_PATH="$HOME/zluda:$LD_LIBRARY_PATH" python3 setup.py develop


# Cleanup
cd $HOME

radna0 avatar May 23 '24 01:05 radna0