djl
djl copied to clipboard
DJL Error on simple Pytorch binding code
Description
import ai.djl.Device;
import ai.djl.MalformedModelException;
import ai.djl.Model;
import ai.djl.engine.Engine;
import ai.djl.inference.Predictor;
import ai.djl.ndarray.NDArray;
import ai.djl.ndarray.NDList;
import ai.djl.ndarray.NDManager;
import ai.djl.translate.Batchifier;
import ai.djl.translate.TranslateException;
import ai.djl.translate.Translator;
import ai.djl.translate.TranslatorContext;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
public class Main {
public static void main(String[] args) throws MalformedModelException, IOException, TranslateException {
System.out.printf("There are %d engines\n", Engine.getAllEngines().size());
Engine.getAllEngines().forEach(engineName ->{
System.out.printf(" Engine name: %s\n", engineName);
});
try(NDManager manager = NDManager.newBaseManager( Device.gpu() , "PyTorch" )) {
NDArray arange = manager.arange(0, 100, 1 );
NDArray max = arange.max();
System.out.println(max.getInt());
}
}
}
I've tried simple example from DJL Library
Expected Behavior
I wanted simple hello world max result from array example
Error Message
Exception in thread "main" ai.djl.engine.EngineException: Cannot download jni files: https://publish.djl.ai/pytorch/2.4.0/jnilib/0.29.0/linux-x86_64/cu124/libdjl_torch.so at ai.djl.pytorch.jni.LibUtils.downloadJniLib(LibUtils.java:542) at ai.djl.pytorch.jni.LibUtils.findJniLibrary(LibUtils.java:280) at ai.djl.pytorch.jni.LibUtils.loadLibrary(LibUtils.java:84) at ai.djl.pytorch.engine.PtEngine.newInstance(PtEngine.java:53) at ai.djl.pytorch.engine.PtEngineProvider.getEngine(PtEngineProvider.java:41) at ai.djl.engine.Engine.getEngine(Engine.java:190) at ai.djl.ndarray.NDManager.newBaseManager(NDManager.java:151) at main.Main.main(Main.java:48) Caused by: java.io.FileNotFoundException: https://publish.djl.ai/pytorch/2.4.0/jnilib/0.29.0/linux-x86_64/cu124/libdjl_torch.so at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1898) at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1500) at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:268) at ai.djl.util.Utils.openUrl(Utils.java:519) at ai.djl.util.Utils.openUrl(Utils.java:498) at ai.djl.util.Utils.openUrl(Utils.java:487) at ai.djl.pytorch.jni.LibUtils.downloadJniLib(LibUtils.java:536) ... 7 more
How to Reproduce?
My maven deps:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>groupId</groupId>
<artifactId>PyTorchDJL</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/ai.djl.pytorch/pytorch-engine -->
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-engine</artifactId>
<version>0.29.0</version>
</dependency>
<!--
%maven ai.djl.pytorch:pytorch-engine:0.8.0
%maven ai.djl.pytorch:pytorch-native-auto:1.6.0
-->
<!-- <dependency>-->
<!-- <groupId>ai.djl.pytorch</groupId>-->
<!-- <artifactId>pytorch-native-cu121</artifactId>-->
<!-- <classifier>linux-x86_64</classifier>-->
<!-- <version>2.3.1</version>-->
<!-- </dependency>-->
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cu124</artifactId>
<classifier>linux-x86_64</classifier>
<version>2.4.0</version>
</dependency>
<!-- FOR CPU
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-native-cpu</artifactId>
<classifier>linux-x86_64</classifier>
<scope>runtime</scope>
<version>2.3.1</version>
</dependency>
-->
<dependency>
<groupId>ai.djl.pytorch</groupId>
<artifactId>pytorch-jni</artifactId>
<version>2.3.1-0.29.0</version>
</dependency>
<dependency>
<groupId>ai.djl</groupId>
<artifactId>model-zoo</artifactId>
<version>0.29.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>
main.Main
</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
</project>
Steps to reproduce
(Paste the commands you ran that produced the error.) java main.Main
What have you tried to solve it?
i have tried to find "libdjl_torch.so" but where to get it.
Environment Info
./gradlew debugEnv
for maven ?
My full output:
/usr/lib/jvm/java-1.8.0-openjdk-amd64/bin/java -agentlib:jdwp=transport=dt_socket,address=127.0.0.1:41405,suspend=y,server=n -javaagent:/home/kadirbasol/Apps/idea-IU-233.11799.300/plugins/java/lib/rt/debugger-agent.jar -Dfile.encoding=UTF-8 -classpath /usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/cldrdata.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/dnsns.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/icedtea-sound.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/jaccess.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/java-atk-wrapper.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/localedata.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/nashorn.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/sunec.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/sunjce_provider.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/sunpkcs11.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/ext/zipfs.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/management-agent.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/rt.jar:/media/kadirbasol/KadirBASOL/Files/Works/PyTorchDJL/target/classes:/home/kadirbasol/.m2/repository/ai/djl/pytorch/pytorch-engine/0.29.0/pytorch-engine-0.29.0.jar:/home/kadirbasol/.m2/repository/ai/djl/api/0.29.0/api-0.29.0.jar:/home/kadirbasol/.m2/repository/com/google/code/gson/gson/2.11.0/gson-2.11.0.jar:/home/kadirbasol/.m2/repository/com/google/errorprone/error_prone_annotations/2.27.0/error_prone_annotations-2.27.0.jar:/home/kadirbasol/.m2/repository/net/java/dev/jna/jna/5.14.0/jna-5.14.0.jar:/home/kadirbasol/.m2/repository/org/apache/commons/commons-compress/1.26.2/commons-compress-1.26.2.jar:/home/kadirbasol/.m2/repository/commons-codec/commons-codec/1.17.0/commons-codec-1.17.0.jar:/home/kadirbasol/.m2/repository/commons-io/commons-io/2.16.1/commons-io-2.16.1.jar:/home/kadirbasol/.m2/repository/org/slf4j/slf4j-api/2.0.13/slf4j-api-2.0.13.jar:/home/kadirbasol/.m2/repository/ai/djl/pytorch/pytorch-native-cu124/2.4.0/pytorch-native-cu124-2.4.0-linux-x86_64.jar:/home/kadirbasol/.m2/repository/ai/djl/pytorch/pytorch-jni/2.3.1-0.29.0/pytorch-jni-2.3.1-0.29.0.jar:/home/kadirbasol/.m2/repository/ai/djl/model-zoo/0.29.0/model-zoo-0.29.0.jar:/home/kadirbasol/Apps/idea-IU-233.11799.300/lib/idea_rt.jar main.Main
Connected to the target VM, address: '127.0.0.1:41405', transport: 'socket'
SLF4J(W): No SLF4J providers were found.
SLF4J(W): Defaulting to no-operation (NOP) logger implementation
SLF4J(W): See https://www.slf4j.org/codes.html#noProviders for further details.
There are 1 engines
Engine name: PyTorch
Exception in thread "main" ai.djl.engine.EngineException: Cannot download jni files: https://publish.djl.ai/pytorch/2.4.0/jnilib/0.29.0/linux-x86_64/cu124/libdjl_torch.so
at ai.djl.pytorch.jni.LibUtils.downloadJniLib(LibUtils.java:542)
at ai.djl.pytorch.jni.LibUtils.findJniLibrary(LibUtils.java:280)
at ai.djl.pytorch.jni.LibUtils.loadLibrary(LibUtils.java:84)
at ai.djl.pytorch.engine.PtEngine.newInstance(PtEngine.java:53)
at ai.djl.pytorch.engine.PtEngineProvider.getEngine(PtEngineProvider.java:41)
at ai.djl.engine.Engine.getEngine(Engine.java:190)
at ai.djl.ndarray.NDManager.newBaseManager(NDManager.java:151)
at main.Main.main(Main.java:48)
Caused by: java.io.FileNotFoundException: https://publish.djl.ai/pytorch/2.4.0/jnilib/0.29.0/linux-x86_64/cu124/libdjl_torch.so
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1898)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1500)
at sun.net.www.protocol.https.HttpsURLConnectionImpl.getInputStream(HttpsURLConnectionImpl.java:268)
at ai.djl.util.Utils.openUrl(Utils.java:519)
at ai.djl.util.Utils.openUrl(Utils.java:498)
at ai.djl.util.Utils.openUrl(Utils.java:487)
at ai.djl.pytorch.jni.LibUtils.downloadJniLib(LibUtils.java:536)
... 7 more
Disconnected from the target VM, address: '127.0.0.1:41405', transport: 'socket'
Process finished with exit code 1
I am using Zorin OS 17.1 which is based on ubuntu 23.04
also
kadirbasol@kadirbasol-HP-Z8-G5-Workstation-Desktop-PC:~/Apps/idea-IU-242.20224.387/bin$ nvidia-smi
Thu Aug 15 11:54:03 2024
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.107.02 Driver Version: 550.107.02 CUDA Version: 12.4 |
|-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA RTX A5000 Off | 00000000:52:00.0 On | Off |
| 30% 36C P8 28W / 230W | 553MiB / 24564MiB | 13% Default |
| | | N/A |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| | 0 N/A N/A 2494 G /usr/lib/xorg/Xorg 162MiB | | 0 N/A N/A 2646 G /usr/bin/gnome-shell 216MiB | | 0 N/A N/A 61524 G ...b9d6b37082f5442bb4491d1ea68c4c6e1f7 103MiB | +-----------------------------------------------------------------------------------------+
@eix128
PyTorch 2.4.0 requires DJL 0.30.0 (which is not released yet, you can try 0.30.0-SNAPSHOT version if you want to). See: https://docs.djl.ai/master/engines/pytorch/pytorch-engine/index.html#supported-pytorch-versions
@eix128
We strongly recommend you to use BOM to ensure you are using correct version for each module. See: https://docs.djl.ai/master/bom/index.html
Well this makes code more error prone. maybe it will be better to make your DJL pack as single dependency.
when will you release 0.30 ? where can i get it ?
@eix128 We cannot package PyTorch dependency in single dependency:
- DJL support 11 engines (PyTorch, OnnxRuntime, MXNet, TensorFlow, FastText ...), user may only need one at a time
- Even PyTorch has more than 10 different flavors (macos, win, linux, aarch64, precxx11, cuda-118, cuda-124 etc), GPU dependency are very big (more than 2G each), the total jar file size is more 20 G.
- The same DJL version supports multiple version of PyTorch (2.4.0, 2.3.1, 1.13.1)
- DJL has a lot extension jars and each jar has their own dependencies. Using BOM is common practice.
When 0.3.0 will be released? See: https://docs.djl.ai/master/index.html#release-notes How to get nightly release: See: https://docs.djl.ai/master/docs/get.html#nightly-snapshots