javacpp-presets
javacpp-presets copied to clipboard
how to free my cuda memory using nvcomp
I have follow the example nvcompLZ4Example to compress my yuv data using 3090TI. but I have 2 questions about this.
- when I go throuth all the code , there are still some gpu memory used
- I print out the compress and decompress time, while the decompress time is much longer than the comress time,this doesn't make sense Here is my code:
public class nvcompLZ4Example {
private static void decomp_compressed_with_manager_factory_example(BytePointer device_input_ptrs, long input_buffer_len) {
long start=System.currentTimeMillis();
CUstream_st stream = new CUstream_st();
int cuda_error = cudaStreamCreate(stream);
long chunk_size = 1 << 16;
nvcompBatchedZstdOpts_t format_opts = new nvcompBatchedZstdOpts_t();
// format_opts.data_type(NVCOMP_TYPE_CHAR);
PimplManager nvcomp_manager = new ZstdManager(chunk_size, format_opts, stream, 0, nvcomp.NoComputeNoVerify);
CompressionConfig comp_config = nvcomp_manager.configure_compression(input_buffer_len);
BytePointer comp_buffer = new BytePointer();
cuda_error = cudaMalloc(comp_buffer, comp_config.max_compressed_buffer_size());
nvcomp_manager.compress(device_input_ptrs, comp_buffer, comp_config);
long compress=System.currentTimeMillis();
System.out.println(compress-start);
long compressedOutputSize = nvcomp_manager.get_compressed_output_size(comp_buffer);
System.out.println(Double.valueOf(compressedOutputSize)/Double.valueOf(input_buffer_len));
// Construct a new nvcomp manager from the compressed buffer.
// Note we could use the nvcomp_manager from above, but here we demonstrate how to create a manager
// for the use case where a buffer is received and the user doesn't know how it was compressed
// Also note, creating the manager in this way synchronizes the stream, as the compressed buffer must be read to
// construct the manager
nvcompManagerBase decomp_nvcomp_manager = create_manager(comp_buffer, stream, 0, NoComputeNoVerify);
DecompressionConfig decomp_config = decomp_nvcomp_manager.configure_decompression(comp_buffer);
BytePointer res_decomp_buffer = new BytePointer();
cuda_error = cudaMalloc(res_decomp_buffer, decomp_config.decomp_data_size());
decomp_nvcomp_manager.decompress(res_decomp_buffer, comp_buffer, decomp_config);
long decompress=System.currentTimeMillis();
System.out.println(decompress-compress);
cuda_error = cudaFree(comp_buffer);
cuda_error = cudaFree(res_decomp_buffer);
cuda_error = cudaStreamSynchronize(stream);
cuda_error = cudaStreamDestroy(stream);
}
public static void main(String[] args) throws FileNotFoundException {
Loader.load(nvcomp.class);
String file_path="/home/yijinsheng/output.yuv";
byte[] uncompressed_data=IoUtil.readBytes(new FileInputStream(file_path));
// Initialize a random array of chars
// int input_buffer_len = 1000000;
// byte[] uncompressed_data = new byte[input_buffer_len];
//
// for (int i = 0; i < input_buffer_len; i++) {
// uncompressed_data[i] = (byte) (Math.random() * 26 + 'a');
// }
long input_buffer_len=uncompressed_data.length;
BytePointer uncompressed_data_ptr = new BytePointer(uncompressed_data);
BytePointer device_input_ptrs = new BytePointer();
int cuda_error = cudaMalloc(device_input_ptrs, input_buffer_len);
cuda_error = cudaMemcpy(device_input_ptrs, uncompressed_data_ptr, input_buffer_len, cudaMemcpyDefault);
decomp_compressed_with_manager_factory_example(device_input_ptrs, input_buffer_len);
cudaFree(device_input_ptrs);
System.out.println("done");
}
}