javacpp-presets icon indicating copy to clipboard operation
javacpp-presets copied to clipboard

how to free my cuda memory using nvcomp

Open yijinsheng opened this issue 5 months ago • 2 comments

I have follow the example nvcompLZ4Example to compress my yuv data using 3090TI. but I have 2 questions about this.

  1. when I go throuth all the code , there are still some gpu memory used
  2. I print out the compress and decompress time, while the decompress time is much longer than the comress time,this doesn't make sense Here is my code:
public class nvcompLZ4Example {
    private static void decomp_compressed_with_manager_factory_example(BytePointer device_input_ptrs, long input_buffer_len) {
        long start=System.currentTimeMillis();
        CUstream_st stream = new CUstream_st();
        int cuda_error = cudaStreamCreate(stream);

        long chunk_size = 1 << 16;

        nvcompBatchedZstdOpts_t format_opts = new nvcompBatchedZstdOpts_t();
//        format_opts.data_type(NVCOMP_TYPE_CHAR);
        PimplManager nvcomp_manager = new ZstdManager(chunk_size, format_opts, stream, 0, nvcomp.NoComputeNoVerify);
        CompressionConfig comp_config = nvcomp_manager.configure_compression(input_buffer_len);

        BytePointer comp_buffer = new BytePointer();
        cuda_error = cudaMalloc(comp_buffer, comp_config.max_compressed_buffer_size());

        nvcomp_manager.compress(device_input_ptrs, comp_buffer, comp_config);
        long compress=System.currentTimeMillis();
        System.out.println(compress-start);
        long compressedOutputSize = nvcomp_manager.get_compressed_output_size(comp_buffer);
        System.out.println(Double.valueOf(compressedOutputSize)/Double.valueOf(input_buffer_len));

        // Construct a new nvcomp manager from the compressed buffer.
        // Note we could use the nvcomp_manager from above, but here we demonstrate how to create a manager
        // for the use case where a buffer is received and the user doesn't know how it was compressed
        // Also note, creating the manager in this way synchronizes the stream, as the compressed buffer must be read to
        // construct the manager
        nvcompManagerBase decomp_nvcomp_manager = create_manager(comp_buffer, stream, 0, NoComputeNoVerify);

        DecompressionConfig decomp_config = decomp_nvcomp_manager.configure_decompression(comp_buffer);
        BytePointer res_decomp_buffer = new BytePointer();
        cuda_error = cudaMalloc(res_decomp_buffer, decomp_config.decomp_data_size());

        decomp_nvcomp_manager.decompress(res_decomp_buffer, comp_buffer, decomp_config);
        long decompress=System.currentTimeMillis();
        System.out.println(decompress-compress);

        cuda_error = cudaFree(comp_buffer);
        cuda_error = cudaFree(res_decomp_buffer);
        cuda_error = cudaStreamSynchronize(stream);
        cuda_error = cudaStreamDestroy(stream);
    }

    public static void main(String[] args) throws FileNotFoundException {
        Loader.load(nvcomp.class);
        String file_path="/home/yijinsheng/output.yuv";
        byte[]  uncompressed_data=IoUtil.readBytes(new FileInputStream(file_path));
        // Initialize a random array of chars
//        int input_buffer_len = 1000000;
//        byte[] uncompressed_data = new byte[input_buffer_len];
//
//        for (int i = 0; i < input_buffer_len; i++) {
//            uncompressed_data[i] = (byte) (Math.random() * 26 + 'a');
//        }

        long input_buffer_len=uncompressed_data.length;

        BytePointer uncompressed_data_ptr = new BytePointer(uncompressed_data);

        BytePointer device_input_ptrs = new BytePointer();

        int cuda_error = cudaMalloc(device_input_ptrs, input_buffer_len);
        cuda_error = cudaMemcpy(device_input_ptrs, uncompressed_data_ptr, input_buffer_len, cudaMemcpyDefault);

        decomp_compressed_with_manager_factory_example(device_input_ptrs, input_buffer_len);
        cudaFree(device_input_ptrs);
        System.out.println("done");
    }
}
image

yijinsheng avatar Sep 20 '24 10:09 yijinsheng