DALI icon indicating copy to clipboard operation
DALI copied to clipboard

Leak gpu memory?

Open Hughen opened this issue 3 years ago • 1 comments

More and more memory of gpu will be used. image

My Code:

import glob
import numpy as np
from nvidia.dali import pipeline_def
import nvidia.dali.fn as fn
import nvidia.dali.types as types
from nvidia.dali.pipeline import Pipeline
from nvidia.dali.plugin.pytorch import DALIGenericIterator
import time

class ExternalSource:
    def __init__(self, path: str) -> None:
        self.path = path
        self.files = glob.glob(path + "**/*.jpg", recursive=True)

    def __getstate__(self):
        state = self.__dict__.copy()
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)

    def __call__(self, sample_info):
        idx = sample_info.idx_in_epoch
        if sample_info.iteration >= self._iter_num:
            # Indicate end of the epoch
            raise StopIteration()
        img_file = self.data[idx]
        seg_file = img_file.replace(".jpg", ".png")
        with open(img_file, "rb") as fd:
            image = np.frombuffer(fd.read(), np.uint8)
        with open(seg_file, "rb") as fd:
            seg = np.frombuffer(fd.read(), np.uint8)
        return image, seg

@pipeline_def(num_threads=2, py_num_workers=4, device_id=0, py_start_method='spawn')
def build_pipe():
    image, seg = fn.external_source(
        source=ExternalSource("/data/"),
        num_outputs=2, batch=False,
        parallel=True, dtype=[types.UINT8, types.UINT8],
    )
    image, seg = fn.decoders.image([image, seg], device="mixed")
    shape = fn.shapes(image)

    scale = fn.random.uniform(range=[0.5, 2.0], shape=[1])
    resized_output = fn.cat(1280 * scale, 500 * scale, axis=0)
    image, seg = fn.resize([image, seg], mode="not_larger", device="gpu")
    pos_x = fn.random.uniform(range=[0, 1.0], shape=[1])
    pos_y = fn.random.uniform(range=[0, 1.0], shape=[1])
    image, seg = fn.crop(
        [image, seg], crop_pos_x=pos_x, crop_pos_y=pos_y, crop=(500, 500),
        out_of_bounds_policy="pad", fill_values=[0], device="gpu"
    )
    should_flip = fn.random.coin_flip(probability=0.55)
    image, seg = fn.flip(
        [image, seg],
        horizontal=should_flip,
    )
    image = fn.color_twist(
        image,
        brightness=fn.random.uniform(range=[-32, 32]),
        contrast=fn.random.uniform(range=[0.5, 1.5]),
        saturation=fn.random.uniform(range=[0.5, 1.5]),
        hue=fn.random.uniform(values=np.arange(-18, 18).tolist()),
    )
    image = fn.normalize(image, mean=128, stddev=128)
    return image, seg, shape

def main():
    pipe = build_pipe(batch_size=24)
    train_iter = DALIGenericIterator(pipe, ["image", "seg", "shape"])
    st = time.perf_counter()
    for i, data in enumerate(train_iter):
        if i % 1000 == 0:
            print("{} iter {}, real batch size: {}, data size {}".format(time.time(), i, len(data[0]["image"]), len(data)))
    print("epoch 1 cost:", time.perf_counter() - st)

if __name__ == "__main__":
    main()

Hughen avatar Aug 04 '22 09:08 Hughen

Hello @Hughen The plot that you show doesn't indicate a significant leak - it looks like it asymptotically converges to some peak value. If you provide a dataset (or point to a publicly available one), we can try to replicate your results and investigate whether this constitutes a leak or a normal behavior.

mzient avatar Aug 04 '22 19:08 mzient

Closing as stalled.

JanuszL avatar Jan 24 '24 14:01 JanuszL