DALI
DALI copied to clipboard
Leak gpu memory?
More and more memory of gpu will be used.

My Code:
import glob
import numpy as np
from nvidia.dali import pipeline_def
import nvidia.dali.fn as fn
import nvidia.dali.types as types
from nvidia.dali.pipeline import Pipeline
from nvidia.dali.plugin.pytorch import DALIGenericIterator
import time
class ExternalSource:
def __init__(self, path: str) -> None:
self.path = path
self.files = glob.glob(path + "**/*.jpg", recursive=True)
def __getstate__(self):
state = self.__dict__.copy()
return state
def __setstate__(self, state):
self.__dict__.update(state)
def __call__(self, sample_info):
idx = sample_info.idx_in_epoch
if sample_info.iteration >= self._iter_num:
# Indicate end of the epoch
raise StopIteration()
img_file = self.data[idx]
seg_file = img_file.replace(".jpg", ".png")
with open(img_file, "rb") as fd:
image = np.frombuffer(fd.read(), np.uint8)
with open(seg_file, "rb") as fd:
seg = np.frombuffer(fd.read(), np.uint8)
return image, seg
@pipeline_def(num_threads=2, py_num_workers=4, device_id=0, py_start_method='spawn')
def build_pipe():
image, seg = fn.external_source(
source=ExternalSource("/data/"),
num_outputs=2, batch=False,
parallel=True, dtype=[types.UINT8, types.UINT8],
)
image, seg = fn.decoders.image([image, seg], device="mixed")
shape = fn.shapes(image)
scale = fn.random.uniform(range=[0.5, 2.0], shape=[1])
resized_output = fn.cat(1280 * scale, 500 * scale, axis=0)
image, seg = fn.resize([image, seg], mode="not_larger", device="gpu")
pos_x = fn.random.uniform(range=[0, 1.0], shape=[1])
pos_y = fn.random.uniform(range=[0, 1.0], shape=[1])
image, seg = fn.crop(
[image, seg], crop_pos_x=pos_x, crop_pos_y=pos_y, crop=(500, 500),
out_of_bounds_policy="pad", fill_values=[0], device="gpu"
)
should_flip = fn.random.coin_flip(probability=0.55)
image, seg = fn.flip(
[image, seg],
horizontal=should_flip,
)
image = fn.color_twist(
image,
brightness=fn.random.uniform(range=[-32, 32]),
contrast=fn.random.uniform(range=[0.5, 1.5]),
saturation=fn.random.uniform(range=[0.5, 1.5]),
hue=fn.random.uniform(values=np.arange(-18, 18).tolist()),
)
image = fn.normalize(image, mean=128, stddev=128)
return image, seg, shape
def main():
pipe = build_pipe(batch_size=24)
train_iter = DALIGenericIterator(pipe, ["image", "seg", "shape"])
st = time.perf_counter()
for i, data in enumerate(train_iter):
if i % 1000 == 0:
print("{} iter {}, real batch size: {}, data size {}".format(time.time(), i, len(data[0]["image"]), len(data)))
print("epoch 1 cost:", time.perf_counter() - st)
if __name__ == "__main__":
main()
Hello @Hughen The plot that you show doesn't indicate a significant leak - it looks like it asymptotically converges to some peak value. If you provide a dataset (or point to a publicly available one), we can try to replicate your results and investigate whether this constitutes a leak or a normal behavior.
Closing as stalled.