How to select a single parameter for the batch of images?
Hello!
Thanks for providing such an excellent library. While experimenting with it I miss one feature - ability to select final image shape for the whole batch. In PyTorch there is a collate_fn which can handle such use cases.
For example first batch of 32 images is resized to shape [256, 256, 3] while next batch is resized to [128, 128, 3]. It it possibly to do so?
Hi @zakajd,
DALI does process the whole batch at the time so there is a little sense in having collate_fn itself as the batch is already there.
If you want to resize all images in the batch you can use either resize, crop, slice or pad operators.
Hi @JanuszL ,
I think maybe following simple code will describe the problem
import numpy as np
from nvidia.dali.pipeline import Pipeline
from nvidia.dali import fn
class A(Pipeline):
def __init__(self):
super(A, self).__init__(batch_size=2, num_threads=1, device_id=0)
def define_graph(self):
imgs, labels = fn.readers.file(file_root='/dataset/dog/')
imgs = fn.decoders.image(imgs)
size = fn.random.uniform(values=[200,300,400,500,600,700,800])
imgs = fn.resize(imgs, size=size)
return imgs, size
a = A()
a.build()
o = a.run()
print('img', o[0].at(0).shape, o[0].at(1).shape)
print('size', np.array(o[1].as_tensor()))
The output is:
img (800, 800, 3) (600, 600, 3)
size [800. 600.]
The pipeline batch size is 2 But you can see the output image size in same batch is different.
How could I to make the resize operator to use same size for all images in same batch, and use another random size for next batch?
(I also tried to feeding random size using ExternalSource to resize operator, so I can make every consecutive 2 numbers are same, e.g. [200,200,300,300,..]. but it complained that DataNode cannot be used for size parameter of resize operator..)
Thanks!
Hi @gbstack,
If you want to use the external source you can try something like this:
import os
import numpy as np
import nvidia.dali.fn as fn
import nvidia.dali.types as types
from nvidia.dali import pipeline_def
batch_size = 4
def get_data():
size = (np.random.ranf(size=[2]).astype(dtype=np.float32)*60 + 30)
out = [size for _ in range(batch_size)]
return out
@pipeline_def
def simple_pipeline():
jpegs, _ = fn.readers.file(files=["DALI_extra/db/single/jpeg/100/swan-3584559_640.jpg"])
images = fn.decoders.image(jpegs)
size = fn.external_source(source=get_data)
images = fn.resize(images, size=size)
return images
pipe = simple_pipeline(batch_size=batch_size, num_threads=4, prefetch_queue_depth=2, device_id=0)
pipe.build()
pipe.run()
out = pipe.run()[0]
print(np.array(out[0]).shape)
print(np.array(out.as_tensor()).shape)
out = pipe.run()[0]
print(np.array(out[0]).shape)
print(np.array(out.as_tensor()).shape)
or you can still use a random generator and the permute batch operator to duplicate only one value for all samples in the batch:
import os
import numpy as np
import nvidia.dali.fn as fn
import nvidia.dali.types as types
from nvidia.dali import pipeline_def
batch_size = 4
@pipeline_def
def simple_pipeline():
jpegs, _ = fn.readers.file(files=["DALI_extra/db/single/jpeg/100/swan-3584559_640.jpg"])
images = fn.decoders.image(jpegs)
size = fn.random.uniform(values=[200,300,400,500,600,700,800])
size = fn.permute_batch(size, indices=[0]*batch_size)
images = fn.resize(images, size=size)
return images
pipe = simple_pipeline(batch_size=batch_size, num_threads=4, prefetch_queue_depth=2, device_id=0)
pipe.build()
pipe.run()
out = pipe.run()[0]
print(np.array(out[0]).shape)
print(np.array(out.as_tensor()).shape)
out = pipe.run()[0]
print(np.array(out[0]).shape)
print(np.array(out.as_tensor()).shape)
out = pipe.run()[0]
print(np.array(out[0]).shape)
@JanuszL Thanks, your solution works!