issue when using spawn

Open quantumiracle opened this issue 1 year ago • 1 comments

The code:

from decord import VideoReader, cpu, gpu

class DecordInit(object):
    """Using Decord(https://github.com/dmlc/decord) to initialize the video_reader."""

    def __init__(self, device_type='cpu', device_id=0, num_threads=1):
        self.num_threads = num_threads
        if device_type == 'cpu':
            self.ctx = cpu(device_id)
        elif device_type == 'gpu':
            self.ctx = gpu(device_id)
        else:
            raise ValueError(f"Unsupported device_type: {device_type}. Use 'cpu' or 'gpu'.")

    def __call__(self, filename):
        """Perform the Decord initialization.
        Args:
            filename (str): Path to the video file.
        """
        reader = VideoReader(filename,
                             ctx=self.ctx,
                             num_threads=self.num_threads)
        return reader

    def __repr__(self):
        repr_str = (f'{self.__class__.__name__}('
                    f'num_threads={self.num_threads})')
        return repr_str
        
def getdataset(args):
    temporal_sample = TemporalRandomCrop(args.num_frames * args.sample_rate)  # 16 x
    norm_fun = ae_norm[args.ae]

    if args.dataset == 't2v':
        if args.multi_scale:
            resize = [
                LongSideResizeVideo(args.max_image_size, skip_low_resolution=True),
                SpatialStrideCropVideo(args.stride)
            ]
        else:
            resize = [CenterCropResizeVideo(args.max_image_size), ]
        
        # Initialize DecordInit with the correct device type and id
        decord_init = DecordInit(device_type=args.device_type, device_id=args.device_id, num_threads=args.dataloader_num_workers)

        transform = transforms.Compose([
            ToTensorVideo(),
            *resize, 
            norm_fun
        ])
        tokenizer = AutoTokenizer.from_pretrained(args.text_encoder_name, cache_dir=args.cache_dir)
        return T2V_dataset(args, transform=transform, temporal_sample=temporal_sample, tokenizer=tokenizer, video_loader=decord_init)
    
    raise NotImplementedError(args.dataset)

def main_func(rank, args):
    real_dataset = getdataset(args)
    real_dataloader = DataLoader(
        real_dataset,
        shuffle=True,
        collate_fn=Collate(args),  # Use Collate class
        batch_size=args.train_batch_size,
        num_workers=args.dataloader_num_workers,
    )
    real_dataloader = cycle(real_dataloader)
    
    for _ in range(10):  # Example loop
        batch = next(real_dataloader)
        # Your processing logic here
        print(batch)

if __name__ == "__main__":
    mp.spawn(main_func, args=(args,), nprocs=8, join=True)

the error is:

  File "/usr/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
TypeError: DECORDContext.__new__() missing 2 required positional arguments: 'device_type' and 'device_id'

Jun 14 '24 01:06 quantumiracle

please ask you if solved?

Oct 12 '24 06:10 Juvenilecris