Hi :)
i run successfully the transformers version, but i have difficulties with the diffusers version,
i changed the "assets/concept_list.json" according my data, but i got the next error
why?
KeyError: Parameter containing:
tensor([[ 0.0538, 0.0331, -0.0274, ..., -0.0369, -0.0265, -0.1301],
[-0.0218, -0.0355, 0.0451, ..., -0.0113, 0.1196, 0.0851],
[-0.0017, 0.0157, 0.0086, ..., 0.0153, 0.0031, -0.0807],
...,
[-0.0325, 0.0046, 0.0012, ..., -0.0154, -0.0103, -0.0002],
[ 0.0052, 0.0671, -0.0349, ..., -0.0433, 0.0659, -0.0225],
[ 0.0109, -0.0112, 0.0980, ..., 0.0068, 0.0108, -0.0460]],
device='cuda:1', requires_grad=True)
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /cortex/users/ohadr/projects/custom-diffusion/src/diffusers_training.py:1105 in │
│ │
│ 1102 │
│ 1103 if name == "main": │
│ 1104 │ args = parse_args() │
│ ❱ 1105 │ main(args) │
│ 1106 │
│ │
│ /cortex/users/ohadr/projects/custom-diffusion/src/diffusers_training.py:967 in main │
│ │
│ 964 │ │ │ │ encoder_hidden_states = text_encoder(batch["input_ids"])[0] │
│ 965 │ │ │ │ │
│ 966 │ │ │ │ # Predict the noise residual │
│ ❱ 967 │ │ │ │ model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sampl │
│ 968 │ │ │ │ │
│ 969 │ │ │ │ # Get the target for loss depending on the prediction type │
│ 970 │ │ │ │ if noise_scheduler.config.prediction_type == "epsilon": │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in │
│ _call_impl │
│ │
│ 1107 │ │ # this function, and just call forward. │
│ 1108 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1109 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1110 │ │ │ return forward_call(*input, **kwargs) │
│ 1111 │ │ # Do not call functions when jit is used │
│ 1112 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1113 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/deepspeed/utils/nvtx.py:15 in │
│ wrapped_fn │
│ │
│ 12 │ │
│ 13 │ def wrapped_fn(*args, **kwargs): │
│ 14 │ │ get_accelerator().range_push(func.qualname) │
│ ❱ 15 │ │ ret_val = func(*args, **kwargs) │
│ 16 │ │ get_accelerator().range_pop() │
│ 17 │ │ return ret_val │
│ 18 │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/deepspeed/runtime/engine.py:1769 │
│ in forward │
│ │
│ 1766 │ │ if self.fp16_auto_cast(): │
│ 1767 │ │ │ inputs = self._cast_inputs_half(inputs) │
│ 1768 │ │ │
│ ❱ 1769 │ │ loss = self.module(*inputs, **kwargs) │
│ 1770 │ │ │
│ 1771 │ │ if self.zero_optimization_partition_weights(): │
│ 1772 │ │ │ # Disable automated discovery of external parameters │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in │
│ _call_impl │
│ │
│ 1107 │ │ # this function, and just call forward. │
│ 1108 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1109 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1110 │ │ │ return forward_call(*input, **kwargs) │
│ 1111 │ │ # Do not call functions when jit is used │
│ 1112 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1113 │ │ if self._backward_hooks or global_backward_hooks: │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/transformers/models/clip/modeling │
│ clip.py:811 in forward │
│ │
│ 808 │ │ ```""" │
│ 809 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 810 │ │ │
│ ❱ 811 │ │ return self.text_model( │
│ 812 │ │ │ input_ids=input_ids, │
│ 813 │ │ │ attention_mask=attention_mask, │
│ 814 │ │ │ position_ids=position_ids, │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in │
│ _call_impl │
│ │
│ 1107 │ │ # this function, and just call forward. │
│ 1108 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1109 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1110 │ │ │ return forward_call(*input, **kwargs) │
│ 1111 │ │ # Do not call functions when jit is used │
│ 1112 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1113 │ │ if self._backward_hooks or global_backward_hooks: │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/transformers/models/clip/modeling │
│ clip.py:708 in forward │
│ │
│ 705 │ │ input_shape = input_ids.size() │
│ 706 │ │ input_ids = input_ids.view(-1, input_shape[-1]) │
│ 707 │ │ │
│ ❱ 708 │ │ hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids) │
│ 709 │ │ │
│ 710 │ │ bsz, seq_len = input_shape │
│ 711 │ │ # CLIP's text model uses causal mask, prepare it here. │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in │
│ _call_impl │
│ │
│ 1107 │ │ # this function, and just call forward. │
│ 1108 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1109 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1110 │ │ │ return forward_call(*input, **kwargs) │
│ 1111 │ │ # Do not call functions when jit is used │
│ 1112 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1113 │ │ if self._backward_hooks or global_backward_hooks: │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/transformers/models/clip/modeling │
│ clip.py:223 in forward │
│ │
│ 220 │ │ │ position_ids = self.position_ids[:, :seq_length] │
│ 221 │ │ │
│ 222 │ │ if inputs_embeds is None: │
│ ❱ 223 │ │ │ inputs_embeds = self.token_embedding(input_ids) │
│ 224 │ │ │
│ 225 │ │ position_embeddings = self.position_embedding(position_ids) │
│ 226 │ │ embeddings = inputs_embeds + position_embeddings │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:1110 in │
│ _call_impl │
│ │
│ 1107 │ │ # this function, and just call forward. │
│ 1108 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1109 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ ❱ 1110 │ │ │ return forward_call(*input, **kwargs) │
│ 1111 │ │ # Do not call functions when jit is used │
│ 1112 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1113 │ │ if self.backward_hooks or global_backward_hooks: │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/sparse.py:158 in │
│ forward │
│ │
│ 155 │ │ │ │ self.weight[self.padding_idx].fill(0) │
│ 156 │ │
│ 157 │ def forward(self, input: Tensor) -> Tensor: │
│ ❱ 158 │ │ return F.embedding( │
│ 159 │ │ │ input, self.weight, self.padding_idx, self.max_norm, │
│ 160 │ │ │ self.norm_type, self.scale_grad_by_freq, self.sparse) │
│ 161 │
│ │
│ /cortex/users/ohadr/envs/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py:2183 in │
│ embedding │
│ │
│ 2180 │ │ # torch.embedding_renorm │
│ 2181 │ │ # remove once script supports set_grad_enabled │
│ 2182 │ │ no_grad_embedding_renorm(weight, input, max_norm, norm_type) │
│ ❱ 2183 │ return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) │
│ 2184 │
│ 2185 │
│ 2186 def embedding_bag( │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
Steps: 0%| | 0/500 [00:01<?, ?it/s]
[23:00:13] ERROR failed (exitcode: 1) local_rank: 0 (pid: 1533480) of binary: /cortex/users/ohadr/envs/envs/ldm/bin/python
Hi, I updated our code with the latest diffusers version==0.21.4. Can you retry if you are still facing this issue.
Thanks!