Tensor size not matching
I keep meeting this error when running the script in model card. Traceback (most recent call last): File "test_starvec.py", line 22, in raw_svg = starvector.generate_im2svg(batch, max_length=4000)[0] File "starvector_arch.py", line 193, in generate_im2svg return self.model.generate_im2svg(batch, **kwargs) File "starvector_base.py", line 245, in generate_im2svg inputs_embeds, attention_mask, prompt_tokens = self._prepare_generation_inputs( File "starvector_base.py", line 208, in _prepare_generation_inputs embedded_image = self.image_encoder(image) File "torch/nn/modules/module.py", line 1775, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "torch/nn/modules/module.py", line 1786, in _call_impl return forward_call(*args, **kwargs) File "image_encoder.py", line 96, in forward embeds = self.visual_encoder(image) File "torch/nn/modules/module.py", line 1775, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "torch/nn/modules/module.py", line 1786, in _call_impl return forward_call(*args, **kwargs) File "clip_model.py", line 188, in forward x = x + self.positional_embedding.to(x.dtype) RuntimeError: The size of tensor a (16) must match the size of tensor b (1024) at non-singleton dimension 2
My model is: starvector/starvector-1b-im2svg
The code snippet:
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoProcessor
from starvector.data.util import process_and_rasterize_svg
import torch
model_name = "starvector/starvector-1b-im2svg"
starvector = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, trust_remote_code=True)
processor = starvector.model.processor
tokenizer = starvector.model.svg_transformer.tokenizer
starvector.cuda()
starvector.eval()
image_pil = Image.open('assets/examples/sample-18.png')
image = processor(image_pil, return_tensors="pt")['pixel_values'].cuda()
if not image.shape[0] == 1:
image = image.squeeze(0)
batch = {"image": image}
raw_svg = starvector.generate_im2svg(batch, max_length=4000)[0]
svg, raster_image = process_and_rasterize_svg(raw_svg)