from VLE import VLEForITM, VLEProcessor, VLEForITMPipeline
from PIL import Image
model_dir = "./pretrained/vle-base"
itm_text = ["a photo of a cat.", "a photo of dogs."]
itm_images = Image.open("pics/dogs.png")
print("Init ITM model")
model = VLEForITM.from_pretrained(model_dir)
vle_processor = VLEProcessor.from_pretrained(model_dir)
print("init ITM pipeline")
itm_pipeline = VLEForITMPipeline(model=model, device='cpu', vle_processor=vle_processor)
itm_pred = itm_pipeline([{"image": itm_images, "text": itm_text[0]},
{"image": itm_images, "text": itm_text[1]}])
for t, pred in zip(itm_text,itm_pred):
print(t,pred)
====================================
Init ITM model
init ITM pipeline
Traceback (most recent call last):
File "D:\PycharmProjects\vle_attack\test.py", line 16, in
itm_pred = itm_pipeline([{"image": itm_images, "text": itm_text[0]},
File "D:\PycharmProjects\vle_attack\VLE\pipeline_vle.py", line 206, in call
results = super().call(inputs, **kwargs)
File "D:\anaconda3\envs\tinyllava\lib\site-packages\transformers\pipelines\base.py", line 1143, in call
outputs = list(final_iterator)
File "D:\anaconda3\envs\tinyllava\lib\site-packages\transformers\pipelines\pt_utils.py", line 124, in next
item = next(self.iterator)
File "D:\anaconda3\envs\tinyllava\lib\site-packages\transformers\pipelines\pt_utils.py", line 124, in next
item = next(self.iterator)
File "D:\anaconda3\envs\tinyllava\lib\site-packages\torch\utils\data\dataloader.py", line 631, in next
data = self._next_data()
File "D:\anaconda3\envs\tinyllava\lib\site-packages\torch\utils\data\dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "D:\anaconda3\envs\tinyllava\lib\site-packages\torch\utils\data_utils\fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "D:\anaconda3\envs\tinyllava\lib\site-packages\torch\utils\data_utils\fetch.py", line 51, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "D:\anaconda3\envs\tinyllava\lib\site-packages\transformers\pipelines\pt_utils.py", line 19, in getitem
processed = self.process(item, **self.params)
File "D:\PycharmProjects\vle_attack\VLE\pipeline_vle.py", line 210, in preprocess
model_inputs = self.vle_processor(text=inputs['text'], images=inputs['image'], return_tensors="pt", padding=True)
File "D:\PycharmProjects\vle_attack\VLE\processing_vle.py", line 102, in call
encoding = self.tokenizer(text, max_length=25, return_tensors=return_tensors, padding="max_length", truncation=True, **kwargs)
TypeError: DebertaV2Tokenizer(name_or_path='./pretrained/vle-large', vocab_size=128000, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '[CLS]', 'eos_token': '[SEP]', 'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True), added_tokens_decoder={
0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
1: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
2: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
3: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
128000: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
} got multiple values for keyword argument 'padding'
1请问demo输入是不是不对?测试Patch分类(PBC)demo可以执行。
2 图文匹配模型是否可以执行NLVR任务或者visual spatial reasoning任务? 谢谢