stable-diffusion-webui
stable-diffusion-webui copied to clipboard
Interrogate CLIP on M1 just brings up Errors on the console
Load an image and press interrogate, doesn't work

Running on local URL: http://127.0.0.1:7860
To create a public link, set `share=True` in `launch()`.
load checkpoint from https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth
Error interrogating
Traceback (most recent call last):
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 137, in interrogate
caption = self.generate_caption(pil_image)
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 122, in generate_caption
caption = self.blip_model.generate(gpu_image, sample=False, num_beams=shared.opts.interrogate_clip_num_beams, min_length=shared.opts.interrogate_clip_min_length, max_length=shared.opts.interrogate_clip_max_length)
File "/Users/user/SD_Test/stable-diffusion-webui/repositories/BLIP/models/blip.py", line 156, in generate
outputs = self.text_decoder.generate(input_ids=input_ids,
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/transformers/generation_utils.py", line 1146, in generate
self._validate_model_kwargs(model_kwargs.copy())
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/transformers/generation_utils.py", line 861, in _validate_model_kwargs
raise ValueError(
ValueError: The following `model_kwargs` are not used by the model: ['encoder_hidden_states', 'encoder_attention_mask'] (note: typos in the generate arguments will also show up in this list)
Traceback (most recent call last):
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 137, in interrogate
caption = self.generate_caption(pil_image)
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 122, in generate_caption
caption = self.blip_model.generate(gpu_image, sample=False, num_beams=shared.opts.interrogate_clip_num_beams, min_length=shared.opts.interrogate_clip_min_length, max_length=shared.opts.interrogate_clip_max_length)
File "/Users/user/SD_Test/stable-diffusion-webui/repositories/BLIP/models/blip.py", line 156, in generate
outputs = self.text_decoder.generate(input_ids=input_ids,
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/transformers/generation_utils.py", line 1146, in generate
self._validate_model_kwargs(model_kwargs.copy())
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/transformers/generation_utils.py", line 861, in _validate_model_kwargs
raise ValueError(
ValueError: The following `model_kwargs` are not used by the model: ['encoder_hidden_states', 'encoder_attention_mask'] (note: typos in the generate arguments will also show up in this list)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/gradio/routes.py", line 275, in run_predict
output = await app.blocks.process_api(
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/gradio/blocks.py", line 785, in process_api
result = await self.call_function(fn_index, inputs, iterator)
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/gradio/blocks.py", line 694, in call_function
prediction = await anyio.to_thread.run_sync(
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/anyio/to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "/Users/user/miniconda/envs/web-ui/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 867, in run
result = context.run(func, *args)
File "/Users/user/SD_Test/stable-diffusion-webui/modules/ui.py", line 351, in interrogate
prompt = shared.interrogator.interrogate(image)
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 167, in interrogate
res += "<error>"
TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'
Desktop (please complete the following information):
- OS: M1 Pro
- Browser Safari
That's not just M1, happening with the precise error on Win 11 via WSL Ubuntu
Exact same error, on Windows 10
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\gradd\.conda\envs\automatic1\lib\site-packages\gradio\routes.py", line 275, in run_predict
output = await app.blocks.process_api(
File "C:\Users\gradd\.conda\envs\automatic1\lib\site-packages\gradio\blocks.py", line 787, in process_api
result = await self.call_function(fn_index, inputs, iterator)
File "C:\Users\gradd\.conda\envs\automatic1\lib\site-packages\gradio\blocks.py", line 694, in call_function
prediction = await anyio.to_thread.run_sync(
File "C:\Users\gradd\.conda\envs\automatic1\lib\site-packages\anyio\to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "C:\Users\gradd\.conda\envs\automatic1\lib\site-packages\anyio\_backends\_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "C:\Users\gradd\.conda\envs\automatic1\lib\site-packages\anyio\_backends\_asyncio.py", line 867, in run
result = context.run(func, *args)
File "D:\Stable-Diffusion\AUTOMATIC1111\stable-diffusion-webui\modules\ui.py", line 351, in interrogate
prompt = shared.interrogator.interrogate(image)
File "D:\Stable-Diffusion\AUTOMATIC1111\stable-diffusion-webui\modules\interrogate.py", line 167, in interrogate
res += "<error>"
TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'
pip install transformers==4.19.2
aweseome thanks, now its working!
Still not working for me. Transformer already installed:
D:\Project\stable-diffusion-webui-main\venv\Scripts>pip install transformers==4.19.2
Requirement already satisfied: transformers==4.19.2 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (4.19.2)
Requirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (0.12.1)
Requirement already satisfied: numpy>=1.17 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (1.23.3)
Requirement already satisfied: filelock in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (3.8.0)
Requirement already satisfied: regex!=2019.12.17 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (2022.9.13)
Requirement already satisfied: pyyaml>=5.1 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (6.0)
Requirement already satisfied: packaging>=20.0 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (21.3)
Requirement already satisfied: tqdm>=4.27 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (4.64.1)
Requirement already satisfied: requests in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (2.25.1)
Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from transformers==4.19.2) (0.10.1)
Requirement already satisfied: typing-extensions>=3.7.4.3 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from huggingface-hub<1.0,>=0.1.0->transformers==4.19.2) (4.4.0)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from packaging>=20.0->transformers==4.19.2) (3.0.9)
Requirement already satisfied: colorama in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from tqdm>=4.27->transformers==4.19.2) (0.4.5)
Requirement already satisfied: chardet<5,>=3.0.2 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from requests->transformers==4.19.2) (4.0.0)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from requests->transformers==4.19.2) (1.26.12)
Requirement already satisfied: certifi>=2017.4.17 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from requests->transformers==4.19.2) (2022.9.24)
Requirement already satisfied: idna<3,>=2.5 in d:\project\stable-diffusion-webui-main\venv\lib\site-packages (from requests->transformers==4.19.2) (2.10)
pip install transformers==4.19.2
hi I have the same problem, please elaborate on where to put this command exactly (I'm a nooooooooob)
if you dont know how to install it just put it into your bat/sh file after conda activate webui run it once then you can delete it again
I did the installation though its still not working
still showing the same error
File "C:\AI\stable-diffusion-webui\stable-diffusion-webui\modules\interrogate.py", line 173, in interrogate
res += "
I`m having the same problem. Upgraded to windows 11 reinstalled everything and now when I do interrogate i get the same message. Tried the suggested git install transformers.
Does not resolve the issue. Would appreciate some help on this.


It could also depend on your region. The first time you use "Interrogate CLIP", it needs to download some stuff. In my case, the download link was blocked in my region, so I used a VPN and it worked! You only VPN it the first time you use CLIP, though
It did download the files, but I think the downloaded files might be corrupted. How can I re download the files it needs?
So, this is probably wrong in some way and I may be yelled at by other users who know more about this stuff, but here's what I did to make it work:
- Open
modules/interrogate.pywith Notepad++ or whatever you prefer - On Line 17, there's this code:
blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' - Copy the link in quotes and open it in your browser (In my case, the link was blocked in my region, so I had to use a VPN)
- A file named
model_base_caption_capfilt_large.pthwill start downloading, it may take some time - Move it to anywhere you want in your
stable-diffusion-webuifolder. I chose to move it torepositories/BLIP/models/ - Go back to
modules/interrogate.py - Go to Line 50. In the beginning of the line it says
blip_model = models.blip.blip_decoder(pretrained=blip_model_url,(and some other stuff that's not important now) - Change
blip_model_urlto the path to yourmodel_base_caption_capfilt_large.pthfile. For me, it looked like this:pretrained="C:/stable-diffusion/stable-diffusion-webui/repositories/BLIP/models/model_base_caption_capfilt_large.pth". WARNING!!! If you copy a path from Windows File Explorer, the folders will be separated by backslashes\. Make sure to change these to forward slashes/or else you will get an error - Save the
interrogate.pyfile and you're good to go!
So, this is probably wrong in some way and I may be yelled at by other users who know more about this stuff, but here's what I did to make it work:
1. Open `modules/interrogate.py` with Notepad++ or whatever you prefer 2. On Line 17, there's this code: `blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'` 3. Copy the link in quotes and open it in your browser (In my case, the link was blocked in my region, so I had to use a VPN) 4. A file named `model_base_caption_capfilt_large.pth` will start downloading, it may take some time 5. Move it to anywhere you want in your `stable-diffusion-webui` folder. I chose to move it to `repositories/BLIP/models/` 6. Go back to `modules/interrogate.py` 7. Go to Line 50. In the beginning of the line it says `blip_model = models.blip.blip_decoder(pretrained=blip_model_url,` (and some other stuff that's not important now) 8. Change `blip_model_url` to the path to your `model_base_caption_capfilt_large.pth` file. For me, it looked like this: `pretrained="C:/stable-diffusion/stable-diffusion-webui/repositories/BLIP/models/model_base_caption_capfilt_large.pth"`. WARNING!!! If you copy a path from Windows File Explorer, the folders will be separated by backslashes `\`. Make sure to change these to forward slashes `/` or else you will get an error 9. Save the `interrogate.py` file and you're good to go!
Thank You. This worked for me.
Thank You. This worked for me.
You're welcome. Just don't forget to change the path if you'll be moving your stable diffusion to a different folder :)
Thank You. This worked for me.
You're welcome. Just don't forget to change the path if you'll be moving your stable diffusion to a different folder :)
thank you I think I did something wrong but I did every step exactly as you said!
So, this is probably wrong in some way and I may be yelled at by other users who know more about this stuff, but here's what I did to make it work:
- Open
modules/interrogate.pywith Notepad++ or whatever you prefer- On Line 17, there's this code:
blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'- Copy the link in quotes and open it in your browser (In my case, the link was blocked in my region, so I had to use a VPN)
- A file named
model_base_caption_capfilt_large.pthwill start downloading, it may take some time- Move it to anywhere you want in your
stable-diffusion-webuifolder. I chose to move it torepositories/BLIP/models/- Go back to
modules/interrogate.py- Go to Line 50. In the beginning of the line it says
blip_model = models.blip.blip_decoder(pretrained=blip_model_url,(and some other stuff that's not important now)- Change
blip_model_urlto the path to yourmodel_base_caption_capfilt_large.pthfile. For me, it looked like this:pretrained="C:/stable-diffusion/stable-diffusion-webui/repositories/BLIP/models/model_base_caption_capfilt_large.pth". WARNING!!! If you copy a path from Windows File Explorer, the folders will be separated by backslashes\. Make sure to change these to forward slashes/or else you will get an error- Save the
interrogate.pyfile and you're good to go!
this is what id got

This is my changes to the interrogate.py file line 50 to 52: blip_model = models.blip.blip_decoder(pretrained="C:/AI/stable-diffusion-webui/stable-diffusion-webui/repositories/BLIP/models/model_base_caption_capfilt_large.pth" return blip_model, image_size=blip_image_eval_size, vit='base', med_config=os.path.join(paths.paths["BLIP"], "configs", "med_config.json")) blip_model.eval()
oh sorry my bad I've accidentally changed the order of the lines I guess for whom ever who did the same go here and check the lines with the original file: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/modules/interrogate.py
So, this is probably wrong in some way and I may be yelled at by other users who know more about this stuff, but here's what I did to make it work:
- Open
modules/interrogate.pywith Notepad++ or whatever you prefer- On Line 17, there's this code:
blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth'- Copy the link in quotes and open it in your browser (In my case, the link was blocked in my region, so I had to use a VPN)
- A file named
model_base_caption_capfilt_large.pthwill start downloading, it may take some time- Move it to anywhere you want in your
stable-diffusion-webuifolder. I chose to move it torepositories/BLIP/models/- Go back to
modules/interrogate.py- Go to Line 50. In the beginning of the line it says
blip_model = models.blip.blip_decoder(pretrained=blip_model_url,(and some other stuff that's not important now)- Change
blip_model_urlto the path to yourmodel_base_caption_capfilt_large.pthfile. For me, it looked like this:pretrained="C:/stable-diffusion/stable-diffusion-webui/repositories/BLIP/models/model_base_caption_capfilt_large.pth". WARNING!!! If you copy a path from Windows File Explorer, the folders will be separated by backslashes\. Make sure to change these to forward slashes/or else you will get an error- Save the
interrogate.pyfile and you're good to go!
Thank you so much It worked for me too!
you are a genius
you are a genius
Thanks :)
Just tried the fixes listed in this thread, still getting the same issues, both downgrading transformers and setting the BLIP file path manually did not work. Anyone got any ideas? Ubuntu 20.04 for reference on OS, lmk if anyone needs additional information.
I also tried everything.. M1 pro
ValueError: The following model_kwargs are not used by the model: ['encoder_hidden_states', 'encoder_attention_mask'] (note: typos in the generate arguments will also show up in this list)
TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'
Edit: After clean install on other M1 machine it works like a charm. Seems it was a issue on my installation.
Edit II: Seems that my problem was the transformers Version. I installed Dreambooth extension that needs transformers 4.22.1 But then CLIP does not work, because it needs 4.19.2
conda activate webui
Thank you for this tip. What worked for me without reinstalling:
conda activate web-ui (note hyphen) pip uninstall transformers pip install transformers==4.19.2
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 167, in interrogate res += "
" TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'
I am newby so have patience please.
This is my solution and works for me:
Error is probably due to wrong management res += "<error>"
the real error shows a mismatch between a len(4) matrix and a len(3) matrix
this is due to image Alpha channel (png) that should be removed before interrogation:
a very dirty solution:
go to modules directory
clear chache: delete all files in __pycache__ directory
open interrogate.py
add: from PIL import Image
after def interrogate(self, pil_image): add following three lines:
background = Image.new('RGBA', pil_image.size, (255,255,255))
alpha_composite = Image.alpha_composite(background, pil_image)
pil_image = alpha_composite.convert('RGB')
to remove alpha channel from image. Stop and restart Automatic1111.
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 167, in interrogate res += "" TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'
I am newby so have patience please.
This is my solution and works for me:
Error is probably due to wrong management
res += "<error>"the real error shows a mismatch between a len(4) matrix and a len(3) matrix this is due to image Alpha channel (png) that should be removed before interrogation:a very dirty solution:
go to
modulesdirectory clear chache: delete all files in__pycache__directory openinterrogate.pyadd:from PIL import Imageafterdef interrogate(self, pil_image):add following three linebackground = Image.new('RGBA', pil_image.size, (255,255,255)) alpha_composite = Image.alpha_composite(background, pil_image) pil_image = alpha_composite.convert('RGB')to remove alpha channel from image. Stop and restart Automatic1111.
Thank you! This fixed my problem. I am using Automatic1111 with ROCM on Linux.
File "/Users/user/SD_Test/stable-diffusion-webui/modules/interrogate.py", line 167, in interrogate res += "" TypeError: unsupported operand type(s) for +=: 'NoneType' and 'str'
I am newby so have patience please.
This is my solution and works for me:
Error is probably due to wrong management
res += "<error>"the real error shows a mismatch between a len(4) matrix and a len(3) matrix this is due to image Alpha channel (png) that should be removed before interrogation:a very dirty solution:
go to
modulesdirectory clear chache: delete all files in__pycache__directory openinterrogate.pyadd:from PIL import Imageafterdef interrogate(self, pil_image):add following three lines:background = Image.new('RGBA', pil_image.size, (255,255,255)) alpha_composite = Image.alpha_composite(background, pil_image) pil_image = alpha_composite.convert('RGB')to remove alpha channel from image. Stop and restart Automatic1111.
What does it look like? Should I be like that? I'm not a programmer
def interrogate(self, pil_image): from PIL import Image background = Image.new('RGBA', pil_image.size, (255,255,255)) alpha_composite = Image.alpha_composite(background, pil_image) pil_image = alpha_composite.convert('RGB') res = "" shared.state.begin() shared.state.job = 'interrogate' try: if shared.cmd_opts.lowvram or shared.cmd_opts.medvram: lowvram.send_everything_to_cpu() devices.torch_gc()
self.load()
caption = self.generate_caption(pil_image)
self.send_blip_to_ram()
devices.torch_gc()
res = caption
clip_image = self.clip_preprocess(pil_image).unsqueeze(0).type(self.dtype).to(devices.device_interrogate)
with torch.no_grad(), devices.autocast():
image_features = self.clip_model.encode_image(clip_image).type(self.dtype)
image_features /= image_features.norm(dim=-1, keepdim=True)
for name, topn, items in self.categories():
matches = self.rank(image_features, items, top_count=topn)
for match, score in matches:
if shared.opts.interrogate_return_ranks:
res += f", ({match}:{score/100:.3f})"
else:
res += ", " + match
except Exception:
print("Error interrogating", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
res += "<error>"
self.unload()
shared.state.end()
return res