LTXVPromptEnhancer: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
The node is very cool, it works sometimes, but most of the times i get this error. Sometimes it works if you insist
@Skol600ml Hey there! I ran into the same "Expected all tensors to be on the same device" error. Here’s how I fixed it by explicitly moving models and images to the same device. Just add the lines marked with # <-- add this line:
In the prompt enhancer loader (e.g., down_load_llm_model and down_load_image_captioner):
def down_load_llm_model(self, llm_name, load_device): model_path = ... llm_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16) llm_model = llm_model.to(load_device) # <-- add this line llm_tokenizer = AutoTokenizer.from_pretrained(model_path) return llm_model, llm_tokenizer
def down_load_image_captioner(self, image_captioner, load_device): model_path = ... image_caption_model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) image_caption_model = image_caption_model.to(load_device) # <-- add this line image_caption_processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True) return image_caption_model, image_caption_processor
And in the enhance() method:
def enhance(...): ... if image_prompt is not None: permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(model.device) # <-- add .to(model.device) image_conditioning = [(permuted_image, 0, 1.0)] ...
After making these changes, I no longer see the device mismatch error. Hope it helps!
嘿,你好!我遇到了相同的“预期所有张量都位于同一设备上”错误。以下是我通过将模型和图像显式移动到同一设备来修复它的方法。只需添加标有 # < 的行 -- 添加以下行:
在提示增强器加载器(例如,down_load_llm_model 和 down_load_image_captioner):
def down_load_llm_model(self, llm_name, load_device): model_path = ... llm_model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16) llm_model = llm_model.to(load_device) # <-- 添加这一行 llm_tokenizer = AutoTokenizer.from_pretrained(model_path) return llm_model, llm_tokenizer
def down_load_image_captioner(self, image_captioner, load_device): model_path = ... image_caption_model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) image_caption_model = image_caption_model.to(load_device) # <-- 添加这一行 image_caption_processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True) return image_caption_model, image_caption_processor
在 enhance() 方法中:
防御增强(...):
... 如果 image_prompt 不是 None: permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(model.device) # <-- add .to(model.device) image_conditioning = [(permuted_image, 0, 1.0)] ... 进行这些更改后,我不再看到 device mismatch 错误。希望它有帮助!
Thank you, but I don't know in which file you modified them
nevermind as it only works once on my side
Sadly the solution only works once and then on a new generation gives the same error.
My recommendation is to use Ollama Generate node for the prompt enhancement until the problem is fixed.
Doesnt work as suppose to. To (temp) Fix the issue
- Go inside the ComfyUI-LTXVideo folder
- open Prompt_enhancer_node.py < with an editor to edit the code (of course, like MS visual code/notepad++).
- On line 121 add: llm_model = llm_model.to(load_device)
- Then AFTER editing the previous line, go to line 133 and add: image_caption_model = image_caption_model.to(load_device)
- As last on line 199 add: .to(model.device) behind: permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :]. So that it looks like: permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(model.device)
This is all done in the Prompt_enhancer_node.py file
Final code should look like:
def down_load_llm_model(self, llm_name, load_device):
model_path = self.model_path_download_if_needed(llm_name)
llm_model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
)
llm_model = llm_model.to(load_device)
llm_tokenizer = AutoTokenizer.from_pretrained(
model_path,
)
return llm_model, llm_tokenizer
def down_load_image_captioner(self, image_captioner, load_device):
model_path = self.model_path_download_if_needed(image_captioner)
image_caption_model = AutoModelForCausalLM.from_pretrained(
model_path, trust_remote_code=True
)
image_caption_model = image_caption_model.to(load_device)
image_caption_processor = AutoProcessor.from_pretrained(
model_path, trust_remote_code=True
)
The last part looks like this:
def enhance(
self,
prompt,
prompt_enhancer: comfy.model_patcher.ModelPatcher,
image_prompt: torch.Tensor = None,
max_resulting_tokens=256,
):
comfy.model_management.free_memory(
prompt_enhancer.memory_required([]),
comfy.model_management.get_torch_device(),
)
comfy.model_management.load_model_gpu(prompt_enhancer)
model = prompt_enhancer.model
image_conditioning = None
if image_prompt is not None:
permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(model.device)
image_conditioning = [(permuted_image, 0, 1.0)]
enhanced_prompt = model(prompt, image_conditioning, max_resulting_tokens)
return (enhanced_prompt[0],)
Hello all, I encountered the same problem and here is how I solved it: The file at: "ComfyUI_windows_portable\ComfyUI\custom_nodes\ComfyUI-LTXVideo\prompt_enhancer_nodes.py" Just modify the enhance procedure as below and you will see a noticeable speed improvement: Before: 17' After: 6' on my 3080.
def enhance(
self,
prompt,
prompt_enhancer: comfy.model_patcher.ModelPatcher,
image_prompt: torch.Tensor = None,
max_resulting_tokens=256,
):
comfy.model_management.free_memory(
prompt_enhancer.memory_required([]),
comfy.model_management.get_torch_device(),
)
# comfy.model_management.load_model_gpu(prompt_enhancer)
device = "cuda:0" if torch.cuda.is_available() else "cpu" # <-- add
model = prompt_enhancer.model.to(device) # <-- add
image_conditioning = None
if image_prompt is not None:
permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(device) # <-- add .to(device)
image_conditioning = [(permuted_image, 0, 1.0)]
enhanced_prompt = model(prompt, image_conditioning, max_resulting_tokens)
return (enhanced_prompt[0],)
Hello all, I encountered the same problem and here is how I solved it: The file at: "ComfyUI_windows_portable\ComfyUI\custom_nodes\ComfyUI-LTXVideo\prompt_enhancer_nodes.py" Just modify the enhance procedure as below and you will see a noticeable speed improvement: Before: 17' After: 6' on my 3080.
Unbelievable - I just came here to Issues to find an answer you just just solved this 7 minutes before I visited.
I tested this and it fixed the issue for me!
Edit For those who may be code illiterate, def enhance( needs to be indented after you copy/paste the code from @patrice74 verbatim.
Hello all, I encountered the same problem and here is how I solved it: The file at: "ComfyUI_windows_portable\ComfyUI\custom_nodes\ComfyUI-LTXVideo\prompt_enhancer_nodes.py" Just modify the enhance procedure as below and you will see a noticeable speed improvement: Before: 17' After: 6' on my 3080.
def enhance( self, prompt, prompt_enhancer: comfy.model_patcher.ModelPatcher, image_prompt: torch.Tensor = None, max_resulting_tokens=256, ): comfy.model_management.free_memory( prompt_enhancer.memory_required([]), comfy.model_management.get_torch_device(), ) # comfy.model_management.load_model_gpu(prompt_enhancer) device = "cuda:0" if torch.cuda.is_available() else "cpu" # <-- add model = prompt_enhancer.model.to(device) # <-- add image_conditioning = None if image_prompt is not None: permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(device) # <-- add .to(device) image_conditioning = [(permuted_image, 0, 1.0)] enhanced_prompt = model(prompt, image_conditioning, max_resulting_tokens) return (enhanced_prompt[0],)
you saved me a lot of time ! thank u
@patrice74 Hero. Thanks that fixed the problem!
@patrice74 solved for me also! (have a rtx 3080 10gb)
大家好,我遇到了同样的问题,下面是我解决问题的方法:文件位于:“ComfyUI_windows_portable\ComfyUI\custom_nodes\ComfyUI-LTXVideo\prompt_enhancer_nodes.py”只需修改增强程序如下,你就会看到明显的速度提升:之前:17'之后:在我的 3080 上为 6'。
def enhance( self, prompt, prompt_enhancer: comfy.model_patcher.ModelPatcher, image_prompt: torch.Tensor = None, max_resulting_tokens=256, ): comfy.model_management.free_memory( prompt_enhancer.memory_required([]), comfy.model_management.get_torch_device(), ) # comfy.model_management.load_model_gpu(prompt_enhancer) device = "cuda:0" if torch.cuda.is_available() else "cpu" # <-- add model = prompt_enhancer.model.to(device) # <-- add image_conditioning = None if image_prompt is not None: permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(device) # <-- add .to(device) image_conditioning = [(permuted_image, 0, 1.0)] enhanced_prompt = model(prompt, image_conditioning, max_resulting_tokens) return (enhanced_prompt[0],)你帮我节省了很多时间!谢谢
我尝试操做多次,仍然不行,不知是不是显卡太老旧的问题,我在其他帖子看到关于类似的回答。
The ultimate solution, to force the use of the CPU。But it will significantly slow the process. 终极解决方案,强制使用CPU。但是会明显降低速度。
`import os import shutil
import comfy.model_management import comfy.model_patcher import folder_paths import torch from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer
from .nodes_registry import comfy_node from .prompt_enhancer_utils import generate_cinematic_prompt
LLM_NAME = ["unsloth/Llama-3.2-3B-Instruct"]
IMAGE_CAPTIONER = ["MiaoshouAI/Florence-2-large-PromptGen-v2.0"]
MODELS_PATH_KEY = "LLM"
class PromptEnhancer(torch.nn.Module): def init( self, image_caption_processor: AutoProcessor, image_caption_model: AutoModelForCausalLM, llm_model: AutoModelForCausalLM, llm_tokenizer: AutoTokenizer, ): super().init() self.image_caption_processor = image_caption_processor self.image_caption_model = image_caption_model self.llm_model = llm_model self.llm_tokenizer = llm_tokenizer # self.device = image_caption_model.device # Original line, potentially points to GPU. self.device = torch.device("cpu") # Modified to force CPU # model parameters and buffer sizes plus some extra 1GB. self.model_size = ( self.get_model_size(self.image_caption_model) + self.get_model_size(self.llm_model) + 1073741824 )
def forward(self, prompt, image_conditioning, max_resulting_tokens):
# Ensure models are on the correct device before use within generate_cinematic_prompt
# Although the models should ideally be moved to CPU during loading,
# ensuring here as well for robustness if generate_cinematic_prompt handles device internally.
# However, based on the original code structure, moving to self.device in enhance method is the primary mechanism.
enhanced_prompt = generate_cinematic_prompt(
self.image_caption_model,
self.image_caption_processor,
self.llm_model,
self.llm_tokenizer,
prompt,
image_conditioning,
max_new_tokens=max_resulting_tokens,
)
return enhanced_prompt
@staticmethod
def get_model_size(model):
total_size = sum(p.numel() * p.element_size() for p in model.parameters()) # Corrected
total_size += sum(b.numel() * b.element_size() for b in model.buffers()) # Corrected
return total_size
def memory_required(self, input_shape):
return self.model_size
@comfy_node(name="LTXVPromptEnhancerLoader") class LTXVPromptEnhancerLoader: @classmethod def INPUT_TYPES(s): return { "required": { "llm_name": ( "STRING", { "default": LLM_NAME, "tooltip": "The hugging face name of the llm model to load.", }, ), "image_captioner_name": ( "STRING", { "default": IMAGE_CAPTIONER, "tooltip": "The hugging face name of the image captioning model to load.", }, ), } }
RETURN_TYPES = ("LTXV_PROMPT_ENHANCER",)
RETURN_NAMES = ("prompt_enhancer",)
FUNCTION = "load"
CATEGORY = "lightricks/LTXV"
TITLE = "LTXV Prompt Enhancer (Down)Loader"
OUTPUT_NODE = False
def model_path_download_if_needed(self, model_name):
model_directory = os.path.join(folder_paths.models_dir, MODELS_PATH_KEY)
os.makedirs(model_directory, exist_ok=True)
model_name_ = model_name.rsplit("/", 1)[-1]
model_path = os.path.join(model_directory, model_name_)
if not os.path.exists(model_path):
from huggingface_hub import snapshot_download
try:
snapshot_download(
repo_id=model_name,
local_dir=model_path,
local_dir_use_symlinks=False,
)
except Exception:
shutil.rmtree(model_path, ignore_errors=True)
raise
return model_path
def down_load_llm_model(self, llm_name, load_device):
model_path = self.model_path_download_if_needed(llm_name)
llm_model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
)
#llm_model = llm_model.to(load_device) # Original commented out line
llm_tokenizer = AutoTokenizer.from_pretrained(
model_path,
)
return llm_model, llm_tokenizer
def down_load_image_captioner(self, image_captioner, load_device):
model_path = self.model_path_download_if_needed(image_captioner)
image_caption_model = AutoModelForCausalLM.from_pretrained(
model_path, trust_remote_code=True
)
#image_caption_model = image_caption_model.to(load_device) # Original commented out line
image_caption_processor = AutoProcessor.from_pretrained(
model_path, trust_remote_code=True
)
return image_caption_model, image_caption_processor
def load(self, llm_name, image_captioner_name):
# load_device = comfy.model_management.get_torch_device() # Original line
load_device = torch.device("cpu") # Modified to force CPU
offload_device = comfy.model_management.vae_offload_device()
llm_model, llm_tokenizer = self.down_load_llm_model(llm_name, load_device)
image_caption_model, image_caption_processor = self.down_load_image_captioner(
image_captioner_name, load_device
)
enhancer = PromptEnhancer(
image_caption_processor, image_caption_model, llm_model, llm_tokenizer
)
# ModelPatcher will manage moving the model to load_device (CPU)
patcher = comfy.model_patcher.ModelPatcher(
enhancer,
load_device,
offload_device,
)
return (patcher,)
@comfy_node(name="LTXVPromptEnhancer") class LTXVPromptEnhancer: @classmethod def INPUT_TYPES(s): return { "required": { "prompt": ("STRING",), "prompt_enhancer": ("LTXV_PROMPT_ENHANCER",), "max_resulting_tokens": ( "INT", {"default": 256, "min": 32, "max": 512}, ), }, "optional": { "image_prompt": ("IMAGE",), }, }
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("str",)
FUNCTION = "enhance"
CATEGORY = "lightricks/LTXV"
TITLE = "LTXV Prompt Enhancer"
OUTPUT_NODE = False
def enhance(
self,
prompt,
prompt_enhancer: comfy.model_patcher.ModelPatcher,
image_prompt: torch.Tensor = None,
max_resulting_tokens=256,
):
# Free memory for the model on the target device (CPU)
comfy.model_management.free_memory(
prompt_enhancer.memory_required([]),
torch.device("cpu"), # Modified to force CPU
)
#comfy.model_management.load_model_gpu(prompt_enhancer) # Original commented out line
#model = prompt_enhancer.model # Original line
# device = "cuda:0" if torch.cuda.is_available() else "cpu" # <-- original add
device = torch.device("cpu") # Modified to force CPU
model = prompt_enhancer.model.to(device) # <-- original add, now moves to CPU
image_conditioning = None
if image_prompt is not None:
# Ensure image tensor is on the correct device (CPU)
permuted_image = image_prompt.permute(3, 0, 1, 2)[None, :].to(device) # <-- original add .to(device), now moves to CPU
image_conditioning = [(permuted_image, 0, 1.0)]
enhanced_prompt = model(prompt, image_conditioning, max_resulting_tokens)
return (enhanced_prompt[0],)`