MuseTalk icon indicating copy to clipboard operation
MuseTalk copied to clipboard

推理速度问题

Open kanghua309 opened this issue 7 months ago • 8 comments

请教一下,我a800机器,推理速度,fps 只能到10帧,这个正常吗? 有没有加速办法?

kanghua309 avatar May 08 '25 08:05 kanghua309

@kanghua309 你好,请问使用的realtime-inference吗?如果追求更快的速度您可以尝试对模型进行torch.compile

zzzweakman avatar May 10 '25 05:05 zzzweakman

单卡v100 SXM2 32GB,直接跑demo大概能达到12.5FPS。想问下还有什么办法进一步提速

xuebofan avatar May 15 '25 01:05 xuebofan

基于realtime-inference在V100上应该是可以达到30fps+的,你这个速度有些不符合预期,可以贴一下log

aidenyzhang avatar May 26 '25 09:05 aidenyzhang

我把wishiper 抽取语音特征,以及最后口型图片贴回面部都用gpu改写了。能到50fps

替换 audio_processor.feature_extractor(pcm_array, sampling_rate=16000, return_tensors="pt").input_features

import torch import torchaudio import torchaudio.transforms as T import numpy as np

class FastWhisperFeatureExtractor: """ 完全对齐 OpenAI Whisper 的特征提取器。 输出为 [1, 80, T],T <= 3000(每 10ms 一帧) """

def init(self, device="cuda", dtype=torch.float32, sampling_rate=16000): self.device = device self.dtype = dtype self.sampling_rate = sampling_rate

self.n_mels = 80 self.n_fft = 400 self.hop_length = 160 # 每帧 10ms self.win_length = 400 self.n_frames = 3000

Whisper 官方 normalization 常量

self.mean = -4.2677393 self.std = 4.5689974

self.mel_transform = T.MelSpectrogram( sample_rate=sampling_rate, n_fft=self.n_fft, win_length=self.win_length, hop_length=self.hop_length, center=False, # ❗与 Whisper 官方一致 pad_mode="reflect", power=1.0, # ❗Whisper 是幅度谱(不是功率谱) norm="slaney", n_mels=self.n_mels, mel_scale="slaney", ).to(self.device)

def extract(self, waveform: np.ndarray) -> torch.Tensor: """ 输入: waveform: np.ndarray or torch.Tensor, shape [T] or [1, T],值域[-1.0, 1.0] 输出: features: torch.Tensor, shape [1, 80, 3000],标准化 """ if isinstance(waveform, np.ndarray): waveform = torch.tensor(waveform, dtype=self.dtype)

if waveform.ndim == 1: waveform = waveform.unsqueeze(0) # [1, T]

waveform = waveform.to(self.device)

with torch.no_grad(): mel_spec = self.mel_transform(waveform) # [1, 80, T'] log_mel = torch.log(torch.clamp(mel_spec, min=1e-10)) # 自然对数 normed_log_mel = (log_mel - self.mean) / self.std # 标准化

pad or trim to 3000 frames

T_now = normed_log_mel.size(-1) if T_now < self.n_frames: pad = torch.zeros((1, self.n_mels, self.n_frames - T_now), dtype=normed_log_mel.dtype, device=self.device) normed_log_mel = torch.cat([normed_log_mel, pad], dim=-1) else: normed_log_mel = normed_log_mel[:, :, :self.n_frames]

return normed_log_mel # [1, 80, 3000]

替换脸部回贴

import torch import torch.nn.functional as F import numpy as np import time from concurrent.futures import ThreadPoolExecutor, Future

class RealTimeFaceComposer: def init(self, device="cuda"): print(f"🧠 RealTimeFaceComposer ready, using device: {device}", flush=True) self.device = device self.executor = ThreadPoolExecutor(max_workers=10)

def _resize_torch(self, image: torch.Tensor, size: tuple) -> torch.Tensor: if image.ndim == 3: image = image.unsqueeze(0) # [1, C, H, W] resized = F.interpolate(image, size=(size[1], size[0]), mode="bilinear", align_corners=False) return resized.squeeze(0)

def blend_tensor( self, face_tensor: torch.Tensor, base_tensor: torch.Tensor, mask_tensor: torch.Tensor, bbox: tuple, mask_crop_box: tuple ) -> torch.Tensor: x1, y1, x2, y2 = bbox x_s, y_s, x_e, y_e = mask_crop_box h, w = y2 - y1, x2 - x1

face_tensor = self._resize_torch(face_tensor, (w, h)) mask_crop = mask_tensor[:, y1 - y_s:y2 - y_s, x1 - x_s:x2 - x_s] mask_resized = F.interpolate(mask_crop.unsqueeze(0), size=(h, w), mode='bilinear', align_corners=False)[0]

base_crop = base_tensor[:, y1:y2, x1:x2] blended_crop = face_tensor * mask_resized + base_crop * (1 - mask_resized)

output = base_tensor.clone() output[:, y1:y2, x1:x2] = blended_crop return output

def blend_tensor_batch( self, face_tensor: torch.Tensor, # (B, 3, H, W) base_tensor: torch.Tensor, # (B, 3, H, W) mask_tensor: torch.Tensor, # (B, 1, H, W) bboxes: list, # list of (x1, y1, x2, y2) mask_crop_boxes: list # list of (x_s, y_s, x_e, y_e) ) -> torch.Tensor: B = face_tensor.shape[0] output = base_tensor.clone()

for i in range(B): x1, y1, x2, y2 = bboxes[i] x_s, y_s, x_e, y_e = mask_crop_boxes[i] h, w = y2 - y1, x2 - x1

face_crop = self._resize_torch(face_tensor[i], (w, h)) mask_crop = mask_tensor[i, :, y1 - y_s:y2 - y_s, x1 - x_s:x2 - x_s] mask_resized = F.interpolate(mask_crop.unsqueeze(0), size=(h, w), mode='bilinear', align_corners=False)[0]

base_crop = base_tensor[i, :, y1:y2, x1:x2] blended_crop = face_crop * mask_resized + base_crop * (1 - mask_resized) output[i, :, y1:y2, x1:x2] = blended_crop

return output

def tensor_batch_to_numpy(self, tensor: torch.Tensor) -> list: tensor = (tensor * 255.0).clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy() return [frame for frame in tensor]

def _tensor_to_numpy(self, tensor: torch.Tensor) -> np.ndarray: return (tensor * 255.0).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()

def blend( self, face: np.ndarray, base: np.ndarray, mask: np.ndarray, bbox: tuple, mask_crop_box: tuple ) -> np.ndarray: if mask.ndim == 3: mask = mask[:, :, 0]

face_tensor = torch.from_numpy(face).permute(2, 0, 1).float().to(self.device) / 255.0 base_tensor = torch.from_numpy(base).permute(2, 0, 1).float().to(self.device) / 255.0 mask_tensor = torch.from_numpy(mask).unsqueeze(0).float().to(self.device) / 255.0

output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) return self._tensor_to_numpy(output_tensor)

def async_blend_and_push(self, face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box, queue): output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) def task(): try: frame = self._tensor_to_numpy(output_tensor) queue.append(frame) except Exception as e: print(f"⚠️ async_blend_and_push 异常: {e}", flush=True) self.executor.submit(task)

def async_blend_and_return(self, face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) -> Future: def task(): try: output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) frame = self._tensor_to_numpy(output_tensor) return frame except Exception as e: print(f"⚠️ async_blend_and_return 异常: {e}", flush=True) return None

return self.executor.submit(task)

def async_blend_and_return_batch(self, face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes) -> Future: output_batch = self.blend_tensor_batch(face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes)

print(f"output_batch get size: {len(output_batch)}", flush=True)

def task(): try: return self.tensor_batch_to_numpy(output_batch) except Exception as e: print(f"⚠️ async_blend_and_return_batch 异常: {e}", flush=True) return []

finally:

print(f"📦 output_batch task over (size={len(output_batch)})", flush=True)

return self.executor.submit(task)

def async_blend_and_return_push(self, face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes,queue): output_batch = self.blend_tensor_batch(face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes) def task(): try: for frame in self.tensor_batch_to_numpy(output_batch): queue.append(frame) #pass #FIX IT except Exception as e: print(f"⚠️ async_blend_and_return_push 异常: {e}", flush=True) return [] return self.executor.submit(task)


From: Bofan Xue @.> Sent: Thursday, May 15, 2025 7:14 AM To: TMElyralab/MuseTalk @.> Cc: king @.>; Mention @.> Subject: Re: [TMElyralab/MuseTalk] 推理速度问题 (Issue #333)

[https://avatars.githubusercontent.com/u/25205337?s=20&v=4]xuebofan left a comment (TMElyralab/MuseTalk#333)https://github.com/TMElyralab/MuseTalk/issues/333#issuecomment-2881942046

单卡v100 SXM2 32GB,直接跑demo大概能达到12.5FPS。想问下还有什么办法进一步提速

— Reply to this email directly, view it on GitHubhttps://github.com/TMElyralab/MuseTalk/issues/333#issuecomment-2881942046, or unsubscribehttps://github.com/notifications/unsubscribe-auth/AGEQ2PQI4TPU6FAO266ATT326PS7DAVCNFSM6AAAAAB4V3IHMKVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDQOBRHE2DEMBUGY. You are receiving this because you were mentioned.

kanghua309 avatar Jun 04 '25 09:06 kanghua309

我把wishiper抽取语音特征,以及最后口型图片贴回脸都用gpu改写了。能到50fps替换audio_processor.feature_extractor(pcm_array, Sample_rate=16000, return_tensors="pt").input_features import torch import torchaudio import torchaudio.transforms as T import numpy as np class FastWhisperFeatureExtractor: """ 完全模仿OpenAI Whisper输出为 [1, 80, T],T <= 3000(每 10ms 一帧) """ def init(self, device="cuda", dtype=torch.float32, Sample_rate=16000): self.device = device self.dtype = dtype self.sampling_rate = Sample_rate self.n_mels = 80 self.n_fft = 的特征提取器。 400 self.hop_length = 160 # 每帧 10ms self.win_length = 400 self.n_frames = 3000 # Whisper 官方标准化常量 self.mean = -4.2677393 self.std = 4.5689974 self.mel_transform = T.MelSpectrogram(sample_rate=sampling_rate, n_fft=self.n_fft, win_length=self.win_length, hop_length=self.hop_length, center=False, # ❗与 Whisper 官方一致 pad_mode="reflect", power=1.0, # ❗Whisper 是幅度谱(不是功率谱)norm="slaney", n_mels=self.n_mels, mel_scale="slaney", ).to(self.device) def extract(self, 波形: np.ndarray) -> torch.Tensor: """ 输入:waveform: np.ndarray 或 torch.Tensor,shape [T] 或 [1, T],值域[-1.0, 1.0] 输出:features: torch.Tensor,shape [1, 80, 3000],标准化 """ if isinstance(waveform, np.ndarray): wave = torch.tensor(waveform, dtype=self.dtype) if wave.ndim == 1: wave = wave.unsqueeze(0) # [1, T] wave = wave.to(self.device) with torch.no_grad(): mel_spec = self.mel_transform(waveform) # [1, 80, T'] log_mel = torch.log(torch.clamp(mel_spec, min=1e-10)) # 自然对数 normed_log_mel = (log_mel - self.mean) / self.std # 标准化 # 填充或修剪到 3000 帧 T_now = normed_log_mel.size(-1) if T_now < self.n_frames: pad = torch.zeros((1, self.n_mels, self.n_frames - T_now), dtype=normed_log_mel.dtype, device=self.device) normed_log_mel = torch.cat([normed_log_mel, pad], dim=-1) else: normed_log_mel = normed_log_mel[:, :, :self.n_frames] return normed_log_mel # [1, 80, 3000] 替换脸部回贴 import torch import torch.nn. functional as F import numpy as np import time from concurrent.futures import ThreadPoolExecutor, Future class RealTimeFaceComposer: def init(self, device="cuda"):打印(f“🧠 RealTimeFaceComposer 就绪,使用设备:{device}”,flush=True)self.device =设备self.executor = ThreadPoolExecutor(max_workers=10)def _resize_torch(self,image:torch.Tensor,size:tuple)-> torch.Tensor:如果image.ndim == 3:image = image.unsqueeze(0)#[1,C,H,W] resized = F.interpolate(image,size=(size[1],size[0]),mode =“bilinear”,align_corners = False)返回resized.squeeze(0)def blend_tensor(self,face_tensor:torch.Tensor,base_tensor:torch.Tensor,mask_tensor:torch.Tensor,bbox:tuple,mask_crop_box:tuple)-> torch.Tensor:x1,y1,x2, y2 = bbox x_s, y_s, x_e, y_e = mask_crop_box h, w = y2 - y1, x2 - x1 face_tensor = self._resize_torch(face_tensor, (w,h)) mask_crop = mask_tensor[:, y1 - y_s:y2 - y_s, x1 - x_s:x2 - x_s] mask_resized = F.interpolate(mask_crop.unsqueeze(0), size=(h, w), mode='bilinear', align_corners=False)[0] base_crop = base_tensor[:, y1:y2, x1:x2] blended_crop = face_tensor * mask_resized + base_crop * (1 - mask_resized) output = base_tensor.clone() output[:, y1:y2, x1:x2] = blended_crop 返回输出 def blend_tensor_batch( self, face_tensor: torch.Tensor, # (B, 3, H, W) base_tensor: torch.Tensor, # (B, 3, H, W) mask_tensor: torch.Tensor,#(B,1,H,W)bboxes:列表,#(x1,y1,x2,y2)列表mask_crop_boxes:列表#(x_s,y_s,x_e,y_e)列表)-> torch.Tensor:B = face_tensor.shape [0] output = base_tensor.clone()for i in range(B):x1,y1,x2,y2 = bboxes [i] x_s,y_s,x_e,y_e = mask_crop_boxes [i] h,w = y2 - y1,x2 - x1 face_crop = self._resize_torch(face_tensor [i],(w,h))mask_crop = mask_tensor [i,:,y1 - y_s:y2 - y_s,x1 - x_s:x2 - x_s] mask_resized = F.interpolate(mask_crop.unsqueeze(0), size=(h, w), mode='bilinear', align_corners=False)[0] base_crop = base_tensor[i, :, y1:y2, x1:x2] blended_crop = face_crop * mask_resized + base_crop * (1 - mask_resized) output[i, :, y1:y2, x1:x2] = blended_crop 返回输出 def tensor_batch_to_numpy(self, tensor: torch.Tensor) -> list: tensor = (tensor * 255.0).clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy() 返回 [frame for frame in tensor] def _tensor_to_numpy(self, tensor: torch.Tensor) -> np.ndarray: 返回 (tensor * 255.0).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() def blend(self, face: np.ndarray, base: np.ndarray, mask: np.ndarray, bbox: tuple, mask_crop_box: tuple ) -> np.ndarray: 如果 mask.ndim == 3: mask = mask[:, :, 0] face_tensor = torch.from_numpy(face).permute(2, 0, 1).float().to(self.device) / 255.0 base_tensor = torch.from_numpy(base).permute(2, 0, 1).float().to(self.device) / 255.0 mask_tensor = torch.from_numpy(mask).unsqueeze(0).float().to(self.device) / 255.0 output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) return self._tensor_to_numpy(output_tensor) def async_blend_and_push(self, face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box, queue): output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) def task(): try: frame = self._tensor_to_numpy(output_tensor) queue.append(frame) except Exception as e: print(f"y2) mask_crop_boxes: list # (x_s, y_s, x_e, y_e) 列表 ) -> torch.Tensor: B = face_tensor.shape[0] output = base_tensor.clone() for i in range(B): x1, y1, x2, y2 = bboxes[i] x_s, y_s, x_e, y_e = mask_crop_boxes[i] h, w = y2 - y1, x2 - x1 face_crop = self._resize_torch(face_tensor[i], (w, h)) mask_crop = mask_tensor[i, :, y1 - y_s:y2 - y_s, x1 - x_s:x2 - x_s] mask_resized = F.interpolate(mask_crop.unsqueeze(0), size=(h, w), mode='bilinear', align_corners=False)[0] base_crop = base_tensor[i, :, y1:y2, x1:x2] blended_crop = face_crop * mask_resized + base_crop * (1 - mask_resized) output[i, :, y1:y2, x1:x2] = blended_crop 返回输出 def tensor_batch_to_numpy(self, tensor: torch.Tensor) -> list: tensor = (tensor * 255.0).clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy() 返回 [frame for frame in tensor] def _tensor_to_numpy(self, tensor: torch.Tensor) -> np.ndarray: 返回 (tensor * 255.0).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() def blend(self, face: np.ndarray, base: np.ndarray, mask: np.ndarray, bbox: tuple, mask_crop_box: tuple) -> np.ndarray: if mask.ndim == 3: mask = mask[:, :, 0] face_tensor = torch.from_numpy(face).permute(2, 0, 1).float().to(self.device) / 255.0 base_tensor = torch.from_numpy(base).permute(2, 0, 1).float().to(self.device) / 255.0 mask_tensor = torch.from_numpy(mask).unsqueeze(0).float().to(self.device) / 255.0 output_tensor = self.blend_tensor(face_tensor,base_tensor,mask_tensor,bbox,mask_crop_box)返回self._tensor_to_numpy(output_tensor)def async_blend_and_push(self,face_tensor,base_tensor,mask_tensor,bbox,mask_crop_box,queue):output_tensor = self.blend_tensor(face_tensor,base_tensor,mask_tensor,bbox,mask_crop_box)def task():尝试:frame = self._tensor_to_numpy(output_tensor)queue.append(frame)除异常为e:print(f“y2) mask_crop_boxes: list # (x_s, y_s, x_e, y_e) 列表 ) -> torch.Tensor: B = face_tensor.shape[0] output = base_tensor.clone() for i in range(B): x1, y1, x2, y2 = bboxes[i] x_s, y_s, x_e, y_e = mask_crop_boxes[i] h, w = y2 - y1, x2 - x1 face_crop = self._resize_torch(face_tensor[i], (w, h)) mask_crop = mask_tensor[i, :, y1 - y_s:y2 - y_s, x1 - x_s:x2 - x_s] mask_resized = F.interpolate(mask_crop.unsqueeze(0), size=(h, w), mode='bilinear', align_corners=False)[0] base_crop = base_tensor[i, :, y1:y2, x1:x2] blended_crop = face_crop * mask_resized + base_crop * (1 - mask_resized) output[i, :, y1:y2, x1:x2] = blended_crop 返回输出 def tensor_batch_to_numpy(self, tensor: torch.Tensor) -> list: tensor = (tensor * 255.0).clamp(0, 255).byte().permute(0, 2, 3, 1).cpu().numpy() 返回 [frame for frame in tensor] def _tensor_to_numpy(self, tensor: torch.Tensor) -> np.ndarray: 返回 (tensor * 255.0).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy() def blend(self, face: np.ndarray, base: np.ndarray, mask: np.ndarray, bbox: tuple, mask_crop_box: tuple) -> np.ndarray: if mask.ndim == 3: mask = mask[:, :, 0] face_tensor = torch.from_numpy(face).permute(2, 0, 1).float().to(self.device) / 255.0 base_tensor = torch.from_numpy(base).permute(2, 0, 1).float().to(self.device) / 255.0 mask_tensor = torch.from_numpy(mask).unsqueeze(0).float().to(self.device) / 255.0 output_tensor = self.blend_tensor(face_tensor,base_tensor,mask_tensor,bbox,mask_crop_box)返回self._tensor_to_numpy(output_tensor)def async_blend_and_push(self,face_tensor,base_tensor,mask_tensor,bbox,mask_crop_box,queue):output_tensor = self.blend_tensor(face_tensor,base_tensor,mask_tensor,bbox,mask_crop_box)def task():尝试:frame = self._tensor_to_numpy(output_tensor)queue.append(frame)除异常为e:print(f“0] face_tensor = torch.from_numpy(face).permute(2, 0, 1).float().to(self.device) / 255.0 base_tensor = torch.from_numpy(base).permute(2, 0, 1).float().to(self.device) / 255.0 mask_tensor = torch.from_numpy(mask).unsqueeze(0).float().to(self.device) / 255.0 output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) return self._tensor_to_numpy(output_tensor) def async_blend_and_push(self, face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box, queue): output_tensor = self.blend_tensor(face_tensor, base_tensor、mask_tensor、bbox、mask_crop_box)def task():try:frame = self._tensor_to_numpy(output_tensor)queue.append(frame)except Exception as e:print(f“0] face_tensor = torch.from_numpy(face).permute(2, 0, 1).float().to(self.device) / 255.0 base_tensor = torch.from_numpy(base).permute(2, 0, 1).float().to(self.device) / 255.0 mask_tensor = torch.from_numpy(mask).unsqueeze(0).float().to(self.device) / 255.0 output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) return self._tensor_to_numpy(output_tensor) def async_blend_and_push(self, face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box, queue): output_tensor = self.blend_tensor(face_tensor, base_tensor、mask_tensor、bbox、mask_crop_box)def task():try:frame = self._tensor_to_numpy(output_tensor)queue.append(frame)except Exception as e:print(f“⚠️async_blend_and_push 异常:{e}", flush=True) self.executor.submit(task) def async_blend_and_return(self, face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) -> Future: def task(): try: output_tensor = self.blend_tensor(face_tensor, base_tensor, mask_tensor, bbox, mask_crop_box) frame = self._tensor_to_numpy(output_tensor) return frame except Exception as e: print(f"⚠️async_blend_and_return 异常:{e}", flush=True) 返回 None 返回 self.executor.submit(task) def async_blend_and_return_batch(self, face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes) -> Future: output_batch = self.blend_tensor_batch(face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes) # print(f"output_batch get size: {len(output_batch)}", flush=True) def task(): try: return self.tensor_batch_to_numpy(output_batch) except Exception as e: print(f"⚠️async_blend_and_return_batch 异常:{e}", flush=True) return [] # finally: # print(f"📦 output_batch task over (size={len(output_batch)})", flush=True) return self.executor.submit(task) def async_blend_and_return_push(self, face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes,queue): output_batch = self.blend_tensor_batch(face_tensor_batch, base_tensor_batch, mask_tensor_batch, bboxes, mask_crop_boxes) def task(): try: for frame in self.tensor_batch_to_numpy(output_batch): queue.append(frame) #pass #FIX IT except Exception as e: print(f"⚠️async_blend_and_return_push 异常:{e}", flush=True) 返回 [] 返回 self.executor.submit(task)

我认为口型图片贴回面部这部分计算效率很高,没必要在用gpu加速了,提升效果不明显,还会占用显存。

Scout9917 avatar Jun 11 '25 03:06 Scout9917

不太正常,我4090优化了一下可以跑120-160fps,你应该是CPU或者是CPU-GPU带宽有瓶颈

tommyfan34 avatar Jul 10 '25 21:07 tommyfan34

不太正常,我4090优化了一下可以跑120-160fps,你应该是CPU或者是CPU-GPU带宽有瓶颈

what???120+fps?没有搞错吗 求指点

Scout9917 avatar Jul 11 '25 00:07 Scout9917

可否简单讲讲优化思路,感谢

XieCong157312 avatar Aug 21 '25 03:08 XieCong157312