coremltools
coremltools copied to clipboard
Extremely Long Loading with default compute_units
🐞Describing the bug
I'm experiencing extremely long loading times when using the MLModel API to load a converted Core ML model. The loading process hangs indefinitely. When changing compute_units to ct.ComputeUnit.CPU_AND_GPU, the model loading works and is fast.
Stack Trace
- If applicable, please paste the complete stack trace.
To Reproduce
- I built a simple super-resolution model in PyTorch, traced it using
torch.jit.trace
and then converted tomlpackage
. There is no fancy operations, and all operations are common in deep learning.
import torch
import torch.nn as nn
import torch.nn.functional as F
class SimpleSAFM(nn.Module):
def __init__(self, dim):
super().__init__()
self.proj = nn.Conv2d(dim, dim, 3, 1, 1, bias=False)
self.dwconv = nn.Conv2d(dim//2, dim//2, 3, 1, 1, groups=dim//2, bias=False)
self.out = nn.Conv2d(dim, dim, 1, 1, 0, bias=False)
self.pool = nn.MaxPool2d(kernel_size=8, stride=8)
self.act = nn.ReLU()
def forward(self, x):
h, w = x.size()[-2:]
x0, x1 = self.proj(x).chunk(2, dim=1)
# x2 = F.adaptive_max_pool2d(x0, (h//8, w//8))
x2 = self.pool(x0)
x2 = self.dwconv(x2)
# x2 = F.interpolate(x2, size=(h, w), mode='bilinear')
x2 = F.interpolate(x2, size=(h, w), mode='nearest')
x2 = self.act(x2) * x0
x = torch.cat([x1, x2], dim=1)
x = self.out(self.act(x))
return x
class CCM(nn.Module):
def __init__(self, dim, ffn_scale):
super().__init__()
self.conv = nn.Sequential(
nn.Conv2d(dim, int(dim*ffn_scale), 3, 1, 1, bias=False),
nn.ReLU(),
nn.Conv2d(int(dim*ffn_scale), dim, 1, 1, 0, bias=False)
)
def forward(self, x):
return self.conv(x)
class AttBlock(nn.Module):
def __init__(self, dim, ffn_scale):
super().__init__()
self.conv1 = SimpleSAFM(dim)
self.conv2 = CCM(dim, ffn_scale)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
return out
class SAFMN_VIS24(nn.Module):
def __init__(self, dim=8, n_blocks=1, ffn_scale=2.0, upscaling_factor=4):
super().__init__()
self.scale = upscaling_factor
self.to_feat = nn.Conv2d(3, dim, 3, 1, 1, bias=False)
self.feats = nn.Sequential(*[AttBlock(dim, ffn_scale) for _ in range(n_blocks)])
self.to_img = nn.Sequential(
nn.Conv2d(dim, 3 * upscaling_factor**2, 3, 1, 1, bias=False),
nn.PixelShuffle(upscaling_factor)
)
def forward(self, x):
x = self.to_feat(x)
x = self.feats(x) + x
return self.to_img(x)
x = torch.randn(1, 3, 1280, 1280)
model = SAFMN_VIS24(dim=16, n_blocks=2, ffn_scale=1.5, upscaling_factor=2)
traced_model = torch.jit.trace(
model, x
)
model_ct = ct.convert(
traced_model,
convert_to="mlprogram",
source="pytorch",
inputs=[ct.TensorType(name="x", shape=x.shape)],
outputs=[ct.TensorType(name="output")],
compute_precision=ct.precision.FLOAT16,
minimum_deployment_target=ct.target.iOS16
)
coreml_model_name = 'SRx2_lighter_1280.mlpackage'
model_ct.save(coreml_model_name)
- Load the model in two ways:
import coremltools as ct
mlmodel = ct.models.MLModel(coreml_model_name, compute_units=ct.ComputeUnit.CPU_AND_GPU) #works
mlmodel = ct.models.MLModel(coreml_model_name) # loading takes forever
The loading works only when setting compute_units=ct.ComputeUnit.CPU_AND_GPU
System environment (please complete the following information):
- coremltools version: 7.2
- OS: Sonoma 14.5
- Any other relevant version information:
Pytorch==2.0.1
Additional context
- I also tried running a performance report in Xcode. The profiling takes forever if setting Compute Unit to
ALL
with real physical devices: macOS 14.5 (m1 pro chip), iOS 17.5 (iPhone 13 Pro) and iOS 18.0 (iPhone 16 Pro). Running performance report works only when setting compute unit to either 'CPU only' or 'CPU and GPU'.