Remove scipy dependency
Its a heavy dependency which requires external libraries and tooling installed. We are only using it for the scipy.ndimage.zoom method which is provided both by Pillow and OpenCV which are already a depepndency for this project.
Of the top of my head these two implementations can replace zoom method, note that the opencv one will be the best performant:
from PIL import Image
import numpy as np
def pillow_zoom(input_array, zoom_factor):
"""Resize using Pillow with similar functionality to scipy.ndimage.zoom"""
if input_array.ndim == 2:
# Handle 2D arrays (single-channel images)
img = Image.fromarray(input_array)
new_size = tuple(int(s * zoom_factor) for s in input_array.shape)
# Swap dimensions for PIL (width, height)
resized = img.resize((new_size[1], new_size[0]), Image.Resampling.BICUBIC)
return np.array(resized)
elif input_array.ndim == 3:
# Handle multi-channel images
results = []
for i in range(input_array.shape[0]):
results.append(pillow_zoom(input_array[i], zoom_factor))
return np.stack(results)
else:
raise ValueError("Unsupported array dimension")
or
import cv2
import numpy as np
def opencv_zoom(input_array, zoom_factor):
"""Resize using OpenCV with similar functionality to scipy.ndimage.zoom"""
if input_array.ndim == 2:
# Single channel 2D array
new_shape = tuple(int(s * zoom_factor) for s in input_array.shape)
# Note: cv2.resize takes (width, height) which is opposite of numpy's (height, width)
return cv2.resize(input_array, (new_shape[1], new_shape[0]), interpolation=cv2.INTER_CUBIC)
elif input_array.ndim == 3:
# Multi-channel or 3D array
results = []
for i in range(input_array.shape[0]):
results.append(opencv_zoom(input_array[i], zoom_factor))
return np.stack(results)
else:
raise ValueError("Unsupported array dimension")
I agree with you!
Could you send a PR and add some metrics like performance (acc and performance)?
Also scipy is pinned in the requirements, which makes it harder to install mlx-vlm in an environment with other packages.
on it
https://github.com/Blaizzy/mlx-vlm/pull/268/
would love this to be in. would solve also #470
done ✅