evision
evision copied to clipboard
ImageDataGenerator would belong here?
I am not sure if this should belong into Evision or a seperate project, its quite simple and used all the time for kaggle problems.
Following as close as possible to Keras ImageDataGenerator https://github.com/keras-team/keras-preprocessing/blob/master/keras_preprocessing/image/image_data_generator.py
Missing
- [ ] zoom
- [ ] shear
- [ ] brightness
- [ ] rescale
- [ ] rotation
- [ ] others
Use it like
train_stream = ImageDataGenerator.stream(%{
path: "priv/chest_xray/train/",
class_map: %{"NORMAL": 0, "PNEUMONIA": 1},
cache: true,
open_flags: OpenCV.cv_IMREAD_GRAYSCALE,
width: 128,
height: 128,
rescale: 255,
batch_size: 16,
shape: {16, 1, 128, 128},
width_shift_range: -100..100,
height_shift_range: -100..100,
horizontal_flip: true
})
test_stream = ImageDataGenerator.stream(%{
path: "priv/chest_xray/test/",
class_map: %{"NORMAL": 0, "PNEUMONIA": 1},
cache: true,
open_flags: OpenCV.cv_IMREAD_GRAYSCALE,
width: 128,
height: 128,
rescale: 255,
batch_size: 99999,
shape: {99999, 1, 128, 128},
})
trained_model = model
|> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.adam(0.001))
|> Axon.Loop.metric(:accuracy)
|> Axon.Loop.handle(:epoch_completed, fn_epoch)
|> Axon.Loop.run(train_stream, epochs: epochs, compiler: EXLA)
defmodule ImageDataGenerator do
alias Evision, as: OpenCV
def stream(args) do
path = args.path
class_map = args.class_map
width = args.width
height = args.height
shape = args.shape
cache = args[:cache]
open_flags = args[:open_flags]
rescale = args[:rescale]
horizontal_flip = args[:horizontal_flip]
vertical_flip = args[:vertical_flip]
width_shift_range = args[:width_shift_range]
height_shift_range = args[:height_shift_range]
shear_range = args[:shear_range]
zoom_range = args[:zoom_range]
brightness_range = args[:brightness_range]
batch_size = args[:batch_size] || 1
cacheable_args = Map.take(args, [:path, :width, :height, :open_flags])
train = :persistent_term.get({:cache_idg, cacheable_args}, nil)
train = cond do
!!train && !!cache -> train
true ->
train_set = Enum.reduce(class_map, [], fn({name, value}, acc)->
files = Path.wildcard("#{path}/#{name}/*")
files = Enum.map(files, fn(path)->
img = OpenCV.imread!(path, flags: open_flags)
img = OpenCV.resize!(img, [width, height])
bin = OpenCV.Mat.to_binary!(img)
%{path: path, onehot: value, bin: bin}
end)
acc ++ files
end)
cache && :persistent_term.put(:cache_train, train_set)
train_set
end
batch_size = if batch_size > length(train) do
length(train)
else batch_size end
0 = rem(length(train), batch_size)
args = Map.merge(args, %{batch_size: batch_size})
stream_1(train, args)
end
def stream_1(train_set, args) do
Stream.resource(
fn ->
train_set = Enum.shuffle(train_set)
idx = 0
{train_set, idx}
end,
fn {data, idx} ->
ts_start = :os.system_time(1000)
offset = idx * args.batch_size
cond do
offset >= length(data) -> {:halt, {data, idx}}
true ->
slice = Enum.slice(data, offset, args.batch_size)
{img_batch, label_batch} = Enum.reduce(slice, {"", ""},
fn(%{bin: bin, onehot: onehot}, {img_batch, label_batch}) ->
bin = transform(bin, args)
{img_batch <> bin, label_batch <> <<onehot::8>>}
end
)
{x_train, y_train} = prepare_nx(img_batch, label_batch, args)
ts_end = :os.system_time(1000)
took = ts_end - ts_start
#IO.puts "batch took #{took}ms"
{[{x_train, y_train}], {data, idx+1}}
end
end,
fn _acc -> :ok end
)
end
def prepare_nx(img_batch, label_batch, args) do
x_train = img_batch
|> Nx.from_binary({:u, 8})
|> Nx.reshape(args.shape)
#TODO: case do in pipe dont compile with defn
x_train = if args[:rescale] do
Nx.divide(x_train, args.rescale)
else x_train end
y_train = label_batch
|> Nx.from_binary({:u, 8})
|> Nx.reshape({args.batch_size, 1})
|> Nx.equal(Nx.tensor(Enum.to_list(0..1)))
{x_train, y_train}
end
def transform(bin, args) do
img = OpenCV.Mat.from_binary!(bin, {:u, 8}, args.width, args.height, 1)
img = if !!args[:horizontal_flip] and :rand.uniform(2) == 1 do
OpenCV.flip!(img, 1)
else img end
img = if !!args[:vertical_flip] and :rand.uniform(2) == 1 do
OpenCV.flip!(img, 0)
else img end
img = if args[:width_shift_range] do
factor = Enum.random(args.width_shift_range) / 1000
opencv_shift_width(img, factor)
else img end
img = if args[:height_shift_range] do
factor = Enum.random(args.height_shift_range) / 1000
opencv_shift_height(img, factor)
else img end
#todo zoom
#todo shear
#todo brightness
OpenCV.Mat.to_binary!(img)
end
def opencv_shift_width(img, factor) do
{width, height} = OpenCV.Mat.shape!(img)
to_shift = trunc(Float.round(width * factor))
m = Evision.Nx.to_mat!(Nx.tensor([[1,0,to_shift],[0,1,0]], type: {:f, 32}))
Evision.warpAffine!(img, m, [width, height])
end
def opencv_shift_height(img, factor) do
{width, height} = OpenCV.Mat.shape!(img)
to_shift = trunc(Float.round(width * factor))
m = Evision.Nx.to_mat!(Nx.tensor([[1,0,0],[0,1,to_shift]], type: {:f, 32}))
Evision.warpAffine!(img, m, [width, height])
end
def opencv_zoom(img, factor) do
#img = OpenCV.Mat.from_binary!(hd(train).bin, {:u,8},128,128,1)
factor = 1.0 + factor
{width, height} = OpenCV.Mat.shape!(img)
new_width = trunc(width * factor)
new_height = trunc(height * factor)
img2 = OpenCV.resize!(img, [new_width, new_height])
# Original: 529x550
# Zoomed: 794x825
#translation_matrix = np.float32([ [1,0,70], [0,1,110] ])
#img_translation = cv2.warpAffine(img, translation_matrix, (num_cols, num_rows))
#height, width = img.shape[:2]
#zoomed = zoom(img, 1.5)
end
end
I feel like the ImageDataGenerator could perhaps be a separate project and use Evision as a dependency (maybe an optional one). Potentially, it could also use other image processing backends (so that users can select which one to do the actual job) in this way.
For example, if a user is already using Evision in their project, then they can use Evision as the backend. If they're not using Evision and there is some other tiny image processing library that could handle all needed operations, then they can choose to use the lightweight one.
WDYT?
Yea, separate project works. Also what about the transformations such as shear, zoom, shift, brightness would those belong separate or here?
EDIT: Note this is generally accomplished by imutils in python which is a utility library for opencv.
Yeah, I think a library similar to the imutils could be the go.
IMHO, although these image processing/manipulating operations/functions could belong here, it would be easier to maintain and more flexible if it was a separate project.
It's possible you might find Image based upon vix that is Elixir bindings on libvips for the functions you referenced. While Image might not have all the convenience functions you're after, libvips certainly does and they are all exposed in Vix/Image.
I've just added experimental support for eVision interoperability. This is basically zero-copy data exchange between eVision/Nx/Image(Vix).
I'm closing this issue as I believe it would be better to implement this (and other related features) in another library.