evision ImageDataGenerator would belong here?

I am not sure if this should belong into Evision or a seperate project, its quite simple and used all the time for kaggle problems.

Following as close as possible to Keras ImageDataGenerator https://github.com/keras-team/keras-preprocessing/blob/master/keras_preprocessing/image/image_data_generator.py

Missing

[ ] zoom
[ ] shear
[ ] brightness
[ ] rescale
[ ] rotation
[ ] others

Use it like

    train_stream = ImageDataGenerator.stream(%{
       path: "priv/chest_xray/train/",
       class_map: %{"NORMAL": 0, "PNEUMONIA": 1},
       cache: true,
       open_flags: OpenCV.cv_IMREAD_GRAYSCALE,
       width: 128,
       height: 128,
       rescale: 255,
       batch_size: 16,
       shape: {16, 1, 128, 128},
       width_shift_range: -100..100,
       height_shift_range: -100..100,
       horizontal_flip: true
   })

   test_stream = ImageDataGenerator.stream(%{
       path: "priv/chest_xray/test/",
       class_map: %{"NORMAL": 0, "PNEUMONIA": 1},
       cache: true,
       open_flags: OpenCV.cv_IMREAD_GRAYSCALE,
       width: 128,
       height: 128,
       rescale: 255,
       batch_size: 99999,
       shape: {99999, 1, 128, 128},
   })
   
 trained_model = model
   |> Axon.Loop.trainer(:categorical_cross_entropy, Axon.Optimizers.adam(0.001))
   |> Axon.Loop.metric(:accuracy)
   |> Axon.Loop.handle(:epoch_completed, fn_epoch)
   |> Axon.Loop.run(train_stream, epochs: epochs, compiler: EXLA)

defmodule ImageDataGenerator do
    alias Evision, as: OpenCV

    def stream(args) do
        path = args.path
        class_map = args.class_map
        width = args.width
        height = args.height
        shape = args.shape
        cache = args[:cache]
        open_flags = args[:open_flags]
        rescale = args[:rescale]
        horizontal_flip = args[:horizontal_flip]
        vertical_flip = args[:vertical_flip]
        width_shift_range = args[:width_shift_range]
        height_shift_range = args[:height_shift_range]
        shear_range = args[:shear_range]
        zoom_range = args[:zoom_range]
        brightness_range = args[:brightness_range]
        batch_size = args[:batch_size] || 1

        cacheable_args = Map.take(args, [:path, :width, :height, :open_flags])
        train = :persistent_term.get({:cache_idg, cacheable_args}, nil)
        train = cond do
            !!train && !!cache -> train
            true ->
                train_set = Enum.reduce(class_map, [], fn({name, value}, acc)->
                    files = Path.wildcard("#{path}/#{name}/*")
                    files = Enum.map(files, fn(path)->
                        img = OpenCV.imread!(path, flags: open_flags)
                        img = OpenCV.resize!(img, [width, height])
                        bin = OpenCV.Mat.to_binary!(img)
                        %{path: path, onehot: value, bin: bin}
                    end)
                    acc ++ files
                end)
                cache && :persistent_term.put(:cache_train, train_set)
                train_set
        end
        
        batch_size = if batch_size > length(train) do
            length(train)
        else batch_size end

        0 = rem(length(train), batch_size)

        args = Map.merge(args, %{batch_size: batch_size})

        stream_1(train, args)
    end

    def stream_1(train_set, args) do
        Stream.resource(
          fn ->
            train_set = Enum.shuffle(train_set)
            idx = 0
            {train_set, idx}
          end,
          fn {data, idx} ->
            ts_start = :os.system_time(1000)
            offset = idx * args.batch_size
            cond do
              offset >= length(data) -> {:halt, {data, idx}}
              true ->
                slice = Enum.slice(data, offset, args.batch_size)
                {img_batch, label_batch} = Enum.reduce(slice, {"", ""}, 
                    fn(%{bin: bin, onehot: onehot}, {img_batch, label_batch}) ->
                        bin = transform(bin, args)
                        {img_batch <> bin, label_batch <> <<onehot::8>>}
                    end
                )
                {x_train, y_train} = prepare_nx(img_batch, label_batch, args)

                ts_end = :os.system_time(1000)
                took = ts_end - ts_start
                #IO.puts "batch took #{took}ms"

                {[{x_train, y_train}], {data, idx+1}}
            end
          end,
          fn _acc -> :ok end
        )        
    end

    def prepare_nx(img_batch, label_batch, args) do
        x_train = img_batch
        |> Nx.from_binary({:u, 8})
        |> Nx.reshape(args.shape)
        #TODO: case do in pipe dont compile with defn
        x_train = if args[:rescale] do
            Nx.divide(x_train, args.rescale)
        else x_train end

        y_train = label_batch
        |> Nx.from_binary({:u, 8})
        |> Nx.reshape({args.batch_size, 1})
        |> Nx.equal(Nx.tensor(Enum.to_list(0..1)))
        {x_train, y_train}
    end

    def transform(bin, args) do
        img = OpenCV.Mat.from_binary!(bin, {:u, 8}, args.width, args.height, 1)

        img = if !!args[:horizontal_flip] and :rand.uniform(2) == 1 do
            OpenCV.flip!(img, 1)
        else img end

        img = if !!args[:vertical_flip] and :rand.uniform(2) == 1 do
            OpenCV.flip!(img, 0)
        else img end

        img = if args[:width_shift_range] do
            factor = Enum.random(args.width_shift_range) / 1000
            opencv_shift_width(img, factor)
        else img end

        img = if args[:height_shift_range] do
            factor = Enum.random(args.height_shift_range) / 1000
            opencv_shift_height(img, factor)
        else img end

        #todo zoom
        #todo shear
        #todo brightness

        OpenCV.Mat.to_binary!(img)
    end

    def opencv_shift_width(img, factor) do
        {width, height} = OpenCV.Mat.shape!(img)
        to_shift = trunc(Float.round(width * factor))
        m = Evision.Nx.to_mat!(Nx.tensor([[1,0,to_shift],[0,1,0]], type: {:f, 32}))
        Evision.warpAffine!(img, m, [width, height])
    end

    def opencv_shift_height(img, factor) do
        {width, height} = OpenCV.Mat.shape!(img)
        to_shift = trunc(Float.round(width * factor))
        m = Evision.Nx.to_mat!(Nx.tensor([[1,0,0],[0,1,to_shift]], type: {:f, 32}))
        Evision.warpAffine!(img, m, [width, height])
    end

    def opencv_zoom(img, factor) do
        #img = OpenCV.Mat.from_binary!(hd(train).bin, {:u,8},128,128,1)

        factor = 1.0 + factor
        {width, height} = OpenCV.Mat.shape!(img)
        new_width = trunc(width * factor)
        new_height = trunc(height * factor)
        img2 = OpenCV.resize!(img, [new_width, new_height])

        # Original: 529x550
        # Zoomed: 794x825 


        #translation_matrix = np.float32([ [1,0,70], [0,1,110] ])   
        #img_translation = cv2.warpAffine(img, translation_matrix, (num_cols, num_rows))

        #height, width = img.shape[:2]
        #zoomed = zoom(img, 1.5)
    end
end

Mar 23 '22 13:03 vans163

I feel like the ImageDataGenerator could perhaps be a separate project and use Evision as a dependency (maybe an optional one). Potentially, it could also use other image processing backends (so that users can select which one to do the actual job) in this way.

For example, if a user is already using Evision in their project, then they can use Evision as the backend. If they're not using Evision and there is some other tiny image processing library that could handle all needed operations, then they can choose to use the lightweight one.

WDYT?

Mar 23 '22 18:03 cocoa-xu

Yea, separate project works. Also what about the transformations such as shear, zoom, shift, brightness would those belong separate or here?

EDIT: Note this is generally accomplished by imutils in python which is a utility library for opencv.

Mar 23 '22 22:03 vans163

Yeah, I think a library similar to the imutils could be the go.

IMHO, although these image processing/manipulating operations/functions could belong here, it would be easier to maintain and more flexible if it was a separate project.

Mar 24 '22 09:03 cocoa-xu

It's possible you might find Image based upon vix that is Elixir bindings on libvips for the functions you referenced. While Image might not have all the convenience functions you're after, libvips certainly does and they are all exposed in Vix/Image.

I've just added experimental support for eVision interoperability. This is basically zero-copy data exchange between eVision/Nx/Image(Vix).

Sep 24 '22 22:09 kipcole9

I'm closing this issue as I believe it would be better to implement this (and other related features) in another library.

Oct 15 '22 04:10 cocoa-xu

evision evision copied to clipboard

ImageDataGenerator would belong here?

evision
evision copied to clipboard