keras-cv RandAugment has a memory leak

RandAugment has a memory leak. I've reproduced this by piping a tf.data.Dataset through a rand augment layer.

Jun 27 '22 19:06 LukeWood

Do you have a gist to reproduce this?

Jun 27 '22 19:06 bhack

working on it, but I think literally applying it in a tf.data.Dataset map for any dataset can trigger it.

Jun 27 '22 22:06 LukeWood

@bhack

"""Tests for memory_leak."""

import tensorflow as tf
from absl.testing import parameterized

from keras_cv import core
from keras_cv.layers import preprocessing
from keras_cv.layers import regularization

def cleanup_memory():
  import gc, sys
  # _colab_autocomplete_locals_ can hold references unnecessarily. so delete.
  # https://cs.corp.google.com/search/?q=_colab_autocomplete_locals_
  sys.modules['__main__']._colab_autocomplete_locals_ = {}
  gc.collect()

def report_memory():
  import psutil, os
  mem_bytes = psutil.Process(os.getpid()).memory_info().rss
  result = 'Memory                        %.2fmb' % (
      mem_bytes / 1024.0 / 1024.0)
  print(result)
  return mem_bytes / 1024.0 / 1024.0

class MemoryLeakTest(tf.test.TestCase, parameterized.TestCase):

  @parameterized.named_parameters(
      # ("AutoContrast", preprocessing.AutoContrast, {
      #     "value_range": (0, 255)
      # }),
      # ("ChannelShuffle", preprocessing.ChannelShuffle, {
      #     "seed": 1
      # }),
      # ("CutMix", preprocessing.CutMix, {
      #     "seed": 1
      # }),
      # ("Equalization", preprocessing.Equalization, {
      #     "value_range": (0, 255)
      # }),
      # ("Grayscale", preprocessing.Grayscale, {}),
      # ("GridMask", preprocessing.GridMask, {
      #     "seed": 1
      # }),
      # ("MixUp", preprocessing.MixUp, {
      #     "seed": 1
      # }),
      # (
      #     "RandomChannelShift",
      #     preprocessing.RandomChannelShift,
      #     {
      #         "value_range": (0, 255),
      #         "factor": 0.5
      #     },
      # ),
      # (
      #     "Posterization",
      #     preprocessing.Posterization,
      #     {
      #         "bits": 3,
      #         "value_range": (0, 255)
      #     },
      # ),
      # (
      #     "RandomColorDegeneration",
      #     preprocessing.RandomColorDegeneration,
      #     {
      #         "factor": 0.5,
      #         "seed": 1
      #     },
      # ),
      # (
      #     "RandomCutout",
      #     preprocessing.RandomCutout,
      #     {
      #         "height_factor": 0.2,
      #         "width_factor": 0.2,
      #         "seed": 1
      #     },
      # ),
      # (
      #     "RandomHue",
      #     preprocessing.RandomHue,
      #     {
      #         "factor": 0.5,
      #         "value_range": (0, 255),
      #         "seed": 1
      #     },
      # ),
      # (
      #     "RandomSaturation",
      #     preprocessing.RandomSaturation,
      #     {
      #         "factor": 0.5,
      #         "seed": 1
      #     },
      # ),
      # (
      #     "RandomSharpness",
      #     preprocessing.RandomSharpness,
      #     {
      #         "factor": 0.5,
      #         "value_range": (0, 255),
      #         "seed": 1
      #     },
      # ),
      # (
      #     "RandomShear",
      #     preprocessing.RandomShear,
      #     {
      #         "x_factor": 0.3,
      #         "x_factor": 0.3,
      #         "seed": 1
      #     },
      # ),
      # ("Solarization", preprocessing.Solarization, {
      #     "value_range": (0, 255)
      # }),
      (
          "RandAugment",
          preprocessing.RandAugment,
          {
              "value_range": (0, 255),
              "magnitude": 0.5,
              "augmentations_per_image": 3,
              "rate": 0.3,
              "magnitude_stddev": 0.1,
          },
      ),
      # (
      #     "RandomAugmentationPipeline",
      #     preprocessing.RandomAugmentationPipeline,
      #     {
      #         "layers": [],
      #         "augmentations_per_image": 1,
      #         "rate": 1.0
      #     },
      # ),
      # (
      #     "RandomChoice",
      #     preprocessing.RandomChoice,
      #     {
      #         "layers": [],
      #         "seed": 3,
      #         "auto_vectorize": False
      #     },
      # ),
      # (
      #     "RandomColorJitter",
      #     preprocessing.RandomColorJitter,
      #     {
      #         "value_range": (0, 255),
      #         "brightness_factor": (-0.2, 0.5),
      #         "contrast_factor": (0.5, 0.9),
      #         "saturation_factor": (0.5, 0.9),
      #         "hue_factor": (0.5, 0.9),
      #         "seed": 1,
      #     },
      # ),
      # (
      #     "DropBlock2D",
      #     regularization.DropBlock2D,
      #     {
      #         "rate": 0.1,
      #         "block_size": (7, 7),
      #         "seed": 1234
      #     },
      # ),
      # (
      #     "StochasticDepth",
      #     regularization.StochasticDepth,
      #     {
      #         "rate": 0.1
      #     },
      # ),
      # (
      #     "DropPath",
      #     regularization.DropPath,
      #     {
      #         "rate": 0.2,
      #     },
      # ),
      # (
      #     "MaybeApply",
      #     preprocessing.MaybeApply,
      #     {
      #         "rate": 0.5,
      #         "layer": None,
      #         "seed": 1234,
      #     },
      # ),
      # (
      #     "RandomJpegQuality",
      #     preprocessing.RandomJpegQuality,
      #     {
      #         "factor": (75, 100)
      #     },
      # ),
  )
  def test_leaks_memory(self, layer_cls, init_args):
    layer = layer_cls(**init_args)

    @tf.function
    def augment(x, y):
      inputs = {"images": x, "labels": y}
      inputs = layer(inputs)
      return inputs["images"], inputs["labels"]

    x = [tf.random.uniform((512, 512, 3)) for _ in range(100)]
    y = [tf.random.uniform(()) for _ in range(100)]

    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.batch(64)
    dataset = dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)

    trials = 100
    for trial in range(trials):
      for (x, y) in iter(dataset):
        pass
      cleanup_memory()
      result = report_memory()
      if trial == 0:
        result_at_0 = result

      self.assertGreater(result_at_0 * 1.1, result, msg=f"Trial={trial}")

Jun 28 '22 20:06 LukeWood

Are you sure that it is related to RandAugment?

Isn't this simply a dup of https://github.com/tensorflow/tensorflow/issues/43969 ?

Jun 29 '22 17:06 bhack

Thanks for pointing this out - I'm not sure, I am just sure there is a memory leak.

Jun 29 '22 17:06 LukeWood