keras-cv
keras-cv copied to clipboard
RandAugment has a memory leak
RandAugment has a memory leak. I've reproduced this by piping a tf.data.Dataset through a rand augment layer.
Do you have a gist to reproduce this?
working on it, but I think literally applying it in a tf.data.Dataset map for any dataset can trigger it.
@bhack
"""Tests for memory_leak."""
import tensorflow as tf
from absl.testing import parameterized
from keras_cv import core
from keras_cv.layers import preprocessing
from keras_cv.layers import regularization
def cleanup_memory():
import gc, sys
# _colab_autocomplete_locals_ can hold references unnecessarily. so delete.
# https://cs.corp.google.com/search/?q=_colab_autocomplete_locals_
sys.modules['__main__']._colab_autocomplete_locals_ = {}
gc.collect()
def report_memory():
import psutil, os
mem_bytes = psutil.Process(os.getpid()).memory_info().rss
result = 'Memory %.2fmb' % (
mem_bytes / 1024.0 / 1024.0)
print(result)
return mem_bytes / 1024.0 / 1024.0
class MemoryLeakTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.named_parameters(
# ("AutoContrast", preprocessing.AutoContrast, {
# "value_range": (0, 255)
# }),
# ("ChannelShuffle", preprocessing.ChannelShuffle, {
# "seed": 1
# }),
# ("CutMix", preprocessing.CutMix, {
# "seed": 1
# }),
# ("Equalization", preprocessing.Equalization, {
# "value_range": (0, 255)
# }),
# ("Grayscale", preprocessing.Grayscale, {}),
# ("GridMask", preprocessing.GridMask, {
# "seed": 1
# }),
# ("MixUp", preprocessing.MixUp, {
# "seed": 1
# }),
# (
# "RandomChannelShift",
# preprocessing.RandomChannelShift,
# {
# "value_range": (0, 255),
# "factor": 0.5
# },
# ),
# (
# "Posterization",
# preprocessing.Posterization,
# {
# "bits": 3,
# "value_range": (0, 255)
# },
# ),
# (
# "RandomColorDegeneration",
# preprocessing.RandomColorDegeneration,
# {
# "factor": 0.5,
# "seed": 1
# },
# ),
# (
# "RandomCutout",
# preprocessing.RandomCutout,
# {
# "height_factor": 0.2,
# "width_factor": 0.2,
# "seed": 1
# },
# ),
# (
# "RandomHue",
# preprocessing.RandomHue,
# {
# "factor": 0.5,
# "value_range": (0, 255),
# "seed": 1
# },
# ),
# (
# "RandomSaturation",
# preprocessing.RandomSaturation,
# {
# "factor": 0.5,
# "seed": 1
# },
# ),
# (
# "RandomSharpness",
# preprocessing.RandomSharpness,
# {
# "factor": 0.5,
# "value_range": (0, 255),
# "seed": 1
# },
# ),
# (
# "RandomShear",
# preprocessing.RandomShear,
# {
# "x_factor": 0.3,
# "x_factor": 0.3,
# "seed": 1
# },
# ),
# ("Solarization", preprocessing.Solarization, {
# "value_range": (0, 255)
# }),
(
"RandAugment",
preprocessing.RandAugment,
{
"value_range": (0, 255),
"magnitude": 0.5,
"augmentations_per_image": 3,
"rate": 0.3,
"magnitude_stddev": 0.1,
},
),
# (
# "RandomAugmentationPipeline",
# preprocessing.RandomAugmentationPipeline,
# {
# "layers": [],
# "augmentations_per_image": 1,
# "rate": 1.0
# },
# ),
# (
# "RandomChoice",
# preprocessing.RandomChoice,
# {
# "layers": [],
# "seed": 3,
# "auto_vectorize": False
# },
# ),
# (
# "RandomColorJitter",
# preprocessing.RandomColorJitter,
# {
# "value_range": (0, 255),
# "brightness_factor": (-0.2, 0.5),
# "contrast_factor": (0.5, 0.9),
# "saturation_factor": (0.5, 0.9),
# "hue_factor": (0.5, 0.9),
# "seed": 1,
# },
# ),
# (
# "DropBlock2D",
# regularization.DropBlock2D,
# {
# "rate": 0.1,
# "block_size": (7, 7),
# "seed": 1234
# },
# ),
# (
# "StochasticDepth",
# regularization.StochasticDepth,
# {
# "rate": 0.1
# },
# ),
# (
# "DropPath",
# regularization.DropPath,
# {
# "rate": 0.2,
# },
# ),
# (
# "MaybeApply",
# preprocessing.MaybeApply,
# {
# "rate": 0.5,
# "layer": None,
# "seed": 1234,
# },
# ),
# (
# "RandomJpegQuality",
# preprocessing.RandomJpegQuality,
# {
# "factor": (75, 100)
# },
# ),
)
def test_leaks_memory(self, layer_cls, init_args):
layer = layer_cls(**init_args)
@tf.function
def augment(x, y):
inputs = {"images": x, "labels": y}
inputs = layer(inputs)
return inputs["images"], inputs["labels"]
x = [tf.random.uniform((512, 512, 3)) for _ in range(100)]
y = [tf.random.uniform(()) for _ in range(100)]
dataset = tf.data.Dataset.from_tensor_slices((x, y))
dataset = dataset.batch(64)
dataset = dataset.map(augment, num_parallel_calls=tf.data.AUTOTUNE)
trials = 100
for trial in range(trials):
for (x, y) in iter(dataset):
pass
cleanup_memory()
result = report_memory()
if trial == 0:
result_at_0 = result
self.assertGreater(result_at_0 * 1.1, result, msg=f"Trial={trial}")
Are you sure that it is related to RandAugment?
Isn't this simply a dup of https://github.com/tensorflow/tensorflow/issues/43969 ?
Thanks for pointing this out - I'm not sure, I am just sure there is a memory leak.