streaming icon indicating copy to clipboard operation
streaming copied to clipboard

Clean up garbage files

Open cassanof opened this issue 7 months ago • 1 comments

We really like the streaming library. The only issue is that it leaves garbage around the file system after every run, and therefore we adopted this ugly hack:

MAX_RETRIES = 2
SUDO_RM_ALLOWED_DIR_PREFIXES = ["/tmp", "/dev/shm", "/data", "/scratch", "/mnt/localssd", "/mnt/nvme"]
def create_streaming_dataset_with_local_cleanup(
    streams: List[Stream],
    batch_size: int,
    replication: int,
    shuffle: bool,
):
    def delete_dir_sudo_if_no_perms(dir: str):
        if os.path.exists(dir):
            try:
                shutil.rmtree(dir)
            except Exception as e:
                if "Errno 20" in str(e): # not a dir
                    try:
                        os.remove(dir)
                    except Exception as e:
                        pass # try sudo rm below

                logger.warning(f"Failed to delete directory {dir}, retrying with sudo... {e}")
                assert any(dir.startswith(d) for d in SUDO_RM_ALLOWED_DIR_PREFIXES), f"Directory {dir} is not a valid directory to sudo delete"
                os.system(f"sudo rm -rf {dir}")

    for i in range(MAX_RETRIES):
        try:
            dataset = StreamingDataset(
                streams=streams,
                predownload=None,
                shuffle=shuffle,
                batching_method="per_stream",
                batch_size=batch_size,
                replication=replication,
                download_timeout=300,
            )
            break
        except Exception as e:
            if i == MAX_RETRIES - 1:
                raise e
            logger.warning(f"Failed to create streaming dataset, cleaning up local env and retrying... {e}")
            for stream in streams:
                delete_dir_sudo_if_no_perms(stream.local)
            # delete /tmp/streaming
            delete_dir_sudo_if_no_perms("/tmp/streaming")
            # delete all files in /dev/shm/00*
            for file in os.listdir("/dev/shm"):
                if file.startswith("00"):
                    delete_dir_sudo_if_no_perms(f"/dev/shm/{file}")

    return dataset

Would be great to fix.

cassanof avatar Apr 02 '25 21:04 cassanof