heudiconv icon indicating copy to clipboard operation
heudiconv copied to clipboard

Skip images that have already been converted instead of throwing an error

Open ChristianHinge opened this issue 11 months ago • 1 comments

Summary

Running heudiconv twice on the exact same source data, with overwrite=False. When running the second time I get:

RuntimeError: was asked to convert into ... but destination already exists

Is this intended? I would love for heudiconv to skip everything it has already preprocessed.

Here is my heuristic script.

from __future__ import annotations

import logging
from typing import Optional
from heudiconv.utils import SeqInfo
import pandas as pd
import os
lgr = logging.getLogger("heudiconv")

def create_key(
    template: Optional[str],
    outtype: tuple[str, ...] = ("nii.gz",),
    annotation_classes: None = None,
) -> tuple[str, tuple[str, ...], None]:
    if template is None or not template:
        raise ValueError("Template must be a valid format string")
    return (template, outtype, annotation_classes)

def name_to_key(names):
    name_to_key = {}
    for name in names:
        if name is None:
            continue
        if name.startswith("pt"):
            name_to_key[name] = create_key('sub-{subject}/{session}/pet/sub-{subject}_{session}_'+name)
        elif name.startswith("ct") or name.startswith("mr"):
            name_to_key[name] = create_key('sub-{subject}/{session}/anat/sub-{subject}_{session}_'+name)
    return name_to_key

def infotodict_base(
    seqinfo: list[SeqInfo],
    conversion_f
) -> dict[tuple[str, tuple[str, ...], None], list[str]]:
    pkl = pd.read_pickle(conversion_f)
    pkl = pkl[~pd.isna(pkl.SERIES_NAME)]
    conversion = dict(zip(pkl.SeriesInstanceUID,pkl.SERIES_NAME))
    names_to_keys = name_to_key(set(conversion.values()))
    info: dict[tuple[str, tuple[str, ...], None], list[str]] = {k:[] for k in names_to_keys.values()}
    for s in seqinfo:
        series_name = conversion.get(s.series_uid,None)
        if series_name is not None:
            k = names_to_keys[series_name]
            info[k].append(s.series_id)
        
    return info

def infotodict(
    seqinfo
) -> dict[tuple[str, tuple[str, ...], None], list[str]]:
    return infotodict_base(seqinfo,"pkl_file.pkl")

Platform details:

Choose one:

  • [x] Local environment - python3.11,
  • [ ] Container
  • Heudiconv version: 1.1.0

ChristianHinge avatar Mar 14 '24 20:03 ChristianHinge