json file creation

Open amaryam333 opened this issue 7 months ago • 1 comments

how can we create json file for our dataset,,, i have read the details in documentation but the code file for it is not found

May 21 '25 09:05 amaryam333

Sorry this is coming so late. Here is an example for creating the MedNeXt json (nnunetv1 style) for BraTS:

Source: nnunet_mednext/dataset_conversion/utils.py

import os
import json
import numpy as np
from typing import Tuple

# Paths and setup variables
output_file = '/kaggle/working/nnUNet_raw_data_base/nnUNet_raw_data/Task2023_BraTS/dataset.json'
imagesTr = '/kaggle/working/nnUNet_raw_data_base/nnUNet_raw_data/Task2023_BraTS/imagesTr'
imagesTs = '/kaggle/working/nnUNet_raw_data_base/nnUNet_raw_data/Task2023_BraTS/imagesTs'
labelsTr = '/kaggle/working/nnUNet_raw_data_base/nnUNet_raw_data/Task2023_BraTS/labelsTr'
modalities = ("t1c", "t1n", "t2f", "t2w")
labels = {0 : "background", 1 : "TC", 2 : "WT", 3 : "ET"}
dataset_name = 'Task2023_BraTS'

def save_json(obj, file, sort_keys=True):
    with open(file, 'w') as f:
        json.dump(obj, f, indent=4, sort_keys=sort_keys)

def subfiles(folder, suffix, join=True):
    all_files = []
    for root, _, files in os.walk(folder):
        for file in files:
            if file.endswith(suffix):
                if join:
                    all_files.append(os.path.join(root, file))
                else:
                    all_files.append(file)
    return all_files

def get_identifiers_from_splitted_files(folder: str):
    uniques = np.unique([i[:-12] for i in subfiles(folder, suffix='.nii.gz', join=False)])
    return uniques

def generate_dataset_json(output_file: str, imagesTr_dir: str, imagesTs_dir: str, modalities: Tuple,
                          labels: dict, dataset_name: str, sort_keys=True, license: str = "hands off!", dataset_description: str = "",
                          dataset_reference="", dataset_release='0.0'):
    train_identifiers = get_identifiers_from_splitted_files(imagesTr_dir)

    if imagesTs_dir is not None:
        test_identifiers = get_identifiers_from_splitted_files(imagesTs_dir)
    else:
        test_identifiers = []

    json_dict = {}
    json_dict['name'] = dataset_name
    json_dict['description'] = dataset_description
    json_dict['tensorImageSize'] = "4D"
    json_dict['reference'] = dataset_reference
    json_dict['licence'] = license
    json_dict['release'] = dataset_release
    json_dict['modality'] = {str(i): modalities[i] for i in range(len(modalities))}
    json_dict['labels'] = {str(i): labels[i] for i in labels.keys()}

    json_dict['numTraining'] = len(train_identifiers)
    json_dict['numTest'] = len(test_identifiers)
    json_dict['training'] = [
        {'image': f"./imagesTr/{i}.nii.gz", "label": f"./labelsTr/{i}.nii.gz"} for i in train_identifiers]
    json_dict['test'] = [f"./imagesTs/{i}.nii.gz" for i in test_identifiers]

    if not output_file.endswith("dataset.json"):
        print("WARNING: output file name is not dataset.json! This may be intentional or not. You decide. "
              "Proceeding anyways...")
    save_json(json_dict, output_file, sort_keys=sort_keys)
generate_dataset_json(output_file=output_file, 
                      imagesTr_dir=imagesTr, 
                      imagesTs_dir=imagesTs,
                      modalities=modalities,
                      labels=labels, 
                      dataset_name=dataset_name)

Oct 17 '25 20:10 toufiqmusah