fastai icon indicating copy to clipboard operation
fastai copied to clipboard

Training multiple FasiAI models and validating using dataframe dataloader():

Open unikill066 opened this issue 1 year ago • 1 comments

I have a unique use case where I am attempting to isolate subjects in each run. I have 56 such runs that I want to automate. To achieve this, I created files such as 1_train.csv, 2_train.csv, ..., and 56_train.csv, along with corresponding 1_test.csv, 2_test.csv, ..., and 56_test.csv.

Contents of 1_train.csv would look like this;

image_path label is_valid
multi/image1.png 2 FALSE
multi/image2.png 2 FALSE
multi/image3.png 2 FALSE

1_test.csv;

image_path label
multi/image11.png 2
multi/image12.png 2

Now, I am working on creating and exporting 56 models and their respective metrics to a common folder. I am currently following a specific approach, but if there is a more efficient way to accomplish this, please let me know.

Question: Each time I loop over the training, testing, and the learners, I assume the learners are distinct, and the weights are re-initialized. Is this correct?

Code:

# Imports
from natsort import natsorted
from fastai.vision.all import *
from fastai.metrics import error_rate
from fastai.vision.learner import unet_learner
from torchvision.models.resnet import resnet34
from fastai.data.external import untar_data, URLs
import fastai, numpy as np, pandas as pd, os, getpass
from fastai.vision.data import SegmentationDataLoaders
import numpy as np, torch, os, shutil, matplotlib.pyplot as plt
from fastai.data.transforms import get_image_files, FuncSplitter, Normalize
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import auc, roc_curve, precision_recall_curve, classification_report
warnings.filterwarnings("ignore", category=UserWarning)
torch.cuda.get_device_name(0)

# filepaths and variables
dir_path_fp = Path("/path/to/fastai")
data_train_fp = Path("/path/to/fastai/crossvalidation/train")
data_test_fp = Path("/path/to/fastai/crossvalidation/test")
cross_val_models_fp = Path("/path/to/fastai/models")

base_filepath = dir_path_fp / 'data'
results_list, list_learners = list(), list()
file_train_list = natsorted(list(data_train_fp.glob('*.csv')))
file_test_list = natsorted(list(data_test_fp.glob('*.csv')))

# Learner list
for train_file in file_train_list:
    train_df = pd.read_csv(train_file)
    train_dls = ImageDataLoaders.from_df(train_df, path=base_filepath, seed=42, 
                                   fn_col=0, label_col=1, valid_col='is_valid',
                                   item_tfms=RandomResizedCrop(256, min_scale=0.7), 
                                   batch_tfms=aug_transforms(), bs=64)
    learn = vision_learner(train_dls, resnet34, metrics=[accuracy, error_rate])
    list_learners.append({train_file: learn})

print("length of the learners list", len(list_learners))
print()

for idx, (learner, train_file, test_file) in enumerate(zip(list_learners, file_train_list, file_test_list)):
    print(idx+1, "\n", learner, "\n", test_file)
    
    learner = learner[train_file]
    lr = learner.lr_find()
    learner.fit_one_cycle(500, lr, cbs=[EarlyStoppingCallback(monitor='valid_loss', patience=10)])

    # Plots
    interp = Interpretation.from_learner(learn)  # plotting losses
    interp.plot_top_losses(9, figsize=(15,10))
    plt.savefig(cross_val_models_fp/'{}_interpretation_top_losses.png'.format(train_file.stem))
    plt.clf()

    results = ClassificationInterpretation.from_learner(learn)  # plotting confusion matrix
    results.plot_confusion_matrix()
    plt.savefig(cross_val_models_fp/'{}_confusion_matrix.png'.format(train_file.stem))
    plt.clf()

    test_df = pd.read_csv(test_file)
    dl_test = learn.dls.test_dl(test_df, with_labels=True)
    preds, targets = learner.get_preds(dl=dl_test)
    pred_classes = preds.argmax(dim=-1)
    
    # Metrics
    accuracy = accuracy_score(targets, pred_classes)
    f1 = f1_score(targets, pred_classes, average='weighted')
    precision = precision_score(targets, pred_classes, average='weighted')
    recall = recall_score(targets, pred_classes, average='weighted')
    print("Testing Metrics: \n", accuracy, f1, precision, recall)
    results_list.append({'Train File': str(train_file), 'Test File': str(test_file),
        'Learner': learner, 'Accuracy': accuracy, 'F1 Score': f1, 'Precision': precision, 'Recall': recall})
    # print(classification_report(targets, pred_classes))

    learner.export(cross_val_models_fp/'{}.pkl'.format(train_file.stem))
results_df = pd.DataFrame(results_list)
results_df.to_csv(cross_val_models_fp/'results_new2.csv', index=False)

unikill066 avatar Feb 02 '24 19:02 unikill066

Happy to look into that!

KossaiSbai avatar Feb 28 '24 20:02 KossaiSbai