fastai
fastai copied to clipboard
Training multiple FasiAI models and validating using dataframe dataloader():
I have a unique use case where I am attempting to isolate subjects in each run. I have 56 such runs that I want to automate. To achieve this, I created files such as 1_train.csv, 2_train.csv, ..., and 56_train.csv, along with corresponding 1_test.csv, 2_test.csv, ..., and 56_test.csv.
Contents of 1_train.csv
would look like this;
image_path | label | is_valid |
---|---|---|
multi/image1.png | 2 | FALSE |
multi/image2.png | 2 | FALSE |
multi/image3.png | 2 | FALSE |
1_test.csv
;
image_path | label |
---|---|
multi/image11.png | 2 |
multi/image12.png | 2 |
Now, I am working on creating and exporting 56 models and their respective metrics to a common folder. I am currently following a specific approach, but if there is a more efficient way to accomplish this, please let me know.
Question: Each time I loop over the training, testing, and the learners, I assume the learners are distinct, and the weights are re-initialized. Is this correct?
Code:
# Imports
from natsort import natsorted
from fastai.vision.all import *
from fastai.metrics import error_rate
from fastai.vision.learner import unet_learner
from torchvision.models.resnet import resnet34
from fastai.data.external import untar_data, URLs
import fastai, numpy as np, pandas as pd, os, getpass
from fastai.vision.data import SegmentationDataLoaders
import numpy as np, torch, os, shutil, matplotlib.pyplot as plt
from fastai.data.transforms import get_image_files, FuncSplitter, Normalize
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import auc, roc_curve, precision_recall_curve, classification_report
warnings.filterwarnings("ignore", category=UserWarning)
torch.cuda.get_device_name(0)
# filepaths and variables
dir_path_fp = Path("/path/to/fastai")
data_train_fp = Path("/path/to/fastai/crossvalidation/train")
data_test_fp = Path("/path/to/fastai/crossvalidation/test")
cross_val_models_fp = Path("/path/to/fastai/models")
base_filepath = dir_path_fp / 'data'
results_list, list_learners = list(), list()
file_train_list = natsorted(list(data_train_fp.glob('*.csv')))
file_test_list = natsorted(list(data_test_fp.glob('*.csv')))
# Learner list
for train_file in file_train_list:
train_df = pd.read_csv(train_file)
train_dls = ImageDataLoaders.from_df(train_df, path=base_filepath, seed=42,
fn_col=0, label_col=1, valid_col='is_valid',
item_tfms=RandomResizedCrop(256, min_scale=0.7),
batch_tfms=aug_transforms(), bs=64)
learn = vision_learner(train_dls, resnet34, metrics=[accuracy, error_rate])
list_learners.append({train_file: learn})
print("length of the learners list", len(list_learners))
print()
for idx, (learner, train_file, test_file) in enumerate(zip(list_learners, file_train_list, file_test_list)):
print(idx+1, "\n", learner, "\n", test_file)
learner = learner[train_file]
lr = learner.lr_find()
learner.fit_one_cycle(500, lr, cbs=[EarlyStoppingCallback(monitor='valid_loss', patience=10)])
# Plots
interp = Interpretation.from_learner(learn) # plotting losses
interp.plot_top_losses(9, figsize=(15,10))
plt.savefig(cross_val_models_fp/'{}_interpretation_top_losses.png'.format(train_file.stem))
plt.clf()
results = ClassificationInterpretation.from_learner(learn) # plotting confusion matrix
results.plot_confusion_matrix()
plt.savefig(cross_val_models_fp/'{}_confusion_matrix.png'.format(train_file.stem))
plt.clf()
test_df = pd.read_csv(test_file)
dl_test = learn.dls.test_dl(test_df, with_labels=True)
preds, targets = learner.get_preds(dl=dl_test)
pred_classes = preds.argmax(dim=-1)
# Metrics
accuracy = accuracy_score(targets, pred_classes)
f1 = f1_score(targets, pred_classes, average='weighted')
precision = precision_score(targets, pred_classes, average='weighted')
recall = recall_score(targets, pred_classes, average='weighted')
print("Testing Metrics: \n", accuracy, f1, precision, recall)
results_list.append({'Train File': str(train_file), 'Test File': str(test_file),
'Learner': learner, 'Accuracy': accuracy, 'F1 Score': f1, 'Precision': precision, 'Recall': recall})
# print(classification_report(targets, pred_classes))
learner.export(cross_val_models_fp/'{}.pkl'.format(train_file.stem))
results_df = pd.DataFrame(results_list)
results_df.to_csv(cross_val_models_fp/'results_new2.csv', index=False)
Happy to look into that!