MIScnn
MIScnn copied to clipboard
FileNotFoundError in KiTS19 notebook example
trafficstars
The overall pipeline looks the same except some minor details:
# Library import
from miscnn.processing.preprocessor import Preprocessor
# Create and configure the Preprocessor class
pp = Preprocessor(data_io, data_aug=data_aug, batch_size=2, subfunctions=subfunctions, prepare_subfunctions=True,
prepare_batches=False, analysis="patchwise-crop", patch_shape=(80, 160, 160),
use_multiprocessing=True)
# Adjust the patch overlap for predictions
pp.patchwise_overlap = (40, 80, 80)
pp.patchwise_skip_blanks = True
...
# Exclude suspious samples from data set
del sample_list[133]
del sample_list[125]
del sample_list[68]
del sample_list[37]
del sample_list[23]
del sample_list[15]
# Create the training/validation sample ID list
import numpy as np
np.random.seed(123)
sample_list = np.random.permutation(sample_list).tolist()
validation_samples = sorted(sample_list[:50])
training_samples = sorted(sample_list[50:])
model.evaluate(training_samples, validation_samples, epochs=epochs, callbacks=[cb_lr, cb_es, cb_mc])
Data is downloaded following the instruction on https://github.com/neheller/kits19 but in the end I get this error:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-10-88f8eac878c5> in <module>
----> 1 model.evaluate(training_samples, validation_samples, epochs=epochs, callbacks=[cb_lr, cb_es, cb_mc])
c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\neural_network\model.py in evaluate(self, training_samples, validation_samples, epochs, iterations, callbacks, class_weight)
264 shuffle=self.shuffle_batches)
265 # Run training & validation process with the Keras fit
--> 266 history = self.model.fit(dataGen_training,
267 validation_data=dataGen_validation,
268 callbacks=callbacks,
~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1047 training_utils.RespectCompiledTrainableState(self):
1048 # Creates a `tf.data.Dataset` and handles batch and epoch iteration.
-> 1049 data_handler = data_adapter.DataHandler(
1050 x=x,
1051 y=y,
~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution)
1103
1104 adapter_cls = select_data_adapter(x, y)
-> 1105 self._adapter = adapter_cls(
1106 x,
1107 y,
~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, shuffle, workers, use_multiprocessing, max_queue_size, model, **kwargs)
907 self._keras_sequence = x
908 self._enqueuer = None
--> 909 super(KerasSequenceAdapter, self).__init__(
910 x,
911 shuffle=False, # Shuffle is handed in the _make_callable override.
~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, workers, use_multiprocessing, max_queue_size, model, **kwargs)
784 # Since we have to know the dtype of the python generator when we build the
785 # dataset, we have to look at a batch to infer the structure.
--> 786 peek, x = self._peek_and_restore(x)
787 peek = self._standardize_batch(peek)
788 peek = _process_tensorlike(peek)
~\anaconda3\envs\cfu_segmentation\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py in _peek_and_restore(x)
918 @staticmethod
919 def _peek_and_restore(x):
--> 920 return x[0], x
921
922 def _handle_multiprocessing(self, x, workers, use_multiprocessing,
c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\neural_network\data_generator.py in __getitem__(self, idx)
63 # Load a batch by generating it or by loading an already prepared
64 if self.preprocessor.prepare_batches : batch = self.load_batch(idx)
---> 65 else : batch = self.generate_batch(idx)
66 # Return the batch containing only an image or an image and segmentation
67 if self.training:
c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\neural_network\data_generator.py in generate_batch(self, idx)
146 self.sample_list.extend(samples)
147 # create a new batch
--> 148 batches = self.preprocessor.run(samples, self.training,
149 self.validation)
150 # Create threading lock to avoid parallel access
c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\processing\preprocessor.py in run(self, indices_list, training, validation)
131 sf.preprocessing(sample, training=training)
132 # Load sample from file with already processed subfunctions
--> 133 else : sample = self.data_io.sample_loader(index, backup=True)
134 # Cache sample object for prediction
135 if not training : self.cache[index] = sample
c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\data_loading\data_io.py in sample_loader(self, index, load_seg, load_pred, backup)
84 def sample_loader(self, index, load_seg=True, load_pred=False, backup=False):
85 # If sample is a backup -> load it from pickle
---> 86 if backup : return self.load_sample_pickle(index)
87 # Load the image with the I/O interface
88 image, extended = self.interface.load_image(index)
c:\users\students\desktop\bacteria_cfu\code\miscnn-master\miscnn\data_loading\data_io.py in load_sample_pickle(self, index)
192 sample_path = os.path.join(self.batch_path, str(self.seed) + "." + \
193 index + ".pickle")
--> 194 with open(sample_path,'rb') as reader:
195 sample = pickle.load(reader)
196 return sample
FileNotFoundError: [Errno 2] No such file or directory: 'batches\\17488990.case_00058.pickle'
I have diagnosed the issue: use_multiprocessing=True flag is the cause
Hey @jumutc,
thanks for the bug report! Mhm. I can not reproduce this batching IO error on our side.
Could you please try to reproduce it in a jupyter notebook or google colab?
Cheers, Dominik