course22
course22 copied to clipboard
Learner.summary() causes a grad error in Learner.lr_find()
Hello, I noticed a very strange bug while experimenting with chapter 13 of the fastbook on convolutions and CNNs
I first instantiated my model as such:
def conv(ni, nf, ks=3, act=True, stride=2):
res = nn.Conv2d(ni, nf, stride=2, kernel_size=ks, padding=ks//2)
if act: res = nn.Sequential(res, nn.ReLU())
return res
model2 = nn.Sequential(
conv(1, 4), # 4 * 14 * 14
nn.AvgPool2d(2), # 4 * 7 * 7
conv(4, 16), # 16 * 4 * 4
nn.AvgPool2d(2), # 16 * 2 * 2
nn.Flatten(),
nn.Linear(16*2*2, 128),
nn.ReLU(),
nn.Linear(128,10)
).to(device)
cnn2 = Learner(dls=mnist, model=model2, loss_func = nn.CrossEntropyLoss(), metrics=accuracy)
cnn2.summary() # an order of magnitude fewer params
This works fine and it shows me a summary of my model. In the next cell I have the following:
cnn2.lr_find()
Which gives the following runtime error: RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
However, if I comment out the cnn2.summary() line there's no error and I get the following learning plot (which I still think is weird as they usually don't look like this):
RuntimeError Traceback (most recent call last)
[<ipython-input-88-b3ecb6afc810>](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in <cell line: 0>()
----> 1 cnn2.lr_find()
17 frames
[/usr/local/lib/python3.11/dist-packages/fastai/callback/schedule.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in lr_find(self, start_lr, end_lr, num_it, stop_div, show_plot, suggest_funcs)
293 n_epoch = num_it//len(self.dls.train) + 1
294 cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div)
--> 295 with self.no_logging(): self.fit(n_epoch, cbs=cb)
296 if suggest_funcs is not None:
297 lrs, losses = tensor(self.recorder.lrs[num_it//10:-5]), tensor(self.recorder.losses[num_it//10:-5])
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in fit(self, n_epoch, lr, wd, cbs, reset_opt, start_epoch)
270 self.opt.set_hypers(lr=self.lr if lr is None else lr)
271 self.n_epoch = n_epoch
--> 272 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
273
274 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _with_events(self, f, event_type, ex, final)
205
206 def _with_events(self, f, event_type, ex, final=noop):
--> 207 try: self(f'before_{event_type}'); f()
208 except ex: self(f'after_cancel_{event_type}')
209 self(f'after_{event_type}'); final()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _do_fit(self)
259 for epoch in range(self.n_epoch):
260 self.epoch=epoch
--> 261 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
262
263 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False, start_epoch=0):
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _with_events(self, f, event_type, ex, final)
205
206 def _with_events(self, f, event_type, ex, final=noop):
--> 207 try: self(f'before_{event_type}'); f()
208 except ex: self(f'after_cancel_{event_type}')
209 self(f'after_{event_type}'); final()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _do_epoch(self)
253
254 def _do_epoch(self):
--> 255 self._do_epoch_train()
256 self._do_epoch_validate()
257
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _do_epoch_train(self)
245 def _do_epoch_train(self):
246 self.dl = self.dls.train
--> 247 self._with_events(self.all_batches, 'train', CancelTrainException)
248
249 def _do_epoch_validate(self, ds_idx=1, dl=None):
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _with_events(self, f, event_type, ex, final)
205
206 def _with_events(self, f, event_type, ex, final=noop):
--> 207 try: self(f'before_{event_type}'); f()
208 except ex: self(f'after_cancel_{event_type}')
209 self(f'after_{event_type}'); final()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in all_batches(self)
211 def all_batches(self):
212 self.n_iter = len(self.dl)
--> 213 for o in enumerate(self.dl): self.one_batch(*o)
214
215 def _backward(self): self.loss_grad.backward()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in one_batch(self, i, b)
241 b = self._set_device(b)
242 self._split(b)
--> 243 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
244
245 def _do_epoch_train(self):
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _with_events(self, f, event_type, ex, final)
205
206 def _with_events(self, f, event_type, ex, final=noop):
--> 207 try: self(f'before_{event_type}'); f()
208 except ex: self(f'after_cancel_{event_type}')
209 self(f'after_{event_type}'); final()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _do_one_batch(self)
229 self('after_loss')
230 if not self.training or not len(self.yb): return
--> 231 self._do_grad_opt()
232
233 def _set_device(self, b):
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _do_grad_opt(self)
217
218 def _do_grad_opt(self):
--> 219 self._with_events(self._backward, 'backward', CancelBackwardException)
220 self._with_events(self._step, 'step', CancelStepException)
221 self.opt.zero_grad()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _with_events(self, f, event_type, ex, final)
205
206 def _with_events(self, f, event_type, ex, final=noop):
--> 207 try: self(f'before_{event_type}'); f()
208 except ex: self(f'after_cancel_{event_type}')
209 self(f'after_{event_type}'); final()
[/usr/local/lib/python3.11/dist-packages/fastai/learner.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _backward(self)
213 for o in enumerate(self.dl): self.one_batch(*o)
214
--> 215 def _backward(self): self.loss_grad.backward()
216 def _step(self): self.opt.step()
217
[/usr/local/lib/python3.11/dist-packages/torch/_tensor.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in backward(self, gradient, retain_graph, create_graph, inputs)
624 inputs=inputs,
625 )
--> 626 torch.autograd.backward(
627 self, gradient, retain_graph, create_graph, inputs=inputs
628 )
[/usr/local/lib/python3.11/dist-packages/torch/autograd/__init__.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
345 # some Python versions print out the first line of a multi-line function
346 # calls in the traceback and some print out the last line
--> 347 _engine_run_backward(
348 tensors,
349 grad_tensors_,
[/usr/local/lib/python3.11/dist-packages/torch/autograd/graph.py](https://z1ay5tp8tr-496ff2e9c6d22116-0-colab.googleusercontent.com/outputframe.html?vrz=colab_20250430-060127_RC00_753083549#) in _engine_run_backward(t_outputs, *args, **kwargs)
821 unregister_hooks = _register_logging_hooks_on_whole_graph(t_outputs)
822 try:
--> 823 return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
824 t_outputs, *args, **kwargs
825 ) # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn