pennylane
pennylane copied to clipboard
[BUG] TorchLayer doesn't work with diff_method='parameter-shift'
Expected behavior
The TorchLayer demo should work with diff_method='parameter-shift'
Actual behavior
It throws an error.
Additional information
This question originated from Forum thread 4940.
Source code
# Demo with parameter-shift
import torch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons
# Set random seeds
torch.manual_seed(42)
np.random.seed(42)
X, y = make_moons(n_samples=200, noise=0.1)
y_ = torch.unsqueeze(torch.tensor(y), 1) # used for one-hot encoded labels
y_hot = torch.scatter(torch.zeros((200, 2)), 1, y_, 1)
c = ["#1f77b4" if y_ == 0 else "#ff7f0e" for y_ in y] # colours for each class
plt.axis("off")
plt.scatter(X[:, 0], X[:, 1], c=c)
plt.show()
import pennylane as qml
n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)
@qml.qnode(dev, diff_method='parameter-shift')
def qnode(inputs, weights):
qml.AngleEmbedding(inputs, wires=range(n_qubits))
qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]
n_layers = 6
weight_shapes = {"weights": (n_layers, n_qubits)}
qlayer = qml.qnn.TorchLayer(qnode, weight_shapes)
clayer_1 = torch.nn.Linear(2, 2)
clayer_2 = torch.nn.Linear(2, 2)
softmax = torch.nn.Softmax(dim=1)
layers = [clayer_1, qlayer, clayer_2, softmax]
model = torch.nn.Sequential(*layers)
opt = torch.optim.SGD(model.parameters(), lr=0.2)
loss = torch.nn.L1Loss()
X = torch.tensor(X, requires_grad=True).float()
y_hot = y_hot.float()
batch_size = 5
batches = 200 // batch_size
data_loader = torch.utils.data.DataLoader(
list(zip(X, y_hot)), batch_size=5, shuffle=True, drop_last=True
)
epochs = 6
for epoch in range(epochs):
running_loss = 0
for xs, ys in data_loader:
opt.zero_grad()
loss_evaluated = loss(model(xs), ys)
loss_evaluated.backward()
opt.step()
running_loss += loss_evaluated
avg_loss = running_loss / batches
print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))
y_pred = model(X)
predictions = torch.argmax(y_pred, axis=1).detach().numpy()
correct = [1 if p == p_true else 0 for p, p_true in zip(predictions, y)]
accuracy = sum(correct) / len(correct)
print(f"Accuracy: {accuracy * 100}%")
Tracebacks
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-8-d039678a60e4> in <cell line: 13>()
19
20 loss_evaluated = loss(model(xs), ys)
---> 21 loss_evaluated.backward()
22
23 opt.step()
11 frames
/usr/local/lib/python3.10/dist-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
523 inputs=inputs,
524 )
--> 525 torch.autograd.backward(
526 self, gradient, retain_graph, create_graph, inputs=inputs
527 )
/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
265 # some Python versions print out the first line of a multi-line function
266 # calls in the traceback and some print out the last line
--> 267 _engine_run_backward(
268 tensors,
269 grad_tensors_,
/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py in _engine_run_backward(t_outputs, *args, **kwargs)
742 unregister_hooks = _register_logging_hooks_on_whole_graph(t_outputs)
743 try:
--> 744 return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
745 t_outputs, *args, **kwargs
746 ) # Calls into the C++ engine to run the backward pass
/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py in apply(self, *args)
299 )
300 user_fn = vjp_fn if vjp_fn is not Function.vjp else backward_fn
--> 301 return user_fn(self, *args)
302
303 def apply_jvp(self, *args):
/usr/local/lib/python3.10/dist-packages/pennylane/workflow/interfaces/torch.py in new_backward(ctx, *flat_grad_outputs)
99 def new_backward(ctx, *flat_grad_outputs):
100 grad_outputs = pytree.tree_unflatten(flat_grad_outputs, ctx._out_struct)
--> 101 grad_inputs = orig_bw(ctx, *grad_outputs)
102 # None corresponds to the diff of out_struct_holder
103 return (None,) + tuple(grad_inputs)
/usr/local/lib/python3.10/dist-packages/pennylane/workflow/interfaces/torch.py in backward(ctx, *dy)
184 # dL/dz convention of PennyLane, autograd and jax. This converts between the formats
185 dy = _recursive_conj(dy)
--> 186 vjps = ctx.jpc.compute_vjp(ctx.tapes, dy)
187 # split tensor into separate entries
188 unpacked_vjps = []
/usr/local/lib/python3.10/dist-packages/pennylane/workflow/jacobian_products.py in compute_vjp(self, tapes, dy)
297 return _compute_vjps(jacs, dy, tapes)
298
--> 299 vjp_tapes, processing_fn = qml.gradients.batch_vjp(
300 tapes, dy, self._gradient_transform, gradient_kwargs=self._gradient_kwargs
301 )
/usr/local/lib/python3.10/dist-packages/pennylane/gradients/vjp.py in batch_vjp(tapes, dys, gradient_fn, reduction, gradient_kwargs)
500 # Loop through the tapes and dys vector
501 for tape, dy in zip(tapes, dys):
--> 502 g_tapes, fn = vjp(tape, dy, gradient_fn, gradient_kwargs=gradient_kwargs)
503 reshape_info.append(len(g_tapes))
504 processing_fns.append(fn)
/usr/local/lib/python3.10/dist-packages/pennylane/gradients/vjp.py in vjp(tape, dy, gradient_fn, gradient_kwargs)
361 pass
362
--> 363 gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs)
364
365 def processing_fn(results, num=None):
/usr/local/lib/python3.10/dist-packages/pennylane/transforms/core/transform_dispatcher.py in __call__(self, *targs, **tkwargs)
98 start = 0
99 for tape in expanded_tapes:
--> 100 intermediate_tapes, post_processing_fn = self._transform(
101 tape, *targs, **tkwargs
102 )
/usr/local/lib/python3.10/dist-packages/pennylane/gradients/parameter_shift.py in param_shift(tape, argnum, shifts, gradient_recipes, fallback_fn, f0, broadcast)
1110 transform_name = "parameter-shift rule"
1111 assert_no_state_returns(tape.measurements, transform_name)
-> 1112 assert_no_trainable_tape_batching(tape, transform_name)
1113
1114 if argnum is None and not tape.trainable_params:
/usr/local/lib/python3.10/dist-packages/pennylane/gradients/gradient_transform.py in assert_no_trainable_tape_batching(tape, transform_name)
95 for idx in range(len(tape.trainable_params)):
96 if tape.get_operation(idx)[0].batch_size is not None:
---> 97 raise NotImplementedError(
98 "Computing the gradient of broadcasted tapes with respect to the broadcasted "
99 f"parameters using the {transform_name} gradient transform is currently not "
NotImplementedError: Computing the gradient of broadcasted tapes with respect to the broadcasted parameters using the parameter-shift rule gradient transform is currently not supported. See #4462 for details.
System information
Name: PennyLane
Version: 0.37.0
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: https://github.com/PennyLaneAI/pennylane
Author:
Author-email:
License: Apache License 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: appdirs, autograd, autoray, cachetools, networkx, numpy, packaging, pennylane-lightning, requests, rustworkx, scipy, semantic-version, toml, typing-extensions
Required-by: PennyLane_Lightning
Platform info: Linux-6.1.85+-x86_64-with-glibc2.35
Python version: 3.10.12
Numpy version: 1.26.4
Scipy version: 1.13.1
Installed devices:
- lightning.qubit (PennyLane_Lightning-0.37.0)
- default.clifford (PennyLane-0.37.0)
- default.gaussian (PennyLane-0.37.0)
- default.mixed (PennyLane-0.37.0)
- default.qubit (PennyLane-0.37.0)
- default.qubit.autograd (PennyLane-0.37.0)
- default.qubit.jax (PennyLane-0.37.0)
- default.qubit.legacy (PennyLane-0.37.0)
- default.qubit.tf (PennyLane-0.37.0)
- default.qubit.torch (PennyLane-0.37.0)
- default.qutrit (PennyLane-0.37.0)
- default.qutrit.mixed (PennyLane-0.37.0)
- default.tensor (PennyLane-0.37.0)
- null.qubit (PennyLane-0.37.0)
Existing GitHub issues
- [X] I have searched existing GitHub issues to make sure the issue does not already exist.