pennylane icon indicating copy to clipboard operation
pennylane copied to clipboard

[BUG] TorchLayer doesn't work with diff_method='parameter-shift'

Open CatalinaAlbornoz opened this issue 6 months ago • 3 comments

Expected behavior

The TorchLayer demo should work with diff_method='parameter-shift'

Actual behavior

It throws an error.

Additional information

This question originated from Forum thread 4940.

Source code

# Demo with parameter-shift
import torch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_moons

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)

X, y = make_moons(n_samples=200, noise=0.1)
y_ = torch.unsqueeze(torch.tensor(y), 1)  # used for one-hot encoded labels
y_hot = torch.scatter(torch.zeros((200, 2)), 1, y_, 1)

c = ["#1f77b4" if y_ == 0 else "#ff7f0e" for y_ in y]  # colours for each class
plt.axis("off")
plt.scatter(X[:, 0], X[:, 1], c=c)
plt.show()

import pennylane as qml

n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, diff_method='parameter-shift')
def qnode(inputs, weights):
    qml.AngleEmbedding(inputs, wires=range(n_qubits))
    qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
    return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

n_layers = 6
weight_shapes = {"weights": (n_layers, n_qubits)}

qlayer = qml.qnn.TorchLayer(qnode, weight_shapes)

clayer_1 = torch.nn.Linear(2, 2)
clayer_2 = torch.nn.Linear(2, 2)
softmax = torch.nn.Softmax(dim=1)
layers = [clayer_1, qlayer, clayer_2, softmax]
model = torch.nn.Sequential(*layers)

opt = torch.optim.SGD(model.parameters(), lr=0.2)
loss = torch.nn.L1Loss()

X = torch.tensor(X, requires_grad=True).float()
y_hot = y_hot.float()

batch_size = 5
batches = 200 // batch_size

data_loader = torch.utils.data.DataLoader(
    list(zip(X, y_hot)), batch_size=5, shuffle=True, drop_last=True
)

epochs = 6

for epoch in range(epochs):

    running_loss = 0

    for xs, ys in data_loader:
        opt.zero_grad()

        loss_evaluated = loss(model(xs), ys)
        loss_evaluated.backward()

        opt.step()

        running_loss += loss_evaluated

    avg_loss = running_loss / batches
    print("Average loss over epoch {}: {:.4f}".format(epoch + 1, avg_loss))

y_pred = model(X)
predictions = torch.argmax(y_pred, axis=1).detach().numpy()

correct = [1 if p == p_true else 0 for p, p_true in zip(predictions, y)]
accuracy = sum(correct) / len(correct)
print(f"Accuracy: {accuracy * 100}%")

Tracebacks

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
<ipython-input-8-d039678a60e4> in <cell line: 13>()
     19 
     20         loss_evaluated = loss(model(xs), ys)
---> 21         loss_evaluated.backward()
     22 
     23         opt.step()

11 frames
/usr/local/lib/python3.10/dist-packages/torch/_tensor.py in backward(self, gradient, retain_graph, create_graph, inputs)
    523                 inputs=inputs,
    524             )
--> 525         torch.autograd.backward(
    526             self, gradient, retain_graph, create_graph, inputs=inputs
    527         )

/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
    265     # some Python versions print out the first line of a multi-line function
    266     # calls in the traceback and some print out the last line
--> 267     _engine_run_backward(
    268         tensors,
    269         grad_tensors_,

/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py in _engine_run_backward(t_outputs, *args, **kwargs)
    742         unregister_hooks = _register_logging_hooks_on_whole_graph(t_outputs)
    743     try:
--> 744         return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
    745             t_outputs, *args, **kwargs
    746         )  # Calls into the C++ engine to run the backward pass

/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py in apply(self, *args)
    299             )
    300         user_fn = vjp_fn if vjp_fn is not Function.vjp else backward_fn
--> 301         return user_fn(self, *args)
    302 
    303     def apply_jvp(self, *args):

/usr/local/lib/python3.10/dist-packages/pennylane/workflow/interfaces/torch.py in new_backward(ctx, *flat_grad_outputs)
     99     def new_backward(ctx, *flat_grad_outputs):
    100         grad_outputs = pytree.tree_unflatten(flat_grad_outputs, ctx._out_struct)
--> 101         grad_inputs = orig_bw(ctx, *grad_outputs)
    102         # None corresponds to the diff of out_struct_holder
    103         return (None,) + tuple(grad_inputs)

/usr/local/lib/python3.10/dist-packages/pennylane/workflow/interfaces/torch.py in backward(ctx, *dy)
    184         # dL/dz convention of PennyLane, autograd and jax. This converts between the formats
    185         dy = _recursive_conj(dy)
--> 186         vjps = ctx.jpc.compute_vjp(ctx.tapes, dy)
    187         # split tensor into separate entries
    188         unpacked_vjps = []

/usr/local/lib/python3.10/dist-packages/pennylane/workflow/jacobian_products.py in compute_vjp(self, tapes, dy)
    297             return _compute_vjps(jacs, dy, tapes)
    298 
--> 299         vjp_tapes, processing_fn = qml.gradients.batch_vjp(
    300             tapes, dy, self._gradient_transform, gradient_kwargs=self._gradient_kwargs
    301         )

/usr/local/lib/python3.10/dist-packages/pennylane/gradients/vjp.py in batch_vjp(tapes, dys, gradient_fn, reduction, gradient_kwargs)
    500     # Loop through the tapes and dys vector
    501     for tape, dy in zip(tapes, dys):
--> 502         g_tapes, fn = vjp(tape, dy, gradient_fn, gradient_kwargs=gradient_kwargs)
    503         reshape_info.append(len(g_tapes))
    504         processing_fns.append(fn)

/usr/local/lib/python3.10/dist-packages/pennylane/gradients/vjp.py in vjp(tape, dy, gradient_fn, gradient_kwargs)
    361         pass
    362 
--> 363     gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs)
    364 
    365     def processing_fn(results, num=None):

/usr/local/lib/python3.10/dist-packages/pennylane/transforms/core/transform_dispatcher.py in __call__(self, *targs, **tkwargs)
     98                 start = 0
     99                 for tape in expanded_tapes:
--> 100                     intermediate_tapes, post_processing_fn = self._transform(
    101                         tape, *targs, **tkwargs
    102                     )

/usr/local/lib/python3.10/dist-packages/pennylane/gradients/parameter_shift.py in param_shift(tape, argnum, shifts, gradient_recipes, fallback_fn, f0, broadcast)
   1110     transform_name = "parameter-shift rule"
   1111     assert_no_state_returns(tape.measurements, transform_name)
-> 1112     assert_no_trainable_tape_batching(tape, transform_name)
   1113 
   1114     if argnum is None and not tape.trainable_params:

/usr/local/lib/python3.10/dist-packages/pennylane/gradients/gradient_transform.py in assert_no_trainable_tape_batching(tape, transform_name)
     95     for idx in range(len(tape.trainable_params)):
     96         if tape.get_operation(idx)[0].batch_size is not None:
---> 97             raise NotImplementedError(
     98                 "Computing the gradient of broadcasted tapes with respect to the broadcasted "
     99                 f"parameters using the {transform_name} gradient transform is currently not "

NotImplementedError: Computing the gradient of broadcasted tapes with respect to the broadcasted parameters using the parameter-shift rule gradient transform is currently not supported. See #4462 for details.

System information

Name: PennyLane
Version: 0.37.0
Summary: PennyLane is a cross-platform Python library for quantum computing, quantum machine learning, and quantum chemistry. Train a quantum computer the same way as a neural network.
Home-page: https://github.com/PennyLaneAI/pennylane
Author: 
Author-email: 
License: Apache License 2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: appdirs, autograd, autoray, cachetools, networkx, numpy, packaging, pennylane-lightning, requests, rustworkx, scipy, semantic-version, toml, typing-extensions
Required-by: PennyLane_Lightning

Platform info:           Linux-6.1.85+-x86_64-with-glibc2.35
Python version:          3.10.12
Numpy version:           1.26.4
Scipy version:           1.13.1
Installed devices:
- lightning.qubit (PennyLane_Lightning-0.37.0)
- default.clifford (PennyLane-0.37.0)
- default.gaussian (PennyLane-0.37.0)
- default.mixed (PennyLane-0.37.0)
- default.qubit (PennyLane-0.37.0)
- default.qubit.autograd (PennyLane-0.37.0)
- default.qubit.jax (PennyLane-0.37.0)
- default.qubit.legacy (PennyLane-0.37.0)
- default.qubit.tf (PennyLane-0.37.0)
- default.qubit.torch (PennyLane-0.37.0)
- default.qutrit (PennyLane-0.37.0)
- default.qutrit.mixed (PennyLane-0.37.0)
- default.tensor (PennyLane-0.37.0)
- null.qubit (PennyLane-0.37.0)

Existing GitHub issues

  • [X] I have searched existing GitHub issues to make sure the issue does not already exist.

CatalinaAlbornoz avatar Aug 06 '24 22:08 CatalinaAlbornoz