aimet
aimet copied to clipboard
issues with fine-tuning of quantsim model in keras
Using aimet version 1.16.2, with tf-cpu.
Issue: when following the AIMET tutorial for using Keras, I'm running into issues re-loading the graph to fine-tune a quantsim model.
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import preprocess_input
from aimet_common.defs import QuantScheme
from aimet_tensorflow import quantsim
from aimet_tensorflow.common import graph_eval
from aimet_tensorflow.utils import graph_saver
from aimet_tensorflow.utils.convert_tf_sess_to_keras import (
save_tf_session_single_gpu,
load_tf_sess_variables_to_keras_single_gpu
)
# instantiating a resnet50 model and saving it
model = tf.keras.applications.resnet50.ResNet50()
sess = tf.keras.backend.get_session()
input_op, output_op = model.inputs[0].name, model.outputs[0].name
sim = quantsim.QuantizationSimModel(
sess,
starting_op_names=[input_op[:input_op.index(":")]],
output_op_names=[output_op[:output_op.index(":")]],
quant_scheme=QuantScheme.post_training_tf_enhanced,
config_file='/usr/local/lib/python3.6/dist-packages/aimet_common/quantsim_config/default_config.json'
)
save_tf_session_single_gpu(sim.session, "/tmp/aimet", input_op, output_op)
# loading the resnet50 + quantsim model into a subclassed keras model
tf.keras.backend.clear_session()
tf.keras.backend.set_learning_phase(1)
model = load_tf_sess_variables_to_keras_single_gpu("/tmp/aimet", [])
model.compile("adam", "categorical_crossentropy")
# compiling the model and attempting to train
x_train = np.random.rand(32, 224, 224, 3)
y_train = np.random.rand(32, )
x_train = preprocess_input(x_train)
y_train = tf.keras.utils.to_categorical(y_train, 1000)
history = model.fit(x_train, y_train, epochs=1, batch_size=1, shuffle=False)
Expand here for full exception stacktrace
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in get_attr(self, name)
2379 with c_api_util.tf_buffer() as buf:
-> 2380 c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf)
2381 data = c_api.TF_GetBuffer(buf)
InvalidArgumentError: Operation 'model/StatefulPartitionedCall' has no attr named '_XlaCompile'.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn)
344 try:
--> 345 xla_compile = op.get_attr("_XlaCompile")
346 xla_separate_compiled_gradients = op.get_attr(
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in get_attr(self, name)
2383 # Convert to ValueError for backwards compatibility.
-> 2384 raise ValueError(str(e))
2385 x = attr_value_pb2.AttrValue()
ValueError: Operation 'model/StatefulPartitionedCall' has no attr named '_XlaCompile'.
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in get_attr(self, name)
2379 with c_api_util.tf_buffer() as buf:
-> 2380 c_api.TF_OperationGetAttrValueProto(self._c_op, name, buf)
2381 data = c_api.TF_GetBuffer(buf)
InvalidArgumentError: Operation 'conv5_block3_3_bn_1/cond/ReadVariableOp_2/Switch' has no attr named '_XlaCompile'.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn)
344 try:
--> 345 xla_compile = op.get_attr("_XlaCompile")
346 xla_separate_compiled_gradients = op.get_attr(
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in get_attr(self, name)
2383 # Convert to ValueError for backwards compatibility.
-> 2384 raise ValueError(str(e))
2385 x = attr_value_pb2.AttrValue()
ValueError: Operation 'conv5_block3_3_bn_1/cond/ReadVariableOp_2/Switch' has no attr named '_XlaCompile'.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
471 preferred_dtype=default_dtype,
--> 472 as_ref=input_arg.is_ref)
473 if input_arg.number_attr and len(
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in internal_convert_n_to_tensor(values, dtype, name, as_ref, preferred_dtype, ctx)
1361 preferred_dtype=preferred_dtype,
-> 1362 ctx=ctx))
1363 return ret
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, ctx, accepted_result_types)
1272 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r" %
-> 1273 (dtype.name, value.dtype.name, value))
1274 return value
ValueError: Tensor conversion requested dtype float32 for Tensor with dtype resource: <tf.Tensor 'gradients/conv5_block3_3_bn_1/cond/ReadVariableOp_2/Switch_grad/Switch_1:1' shape=() dtype=resource>
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-15-7cb75defa0d7> in <module>
8 y_train = tf.keras.utils.to_categorical(y_train, 1000)
9
---> 10 history = model.fit(x_train, y_train, epochs=1, batch_size=1, shuffle=False)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
725 max_queue_size=max_queue_size,
726 workers=workers,
--> 727 use_multiprocessing=use_multiprocessing)
728
729 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
673 validation_steps=validation_steps,
674 validation_freq=validation_freq,
--> 675 steps_name='steps_per_epoch')
676
677 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in model_iteration(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps, validation_freq, mode, validation_in_fit, prepared_feed_values_from_dataset, steps_name, **kwargs)
187 # function we recompile the metrics based on the updated
188 # sample_weight_mode value.
--> 189 f = _make_execution_function(model, mode)
190
191 # Prepare validation data. Hold references to the iterator and the input list
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_arrays.py in _make_execution_function(model, mode)
564 if model._distribution_strategy:
565 return distributed_training_utils._make_execution_function(model, mode)
--> 566 return model._make_execution_function(mode)
567
568
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _make_execution_function(self, mode)
2181 def _make_execution_function(self, mode):
2182 if mode == ModeKeys.TRAIN:
-> 2183 self._make_train_function()
2184 return self.train_function
2185 if mode == ModeKeys.TEST:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in _make_train_function(self)
2113 # Training updates
2114 updates = self.optimizer.get_updates(
-> 2115 params=self._collected_trainable_weights, loss=self.total_loss)
2116 # Unconditional updates
2117 updates += self.get_updates_for(None)
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in get_updates(self, loss, params)
496
497 def get_updates(self, loss, params):
--> 498 grads = self.get_gradients(loss, params)
499 grads_and_vars = list(zip(grads, params))
500 self._assert_valid_dtypes([
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py in get_gradients(self, loss, params)
387 with backend.get_graph().as_default(), backend.name_scope(self._name +
388 "/gradients"):
--> 389 grads = gradients.gradients(loss, params)
390 for grad, param in zip(grads, params):
391 if grad is None:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_impl.py in gradients(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients)
156 ys, xs, grad_ys, name, colocate_gradients_with_ops,
157 gate_gradients, aggregation_method, stop_gradients,
--> 158 unconnected_gradients)
159 # pylint: enable=protected-access
160
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph)
677 # functions.
678 in_grads = _MaybeCompile(grad_scope, op, func_call,
--> 679 lambda: grad_fn(op, *out_grads))
680 else:
681 # For function call ops, we add a 'SymbolicGradient'
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn)
348 xla_scope = op.get_attr("_XlaScope").decode()
349 except ValueError:
--> 350 return grad_fn() # Exit early
351
352 if not xla_compile:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in <lambda>()
677 # functions.
678 in_grads = _MaybeCompile(grad_scope, op, func_call,
--> 679 lambda: grad_fn(op, *out_grads))
680 else:
681 # For function call ops, we add a 'SymbolicGradient'
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _registered_grad_fn(op, *doutputs)
713 @ops.RegisterGradient(self._gradient_name)
714 def _registered_grad_fn(op, *doutputs): # pylint: disable=unused-variable
--> 715 return self._rewrite_forward_and_call_backward(op, *doutputs)
716 return self._gradient_name
717
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _rewrite_forward_and_call_backward(self, op, *doutputs)
659 def _rewrite_forward_and_call_backward(self, op, *doutputs):
660 """Add outputs to the forward call and feed them to the grad function."""
--> 661 forward_function, backwards_function = self.forward_backward(len(doutputs))
662 if not backwards_function.outputs:
663 return []
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in forward_backward(self, num_doutputs)
580 if forward_backward is not None:
581 return forward_backward
--> 582 forward, backward = self._construct_forward_backward(num_doutputs)
583 self._cached_function_pairs[num_doutputs] = (forward, backward)
584 return forward, backward
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _construct_forward_backward(self, num_doutputs)
627 args=[], kwargs={},
628 signature=signature,
--> 629 func_graph=backwards_graph)
630 backwards_graph_captures = backwards_graph.external_captures
631 captures_from_forward = [
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/eager/function.py in _backprop_function(*grad_ys)
617 self._func_graph.inputs,
618 grad_ys=grad_ys,
--> 619 src_graph=self._func_graph)
620
621 with self._func_graph.as_default():
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients, unconnected_gradients, src_graph)
677 # functions.
678 in_grads = _MaybeCompile(grad_scope, op, func_call,
--> 679 lambda: grad_fn(op, *out_grads))
680 else:
681 # For function call ops, we add a 'SymbolicGradient'
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in _MaybeCompile(scope, op, func, grad_fn)
348 xla_scope = op.get_attr("_XlaScope").decode()
349 except ValueError:
--> 350 return grad_fn() # Exit early
351
352 if not xla_compile:
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gradients_util.py in <lambda>()
677 # functions.
678 in_grads = _MaybeCompile(grad_scope, op, func_call,
--> 679 lambda: grad_fn(op, *out_grads))
680 else:
681 # For function call ops, we add a 'SymbolicGradient'
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/control_flow_grad.py in _SwitchGrad(op, *grad)
86 false_grad = switch(grad[0], op.inputs[1])[0]
87 true_grad = switch(grad[1], op.inputs[1])[1]
---> 88 return merge([false_grad, true_grad])[0], None
89
90
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/control_flow_ops.py in merge(inputs, name)
399 return gen_control_flow_ops.ref_merge(inputs, name)
400 else:
--> 401 return gen_control_flow_ops.merge(inputs, name)
402 else:
403 # If there is a mix of tensors and indexed slices, then convert the
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_control_flow_ops.py in merge(inputs, name)
444 _attr_N = len(inputs)
445 _, _, _op = _op_def_lib._apply_op_helper(
--> 446 "Merge", inputs=inputs, name=name)
447 _result = _op.outputs[:]
448 _inputs_flat = _op.inputs
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
498 (prefix, dtype.name))
499 else:
--> 500 raise TypeError("%s that don't all match." % prefix)
501 else:
502 raise TypeError(
TypeError: Tensors in list passed to 'inputs' of 'Merge' Op have types [float32, resource] that don't all match.
@btuan Thank you for reporting this issue. @quic-hanwxion could you take a quick look at this.
Hi @btuan Apologies for not getting back on this earlier. At the moment, we have only tested recipe for AIMET compression support for Keras models. And, we are yet to test and extend support for QAT with keras model. Please track this issue for updates on this : https://github.com/quic/aimet/issues/765
Closing this issue due to inactivity. Please re-open it/ create a new issue if you need further help.