tensorflow
tensorflow copied to clipboard
Failure in convert Gemma 2B models to TfLite
I tried converting Google Gemma 2B models to TfLite. Found it ending in failure
1. System information
- Ubuntu 22.04
- TensorFlow installation (installed with keras-nlp) :
- TensorFlow library (installed with keras-nlp):
2. Code
import os
import keras
import os
import numpy as np
import keras_nlp
import tensorflow as tf
import tensorflow_text as tf_text
from tensorflow import keras
from tensorflow.lite.python import interpreter
import time
os.environ["KAGGLE_USERNAME"] = "rag"
os.environ["KAGGLE_KEY"] = 'e7c'
os.environ["KERAS_BACKEND"] = "tensorflow" # Or "tensorflow" or "torch".
preprocessor = keras_nlp.models.GemmaCausalLMPreprocessor.from_preset('gemma_2b_en', sequence_length=4096, add_end_token=True
)
generator = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")
def run_inference(input, generate_tflite):
interp = interpreter.InterpreterWithCustomOps(
model_content=generate_tflite,
custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS)
interp.get_signature_list()
preprocessor_output = preprocessor.generate_preprocess(
input, sequence_length=preprocessor.sequence_length
)
generator = interp.get_signature_runner('serving_default')
output = generator(preprocessor_output)
output = preprocessor.generate_postprocess(output["output_0"])
print("\nGenerated with TFLite:\n", output)
generate_function = generator.make_generate_function()
concrete_func = generate_function.get_concrete_function({
"token_ids": tf.TensorSpec([None, 4096]),
"padding_mask": tf.TensorSpec([None, 4096])
})
converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func],
generator)
converter.target_spec.supported_ops = [
tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
converter.allow_custom_ops = True
converter.target_spec.experimental_select_user_tf_ops = ["UnsortedSegmentJoin", "UpperBound"]
converter._experimental_guarantee_all_funcs_one_use = True
generate_tflite = converter.convert()
run_inference("I'm enjoying a", generate_tflite)
with open('unquantized_mistral.tflite', 'wb') as f:
f.write(generate_tflite)
3. Failure after conversion
I am getting this error:
tensorflow/core.py":65:1))))))))))))))))))))))))))]): error: missing attribute 'value' LLVM ERROR: Failed to infer result type(s). Aborted (core dumped)
5. (optional) Any other info / logs
2024-02-22 06:34:41.094712: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-02-22 06:34:41.094742: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-02-22 06:34:41.095691: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp58p378bn
2024-02-22 06:34:41.140303: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-02-22 06:34:41.140329: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmp58p378bn
2024-02-22 06:34:41.233389: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-02-22 06:34:41.264724: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-02-22 06:34:43.697440: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmp58p378bn
2024-02-22 06:34:44.189111: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 3093423 microseconds.
2024-02-22 06:34:45.009212: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
loc(fused["ReadVariableOp:", callsite("decoder_block_0_1/attention_1/attention_output_1/Cast/ReadVariableOp@__inference_generate_step_12229"("/workspace/gem.py":38:1) at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":258:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":235:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":212:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":214:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py":816:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py":42:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":157:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_decoder_block.py":147:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py":816:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py":42:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":157:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_attention.py":193:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py":816:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py":42:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":157:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/einsum_dense.py":218:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/numpy.py":2414:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/numpy.py":90:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/numpy.py":91:1 at "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/core.py":65:1))))))))))))))))))))))))))]): error: missing attribute 'value'
LLVM ERROR: Failed to infer result type(s).
Aborted (core dumped)