tensorflow icon indicating copy to clipboard operation
tensorflow copied to clipboard

Failure in convert Gemma 2B models to TfLite

Open RageshAntonyHM opened this issue 1 year ago • 59 comments

I tried converting Google Gemma 2B models to TfLite. Found it ending in failure

1. System information

  • Ubuntu 22.04
  • TensorFlow installation (installed with keras-nlp) :
  • TensorFlow library (installed with keras-nlp):

2. Code

import os
import keras
import os
import numpy as np
import keras_nlp
import tensorflow as tf
import tensorflow_text as tf_text
from tensorflow import keras
from tensorflow.lite.python import interpreter
import time

os.environ["KAGGLE_USERNAME"] = "rag"
os.environ["KAGGLE_KEY"] = 'e7c'
os.environ["KERAS_BACKEND"] = "tensorflow"  # Or "tensorflow" or "torch".

preprocessor = keras_nlp.models.GemmaCausalLMPreprocessor.from_preset('gemma_2b_en', sequence_length=4096, add_end_token=True
)
generator = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")

def run_inference(input, generate_tflite):
  interp = interpreter.InterpreterWithCustomOps(
      model_content=generate_tflite,
      custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS)
  interp.get_signature_list()

  preprocessor_output = preprocessor.generate_preprocess(
    input, sequence_length=preprocessor.sequence_length
  )
  generator = interp.get_signature_runner('serving_default')
  output = generator(preprocessor_output)
  output = preprocessor.generate_postprocess(output["output_0"])
  print("\nGenerated with TFLite:\n", output)

generate_function = generator.make_generate_function()
concrete_func = generate_function.get_concrete_function({
  "token_ids": tf.TensorSpec([None, 4096]),
  "padding_mask": tf.TensorSpec([None, 4096])
})


converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func],
                                                            generator)
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
converter.allow_custom_ops = True
converter.target_spec.experimental_select_user_tf_ops = ["UnsortedSegmentJoin", "UpperBound"]
converter._experimental_guarantee_all_funcs_one_use = True
generate_tflite = converter.convert()
run_inference("I'm enjoying a", generate_tflite)

with open('unquantized_mistral.tflite', 'wb') as f:
  f.write(generate_tflite)

3. Failure after conversion

I am getting this error:

tensorflow/core.py":65:1))))))))))))))))))))))))))]): error: missing attribute 'value' LLVM ERROR: Failed to infer result type(s). Aborted (core dumped)

5. (optional) Any other info / logs

2024-02-22 06:34:41.094712: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-02-22 06:34:41.094742: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-02-22 06:34:41.095691: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp58p378bn
2024-02-22 06:34:41.140303: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-02-22 06:34:41.140329: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /tmp/tmp58p378bn
2024-02-22 06:34:41.233389: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-02-22 06:34:41.264724: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-02-22 06:34:43.697440: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /tmp/tmp58p378bn
2024-02-22 06:34:44.189111: I tensorflow/cc/saved_model/loader.cc:316] SavedModel load for tags { serve }; Status: success: OK. Took 3093423 microseconds.
2024-02-22 06:34:45.009212: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
loc(fused["ReadVariableOp:", callsite("decoder_block_0_1/attention_1/attention_output_1/Cast/ReadVariableOp@__inference_generate_step_12229"("/workspace/gem.py":38:1) at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":258:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":235:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":212:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_causal_lm.py":214:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py":816:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py":42:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":157:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_decoder_block.py":147:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py":816:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py":42:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":157:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras_nlp/models/gemma/gemma_attention.py":193:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/layer.py":816:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":118:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/operation.py":42:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py":157:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/einsum_dense.py":218:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/ops/numpy.py":2414:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/numpy.py":90:1 at callsite("/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/numpy.py":91:1 at "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/core.py":65:1))))))))))))))))))))))))))]): error: missing attribute 'value'
LLVM ERROR: Failed to infer result type(s).
Aborted (core dumped)

RageshAntonyHM avatar Feb 22 '24 07:02 RageshAntonyHM