keras
keras copied to clipboard
The saved keras model cannot be loaded.
After training I saved my model. and I can't load. I tried everything, but it always gives me a custom_objects error
I based myself on the GPT miniature code in the doc
code:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import ops
from tensorflow.keras.layers import TextVectorization
import numpy as np
import os
import string
import random
import tensorflow
import tensorflow.data as tf_data
import tensorflow.strings as tf_strings
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
"""
Mask the upper half of the dot product matrix in self attention.
This prevents flow of information from future tokens to current token.
1's in the lower triangle, counting from the lower right corner.
"""
i = ops.arange(n_dest)[:, None]
j = ops.arange(n_src)
m = i >= j - n_src + n_dest
mask = ops.cast(m, dtype)
mask = ops.reshape(mask, [1, n_dest, n_src])
mult = ops.concatenate(
[ops.expand_dims(batch_size, -1), ops.convert_to_tensor([1, 1])], 0
)
return ops.tile(mask, mult)
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super().__init__()
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[
layers.Dense(ff_dim, activation="relu"),
layers.Dense(embed_dim),
]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = ops.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super().__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = ops.shape(x)[-1]
positions = ops.arange(0, maxlen, 1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
vocab_size = 20000 # Only consider the top 20k words
maxlen = 80 # Max sequence size
embed_dim = 256 # Embedding size for each token
num_heads = 4 # Number of attention heads
feed_forward_dim = 256 # Hidden layer size in feed forward network inside transformer
def create_model():
inputs = layers.Input(shape=(maxlen,), dtype="int32")
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, feed_forward_dim)
x = transformer_block(x)
outputs = layers.Dense(vocab_size)(x)
model = keras.Model(inputs=inputs, outputs=[outputs, x])
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
"Adam",
loss=[loss_fn, None],
) # No loss and optimization based on word embeddings from transformer block
return model
batch_size = 128
# The dataset contains each review in a separate text file
# The text files are present in four different folders
# Create a list all files
filenames = [
"datasets/data_1.txt",
"datasets/data_2.txt",
"datasets/data_3.txt",
"datasets/data_4.txt",
]
# directories = [
# "aclImdb/train/pos",
# "aclImdb/train/neg",
# "aclImdb/test/pos",
# "aclImdb/test/neg",
# ]
# for dir in directories:
# for f in os.listdir(dir):
# filenames.append(os.path.join(dir, f))
print(f"{len(filenames)} files")
# Create a dataset from text files
random.shuffle(filenames)
text_ds = tf_data.TextLineDataset(filenames)
text_ds = text_ds.shuffle(buffer_size=256)
text_ds = text_ds.batch(batch_size)
# print(dir(text_ds))
def custom_standardization(input_string):
"""Remove html line-break tags and handle punctuation"""
lowercased = tf_strings.lower(input_string)
stripped_html = tf_strings.regex_replace(lowercased, "<br />", " ")
return tf_strings.regex_replace(stripped_html, f"([{string.punctuation}])", r" \1")
# Create a vectorization layer and adapt it to the text
vectorize_layer = TextVectorization(
standardize=custom_standardization,
max_tokens=vocab_size - 1,
output_mode="int",
output_sequence_length=maxlen + 1,
)
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary() # To get words back from token indices
def prepare_lm_inputs_labels(text):
"""
Shift word sequences by 1 position so that the target for position (i) is
word at position (i+1). The model will use all words up till position (i)
to predict the next word.
"""
text = tensorflow.expand_dims(text, -1)
tokenized_sentences = vectorize_layer(text)
x = tokenized_sentences[:, :-1]
y = tokenized_sentences[:, 1:]
print(tokenized_sentences)
print(y)
return x, y
text_ds = text_ds.map(prepare_lm_inputs_labels, num_parallel_calls=tf_data.AUTOTUNE)
text_ds = text_ds.prefetch(tf_data.AUTOTUNE)
class TextGenerator(keras.callbacks.Callback):
"""A callback to generate text from a trained model.
1. Feed some starting prompt to the model
2. Predict probabilities for the next token
3. Sample the next token and add it to the next input
Arguments:
max_tokens: Integer, the number of tokens to be generated after prompt.
start_tokens: List of integers, the token indices for the starting prompt.
index_to_word: List of strings, obtained from the TextVectorization layer.
top_k: Integer, sample from the `top_k` token predictions.
print_every: Integer, print after this many epochs.
"""
def __init__(
self, max_tokens, start_tokens, index_to_word, top_k=10, print_every=1
):
self.max_tokens = max_tokens
self.start_tokens = start_tokens
self.index_to_word = index_to_word
self.print_every = print_every
self.k = top_k
def sample_from(self, logits):
logits, indices = ops.top_k(logits, k=self.k, sorted=True)
indices = np.asarray(indices).astype("int32")
preds = keras.activations.softmax(ops.expand_dims(logits, 0))[0]
preds = np.asarray(preds).astype("float32")
return np.random.choice(indices, p=preds)
def detokenize(self, number):
return self.index_to_word[number]
def on_epoch_end(self, epoch, logs=None):
start_tokens = [_ for _ in self.start_tokens]
if (epoch + 1) % self.print_every != 0:
return
num_tokens_generated = 0
tokens_generated = []
while num_tokens_generated <= self.max_tokens:
pad_len = maxlen - len(start_tokens)
sample_index = len(start_tokens) - 1
if pad_len < 0:
x = start_tokens[:maxlen]
sample_index = maxlen - 1
elif pad_len > 0:
x = start_tokens + [0] * pad_len
else:
x = start_tokens
x = np.array([x])
y, _ = self.model.predict(x, verbose=0)
sample_token = self.sample_from(y[0][sample_index])
tokens_generated.append(sample_token)
start_tokens.append(sample_token)
num_tokens_generated = len(tokens_generated)
txt = " ".join(
[self.detokenize(_) for _ in self.start_tokens + tokens_generated]
)
print(f"generated text:\n{txt}\n")
# Tokenize starting prompt
word_to_index = {}
for index, word in enumerate(vocab):
word_to_index[word] = index
start_prompt = "what is keras ?"
start_tokens = [word_to_index.get(_, 1) for _ in start_prompt.split()]
num_tokens_generated = 40
text_gen_callback = TextGenerator(num_tokens_generated, start_tokens, vocab)
model = create_model()
model.fit(text_ds, verbose=2, epochs=max_ecpoch, callbacks=[text_gen_callback])
model.save("model.keras")
### Now, when i try to load my model:
from tensorflow.keras import models
br_model = models.load_model("model.keras")
br_model.summary()
I'm getting this errors:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:718, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
717 try:
--> 718 instance = cls.from_config(inner_config)
719 except TypeError as e:
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\models\model.py:517, in Model.from_config(cls, config, custom_objects)
515 from keras.src.models.functional import functional_from_config
--> 517 return functional_from_config(
518 cls, config, custom_objects=custom_objects
519 )
521 # Either the model has a custom __init__, or the config
522 # does not contain all the information necessary to
523 # revive a Functional model. This happens when the user creates
(...)
526 # In this case, we fall back to provide all config into the
527 # constructor of the class.
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\models\functional.py:517, in functional_from_config(cls, config, custom_objects)
516 for layer_data in config["layers"]:
--> 517 process_layer(layer_data)
519 # Then we process nodes in order of layer depth.
520 # Nodes that cannot yet be processed (if the inbound node
521 # does not yet exist) are re-enqueued, and the process
522 # is repeated until all nodes are processed.
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\models\functional.py:501, in functional_from_config.<locals>.process_layer(layer_data)
500 else:
--> 501 layer = serialization_lib.deserialize_keras_object(
502 layer_data, custom_objects=custom_objects
503 )
504 created_layers[layer_name] = layer
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:694, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
692 return obj
--> 694 cls = _retrieve_class_or_fn(
695 class_name,
696 registered_name,
697 module,
698 obj_type="class",
699 full_config=config,
700 custom_objects=custom_objects,
701 )
703 if isinstance(cls, types.FunctionType):
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:812, in _retrieve_class_or_fn(name, registered_name, module, obj_type, full_config, custom_objects)
810 return obj
--> 812 raise TypeError(
813 f"Could not locate {obj_type} '{name}'. "
814 "Make sure custom classes are decorated with "
815 "`@keras.saving.register_keras_serializable()`. "
816 f"Full object config: {full_config}"
817 )
TypeError: Could not locate class 'TokenAndPositionEmbedding'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'class_name': 'TokenAndPositionEmbedding', 'config': {'maxlen': 80, 'vocab_size': 20000, 'embed_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TokenAndPositionEmbedding', 'build_config': {'input_shape': [None, 80]}, 'name': 'token_and_position_embedding_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80], 'dtype': 'int32', 'keras_history': ['input_layer_8', 0, 0]}}], 'kwargs': {}}]}
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In[2], line 5
1 # from tensorflow.keras.utils import custom_object_scope
2 # from tensorflow.keras.utils import get_custom_objects
3 from tensorflow.keras import models
----> 5 br_model = models.load_model("E:\\Alkaou\Python Projects\\models\\br_model.keras")
7 br_model.summary()
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\saving_api.py:176, in load_model(filepath, custom_objects, compile, safe_mode)
173 is_keras_zip = True
175 if is_keras_zip:
--> 176 return saving_lib.load_model(
177 filepath,
178 custom_objects=custom_objects,
179 compile=compile,
180 safe_mode=safe_mode,
181 )
182 if str(filepath).endswith((".h5", ".hdf5")):
183 return legacy_h5_format.load_model_from_hdf5(
184 filepath, custom_objects=custom_objects, compile=compile
185 )
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\saving_lib.py:152, in load_model(filepath, custom_objects, compile, safe_mode)
147 raise ValueError(
148 "Invalid filename: expected a `.keras` extension. "
149 f"Received: filepath={filepath}"
150 )
151 with open(filepath, "rb") as f:
--> 152 return _load_model_from_fileobj(
153 f, custom_objects, compile, safe_mode
154 )
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\saving_lib.py:170, in _load_model_from_fileobj(fileobj, custom_objects, compile, safe_mode)
168 # Construct the model from the configuration file in the archive.
169 with ObjectSharingScope():
--> 170 model = deserialize_keras_object(
171 config_dict, custom_objects, safe_mode=safe_mode
172 )
174 all_filenames = zf.namelist()
175 if _VARS_FNAME + ".h5" in all_filenames:
File E:\Alkaou\Python Projects\ia_code\venv\lib\site-packages\keras\src\saving\serialization_lib.py:720, in deserialize_keras_object(config, custom_objects, safe_mode, **kwargs)
718 instance = cls.from_config(inner_config)
719 except TypeError as e:
--> 720 raise TypeError(
721 f"{cls} could not be deserialized properly. Please"
722 " ensure that components that are Python object"
723 " instances (layers, models, etc.) returned by"
724 " `get_config()` are explicitly deserialized in the"
725 " model's `from_config()` method."
726 f"\n\nconfig={config}.\n\nException encountered: {e}"
727 )
728 build_config = config.get("build_config", None)
729 if build_config and not instance.built:
TypeError: <class 'keras.src.models.functional.Functional'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.
config={'module': 'keras.src.models.functional', 'class_name': 'Functional', 'config': {'name': 'functional_11', 'trainable': True, 'layers': [{'module': 'keras.layers', 'class_name': 'InputLayer', 'config': {'batch_shape': [None, 80], 'dtype': 'int32', 'sparse': False, 'name': 'input_layer_8'}, 'registered_name': None, 'name': 'input_layer_8', 'inbound_nodes': []}, {'module': None, 'class_name': 'TokenAndPositionEmbedding', 'config': {'maxlen': 80, 'vocab_size': 20000, 'embed_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TokenAndPositionEmbedding', 'build_config': {'input_shape': [None, 80]}, 'name': 'token_and_position_embedding_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80], 'dtype': 'int32', 'keras_history': ['input_layer_8', 0, 0]}}], 'kwargs': {}}]}, {'module': None, 'class_name': 'TransformerBlock', 'config': {'embed_dim': 256, 'num_heads': 2, 'ff_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TransformerBlock', 'build_config': {'input_shape': [None, 80, 256]}, 'name': 'transformer_block_3', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80, 256], 'dtype': 'float32', 'keras_history': ['token_and_position_embedding_11', 0, 0]}}], 'kwargs': {}}]}, {'module': 'keras.layers', 'class_name': 'Dense', 'config': {'name': 'dense_11', 'trainable': True, 'dtype': 'float32', 'units': 20000, 'activation': 'linear', 'use_bias': True, 'kernel_initializer': {'module': 'keras.initializers', 'class_name': 'GlorotUniform', 'config': {'seed': None}, 'registered_name': None}, 'bias_initializer': {'module': 'keras.initializers', 'class_name': 'Zeros', 'config': {}, 'registered_name': None}, 'kernel_regularizer': None, 'bias_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}, 'registered_name': None, 'build_config': {'input_shape': [None, 80, 256]}, 'name': 'dense_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80, 256], 'dtype': 'float32', 'keras_history': ['transformer_block_3', 0, 0]}}], 'kwargs': {}}]}], 'input_layers': [['input_layer_8', 0, 0]], 'output_layers': [['dense_11', 0, 0], ['transformer_block_3', 0, 0]]}, 'registered_name': 'Functional', 'build_config': {'input_shape': None}, 'compile_config': {'optimizer': 'Adam', 'loss': [{'module': 'keras.losses', 'class_name': 'SparseCategoricalCrossentropy', 'config': {'name': 'sparse_categorical_crossentropy', 'reduction': 'sum_over_batch_size', 'from_logits': True, 'ignore_class': None}, 'registered_name': None}, None], 'loss_weights': None, 'metrics': None, 'weighted_metrics': None, 'run_eagerly': False, 'steps_per_execution': 1, 'jit_compile': False}}.
Exception encountered: Could not locate class 'TokenAndPositionEmbedding'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'class_name': 'TokenAndPositionEmbedding', 'config': {'maxlen': 80, 'vocab_size': 20000, 'embed_dim': 256, 'trainable': True, 'dtype': 'float32'}, 'registered_name': 'TokenAndPositionEmbedding', 'build_config': {'input_shape': [None, 80]}, 'name': 'token_and_position_embedding_11', 'inbound_nodes': [{'args': [{'class_name': '__keras_tensor__', 'config': {'shape': [None, 80], 'dtype': 'int32', 'keras_history': ['input_layer_8', 0, 0]}}], 'kwargs': {}}]}
version
Python 3.10.10
tensorflow==2.16.1
keras==3.3.3
You can follow the instruction as error message suggested to register the custom objects.
Here are the multiple ways of doing it https://keras.io/guides/serialization_and_saving/#custom-objects
You can follow the instruction as error message suggested to register the custom objects.
Here are the multiple ways of doing it https://keras.io/guides/serialization_and_saving/#custom-objects
I've tried everything possible, but I still can't load my saved model. I made a git repository. you can please visit my code to help me.
code is here :
https://github.com/alkaou/GenIA_LLM.git
You are writing your keras custom layer but you have keras passing its own arguments like trainable or non-trainable into its layers. so you should pass the **kwargs into all the layers and models because we don't know what arguments are passing during training
either you give the arguments passed onto the custom functions from outside the create_model() like these
vocab_size = 20000 # Only consider the top 20k words
maxlen = 80 # Max sequence size
embed_dim = 256 # Embedding size for each token
num_heads = 4 # Number of attention heads
feed_forward_dim = 256 # Hidden layer size in feed forward network inside transformer
to the custom_objects in the load_model's argument or you can give them in config
Method 1:
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs): # pass the **kwargs which takes the training = True arguments when training and False when predicting
super().__init__(**kwargs) # pass it to super so it properly flows into the orginal keras.layers.Layer
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[
layers.Dense(ff_dim, activation="relu"),
layers.Dense(embed_dim),
]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = ops.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
# same with TokenAndPositionEmbedding
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim,**kwargs):
super().__init__(**kwargs)
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = ops.shape(x)[-1]
positions = ops.arange(0, maxlen, 1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
the while loading you gotta all the arguments into the custom objects in load model
keras.models.load_model(
"br_model.keras",
custom_objects={
"vocab_size":20000,
"maxlen":80,
"embed_dim":256,
"num_heads":4,
"feed_forward_dim":256,
"TokenAndPositionEmbedding":TokenAndPositionEmbedding,
"TransformerBlock":TransformerBlock
}
)
or you could do this and avoid it passing in custom_objects
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs):
super().__init__(**kwargs)
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[
layers.Dense(ff_dim, activation="relu"),
layers.Dense(embed_dim),
]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = ops.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool")
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
def get_config(self):
config = super().get_config().copy()
config.update(
{
"embed_dim":self.att.key_dim,
"num_heads": self.at.num_heads,
"ff_dim": self.ffn.layers[0].units,
"rate": self.dropout1.rate,
}
)
return config
@classmethod
def from_config(cls,config):
return cls(**config)
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim,**kwargs):
super().__init__(**kwargs)
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = ops.shape(x)[-1]
positions = ops.arange(0, maxlen, 1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
def get_config(self):
config = super().get_config().copy()
config.update(
{
"maxlen": self.pos_emb.input_dim,
"vocab_size": self.token_emb.input_dim,
"embed_dim": self.token_emb.output_dim
}
)
return config
@classmethod
def from_config(cls,config):
return cls(**config)
and when loading model
keras.models.load_model(
"br_model.keras",
custom_objects={
"TokenAndPositionEmbedding":TokenAndPositionEmbedding,
"TransformerBlock":TransformerBlock
}
)
See whether this solves it
Vous écrivez votre couche personnalisée keras mais vous avez des keras qui transmettent ses propres arguments comme entraînable ou non entraînable dans ses couches. vous devez donc transmettre les **kwargs dans toutes les couches et tous les modèles car nous ne savons pas quels arguments sont transmis pendant la formation
soit vous donnez les arguments transmis aux fonctions personnalisées depuis l'extérieur de create_model() comme ceux-ci
vocab_size = 20000 # Only consider the top 20k words maxlen = 80 # Max sequence size embed_dim = 256 # Embedding size for each token num_heads = 4 # Number of attention heads feed_forward_dim = 256 # Hidden layer size in feed forward network inside transformeraux custom_objects dans l'argument du load_model ou vous pouvez les donner dans la configuration
Méthode 1 :
class TransformerBlock(layers.Layer): def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs): # pass the **kwargs which takes the training = True arguments when training and False when predicting super().__init__(**kwargs) # pass it to super so it properly flows into the orginal keras.layers.Layer self.att = layers.MultiHeadAttention(num_heads, embed_dim) self.ffn = keras.Sequential( [ layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ] ) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate) def call(self, inputs): input_shape = ops.shape(inputs) batch_size = input_shape[0] seq_len = input_shape[1] causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool") attention_output = self.att(inputs, inputs, attention_mask=causal_mask) attention_output = self.dropout1(attention_output) out1 = self.layernorm1(inputs + attention_output) ffn_output = self.ffn(out1) ffn_output = self.dropout2(ffn_output) return self.layernorm2(out1 + ffn_output) # same with TokenAndPositionEmbedding class TokenAndPositionEmbedding(layers.Layer): def __init__(self, maxlen, vocab_size, embed_dim,**kwargs): super().__init__(**kwargs) self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim) self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim) def call(self, x): maxlen = ops.shape(x)[-1] positions = ops.arange(0, maxlen, 1) positions = self.pos_emb(positions) x = self.token_emb(x) return x + positionspendant le chargement, vous devez tous les arguments dans les objets personnalisés dans le modèle de chargement
keras.models.load_model( "br_model.keras", custom_objects={ "vocab_size":20000, "maxlen":80, "embed_dim":256, "num_heads":4, "feed_forward_dim":256, "TokenAndPositionEmbedding":TokenAndPositionEmbedding, "TransformerBlock":TransformerBlock } )ou vous pouvez le faire et éviter de le transmettre à custom_objects
class TransformerBlock(layers.Layer): def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1,**kwargs): super().__init__(**kwargs) self.att = layers.MultiHeadAttention(num_heads, embed_dim) self.ffn = keras.Sequential( [ layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim), ] ) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate) def call(self, inputs): input_shape = ops.shape(inputs) batch_size = input_shape[0] seq_len = input_shape[1] causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, "bool") attention_output = self.att(inputs, inputs, attention_mask=causal_mask) attention_output = self.dropout1(attention_output) out1 = self.layernorm1(inputs + attention_output) ffn_output = self.ffn(out1) ffn_output = self.dropout2(ffn_output) return self.layernorm2(out1 + ffn_output) def get_config(self): config = super().get_config().copy() config.update( { "embed_dim":self.att.key_dim, "num_heads": self.at.num_heads, "ff_dim": self.ffn.layers[0].units, "rate": self.dropout1.rate, } ) return config @classmethod def from_config(cls,config): return cls(**config) class TokenAndPositionEmbedding(layers.Layer): def __init__(self, maxlen, vocab_size, embed_dim,**kwargs): super().__init__(**kwargs) self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim) self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim) def call(self, x): maxlen = ops.shape(x)[-1] positions = ops.arange(0, maxlen, 1) positions = self.pos_emb(positions) x = self.token_emb(x) return x + positions def get_config(self): config = super().get_config().copy() config.update( { "maxlen": self.pos_emb.input_dim, "vocab_size": self.token_emb.input_dim, "embed_dim": self.token_emb.output_dim } ) return config @classmethod def from_config(cls,config): return cls(**config)et lors du chargement du modèle
keras.models.load_model( "br_model.keras", custom_objects={ "TokenAndPositionEmbedding":TokenAndPositionEmbedding, "TransformerBlock":TransformerBlock } )Voyez si cela résout le problème
Thank you very so much. It's working.
brother close this as completed. Thank you.
Is it possible to use the Keras register_serializable decorator here and avoid using the custom scope?
This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.
just another user here @emi-dm , i think the answer is yes (in fact it is recommended. i'd assume that adding the decorator to method 2 should work without declaring custom objects.
This issue is stale because it has been open for 14 days with no activity. It will be closed if no further activity occurs. Thank you.
This issue was closed because it has been inactive for 28 days. Please reopen if you'd like to work on this further.