CTranslate2
CTranslate2 copied to clipboard
bug: alibi + multi_query_attention crash
try:
import transformers
except ImportError:
pass
from ctranslate2.specs import (
transformer_spec,
)
from ctranslate2.converters.transformers import TransformersConverter, register_loader, ModelLoader, _SUPPORTED_ACTIVATIONS
@register_loader("GPTBigCodeConfig")
class GPTBigCodeMHALoader(ModelLoader):
@property
def architecture_name(self):
return "GPTBigCodeForCausalLM"
def get_model_spec(self, model):
spec = transformer_spec.TransformerDecoderModelSpec.from_config(
model.config.n_layer,
model.config.n_head,
pre_norm=True,
activation=_SUPPORTED_ACTIVATIONS[model.config.activation_function],
multi_query_attention=True,
alibi=True,
alibi_use_positive_positions=True,
)
self.set_decoder(spec.decoder, model.transformer)
self.set_linear(spec.decoder.projection, model.lm_head)
return spec
def set_vocabulary(self, spec, tokens):
spec.register_vocabulary(tokens)
def get_vocabulary(self, model, tokenizer):
tokens = super().get_vocabulary(model, tokenizer)
extra_ids = model.config.vocab_size - len(tokens)
for i in range(extra_ids):
tokens.append("<extra_id_%d>" % i)
return tokens
def set_config(self, config, model, tokenizer):
config.bos_token = tokenizer.bos_token
config.eos_token = tokenizer.eos_token
config.unk_token = tokenizer.unk_token
def set_decoder(self, spec, module):
spec.scale_embeddings = False
self.set_embeddings(spec.embeddings, module.wte)
# self.set_position_encodings(spec.position_encodings, module.wpe)
self.set_layer_norm(spec.layer_norm, module.ln_f)
for layer_spec, layer in zip(spec.layer, module.h):
self.set_layer_norm(layer_spec.self_attention.layer_norm, layer.ln_1)
self.set_linear(layer_spec.self_attention.linear[0], layer.attn.c_attn)
self.set_linear(layer_spec.self_attention.linear[1], layer.attn.c_proj)
self.set_layer_norm(layer_spec.ffn.layer_norm, layer.ln_2)
self.set_linear(layer_spec.ffn.linear_0, layer.mlp.c_fc)
self.set_linear(layer_spec.ffn.linear_1, layer.mlp.c_proj)
converter = TransformersConverter(
"bigcode/gpt_bigcode-santacoder",
load_as_float16=False,
low_cpu_mem_usage=True,
trust_remote_code=False,
)
converter.convert("./bigcode_alibi", force=True)
import ctranslate2
generator = ctranslate2.Generator("./bigcode_alibi")
results = generator.generate_batch([["python"]], max_length=100)
error:
ValueError: can't index dimension 3 for a storage with rank 3