BELLE 请问模型运行时的内存和显存需要多少？

我这边60G的内存在AutoModelForCausalLM.from_pretrained过程溢出了

Mar 22 '23 09:03 MingJiaAn

建议GPU运行。如果不做量化的话，加载7B模型大概需要28G显存左右。

Mar 22 '23 10:03 bestpredicts

议GPU运行。如果不做量化的话，加载7B模型大概需要28

是在gpu运行的，但是加载模型的时候，内存先暴涨

Mar 22 '23 10:03 MingJiaAn

colab pro

!pip uninstall transformers -y
!pip install bitsandbytes
!pip install -q datasets loralib sentencepiece
!pip install transformers
!pip install -q git+https://github.com/huggingface/peft.git

from transformers import AutoTokenizer, AutoModelForCausalLM
import sys
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb
from datasets import load_dataset
import transformers
from transformers import AutoTokenizer, AutoConfig
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model, get_peft_model_state_dict

MICRO_BATCH_SIZE = 4  # this could actually be 5 but i like powers of 2
BATCH_SIZE = 128
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 3  # we don't need 3 tbh
LEARNING_RATE = 3e-4  # the Karpathy constant
CUTOFF_LEN = 256  # 256 accounts for about 96% of the data
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
VAL_SET_SIZE=2000

model_path = "./" # You can modify the path for storing the local model
model =  AutoModelForCausalLM.from_pretrained(
    "BelleGroup/BELLE-7B-2M",
    load_in_8bit=True,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained("BelleGroup/BELLE-7B-2M", add_eos_token=True)
print("Human:")
line = input()
while line:
        inputs = 'Human: ' + line.strip() + '\n\nAssistant:'
        input_ids = tokenizer(inputs, return_tensors="pt").input_ids
        input_ids = input_ids.to(model.device)
        outputs = model.generate(input_ids, max_new_tokens=200, do_sample = True, top_k = 30, top_p = 0.85, temperature = 0.35, repetition_penalty=1.2)
        rets = tokenizer.batch_decode(outputs, skip_special_tokens=True)
        print("Assistant:\n" + rets[0].strip().replace(inputs, ""))
        print("\n------------------------------------------------\nHuman:")
        line = input()