localGPT
localGPT copied to clipboard
Running localGPT
I have tried running localGPT and get the follwoing error [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 67108864 bytes.
C:\AI\LocalGPT>python run_localGPT.py
Running on: cuda
load INSTRUCTOR_Transformer
max_seq_length 512
Using embedded DuckDB with persistence: data will be stored in: C:\AI\LocalGPT/DB
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ C:\AI\LocalGPT\run_localGPT.py:88 in main.""" │
│ ❱ 1130 │ │ return self.main(*args, **kwargs) │
│ 1131 │
│ 1132 │
│ 1133 class Command(BaseCommand): │
│ │
│ C:\Python\lib\site-packages\click\core.py:1055 in main │
│ │
│ 1052 │ │ try: │
│ 1053 │ │ │ try: │
│ 1054 │ │ │ │ with self.make_context(prog_name, args, **extra) as ctx: │
│ ❱ 1055 │ │ │ │ │ rv = self.invoke(ctx) │
│ 1056 │ │ │ │ │ if not standalone_mode: │
│ 1057 │ │ │ │ │ │ return rv │
│ 1058 │ │ │ │ │ # it's not safe to ctx.exit(rv) here! │
│ │
│ C:\Python\lib\site-packages\click\core.py:1404 in invoke │
│ │
│ 1401 │ │ │ echo(style(message, fg="red"), err=True) │
│ 1402 │ │ │
│ 1403 │ │ if self.callback is not None: │
│ ❱ 1404 │ │ │ return ctx.invoke(self.callback, **ctx.params) │
│ 1405 │ │
│ 1406 │ def shell_complete(self, ctx: Context, incomplete: str) -> t.List["CompletionItem"]: │
│ 1407 │ │ """Return a list of completions for the incomplete value. Looks │
│ │
│ C:\Python\lib\site-packages\click\core.py:760 in invoke │
│ │
│ 757 │ │ │
│ 758 │ │ with augment_usage_errors(__self): │
│ 759 │ │ │ with ctx: │
│ ❱ 760 │ │ │ │ return __callback(*args, **kwargs) │
│ 761 │ │
│ 762 │ def forward( │
│ 763 │ │ __self, __cmd: "Command", *args: t.Any, **kwargs: t.Any # noqa: B902 │
│ │
│ C:\AI\LocalGPT\run_localGPT.py:61 in main │
│ │
│ 58 │ # Prepare the LLM │
│ 59 │ # callbacks = [StreamingStdOutCallbackHandler()] │
│ 60 │ # load the LLM for generating Natural Language responses. │
│ ❱ 61 │ llm = load_model() │
│ 62 │ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, r │
│ 63 │ # Interactive questions and answers │
│ 64 │ while True: │
│ │
│ C:\AI\LocalGPT\run_localGPT.py:21 in load_model │
│ │
│ 18 │ model_id = "TheBloke/vicuna-7B-1.1-HF" │
│ 19 │ tokenizer = LlamaTokenizer.from_pretrained(model_id) │
│ 20 │ │
│ ❱ 21 │ model = LlamaForCausalLM.from_pretrained(model_id, │
│ 22 │ │ │ │ │ │ │ │ │ │ │ # load_in_8bit=True, # set these options i │
│ 23 │ │ │ │ │ │ │ │ │ │ │ # device_map=1#'auto', │
│ 24 │ │ │ │ │ │ │ │ │ │ │ # torch_dtype=torch.float16, │
│ │
│ C:\Python\lib\site-packages\transformers\modeling_utils.py:2611 in from_pretrained │
│ │
│ 2608 │ │ │ init_contexts.append(init_empty_weights()) │
│ 2609 │ │ │
│ 2610 │ │ with ContextManagers(init_contexts): │
│ ❱ 2611 │ │ │ model = cls(config, *model_args, **model_kwargs) │
│ 2612 │ │ │
│ 2613 │ │ # Check first if we are from_pt │
│ 2614 │ │ if use_keep_in_fp32_modules: │
│ │
│ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:615 in init │
│ │
│ 612 class LlamaForCausalLM(LlamaPreTrainedModel): │
│ 613 │ def init(self, config): │
│ 614 │ │ super().init(config) │
│ ❱ 615 │ │ self.model = LlamaModel(config) │
│ 616 │ │ │
│ 617 │ │ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) │
│ 618 │
│ │
│ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:446 in init │
│ │
│ 443 │ │ self.vocab_size = config.vocab_size │
│ 444 │ │ │
│ 445 │ │ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.pad │
│ ❱ 446 │ │ self.layers = nn.ModuleList([LlamaDecoderLayer(config) for _ in range(config.num │
│ 447 │ │ self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps) │
│ 448 │ │ │
│ 449 │ │ self.gradient_checkpointing = False │
│ │
│ C:\Python\lib\site-packages\transformers\models\llama\modeling_llama.py:446 in
similar error on my 8GB RAM win 11 notebook RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 180355072 bytes Seems to imply not enough RAM on my notebook but not sure. I only have the sample pdf in the source documents