funcchain
funcchain copied to clipboard
Document how to reuse LLM (without having it restart on every run)
So that llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from .models/mistral-7b-instruct-v0.1.Q4_K_M.gguf...
is only shown once
from funcchain import chain, settings
from funcchain.model.defaults import ChatLlamaCpp, get_gguf_model
from pydantic import BaseModel
model = "Mistral-7B-Instruct-v0.1-GGUF"
model_file = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
settings.llm = ChatLlamaCpp(
model_path=get_gguf_model(model, "Q4_K_M", settings).as_posix(),
)
class Translated(BaseModel):
chinese: str
english: str
french: str
def hello(text: str) -> Translated:
"""
Translate text into three languages.
"""
return chain()
hello("Hello!")
hello("one")
hello("two")
I was doing this, expecting it to work (and it did), but it was deceitfully using <class 'langchain_openai.chat_models.base.ChatOpenAI'>!
from funcchain import chain, settings
from funcchain.model.defaults import ChatLlamaCpp, get_gguf_model
from langchain_community.llms import LlamaCpp
from pydantic import BaseModel
from langchain_community.llms.llamacpp import LlamaCpp
from huggingface_hub import hf_hub_download
model = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
model_file = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
model_path = hf_hub_download(model, model_file)
llm = LlamaCpp(model_path=model_path)
llm.client.verbose = False
settings.llm = llm
class Translated(BaseModel):
chinese: str
english: str
french: str
def hello(text: str) -> Translated:
"""
Translate text into three languages.
"""
return chain()
hello("Hello!")