autollm
autollm copied to clipboard
HuggingFace Embeddings Problems
After creates all embeddings get a "currently loading" from embeddings model
Traceback (most recent call last):
File "/home/frankie/Documents/falopitaConIA/app.py", line 53, in <module>
query_engine = AutoQueryEngine.from_defaults(
File "/home/frankie/.local/lib/python3.10/site-packages/autollm/auto/query_engine.py", line 258, in from_defaults
...
...
litellm.exceptions.APIConnectionError: Model BAAI/bge-small-zh is currently loading
here is my code:
# import required functions, classes
from autollm import AutoQueryEngine, AutoFastAPI
from autollm.utils.document_reading import read_github_repo_as_documents, read_files_as_documents
import os
import uvicorn
required_exts = [".md"] # optional, only read files with these extensions
relative_folder_path = "docs" # relative path from the repo root to the folder containing documents
documents = read_files_as_documents(input_dir="docs", required_exts=required_exts)
# llm params
llm_model = "ollama/llama2"
#llm_max_tokens = 512
llm_temperature = 1
llm_api_base = "http://localhost:11434"
os.environ['HUGGINGFACE_API_KEY'] = "hf_XXXXXXXXXXXXXXXvHu"
system_prompt = """
"""
query_wrapper_prompt = '''
The document information is below.
---------------------
{context_str}
---------------------
Using the document information and mostly relying on it,
answer the query.
{query_str}
'''
enable_cost_calculator = True
embed_model="huggingface/BAAI/bge-small-zh"
chunk_size = 248
chunk_overlap = 48
context_window = 8200
# vector store params
vector_store_type = "LanceDBVectorStore"
lancedb_uri = "./.lancedb"
lancedb_table_name = "vectors"
use_async = True
exist_ok = True
overwrite_existing = True
# query engine params
similarity_top_k = 3
response_mode = "compact_accumulate"
structured_answer_filtering= False
query_engine = AutoQueryEngine.from_defaults(
documents=documents,
llm_model=llm_model,
llm_temperature=llm_temperature,
llm_api_base=llm_api_base,
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
enable_cost_calculator=enable_cost_calculator,
embed_model=embed_model,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
context_window=context_window,
vector_store_type=vector_store_type,
lancedb_uri=lancedb_uri,
lancedb_table_name=lancedb_table_name,
use_async=use_async,
exist_ok=exist_ok,
overwrite_existing=overwrite_existing,
similarity_top_k=similarity_top_k,
response_mode=response_mode,
structured_answer_filtering=structured_answer_filtering,
)
#response = query_engine.query(query_input)
#print(response.response)
app = AutoFastAPI.from_query_engine(query_engine)
uvicorn.run(app, host="0.0.0.0", port=8000)