move llmware base directory
Problem: I'm running out of storage on my primary drive
Suggested fix: allow an environment variable to set the base path
Example Refactor: llmware\llmware\configs.py
- use LLMWARE_BASE_PATH environment variable to set
_base_fp - revert to using the existing USERPROFILE path if directory does not exist
Hi @c-w-m,
thank you for that feature request! I think it is definitely something that should be at least considered to be implemented.
@c-w-m - could you please take a look at the LLMWareConfig class in configs.py - it provides a variety of options for setting the home path and where artifacts are stored locally - I am copying and pasting the relevant config parameters below found in this file:
class LLMWareConfig:
"""LLMWare global configuration object - use set/get to update """
if platform.system() == "Windows":
_base_fp = {"home_path": os.environ.get("USERPROFILE"),
"llmware_path_name": "llmware_data" + os.sep}
else:
_base_fp = {"home_path": os.environ.get("HOME"),
"llmware_path_name": "llmware_data" + os.sep}
_fp = {"model_repo_path_name": "model_repo" + os.sep,
"library_path_name": "accounts" + os.sep,
"input_path_name": "input_channel" + os.sep,
"parser_path_name": "parser_history" + os.sep,
"query_path_name": "query_history" + os.sep,
"prompt_path_name": "prompt_history" + os.sep,
"tmp_path_name": "tmp" + os.sep}
To set the parameter: LLMWareConfig().set_home("/new/path/") or by setting the os.environ variable.
Please confirm back if this resolves/advances the issue ...
I'm also wrestling with this! bottom line, I am able reassign the _base_fp with the following method,
@classmethod
def set_home(cls, new_value):
"""Set home directory path"""
cls._base_fp["home_path"] = new_value
HOWEVER...I'm just unable to change the behavior where the db is to be created/saved. I've tried a dozen things to try and relocate the db (sqlite) but nothing works.
What do llmware authors recommend? This example clearly reproduces the dilemma, where the db file always defaults to the originally initialized file path, even while all other requisite directories and files adhere to the new home path. As an aside (not shown in the example below) if I manually create the expected directory where the db will be created and accessed, then ingestion operations can be successfully run, the files will process to their new_home_path, but the database will be accessible in the default out-of-the-box path.
import os
from llmware.library import Library
from llmware.configs import LLMWareConfig
def setup_workspace(new_home_path, active_db):
print(f"Setting home path to: {new_home_path}")
LLMWareConfig.set_home(new_home_path)
print(f"Setting active DB to: {active_db}")
LLMWareConfig.set_active_db(active_db)
print("Calling setup_llmware_workspace")
LLMWareConfig.setup_llmware_workspace()
def check_directory_status():
directories = {
"Home": LLMWareConfig.get_home(),
"LLMWare": LLMWareConfig.get_llmware_path(),
"Library": LLMWareConfig.get_library_path(),
"Model Repo": LLMWareConfig.get_model_repo_path(),
"Input": LLMWareConfig.get_input_path(),
"Parser": LLMWareConfig.get_parser_path(),
"Query": LLMWareConfig.get_query_path(),
"Prompt": LLMWareConfig.get_prompt_path(),
"Temp": LLMWareConfig.get_tmp_path(),
}
for name, path in directories.items():
exists = os.path.exists(path)
print(f"Directory {name}: {'Exists' if exists else 'Does not exist'} at {path}")
if exists:
print(f" Contents: {os.listdir(path)}")
def create_library(library_name, account_name):
try:
print(f"Attempting to create library: {library_name} for account: {account_name}")
library = Library().create_new_library(library_name, account_name)
return library
except Exception as e:
print(f"Error creating library: {str(e)}")
return None
if __name__ == "__main__":
# note target directory -- in this case "new_llmware_fp" must exist.
# modify new_home_path below for your environment to test
new_home_path = "/Volumes/ExternalStorage/new_llmware_fp"
active_db = "sqlite"
setup_workspace(new_home_path, active_db)
print("\nChecking directory status after setup:")
check_directory_status()
library_name = "example_library"
account_name = "example_account"
library = create_library(library_name, account_name)
if library:
print(f"Library created at: {library.library_main_path}")
print("Checking library-specific directories:")
for dir_name in ["file_copy_path", "image_path", "dataset_path", "nlp_path", "output_path", "tmp_path", "embedding_path"]:
path = getattr(library, dir_name)
exists = os.path.exists(path)
print(f"{dir_name}: {'Exists' if exists else 'Does not exist'} at {path}")
if exists:
print(f" Contents: {os.listdir(path)}")
else:
print("Failed to create library")
expected_db_path = os.path.join(new_home_path, "llmware_data", "accounts", "sqlite_llmware.db")
print(f"\nDatabase file:")
print(f"Expected path: {expected_db_path}")
print(f"Exists: {os.path.exists(expected_db_path)}")
if os.path.exists(expected_db_path):
print(f"File size: {os.path.getsize(expected_db_path)} bytes")
else:
print("Checking parent directories:")
parent_dir = os.path.dirname(expected_db_path)
while parent_dir != '/':
if os.path.exists(parent_dir):
print(f" {parent_dir} exists")
print(f" Contents: {os.listdir(parent_dir)}")
break
else:
print(f" {parent_dir} does not exist")
parent_dir = os.path.dirname(parent_dir)
print("\nFinal LLMWareConfig settings:")
print(f"Home: {LLMWareConfig.get_home()}")
print(f"Active DB: {LLMWareConfig.get_active_db()}")
print(f"Library Path: {LLMWareConfig.get_library_path()}")
Setting the environment variable per @doberst does not do the job, either. I would seriously be grateful for help on this issue. It has been a considerable snafu and regretfully inhibiting adoption of llmware. Thank You