Changed default local llm config and readme memory requirements

curvedinf · Aug 5, 2024 · 4a7da7c · 4a7da7c
1 parent 7981c62
commit 4a7da7c
Show file tree

Hide file tree

Showing 5 changed files with 33 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -29,8 +29,8 @@ In this section are recipes to run `dir-assistant` in basic capacity to get you
 
 ### Quickstart with Local Default Model (Phi 3 128k)
 
-To get started locally, you can download a default llm model. This model requires approximately 6GB of
-memory to run effectively. To run via CPU:
+To get started locally, you can download a default llm model. Default configuration with this model requires 
+14GB of memory, but you will be able to adjust the configuration to fit lower memory requirements. To run via CPU:
 
 ```shell
 pip install dir-assistant
@@ -215,6 +215,22 @@ The options available for `llama-cpp-python` are documented in the
 What the options do is also documented in the 
 [llama.cpp CLI documentation](https://github.com/ggerganov/llama.cpp/blob/master/examples/main/README.md).
 
+The most important `llama-cpp-python` options are related to tuning the LLM to your system's VRAM:
+
+* Setting `n_ctx` lower will reduce the amount of VRAM required to run, but will decrease the amount of 
+file text that can be included when running a prompt.
+* `CONTEXT_FILE_RATIO` sets the proportion of prompt history to file text to be included when sent to the LLM. 
+Higher ratios mean more file text and less prompt history. More file text generally improves comprehension.
+* If your llm `n_ctx` is smaller than your embed `n_ctx` times `CONTEXT_FILE_RATIO`, your file text chunks 
+have the potential to be larger than your llm context, and thus will not be included. To ensure all files 
+can be included, make sure your embed context is smaller than `n_ctx` times `CONTEXT_FILE_RATIO`.
+* Larger embed `n_ctx` will chunk your files into larger sizes, which allows LLMs to understand them more
+easily.
+* `n_batch` must be smaller than the `n_ctx` of a model, but setting it higher will probably improve
+performance.
+
+For other tips about tuning Llama.cpp, explore their documentation and do some google searches.
+
 ## Running
 
 ```shell

diff --git a/dir_assistant/config.py b/dir_assistant/config.py
@@ -16,16 +16,17 @@
         'build/',
         '.idea/',
         '__pycache__',
+        'dist/',
     ],
-    'CONTEXT_FILE_RATIO': 0.5,
+    'CONTEXT_FILE_RATIO': 0.9,
     'ACTIVE_MODEL_IS_LOCAL': False,
     'USE_CGRAG': True,
     'PRINT_CGRAG': False,
     'MODELS_PATH': '~/.local/share/dir-assistant/models/',
     'EMBED_MODEL': '',
     'LLM_MODEL': '',
     'LLAMA_CPP_OPTIONS': {
-        'n_ctx': 8192,
+        'n_ctx': 12000,
         'verbose': False,
     },
     'LLAMA_CPP_EMBED_OPTIONS': {

diff --git a/dir_assistant/setkey.py b/dir_assistant/setkey.py
@@ -2,7 +2,6 @@
 
 
 def setkey(args, config_dict):
-    args.api_name, args.api_key
     config_dict['DIR_ASSISTANT']['LITELLM_API_KEYS'][args.api_name] = args.api_key
     save_config(config_dict)
     print(f"Set {args.api_name} API key successfully.")
diff --git a/dir_assistant/start.py b/dir_assistant/start.py
@@ -51,12 +51,18 @@ def start(args, config_dict):
     use_cgrag = config_dict['USE_CGRAG']
     print_cgrag = config_dict['PRINT_CGRAG']
 
-    if embed_model_file == '':
-        print("You must specify an embedding model in config.json. See readme for more information. Exiting...")
+    if config_dict["EMBED_MODEL"] == "":
+        print("""You must specify EMBED_MODEL. Use 'dir-assistant config open' and \
+see readme for more information. Exiting...""")
         exit(1)
-
-    if active_model_is_local and llm_model_file == '':
-        print("You must specify an local LLM model in config.json. See readme for more information. Exiting...")
+    if active_model_is_local:
+        if config_dict["LLM_MODEL"] == "":
+            print("""You must specify LLM_MODEL.  Use 'dir-assistant config open' and \
+    see readme for more information. Exiting...""")
+            exit(1)
+    elif lite_llm_model == "":
+        print("""You must specify LITELLM_MODEL. Use 'dir-assistant config open' and see readme \
+for more information. Exiting...""")
         exit(1)
 
     ignore_paths = args.i__ignore if args.i__ignore else []
@@ -84,9 +90,6 @@ def start(args, config_dict):
     # Initialize the LLM model
     if active_model_is_local:
         print(f"{Fore.LIGHTBLACK_EX}Loading local LLM model...{Style.RESET_ALL}")
-        if config_dict['LLM_MODEL'] == "":
-            print("You must specify an LLM model in config.json. See readme for more information. Exiting...")
-            exit(1)
         llm = LlamaCppRunner(
             model_path=llm_model_file,
             llama_cpp_options=llama_cpp_options,
@@ -101,9 +104,6 @@ def start(args, config_dict):
         )
     else:
         print(f"{Fore.LIGHTBLACK_EX}Loading remote LLM model...{Style.RESET_ALL}")
-        if lite_llm_model == "":
-            print("You must specify a LiteLLM model in config.json. See readme for more information. Exiting...")
-            exit(1)
         llm = LiteLLMRunner(
             lite_llm_model=lite_llm_model,
             lite_llm_model_uses_system_message=lite_llm_model_uses_system_message,

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 setup(
     name="dir-assistant",
-    version="1.0.0",
+    version="1.0.1",
     description="Chat with your current directory's files using a local or API LLM.",
     long_description=README,
     long_description_content_type='text/markdown',