Spaces:

dl4ds
/

dl4ds_tutor

Build error

Thomas (Tom) Gardos commited on Jul 29, 2024

Commit

6f6768d

2 Parent(s): 30045eb 902a706

Merge pull request #40 from DL4DS/remove_tinyllama

Remove TinyLlama from LFS and add caching mechanism

Files changed (3) hide show

code/modules/chat/chat_model_loader.py CHANGED Viewed

@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
 import torch
 import transformers
 import os
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
@@ -14,6 +16,14 @@ class ChatModelLoader:
         self.config = config
         self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
     def load_chat_model(self):
         if self.config["llm_params"]["llm_loader"] == "openai":
             llm = ChatOpenAI(
@@ -21,7 +31,7 @@ class ChatModelLoader:
             )
         elif self.config["llm_params"]["llm_loader"] == "local_llm":
             n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
-            model_path = self.config["llm_params"]["local_llm_params"]["model"]
             llm = LlamaCpp(
                 model_path=model_path,
                 n_batch=n_batch,

 import torch
 import transformers
 import os
+from pathlib import Path
+from huggingface_hub import hf_hub_download
 from langchain.callbacks.manager import CallbackManager
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
         self.config = config
         self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+    def _verify_model_cache(self, model_cache_path):
+        hf_hub_download(
+            repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
+            filename=self.config["llm_params"]["local_llm_params"]["filename"],
+            cache_dir=model_cache_path
+            )
+        return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
     def load_chat_model(self):
         if self.config["llm_params"]["llm_loader"] == "openai":
             llm = ChatOpenAI(
             )
         elif self.config["llm_params"]["llm_loader"] == "local_llm":
             n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
+            model_path = self._verify_model_cache(self.config["llm_params"]["local_llm_params"]["model"])
             llm = LlamaCpp(
                 model_path=model_path,
                 n_batch=n_batch,

code/modules/config/config.yml CHANGED Viewed

@@ -34,6 +34,8 @@ llm_params:
   local_llm_params:
     model: 'tiny-llama'
     temperature: 0.7
   pdf_reader: 'llama' # str [llama, pymupdf, gpt]
 chat_logging:
@@ -50,4 +52,4 @@ splitter_options:
   chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
   front_chunks_to_remove : null # int or None
   last_chunks_to_remove : null # int or None
-  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings

   local_llm_params:
     model: 'tiny-llama'
     temperature: 0.7
+    repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
+    filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
   pdf_reader: 'llama' # str [llama, pymupdf, gpt]
 chat_logging:
   chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
   front_chunks_to_remove : null # int or None
   last_chunks_to_remove : null # int or None
+  delimiters_to_remove : ['\t', '\n', '   ', '  '] # list of strings

code/modules/config/constants.py CHANGED Viewed

@@ -86,5 +86,5 @@ Question: {question}
 # Model Paths
-LLAMA_PATH = "../storage/models/tinyllama-1.1b-chat-v1.0.Q5_K_M.gguf"
 MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"

 # Model Paths
+LLAMA_PATH = "../storage/models/tinyllama"
 MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"