Spaces:
Build error
Build error
Merge pull request #40 from DL4DS/remove_tinyllama
Browse filesRemove TinyLlama from LFS and add caching mechanism
code/modules/chat/chat_model_loader.py
CHANGED
@@ -5,6 +5,8 @@ from langchain_community.llms import LlamaCpp
|
|
5 |
import torch
|
6 |
import transformers
|
7 |
import os
|
|
|
|
|
8 |
from langchain.callbacks.manager import CallbackManager
|
9 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
10 |
|
@@ -14,6 +16,14 @@ class ChatModelLoader:
|
|
14 |
self.config = config
|
15 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def load_chat_model(self):
|
18 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
19 |
llm = ChatOpenAI(
|
@@ -21,7 +31,7 @@ class ChatModelLoader:
|
|
21 |
)
|
22 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
23 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
24 |
-
model_path = self.config["llm_params"]["local_llm_params"]["model"]
|
25 |
llm = LlamaCpp(
|
26 |
model_path=model_path,
|
27 |
n_batch=n_batch,
|
|
|
5 |
import torch
|
6 |
import transformers
|
7 |
import os
|
8 |
+
from pathlib import Path
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
from langchain.callbacks.manager import CallbackManager
|
11 |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
12 |
|
|
|
16 |
self.config = config
|
17 |
self.huggingface_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
18 |
|
19 |
+
def _verify_model_cache(self, model_cache_path):
|
20 |
+
hf_hub_download(
|
21 |
+
repo_id=self.config["llm_params"]["local_llm_params"]["repo_id"],
|
22 |
+
filename=self.config["llm_params"]["local_llm_params"]["filename"],
|
23 |
+
cache_dir=model_cache_path
|
24 |
+
)
|
25 |
+
return str(list(Path(model_cache_path).glob("*/snapshots/*/*.gguf"))[0])
|
26 |
+
|
27 |
def load_chat_model(self):
|
28 |
if self.config["llm_params"]["llm_loader"] == "openai":
|
29 |
llm = ChatOpenAI(
|
|
|
31 |
)
|
32 |
elif self.config["llm_params"]["llm_loader"] == "local_llm":
|
33 |
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
34 |
+
model_path = self._verify_model_cache(self.config["llm_params"]["local_llm_params"]["model"])
|
35 |
llm = LlamaCpp(
|
36 |
model_path=model_path,
|
37 |
n_batch=n_batch,
|
code/modules/config/config.yml
CHANGED
@@ -34,6 +34,8 @@ llm_params:
|
|
34 |
local_llm_params:
|
35 |
model: 'tiny-llama'
|
36 |
temperature: 0.7
|
|
|
|
|
37 |
pdf_reader: 'llama' # str [llama, pymupdf, gpt]
|
38 |
|
39 |
chat_logging:
|
@@ -50,4 +52,4 @@ splitter_options:
|
|
50 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
51 |
front_chunks_to_remove : null # int or None
|
52 |
last_chunks_to_remove : null # int or None
|
53 |
-
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
|
|
34 |
local_llm_params:
|
35 |
model: 'tiny-llama'
|
36 |
temperature: 0.7
|
37 |
+
repo_id: 'TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF' # HuggingFace repo id
|
38 |
+
filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
|
39 |
pdf_reader: 'llama' # str [llama, pymupdf, gpt]
|
40 |
|
41 |
chat_logging:
|
|
|
52 |
chunk_separators : ["\n\n", "\n", " ", ""] # list of strings
|
53 |
front_chunks_to_remove : null # int or None
|
54 |
last_chunks_to_remove : null # int or None
|
55 |
+
delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings
|
code/modules/config/constants.py
CHANGED
@@ -86,5 +86,5 @@ Question: {question}
|
|
86 |
|
87 |
# Model Paths
|
88 |
|
89 |
-
LLAMA_PATH = "../storage/models/tinyllama
|
90 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|
|
|
86 |
|
87 |
# Model Paths
|
88 |
|
89 |
+
LLAMA_PATH = "../storage/models/tinyllama"
|
90 |
MISTRAL_PATH = "storage/models/mistral-7b-v0.1.Q4_K_M.gguf"
|