johnpaulbin commited on
Commit
49c7346
·
verified ·
1 Parent(s): 26149dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -2,17 +2,23 @@ from huggingface_hub import hf_hub_download
2
  from llama_cpp import Llama
3
  import gradio as gr
4
 
5
- # Download the model from Hugging Face
6
- model_name = "johnpaulbin/articulate-V1-Q8_0-GGUF"
7
- model_file = "articulate-V1-q8_0.gguf" # Verify the exact file name in the repository
8
- model_path = hf_hub_download(repo_id=model_name, filename=model_file)
9
 
10
- # Initialize the Llama model
 
 
 
 
 
11
  llm = Llama(
12
- model_path=model_path,
13
- n_ctx=1028, # Context length
14
- n_threads=2, # Number of CPU threads
15
- n_gpu_layers=0 # Run on CPU only
 
16
  )
17
 
18
  # Define the translation function
 
2
  from llama_cpp import Llama
3
  import gradio as gr
4
 
5
+ # Download the base model
6
+ base_model_repo = "QuantFactory/Qwen2.5-1.5B-Instruct-GGUF"
7
+ base_model_file = "Qwen2.5-1.5B-Instruct.Q8_0.gguf"
8
+ base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_file)
9
 
10
+ # Download the LoRA adapter
11
+ adapter_repo = "johnpaulbin/articulate-V1-Q8_0-GGUF"
12
+ adapter_file = "articulate-V1-q8_0.gguf"
13
+ adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file)
14
+
15
+ # Initialize the Llama model with base model and adapter
16
  llm = Llama(
17
+ model_path=base_model_path,
18
+ lora_path=adapter_path,
19
+ n_ctx=1024, # Context length, set manually since adapter lacks it
20
+ n_threads=2, # Adjust based on your system
21
+ n_gpu_layers=0 # Set to >0 if GPU acceleration is desired and supported
22
  )
23
 
24
  # Define the translation function