Spaces:

johnpaulbin
/

googoo

Sleeping

johnpaulbin commited on Feb 23

Commit

49c7346

verified ·

1 Parent(s): 26149dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,17 +2,23 @@ from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 import gradio as gr
-# Download the model from Hugging Face
-model_name = "johnpaulbin/articulate-V1-Q8_0-GGUF"
-model_file = "articulate-V1-q8_0.gguf"  # Verify the exact file name in the repository
-model_path = hf_hub_download(repo_id=model_name, filename=model_file)
-# Initialize the Llama model
 llm = Llama(
-    model_path=model_path,
-    n_ctx=1028,      # Context length
-    n_threads=2,    # Number of CPU threads
-    n_gpu_layers=0   # Run on CPU only
 )
 # Define the translation function

 from llama_cpp import Llama
 import gradio as gr
+# Download the base model
+base_model_repo = "QuantFactory/Qwen2.5-1.5B-Instruct-GGUF"
+base_model_file = "Qwen2.5-1.5B-Instruct.Q8_0.gguf"
+base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_file)
+# Download the LoRA adapter
+adapter_repo = "johnpaulbin/articulate-V1-Q8_0-GGUF"
+adapter_file = "articulate-V1-q8_0.gguf"
+adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file)
+# Initialize the Llama model with base model and adapter
 llm = Llama(
+    model_path=base_model_path,
+    lora_path=adapter_path,
+    n_ctx=1024,       # Context length, set manually since adapter lacks it
+    n_threads=2,     # Adjust based on your system
+    n_gpu_layers=0    # Set to >0 if GPU acceleration is desired and supported
 )
 # Define the translation function