Spaces:
Sleeping
Sleeping
File size: 2,843 Bytes
7f36089 49c7346 2d6f6bd 49c7346 7f36089 49c7346 9c9d112 49c7346 7f36089 49c7346 5c791f3 da4aea2 9c9d112 49c7346 7f36089 26149dc fbac3b2 26149dc 7f36089 26149dc 7f36089 26149dc 7f36089 26149dc 253566e 26149dc 7f36089 ef3a6f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
# Download the base model
base_model_repo = "johnpaulbin/articulate-11-expspanish-base-merged-Q8_0-GGUF"
base_model_file = "articulate-11-expspanish-base-merged-q8_0.gguf"
base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_file)
# Download the LoRA adapter
adapter_repo = "johnpaulbin/articulate-V1-Q8_0-GGUF"
adapter_file = "articulate-V1-q8_0.gguf"
adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file)
import multiprocessing
# Optimize thread count based on available CPU cores
# Use half the available cores for better performance with LLMs
cpu_count = multiprocessing.cpu_count()
optimal_threads = max(2, cpu_count // 2)
print(f"Initializing model with {optimal_threads} threads...")
# Initialize the Llama model with base model and adapter
llm = Llama(
model_path=base_model_path,
lora_path=adapter_path,
n_ctx=512, # Context length, set manually since adapter lacks it
n_threads=optimal_threads, # Adjust based on your system
use_mmap=True,
n_gpu_layers=0 # Set to >0 if GPU acceleration is desired and supported
)
# Define the translation function
def translate(direction, text):
# Determine source and target languages based on direction
if direction == "English to Spanish":
source_lang = "ENGLISH"
target_lang = "SPANISH"
elif direction == "Spanish to English":
source_lang = "SPANISH"
target_lang = "ENGLISH"
elif direction == "Korean to English":
source_lang = "KOREAN"
target_lang = "ENGLISH"
elif direction == "English to Korean":
source_lang = "ENGLISH"
target_lang = "KOREAN"
else:
return "Invalid direction"
# Construct the prompt for raw completion
prompt = f"[{source_lang}]{text}[{target_lang}]"
# Generate completion with deterministic settings (greedy decoding)
response = llm.create_completion(
prompt,
max_tokens=200, # Limit output length
temperature=0, # Greedy decoding
top_k=1 # Select the most probable token
)
# Extract and return the generated text
return response['choices'][0]['text'].strip()
# Define the Gradio interface
direction_options = ["English to Spanish", "Spanish to English", "Korean to English", "English to Korean"]
iface = gr.Interface(
fn=translate,
inputs=[
gr.Dropdown(choices=direction_options, label="Translation Direction"),
gr.Textbox(lines=5, label="Input Text")
],
outputs=gr.Textbox(lines=5, label="Translation"),
title="Translation App",
description="Translate text between English and Spanish using the Articulate V1 model."
)
# Launch the app
iface.launch(debug=True) |