Spaces:

johnpaulbin
/

googoo

Sleeping

File size: 2,843 Bytes

from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr

# Download the base model
base_model_repo = "johnpaulbin/articulate-11-expspanish-base-merged-Q8_0-GGUF"
base_model_file = "articulate-11-expspanish-base-merged-q8_0.gguf"
base_model_path = hf_hub_download(repo_id=base_model_repo, filename=base_model_file)

# Download the LoRA adapter
adapter_repo = "johnpaulbin/articulate-V1-Q8_0-GGUF"
adapter_file = "articulate-V1-q8_0.gguf"
adapter_path = hf_hub_download(repo_id=adapter_repo, filename=adapter_file)
import multiprocessing
# Optimize thread count based on available CPU cores
# Use half the available cores for better performance with LLMs
cpu_count = multiprocessing.cpu_count()
optimal_threads = max(2, cpu_count // 2)
print(f"Initializing model with {optimal_threads} threads...")

# Initialize the Llama model with base model and adapter
llm = Llama(
    model_path=base_model_path,
    lora_path=adapter_path,
    n_ctx=512,       # Context length, set manually since adapter lacks it
    n_threads=optimal_threads,     # Adjust based on your system
    use_mmap=True,
    n_gpu_layers=0    # Set to >0 if GPU acceleration is desired and supported
)

# Define the translation function
def translate(direction, text):
    # Determine source and target languages based on direction
    if direction == "English to Spanish":
        source_lang = "ENGLISH"
        target_lang = "SPANISH"
    elif direction == "Spanish to English":
        source_lang = "SPANISH"
        target_lang = "ENGLISH"
    elif direction == "Korean to English":
        source_lang = "KOREAN"
        target_lang = "ENGLISH"
    elif direction == "English to Korean":
        source_lang = "ENGLISH"
        target_lang = "KOREAN"
    else:
        return "Invalid direction"
    
    # Construct the prompt for raw completion
    prompt = f"[{source_lang}]{text}[{target_lang}]"
    
    # Generate completion with deterministic settings (greedy decoding)
    response = llm.create_completion(
        prompt,
        max_tokens=200,    # Limit output length
        temperature=0,     # Greedy decoding
        top_k=1            # Select the most probable token
    )
    
    # Extract and return the generated text
    return response['choices'][0]['text'].strip()

# Define the Gradio interface
direction_options = ["English to Spanish", "Spanish to English", "Korean to English", "English to Korean"]
iface = gr.Interface(
    fn=translate,
    inputs=[
        gr.Dropdown(choices=direction_options, label="Translation Direction"),
        gr.Textbox(lines=5, label="Input Text")
    ],
    outputs=gr.Textbox(lines=5, label="Translation"),
    title="Translation App",
    description="Translate text between English and Spanish using the Articulate V1 model."
)

# Launch the app
iface.launch(debug=True)