Spaces:
Sleeping
Sleeping
File size: 4,551 Bytes
7f36089 b27a850 7f36089 c2b521a b27a850 7f36089 b27a850 c2b521a 9c9d112 c2b521a b27a850 49c7346 c2b521a b27a850 7f36089 49c7346 c2b521a 7f36089 b27a850 c2b521a b27a850 c2b521a 7f36089 26149dc c2b521a b27a850 c2b521a b27a850 c2b521a b27a850 c2b521a b27a850 26149dc 7f36089 b27a850 c2b521a b27a850 c2b521a 7f36089 c2b521a b27a850 c2b521a b27a850 c2b521a b27a850 7f36089 c2b521a b27a850 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
import multiprocessing
import time
import os
# Model paths
def get_model_path(repo_id, filename):
print(f"Obtaining {filename}...")
return hf_hub_download(repo_id=repo_id, filename=filename)
# Get models
base_model_path = get_model_path(
"johnpaulbin/articulate-11-expspanish-base-merged-Q8_0-GGUF",
"articulate-11-expspanish-base-merged-q8_0.gguf"
)
adapter_path = get_model_path(
"johnpaulbin/articulate-V1-Q8_0-GGUF",
"articulate-V1-q8_0.gguf"
)
# Conservative CPU settings to avoid memory corruption
cpu_count = multiprocessing.cpu_count()
optimal_threads = max(1, min(8, cpu_count // 2)) # More conservative thread count
batch_size = 128 # Reduced batch size to prevent memory issues
print(f"Initializing model with {optimal_threads} threads and batch size {batch_size}...")
# Initialize model with safer parameters
start_time = time.time()
llm = Llama(
model_path=base_model_path,
lora_path=adapter_path,
n_ctx=512,
n_threads=optimal_threads,
n_batch=batch_size, # Smaller batch size for stability
use_mmap=True,
n_gpu_layers=0,
verbose=False
)
print(f"Model loaded in {time.time() - start_time:.2f} seconds")
# Simple translation cache (limited size)
translation_cache = {}
MAX_CACHE_SIZE = 50 # Reduced cache size
def translate(direction, text):
# Validate input
if not text or not text.strip():
return ""
text = text.strip()
# Simple cache lookup
cache_key = f"{direction}:{text}"
if cache_key in translation_cache:
return translation_cache[cache_key]
# Start timing
start_time = time.time()
# Language mapping
lang_map = {
"English to Spanish": ("ENGLISH", "SPANISH"),
"Spanish to English": ("SPANISH", "ENGLISH"),
"Korean to English": ("KOREAN", "ENGLISH"),
"English to Korean": ("ENGLISH", "KOREAN")
}
if direction not in lang_map:
return "Invalid direction"
source_lang, target_lang = lang_map[direction]
# Create prompt
prompt = f"[{source_lang}]{text}[{target_lang}]"
try:
# Generate translation with conservative settings
response = llm.create_completion(
prompt,
max_tokens=128, # Conservative token limit
temperature=0.0, # Deterministic
top_k=1, # Most likely token only
top_p=1.0, # No sampling
repeat_penalty=1.0,
stream=False
)
translation = response['choices'][0]['text'].strip()
# Manage cache size
if len(translation_cache) >= MAX_CACHE_SIZE:
# Remove oldest entry
translation_cache.pop(next(iter(translation_cache)))
translation_cache[cache_key] = translation
# Log performance
inference_time = time.time() - start_time
print(f"Translation completed in {inference_time:.3f}s")
return translation
except Exception as e:
print(f"Translation error: {e}")
return f"Error during translation: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="Translation App") as iface:
gr.Markdown("## Fast Translation App")
with gr.Row():
direction = gr.Dropdown(
choices=["English to Spanish", "Spanish to English", "Korean to English", "English to Korean"],
label="Translation Direction",
value="English to Spanish"
)
with gr.Row():
input_text = gr.Textbox(lines=5, label="Input Text")
output_text = gr.Textbox(lines=5, label="Translation")
# Add translate button
translate_btn = gr.Button("Translate")
translate_btn.click(fn=translate, inputs=[direction, input_text], outputs=output_text)
# Examples WITHOUT caching (to avoid memory issues)
gr.Examples(
examples=[
["English to Spanish", "Hello, how are you today?"],
["Spanish to English", "Hola, ¿cómo estás hoy?"],
["English to Korean", "The weather is nice today."],
["Korean to English", "오늘 날씨가 좋습니다."]
],
inputs=[direction, input_text],
cache_examples=False # Disabled caching to prevent memory issues
)
# Launch with safer settings
iface.launch(debug=False, show_error=True) |