johnpaulbin commited on
Commit
26149dc
·
verified ·
1 Parent(s): 7f36089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -4,43 +4,54 @@ import gradio as gr
4
 
5
  # Download the model from Hugging Face
6
  model_name = "johnpaulbin/articulate-V1-Q8_0-GGUF"
7
- model_file = "articulate-V1-Q8_0.gguf" # Replace with the actual GGUF file name from the repository
8
  model_path = hf_hub_download(repo_id=model_name, filename=model_file)
9
 
10
- # Initialize the Llama model with llama-cpp-python
11
  llm = Llama(
12
  model_path=model_path,
13
- n_ctx=1024, # Context length (adjust as needed)
14
- n_threads=2, # Number of CPU threads
15
- n_gpu_layers=0 # Run on CPU only (no GPU in free Spaces tier)
16
  )
17
 
18
- # Define the chat function for Gradio
19
- def chat(message, history):
20
- # Build the message list with history and current user input
21
- messages = []
22
- for user_msg, assistant_msg in history:
23
- messages.append({"role": "user", "content": user_msg})
24
- messages.append({"role": "assistant", "content": assistant_msg})
25
- messages.append({"role": "user", "content": message})
 
 
 
26
 
27
- # Perform inference with greedy decoding
28
- response = llm.create_chat_completion(
29
- messages=messages,
30
- max_tokens=100, # Limit output length
31
- top_k=1, # Greedy decoding: select the top token
32
- temperature=0.01 # Low temperature for determinism (top_k=1 is sufficient)
 
 
 
33
  )
34
 
35
  # Extract and return the generated text
36
- generated_text = response['choices'][0]['message']['content']
37
- return generated_text
38
 
39
- # Create the Gradio ChatInterface
40
- iface = gr.ChatInterface(
41
- fn=chat,
42
- title="Articulate V1 Chatbot",
43
- description="Chat with the Articulate V1 model (Llama 3-based) using greedy decoding."
 
 
 
 
 
 
44
  )
45
 
46
  # Launch the app
 
4
 
5
  # Download the model from Hugging Face
6
  model_name = "johnpaulbin/articulate-V1-Q8_0-GGUF"
7
+ model_file = "articulate-V1-q8_0.gguf" # Verify the exact file name in the repository
8
  model_path = hf_hub_download(repo_id=model_name, filename=model_file)
9
 
10
+ # Initialize the Llama model
11
  llm = Llama(
12
  model_path=model_path,
13
+ n_ctx=1028, # Context length
14
+ n_threads=2, # Number of CPU threads
15
+ n_gpu_layers=0 # Run on CPU only
16
  )
17
 
18
+ # Define the translation function
19
+ def translate(direction, text):
20
+ # Determine source and target languages based on direction
21
+ if direction == "English to Spanish":
22
+ source_lang = "ENGLISH"
23
+ target_lang = "SPANISH"
24
+ elif direction == "Spanish to English":
25
+ source_lang = "SPANISH"
26
+ target_lang = "ENGLISH"
27
+ else:
28
+ return "Invalid direction"
29
 
30
+ # Construct the prompt for raw completion
31
+ prompt = f"[{source_lang}]{text}[{target_lang}]"
32
+
33
+ # Generate completion with deterministic settings (greedy decoding)
34
+ response = llm.create_completion(
35
+ prompt,
36
+ max_tokens=200, # Limit output length
37
+ temperature=0, # Greedy decoding
38
+ top_k=1 # Select the most probable token
39
  )
40
 
41
  # Extract and return the generated text
42
+ return response['choices'][0]['text'].strip()
 
43
 
44
+ # Define the Gradio interface
45
+ direction_options = ["English to Spanish", "Spanish to English"]
46
+ iface = gr.Interface(
47
+ fn=translate,
48
+ inputs=[
49
+ gr.Dropdown(choices=direction_options, label="Translation Direction"),
50
+ gr.Textbox(lines=5, label="Input Text")
51
+ ],
52
+ outputs=gr.Textbox(lines=5, label="Translation"),
53
+ title="Translation App",
54
+ description="Translate text between English and Spanish using the Articulate V1 model."
55
  )
56
 
57
  # Launch the app