Spaces:

ai4bharat
/

IndicTrans3-beta

Running on Zero

App Files Files Community

sumanthd commited on Mar 11

Commit

c5ecbf5

1 Parent(s): b063b6f

add model inference

Browse files

Files changed (1) hide show

app.py +65 -14

app.py CHANGED Viewed

@@ -1,9 +1,24 @@
 import gradio as gr
-model = None
-tokenizer = None
-# device = 0 if torch.cuda.is_available() else -1
 LANGUAGES = {
     "Hindi": "hin_Deva",
@@ -29,9 +44,51 @@ LANGUAGES = {
     "Bodo": "brx_Deva"
 }
-def translate(src_lang, text, tgt_lang):
-    return "Translation output will appear here..."
 def store_feedback(rating, feedback_text):
     if not rating:
@@ -59,12 +116,6 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as demo:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
-                src_lang = gr.Dropdown(
-                    ["English"],
-                    value="English",
-                    label="Translate From",
-                    elem_id="translate-from"
-                )
                 text_input = gr.Textbox(
                     placeholder="Enter text to translate...",
@@ -90,7 +141,7 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as demo:
                 )
         btn_submit = gr.Button("Translate")
-        btn_submit.click(fn=translate, inputs=[src_lang, text_input, tgt_lang], outputs=text_output)
         gr.Examples(
             examples=[
@@ -100,9 +151,9 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as demo:
                 ["English", "Hello, how are you today? I hope you're doing well.", "Marathi"],
                 ["English", "Hello, how are you today? I hope you're doing well.", "Malayalam"]
             ],
-            inputs=[src_lang, text_input, tgt_lang],
             outputs=text_output,
-            fn=translate,
             cache_examples=True,
             examples_per_page=5
         )

+import torch
+import spaces
+from collections.abc import Iterator
+from threading import Thread
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 2048
+MAX_INPUT_TOKEN_LENGTH = 4096
+if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
+if torch.cuda.is_available():
+    model_id = "ai4bharat/IndicTrans3-beta"
+    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer.use_default_system_prompt = False
 LANGUAGES = {
     "Hindi": "hin_Deva",
     "Bodo": "brx_Deva"
 }
+# def translate(src_lang, text, tgt_lang):
+#     return "Translation output will appear here..."
+@spaces.GPU
+def generate(
+    tgt_lang: str,
+    message: str,
+    max_new_tokens: int = 1024,
+    temperature: float = 0.6,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2,
+) -> Iterator[str]:
+    conversation = []
+    conversation.append({"role": "user", "content": f"Translate the following text to {tgt_lang}: {message}"})
+    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
+    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
+    input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield "".join(outputs)
 def store_feedback(rating, feedback_text):
     if not rating:
     with gr.Column(elem_id="col-container"):
         with gr.Row():
             with gr.Column():
                 text_input = gr.Textbox(
                     placeholder="Enter text to translate...",
                 )
         btn_submit = gr.Button("Translate")
+        btn_submit.click(fn=generate, inputs=[tgt_lang, text_input, 4096, 0, 50, 0], outputs=text_output)
         gr.Examples(
             examples=[
                 ["English", "Hello, how are you today? I hope you're doing well.", "Marathi"],
                 ["English", "Hello, how are you today? I hope you're doing well.", "Malayalam"]
             ],
+            inputs=[tgt_lang, text_input, 4096, 0, 50, 0],
             outputs=text_output,
+            fn=generate,
             cache_examples=True,
             examples_per_page=5
         )