Spaces:

aryachakraborty
/

Voice_Assistant_smolLM2

Running

File size: 3,364 Bytes

cdff304

### ARIA – AI-Responsive Interactive Assistant

from flask import Flask, request, render_template
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from web_search_tool import web_search
import warnings
warnings.filterwarnings("ignore", message="Failed to load image Python extension")


app = Flask(__name__)

# Load model and tokenizer
checkpoint = "HuggingFaceTB/SmolLM2-360M-Instruct"
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

@app.route("/", methods=["GET", "POST"])
def index():
    response = ""
    if request.method == "POST":
        user_query = request.form["query"]
        mode = request.form.get("mode", "web")  # Default to web search mode

        # Different prompts based on mode
        if mode == "web":
            try:
                context = web_search(user_query)
            except Exception as e:
                context = "No additional context could be retrieved."
                print("Web search failed:", e)

            system_prompt = (
                "You are a voice assistant that answers in a polite and professional tone. "
                "Use the following context to help answer the question:\n"
                f"{context}\n"
                "If the context is insufficient, still try to give the best possible answer."
            )
        elif mode == "reasoning":
            system_prompt = (
                "You are a voice assistant specialized in logical reasoning and problem-solving. "
                "Break down the problem systematically and explain your thought process step by step. "
                "Consider different angles and approaches to arrive at a well-reasoned conclusion."
            )
            context = ""
        else:  # creative mode
            system_prompt = (
                "You are a voice assistant with a flair for creative and imaginative responses. "
                "Think outside the box and provide unique, innovative perspectives while maintaining "
                "relevance to the query. Feel free to use metaphors, analogies, and vivid descriptions."
            )
            context = ""

        # System prompt setup with context included
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_query}
        ]

        input_text = tokenizer.apply_chat_template(messages, tokenize=False)
        inputs = tokenizer.encode(input_text,
                                return_tensors="pt",
                                padding=True,
                                truncation=True).to(device)
        
        outputs = model.generate(
            inputs,
            max_new_tokens=128
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the assistant's reply
        if "assistant" in response:
            response = response.split("assistant")[-1].strip(": ").strip()
        else:
            response = "Sorry, couldn't understand your query. Can you ask again?"

    return render_template("index.html", response=response)

if __name__ == "__main__":
    app.run(debug=True)