Spaces:

voldemortuk
/

Solution_2

Running

File size: 6,039 Bytes

5cc1949

"""Customizable Chatbot Main Functions"""

import os
import threading
from typing import Optional, Any

from dotenv import load_dotenv
from genai_voice.processing.audio import Audio
from genai_voice.models.open_ai import CustomOpenAIModel
from genai_voice.config.defaults import Config
from genai_voice.logger.log_utils import log, LogLevels

from genai_voice.defintions.prompts import (
    TRAVEL_AGENT_PROMPT,
    PROMPTS_TO_CONTEXT_DATA_FILE,
)

# load environment variables from .env file
load_dotenv(override=True)


class ChatBot:
    """ChatBot Class"""

    def __init__(
        self,
        prompt: Optional[str] = None,
        context_file_path: Optional[str] = None,
        model_name: str = Config.MODEL_GPT_TURBO_NAME,
        mic_id: Any = None,
        enable_speakers: bool = False,
        threaded: bool = False,
    ) -> None:
        """
        Initialize the chatbot
        mic_id:          The index of the mic to enable
        enable_speakers: Whether or not audio will be played
        threaded:        Plays back audio in seperate thread, can interfere with speech detector
        """
        if not prompt:
            prompt = TRAVEL_AGENT_PROMPT
            context_file_path = PROMPTS_TO_CONTEXT_DATA_FILE[TRAVEL_AGENT_PROMPT]

        log(f"Context file: {context_file_path}", log_level=LogLevels.ON)

        # Ensure our context file exists
        self.context_file_path = os.path.join("data", context_file_path)
        if not os.path.exists(self.context_file_path):
            raise ValueError(
                f"Provided context file path does not exist: {self.context_file_path}"
            )
        self.model_name = model_name
        
        if self.model_name == Config.MODEL_GPT_TURBO_NAME:
            self.__client = CustomOpenAIModel(api_key=Config.OPENAI_API_KEY)
        else:
            raise ValueError(f"Model {self.model_name} is not currently supported.")

        # Whether or not to use speakers
        self.__enable_speakers = enable_speakers

        # Whether or not to thread playback
        self.__threaded = threaded

        # Initialize audio library
        self.audio = Audio()

        # Initialize mic
        if mic_id is not None:
            self.initialize_microphone(mic_id)

        # Get data for LLM Context
        self.context = self.get_context_data()

        # Get initial prompt
        self.prompt = prompt

        # Prompt template to initialize LLM
        self.llm_prompt = self.__client.build_prompt(
            prompt=self.prompt, context=self.context
        )

    def get_completion_from_messages(self, messages):
        """
        Send the message to the specified OpenAI model
        """
        # use default config for model
        return self.__client.generate(messages=messages, config=None)

    def get_context_data(self) -> str:
        """Get the data for the LLM"""
        with open(self.context_file_path, "r", encoding="utf-8") as f:
            data = "".join(line for line in f)
        return data

    def respond(self, prompt, llm_history: list = None):
        """
        Get a response based on the current history
        """
        if not llm_history:
            log("Empty history. Creating a state list to track histories.")
            llm_history = []
        context = [self.llm_prompt]
        for interaction in llm_history:
            context.append({"role": "user", "content": f"{interaction[0]}"})
            context.append({"role": "assistant", "content": f"{interaction[1]}"})

        context.append({"role": "user", "content": f"{prompt}"})
        llm_response = self.get_completion_from_messages(context)

        if self.__enable_speakers:
            # With threads
            if self.__threaded:
                speaker_thread = threading.Thread(
                    target=self.audio.communicate, args=(llm_response,)
                )
                speaker_thread.start()
            # Without threads
            else:
                self.audio.communicate(llm_response)
        return llm_response

    def initialize_microphone(self, mic_id):
        """
        Initialize microphone object with the indicated ID.
        For best results a headset with a mic is recommended.
        """
        self.audio.initialize_microphone(mic_id)

    def recognize_speech_from_mic(self):
        """
        Listens for speech
        return: The text of the captured speech
        """
        return self.audio.recognize_speech_from_mic()

    def communicate(self, message):
        """
        Plays a message on the speakers
        message: the message
        """
        self.audio.communicate(message)

    def get_prompt_from_streamlit_audio(self, audio) -> str:
        """Converts audio captured from streamit to text"""
        if not audio:
            return None
        return self.audio.transcribe_from_transformer(audio)

    def get_prompt_from_gradio_audio(self, audio):
        """
        Converts audio captured from gradio to text.
        See https://www.gradio.app/guides/real-time-speech-recognition for more info.
        audio: object containing sampling frequency and raw audio data
        """
        log(f"Getting prompt from audio device: {audio}")
        if not audio:
            return None
        return self.audio.get_prompt_from_gradio_audio(audio)

    def get_prompt_from_file(self, file):
        """
        Converts audio from a file to text.
        file: the path to the audio file
        """

        return self.audio.get_prompt_from_file(file)


if __name__ == "__main__":
    # Enable speakers (adjust to True or False as needed)
    ENABLE_SPEAKERS = True

    # pylint: disable=invalid-name
    human_prompt = ""
    history = []

    # Create the chatbot
    chatbot = ChatBot(enable_speakers=ENABLE_SPEAKERS)

    # Main loop
    while human_prompt != "goodbye":
        response = chatbot.respond(human_prompt, history)
        history.append([human_prompt, response])
        human_prompt = input(f"\n{response}\n\n")