Spaces:

Gigaverse
/

ivrit-ai-streaming

Sleeping

File size: 1,905 Bytes

# Function to convert segments to dictionaries
from faster_whisper.transcribe import Segment, Word



# Function to dump a Word instance to a dictionary
def word_to_dict(word: Word) -> dict:
    return {
        "start": word.start,
        "end": word.end,
        "word": word.word,
        "probability": word.probability
    }

# Function to load a Word instance from a dictionary
def dict_to_word(data: dict) -> Word:
    return Word(
        start=data["start"],
        end=data["end"],
        word=data["word"],
        probability=data["probability"]
    )

# Function to dump a Segment instance to a dictionary
def segment_to_dict(segment: Segment) -> dict:
    return {
        "id": segment.id,
        "seek": segment.seek,
        "start": segment.start,
        "end": segment.end,
        "text": segment.text,
        "tokens": segment.tokens,
        "temperature": segment.temperature,
        "avg_logprob": segment.avg_logprob,
        "compression_ratio": segment.compression_ratio,
        "no_speech_prob": segment.no_speech_prob,
        "words": [word_to_dict(word) for word in segment.words] if segment.words else None
    }

# Function to load a Segment instance from a dictionary
def dict_to_segment(data: dict) -> Segment:
    return Segment(
        id=data["id"],
        seek=data["seek"],
        start=data["start"],
        end=data["end"],
        text=data["text"],
        tokens=data["tokens"],
        temperature=data["temperature"],
        avg_logprob=data["avg_logprob"],
        compression_ratio=data["compression_ratio"],
        no_speech_prob=data["no_speech_prob"],
        words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
    )

def get_raw_words_from_segments(segments: list[Segment]) -> str:
    return " ".join(
        word.word
        for segment in segments if segment.words
        for word in segment.words
    )