Spaces:
Sleeping
Sleeping
File size: 1,905 Bytes
861eb71 d8dadfc 861eb71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# Function to convert segments to dictionaries
from faster_whisper.transcribe import Segment, Word
# Function to dump a Word instance to a dictionary
def word_to_dict(word: Word) -> dict:
return {
"start": word.start,
"end": word.end,
"word": word.word,
"probability": word.probability
}
# Function to load a Word instance from a dictionary
def dict_to_word(data: dict) -> Word:
return Word(
start=data["start"],
end=data["end"],
word=data["word"],
probability=data["probability"]
)
# Function to dump a Segment instance to a dictionary
def segment_to_dict(segment: Segment) -> dict:
return {
"id": segment.id,
"seek": segment.seek,
"start": segment.start,
"end": segment.end,
"text": segment.text,
"tokens": segment.tokens,
"temperature": segment.temperature,
"avg_logprob": segment.avg_logprob,
"compression_ratio": segment.compression_ratio,
"no_speech_prob": segment.no_speech_prob,
"words": [word_to_dict(word) for word in segment.words] if segment.words else None
}
# Function to load a Segment instance from a dictionary
def dict_to_segment(data: dict) -> Segment:
return Segment(
id=data["id"],
seek=data["seek"],
start=data["start"],
end=data["end"],
text=data["text"],
tokens=data["tokens"],
temperature=data["temperature"],
avg_logprob=data["avg_logprob"],
compression_ratio=data["compression_ratio"],
no_speech_prob=data["no_speech_prob"],
words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
)
def get_raw_words_from_segments(segments: list[Segment]) -> str:
return " ".join(
word.word
for segment in segments if segment.words
for word in segment.words
) |