Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import torch
|
|
3 |
import numpy as np
|
4 |
import soundfile as sf
|
5 |
import librosa
|
|
|
6 |
from transformers import pipeline
|
7 |
|
8 |
pipe = pipeline(
|
@@ -13,13 +14,16 @@ pipe = pipeline(
|
|
13 |
)
|
14 |
|
15 |
def transcribe(audio_data):
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
18 |
if audio_data is None:
|
19 |
-
return "Ошибка: не получены аудиоданные"
|
20 |
-
|
21 |
wav_file = "temp_audio.wav"
|
22 |
-
|
23 |
if isinstance(audio_data, tuple):
|
24 |
audio_array, sample_rate = audio_data
|
25 |
sf.write(wav_file, audio_array, sample_rate)
|
@@ -27,18 +31,25 @@ def transcribe(audio_data):
|
|
27 |
audio_array, sample_rate = librosa.load(audio_data, sr=16000)
|
28 |
sf.write(wav_file, audio_array, sample_rate)
|
29 |
else:
|
30 |
-
return "Ошибка: неизвестный формат аудиоданных"
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
result = pipe(wav_file)
|
33 |
-
|
|
|
|
|
34 |
|
35 |
with gr.Blocks() as app:
|
36 |
gr.Markdown("## Распознавание речи с Whisper")
|
37 |
-
|
38 |
audio_data = gr.Audio(type="filepath")
|
39 |
text_output = gr.Textbox(label="Распознанный текст")
|
40 |
-
|
|
|
41 |
btn = gr.Button("Распознать")
|
42 |
-
btn.click(transcribe, inputs=audio_data, outputs=text_output)
|
43 |
|
44 |
app.launch(debug=True)
|
|
|
3 |
import numpy as np
|
4 |
import soundfile as sf
|
5 |
import librosa
|
6 |
+
import time
|
7 |
from transformers import pipeline
|
8 |
|
9 |
pipe = pipeline(
|
|
|
14 |
)
|
15 |
|
16 |
def transcribe(audio_data):
|
17 |
+
log_messages = []
|
18 |
+
|
19 |
+
start_time = time.time()
|
20 |
+
log_messages.append("Загрузка файла...")
|
21 |
+
|
22 |
if audio_data is None:
|
23 |
+
return "Ошибка: не получены аудиоданные", "\n".join(log_messages)
|
24 |
+
|
25 |
wav_file = "temp_audio.wav"
|
26 |
+
|
27 |
if isinstance(audio_data, tuple):
|
28 |
audio_array, sample_rate = audio_data
|
29 |
sf.write(wav_file, audio_array, sample_rate)
|
|
|
31 |
audio_array, sample_rate = librosa.load(audio_data, sr=16000)
|
32 |
sf.write(wav_file, audio_array, sample_rate)
|
33 |
else:
|
34 |
+
return "Ошибка: неизвестный формат аудиоданных", "\n".join(log_messages)
|
35 |
+
|
36 |
+
log_messages.append(f"Загрузка файла завершена за {time.time() - start_time:.2f} сек")
|
37 |
+
|
38 |
+
start_time = time.time()
|
39 |
+
log_messages.append("Работа модели... в процессе")
|
40 |
result = pipe(wav_file)
|
41 |
+
log_messages.append(f"Работа модели завершена за {time.time() - start_time:.2f} сек")
|
42 |
+
|
43 |
+
return result["text"], "\n".join(log_messages)
|
44 |
|
45 |
with gr.Blocks() as app:
|
46 |
gr.Markdown("## Распознавание речи с Whisper")
|
47 |
+
|
48 |
audio_data = gr.Audio(type="filepath")
|
49 |
text_output = gr.Textbox(label="Распознанный текст")
|
50 |
+
log_output = gr.Textbox(label="Лог выполнения", interactive=False)
|
51 |
+
|
52 |
btn = gr.Button("Распознать")
|
53 |
+
btn.click(transcribe, inputs=audio_data, outputs=[text_output, log_output])
|
54 |
|
55 |
app.launch(debug=True)
|