Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload 2 files
Browse files- app.py +10 -9
- languages.py +102 -0
app.py
CHANGED
@@ -14,6 +14,7 @@ from fastrtc import (
|
|
14 |
get_turn_credentials,
|
15 |
)
|
16 |
from gradio.utils import get_space
|
|
|
17 |
|
18 |
cur_dir = Path(__file__).parent
|
19 |
|
@@ -23,39 +24,39 @@ load_dotenv()
|
|
23 |
client = AsyncClient(timeout=30)
|
24 |
|
25 |
|
26 |
-
async def transcribe_file(audio: tuple[int, np.ndarray]):
|
27 |
response = await client.post(
|
28 |
url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
|
29 |
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
|
30 |
files={"file": audio_to_bytes(audio)},
|
31 |
-
data={"response_format": "text"},
|
32 |
)
|
33 |
return response.text
|
34 |
|
35 |
|
36 |
-
async def transcribe(audio: tuple[int, np.ndarray], transcript: str):
|
37 |
-
text = await transcribe_file(audio)
|
38 |
yield AdditionalOutputs(transcript + " " + text)
|
39 |
|
40 |
|
41 |
transcript = gr.Textbox(label="Transcript")
|
42 |
stream = Stream(
|
43 |
-
ReplyOnPause(transcribe),
|
44 |
modality="audio",
|
45 |
mode="send",
|
46 |
-
additional_inputs=[transcript],
|
47 |
additional_outputs=[transcript],
|
48 |
additional_outputs_handler=lambda a, b: b,
|
49 |
-
rtc_configuration=get_turn_credentials_async
|
50 |
server_rtc_configuration=get_turn_credentials(ttl=604_800),
|
51 |
concurrency_limit=20 if get_space() else None,
|
52 |
-
time_limit=300,
|
53 |
ui_args={"title": ""},
|
54 |
)
|
55 |
|
56 |
iface = gr.Interface(
|
57 |
fn=transcribe_file,
|
58 |
-
inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"])],
|
59 |
outputs=gr.Textbox(label="Transcript"),
|
60 |
)
|
61 |
|
|
|
14 |
get_turn_credentials,
|
15 |
)
|
16 |
from gradio.utils import get_space
|
17 |
+
from languages import LANGUAGES
|
18 |
|
19 |
cur_dir = Path(__file__).parent
|
20 |
|
|
|
24 |
client = AsyncClient(timeout=30)
|
25 |
|
26 |
|
27 |
+
async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
|
28 |
response = await client.post(
|
29 |
url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
|
30 |
headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
|
31 |
files={"file": audio_to_bytes(audio)},
|
32 |
+
data={"response_format": "text", "language": language},
|
33 |
)
|
34 |
return response.text
|
35 |
|
36 |
|
37 |
+
async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
|
38 |
+
text = await transcribe_file(audio, language)
|
39 |
yield AdditionalOutputs(transcript + " " + text)
|
40 |
|
41 |
|
42 |
transcript = gr.Textbox(label="Transcript")
|
43 |
stream = Stream(
|
44 |
+
ReplyOnPause(transcribe, input_sample_rate=48_100),
|
45 |
modality="audio",
|
46 |
mode="send",
|
47 |
+
additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
|
48 |
additional_outputs=[transcript],
|
49 |
additional_outputs_handler=lambda a, b: b,
|
50 |
+
rtc_configuration=get_turn_credentials_async,
|
51 |
server_rtc_configuration=get_turn_credentials(ttl=604_800),
|
52 |
concurrency_limit=20 if get_space() else None,
|
53 |
+
time_limit=300,
|
54 |
ui_args={"title": ""},
|
55 |
)
|
56 |
|
57 |
iface = gr.Interface(
|
58 |
fn=transcribe_file,
|
59 |
+
inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
|
60 |
outputs=gr.Textbox(label="Transcript"),
|
61 |
)
|
62 |
|
languages.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LANGUAGES = [
|
2 |
+
("English", "<|en|>"),
|
3 |
+
("Chinese", "<|zh|>"),
|
4 |
+
("German", "<|de|>"),
|
5 |
+
("Spanish", "<|es|>"),
|
6 |
+
("Russian", "<|ru|>"),
|
7 |
+
("Korean", "<|ko|>"),
|
8 |
+
("French", "<|fr|>"),
|
9 |
+
("Japanese", "<|ja|>"),
|
10 |
+
("Portuguese", "<|pt|>"),
|
11 |
+
("Turkish", "<|tr|>"),
|
12 |
+
("Polish", "<|pl|>"),
|
13 |
+
("Catalan", "<|ca|>"),
|
14 |
+
("Dutch", "<|nl|>"),
|
15 |
+
("Arabic", "<|ar|>"),
|
16 |
+
("Swedish", "<|sv|>"),
|
17 |
+
("Italian", "<|it|>"),
|
18 |
+
("Indonesian", "<|id|>"),
|
19 |
+
("Hindi", "<|hi|>"),
|
20 |
+
("Finnish", "<|fi|>"),
|
21 |
+
("Vietnamese", "<|vi|>"),
|
22 |
+
("Hebrew", "<|he|>"),
|
23 |
+
("Ukrainian", "<|uk|>"),
|
24 |
+
("Greek", "<|el|>"),
|
25 |
+
("Malay", "<|ms|>"),
|
26 |
+
("Czech", "<|cs|>"),
|
27 |
+
("Romanian", "<|ro|>"),
|
28 |
+
("Danish", "<|da|>"),
|
29 |
+
("Hungarian", "<|hu|>"),
|
30 |
+
("Tamil", "<|ta|>"),
|
31 |
+
("Norwegian", "<|no|>"),
|
32 |
+
("Thai", "<|th|>"),
|
33 |
+
("Urdu", "<|ur|>"),
|
34 |
+
("Croatian", "<|hr|>"),
|
35 |
+
("Bulgarian", "<|bg|>"),
|
36 |
+
("Lithuanian", "<|lt|>"),
|
37 |
+
("Latin", "<|la|>"),
|
38 |
+
("Maori", "<|mi|>"),
|
39 |
+
("Malayalam", "<|ml|>"),
|
40 |
+
("Welsh", "<|cy|>"),
|
41 |
+
("Slovak", "<|sk|>"),
|
42 |
+
("Telugu", "<|te|>"),
|
43 |
+
("Persian", "<|fa|>"),
|
44 |
+
("Latvian", "<|lv|>"),
|
45 |
+
("Bengali", "<|bn|>"),
|
46 |
+
("Serbian", "<|sr|>"),
|
47 |
+
("Azerbaijani", "<|az|>"),
|
48 |
+
("Slovenian", "<|sl|>"),
|
49 |
+
("Kannada", "<|kn|>"),
|
50 |
+
("Estonian", "<|et|>"),
|
51 |
+
("Macedonian", "<|mk|>"),
|
52 |
+
("Breton", "<|br|>"),
|
53 |
+
("Basque", "<|eu|>"),
|
54 |
+
("Icelandic", "<|is|>"),
|
55 |
+
("Armenian", "<|hy|>"),
|
56 |
+
("Nepali", "<|ne|>"),
|
57 |
+
("Mongolian", "<|mn|>"),
|
58 |
+
("Bosnian", "<|bs|>"),
|
59 |
+
("Kazakh", "<|kk|>"),
|
60 |
+
("Albanian", "<|sq|>"),
|
61 |
+
("Swahili", "<|sw|>"),
|
62 |
+
("Galician", "<|gl|>"),
|
63 |
+
("Marathi", "<|mr|>"),
|
64 |
+
("Punjabi", "<|pa|>"),
|
65 |
+
("Sinhala", "<|si|>"),
|
66 |
+
("Khmer", "<|km|>"),
|
67 |
+
("Shona", "<|sn|>"),
|
68 |
+
("Yoruba", "<|yo|>"),
|
69 |
+
("Somali", "<|so|>"),
|
70 |
+
("Afrikaans", "<|af|>"),
|
71 |
+
("Occitan", "<|oc|>"),
|
72 |
+
("Georgian", "<|ka|>"),
|
73 |
+
("Belarusian", "<|be|>"),
|
74 |
+
("Tajik", "<|tg|>"),
|
75 |
+
("Sindhi", "<|sd|>"),
|
76 |
+
("Gujarati", "<|gu|>"),
|
77 |
+
("Amharic", "<|am|>"),
|
78 |
+
("Yiddish", "<|yi|>"),
|
79 |
+
("Lao", "<|lo|>"),
|
80 |
+
("Uzbek", "<|uz|>"),
|
81 |
+
("Faroese", "<|fo|>"),
|
82 |
+
("Haitian Creole", "<|ht|>"),
|
83 |
+
("Pashto", "<|ps|>"),
|
84 |
+
("Turkmen", "<|tk|>"),
|
85 |
+
("Norwegian Nynorsk", "<|nn|>"),
|
86 |
+
("Maltese", "<|mt|>"),
|
87 |
+
("Sanskrit", "<|sa|>"),
|
88 |
+
("Luxembourgish", "<|lb|>"),
|
89 |
+
("Burmese", "<|my|>"),
|
90 |
+
("Tibetan", "<|bo|>"),
|
91 |
+
("Tagalog", "<|tl|>"),
|
92 |
+
("Malagasy", "<|mg|>"),
|
93 |
+
("Assamese", "<|as|>"),
|
94 |
+
("Tatar", "<|tt|>"),
|
95 |
+
("Hawaiian", "<|haw|>"),
|
96 |
+
("Lingala", "<|ln|>"),
|
97 |
+
("Hausa", "<|ha|>"),
|
98 |
+
("Bashkir", "<|ba|>"),
|
99 |
+
("Javanese", "<|jw|>"),
|
100 |
+
("Sundanese", "<|su|>"),
|
101 |
+
("Cantonese", "<|yue|>"),
|
102 |
+
]
|