freddyaboulton HF Staff commited on
Commit
bd507ca
·
verified ·
1 Parent(s): 0a71e28

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +10 -9
  2. languages.py +102 -0
app.py CHANGED
@@ -14,6 +14,7 @@ from fastrtc import (
14
  get_turn_credentials,
15
  )
16
  from gradio.utils import get_space
 
17
 
18
  cur_dir = Path(__file__).parent
19
 
@@ -23,39 +24,39 @@ load_dotenv()
23
  client = AsyncClient(timeout=30)
24
 
25
 
26
- async def transcribe_file(audio: tuple[int, np.ndarray]):
27
  response = await client.post(
28
  url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
29
  headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
30
  files={"file": audio_to_bytes(audio)},
31
- data={"response_format": "text"},
32
  )
33
  return response.text
34
 
35
 
36
- async def transcribe(audio: tuple[int, np.ndarray], transcript: str):
37
- text = await transcribe_file(audio)
38
  yield AdditionalOutputs(transcript + " " + text)
39
 
40
 
41
  transcript = gr.Textbox(label="Transcript")
42
  stream = Stream(
43
- ReplyOnPause(transcribe),
44
  modality="audio",
45
  mode="send",
46
- additional_inputs=[transcript],
47
  additional_outputs=[transcript],
48
  additional_outputs_handler=lambda a, b: b,
49
- rtc_configuration=get_turn_credentials_async if get_space() else None,
50
  server_rtc_configuration=get_turn_credentials(ttl=604_800),
51
  concurrency_limit=20 if get_space() else None,
52
- time_limit=300,
53
  ui_args={"title": ""},
54
  )
55
 
56
  iface = gr.Interface(
57
  fn=transcribe_file,
58
- inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"])],
59
  outputs=gr.Textbox(label="Transcript"),
60
  )
61
 
 
14
  get_turn_credentials,
15
  )
16
  from gradio.utils import get_space
17
+ from languages import LANGUAGES
18
 
19
  cur_dir = Path(__file__).parent
20
 
 
24
  client = AsyncClient(timeout=30)
25
 
26
 
27
+ async def transcribe_file(audio: tuple[int, np.ndarray], language: str):
28
  response = await client.post(
29
  url="https://douatiz8x2itm3yn.us-east-1.aws.endpoints.huggingface.cloud/api/v1/audio/transcriptions",
30
  headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
31
  files={"file": audio_to_bytes(audio)},
32
+ data={"response_format": "text", "language": language},
33
  )
34
  return response.text
35
 
36
 
37
+ async def transcribe(audio: tuple[int, np.ndarray], transcript: str, language: str):
38
+ text = await transcribe_file(audio, language)
39
  yield AdditionalOutputs(transcript + " " + text)
40
 
41
 
42
  transcript = gr.Textbox(label="Transcript")
43
  stream = Stream(
44
+ ReplyOnPause(transcribe, input_sample_rate=48_100),
45
  modality="audio",
46
  mode="send",
47
+ additional_inputs=[transcript, gr.Dropdown(choices=LANGUAGES, label="Language")],
48
  additional_outputs=[transcript],
49
  additional_outputs_handler=lambda a, b: b,
50
+ rtc_configuration=get_turn_credentials_async,
51
  server_rtc_configuration=get_turn_credentials(ttl=604_800),
52
  concurrency_limit=20 if get_space() else None,
53
+ time_limit=300,
54
  ui_args={"title": ""},
55
  )
56
 
57
  iface = gr.Interface(
58
  fn=transcribe_file,
59
+ inputs=[gr.Audio(label="Upload Audio", sources=["upload", "microphone"]), gr.Dropdown(choices=LANGUAGES, label="Language")],
60
  outputs=gr.Textbox(label="Transcript"),
61
  )
62
 
languages.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LANGUAGES = [
2
+ ("English", "<|en|>"),
3
+ ("Chinese", "<|zh|>"),
4
+ ("German", "<|de|>"),
5
+ ("Spanish", "<|es|>"),
6
+ ("Russian", "<|ru|>"),
7
+ ("Korean", "<|ko|>"),
8
+ ("French", "<|fr|>"),
9
+ ("Japanese", "<|ja|>"),
10
+ ("Portuguese", "<|pt|>"),
11
+ ("Turkish", "<|tr|>"),
12
+ ("Polish", "<|pl|>"),
13
+ ("Catalan", "<|ca|>"),
14
+ ("Dutch", "<|nl|>"),
15
+ ("Arabic", "<|ar|>"),
16
+ ("Swedish", "<|sv|>"),
17
+ ("Italian", "<|it|>"),
18
+ ("Indonesian", "<|id|>"),
19
+ ("Hindi", "<|hi|>"),
20
+ ("Finnish", "<|fi|>"),
21
+ ("Vietnamese", "<|vi|>"),
22
+ ("Hebrew", "<|he|>"),
23
+ ("Ukrainian", "<|uk|>"),
24
+ ("Greek", "<|el|>"),
25
+ ("Malay", "<|ms|>"),
26
+ ("Czech", "<|cs|>"),
27
+ ("Romanian", "<|ro|>"),
28
+ ("Danish", "<|da|>"),
29
+ ("Hungarian", "<|hu|>"),
30
+ ("Tamil", "<|ta|>"),
31
+ ("Norwegian", "<|no|>"),
32
+ ("Thai", "<|th|>"),
33
+ ("Urdu", "<|ur|>"),
34
+ ("Croatian", "<|hr|>"),
35
+ ("Bulgarian", "<|bg|>"),
36
+ ("Lithuanian", "<|lt|>"),
37
+ ("Latin", "<|la|>"),
38
+ ("Maori", "<|mi|>"),
39
+ ("Malayalam", "<|ml|>"),
40
+ ("Welsh", "<|cy|>"),
41
+ ("Slovak", "<|sk|>"),
42
+ ("Telugu", "<|te|>"),
43
+ ("Persian", "<|fa|>"),
44
+ ("Latvian", "<|lv|>"),
45
+ ("Bengali", "<|bn|>"),
46
+ ("Serbian", "<|sr|>"),
47
+ ("Azerbaijani", "<|az|>"),
48
+ ("Slovenian", "<|sl|>"),
49
+ ("Kannada", "<|kn|>"),
50
+ ("Estonian", "<|et|>"),
51
+ ("Macedonian", "<|mk|>"),
52
+ ("Breton", "<|br|>"),
53
+ ("Basque", "<|eu|>"),
54
+ ("Icelandic", "<|is|>"),
55
+ ("Armenian", "<|hy|>"),
56
+ ("Nepali", "<|ne|>"),
57
+ ("Mongolian", "<|mn|>"),
58
+ ("Bosnian", "<|bs|>"),
59
+ ("Kazakh", "<|kk|>"),
60
+ ("Albanian", "<|sq|>"),
61
+ ("Swahili", "<|sw|>"),
62
+ ("Galician", "<|gl|>"),
63
+ ("Marathi", "<|mr|>"),
64
+ ("Punjabi", "<|pa|>"),
65
+ ("Sinhala", "<|si|>"),
66
+ ("Khmer", "<|km|>"),
67
+ ("Shona", "<|sn|>"),
68
+ ("Yoruba", "<|yo|>"),
69
+ ("Somali", "<|so|>"),
70
+ ("Afrikaans", "<|af|>"),
71
+ ("Occitan", "<|oc|>"),
72
+ ("Georgian", "<|ka|>"),
73
+ ("Belarusian", "<|be|>"),
74
+ ("Tajik", "<|tg|>"),
75
+ ("Sindhi", "<|sd|>"),
76
+ ("Gujarati", "<|gu|>"),
77
+ ("Amharic", "<|am|>"),
78
+ ("Yiddish", "<|yi|>"),
79
+ ("Lao", "<|lo|>"),
80
+ ("Uzbek", "<|uz|>"),
81
+ ("Faroese", "<|fo|>"),
82
+ ("Haitian Creole", "<|ht|>"),
83
+ ("Pashto", "<|ps|>"),
84
+ ("Turkmen", "<|tk|>"),
85
+ ("Norwegian Nynorsk", "<|nn|>"),
86
+ ("Maltese", "<|mt|>"),
87
+ ("Sanskrit", "<|sa|>"),
88
+ ("Luxembourgish", "<|lb|>"),
89
+ ("Burmese", "<|my|>"),
90
+ ("Tibetan", "<|bo|>"),
91
+ ("Tagalog", "<|tl|>"),
92
+ ("Malagasy", "<|mg|>"),
93
+ ("Assamese", "<|as|>"),
94
+ ("Tatar", "<|tt|>"),
95
+ ("Hawaiian", "<|haw|>"),
96
+ ("Lingala", "<|ln|>"),
97
+ ("Hausa", "<|ha|>"),
98
+ ("Bashkir", "<|ba|>"),
99
+ ("Javanese", "<|jw|>"),
100
+ ("Sundanese", "<|su|>"),
101
+ ("Cantonese", "<|yue|>"),
102
+ ]