Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
import streamlit as st
|
2 |
-
from streamlit_webrtc import webrtc_streamer, AudioProcessorBase
|
3 |
-
import av
|
4 |
import numpy as np
|
5 |
import tempfile
|
6 |
import soundfile as sf
|
@@ -8,15 +6,7 @@ import os
|
|
8 |
import time
|
9 |
import re
|
10 |
from openai import OpenAI
|
11 |
-
|
12 |
-
# ------------------ Audio Processor ------------------
|
13 |
-
class AudioRecorder(AudioProcessorBase):
|
14 |
-
def __init__(self):
|
15 |
-
self.recorded_frames = []
|
16 |
-
|
17 |
-
def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
|
18 |
-
self.recorded_frames.append(frame)
|
19 |
-
return frame
|
20 |
|
21 |
# ------------------ App Configuration ------------------
|
22 |
st.set_page_config(page_title="Document AI Assistant", layout="wide")
|
@@ -71,40 +61,34 @@ with col1:
|
|
71 |
# ------------------ Voice Input Processing ------------------
|
72 |
with col2:
|
73 |
st.markdown("### ποΈ Voice Input (Optional)")
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
audio_processor_factory=AudioRecorder,
|
78 |
-
media_stream_constraints={"audio": True, "video": False},
|
79 |
-
async_processing=True,
|
80 |
-
)
|
81 |
-
|
82 |
-
if webrtc_ctx.audio_processor and not webrtc_ctx.state.playing and webrtc_ctx.audio_processor.recorded_frames:
|
83 |
st.info("Transcribing your voice...")
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
|
109 |
if st.session_state.transcript_preview:
|
110 |
st.markdown("---")
|
@@ -151,7 +135,7 @@ with col2:
|
|
151 |
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
|
152 |
|
153 |
image_match = re.search(
|
154 |
-
r'https://raw
|
155 |
assistant_message
|
156 |
)
|
157 |
if image_match:
|
@@ -164,4 +148,4 @@ with col2:
|
|
164 |
|
165 |
for message in reversed(st.session_state.messages):
|
166 |
role, content = message["role"], message["content"]
|
167 |
-
st.chat_message(role).write(content)
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
import numpy as np
|
3 |
import tempfile
|
4 |
import soundfile as sf
|
|
|
6 |
import time
|
7 |
import re
|
8 |
from openai import OpenAI
|
9 |
+
from streamlit_audio_recorder import audio_recorder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
# ------------------ App Configuration ------------------
|
12 |
st.set_page_config(page_title="Document AI Assistant", layout="wide")
|
|
|
61 |
# ------------------ Voice Input Processing ------------------
|
62 |
with col2:
|
63 |
st.markdown("### ποΈ Voice Input (Optional)")
|
64 |
+
audio_bytes = audio_recorder(pause_threshold=2.0)
|
65 |
+
|
66 |
+
if audio_bytes:
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
st.info("Transcribing your voice...")
|
68 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
69 |
+
tmp.write(audio_bytes)
|
70 |
+
tmp_path = tmp.name
|
71 |
+
|
72 |
+
with open(tmp_path, "rb") as audio_file:
|
73 |
+
try:
|
74 |
+
whisper_result = client.audio.transcriptions.create(
|
75 |
+
model="whisper-1",
|
76 |
+
file=audio_file,
|
77 |
+
response_format="json"
|
78 |
+
)
|
79 |
+
transcript = whisper_result.text.strip()
|
80 |
+
confidence = whisper_result.get("confidence", "N/A")
|
81 |
+
|
82 |
+
if transcript:
|
83 |
+
st.success(f"Recognized: {transcript}")
|
84 |
+
st.caption(f"π§ Confidence: {confidence}")
|
85 |
+
if preview_toggle:
|
86 |
+
st.session_state.transcript_preview = transcript
|
87 |
+
else:
|
88 |
+
st.session_state.messages.append({"role": "user", "content": transcript})
|
89 |
+
st.rerun()
|
90 |
+
except Exception as e:
|
91 |
+
st.error(f"β Transcription failed: {str(e)}")
|
92 |
|
93 |
if st.session_state.transcript_preview:
|
94 |
st.markdown("---")
|
|
|
135 |
st.session_state.messages.append({"role": "assistant", "content": assistant_message})
|
136 |
|
137 |
image_match = re.search(
|
138 |
+
r'https://raw\.githubusercontent\.com/AndrewLORTech/surgical-pathology-manual/main/[\w\-/]*\.png',
|
139 |
assistant_message
|
140 |
)
|
141 |
if image_match:
|
|
|
148 |
|
149 |
for message in reversed(st.session_state.messages):
|
150 |
role, content = message["role"], message["content"]
|
151 |
+
st.chat_message(role).write(content)
|