Spaces:
Running
Running
Commit
·
a38c567
1
Parent(s):
e63867f
Added Audio Support
Browse files- __pycache__/utils.cpython-312.pyc +0 -0
- app.py +42 -11
- requirements.txt +4 -1
- utils.py +19 -1
__pycache__/utils.cpython-312.pyc
CHANGED
Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ
|
|
app.py
CHANGED
@@ -7,7 +7,8 @@ from utils import (
|
|
7 |
retrieve_context_approx,
|
8 |
build_prompt,
|
9 |
ask_gemini,
|
10 |
-
load_documents_gradio,
|
|
|
11 |
)
|
12 |
|
13 |
client = authenticate()
|
@@ -37,20 +38,50 @@ def handle_question(query):
|
|
37 |
answer = ask_gemini(prompt, client)
|
38 |
return f"### My Insights :\n\n{answer.strip()}"
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
#gr.Image(value="bg.JPG", visible=True)
|
44 |
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
|
|
|
|
49 |
|
50 |
-
|
51 |
-
|
|
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
demo.launch(share=True) # Or demo.deploy(hf_space="your-username/your-space-name")
|
|
|
7 |
retrieve_context_approx,
|
8 |
build_prompt,
|
9 |
ask_gemini,
|
10 |
+
load_documents_gradio,
|
11 |
+
transcribe
|
12 |
)
|
13 |
|
14 |
client = authenticate()
|
|
|
38 |
answer = ask_gemini(prompt, client)
|
39 |
return f"### My Insights :\n\n{answer.strip()}"
|
40 |
|
41 |
+
def route_question(text_input, audio_input):
|
42 |
+
if text_input.strip():
|
43 |
+
return handle_question(text_input)
|
44 |
+
elif audio_input is not None:
|
45 |
+
transcribed = transcribe(audio_input)
|
46 |
+
return handle_question(transcribed)
|
47 |
+
else:
|
48 |
+
return "Please provide a question by typing or speaking."
|
49 |
|
50 |
+
def show_audio():
|
51 |
+
return gr.update(visible=True)
|
|
|
52 |
|
53 |
+
css="""
|
54 |
+
#micbttn {
|
55 |
+
background-color: #FFCCCB;
|
56 |
+
font-size: 30px;
|
57 |
+
height: 59px;
|
58 |
+
}
|
59 |
|
60 |
+
#micINP {
|
61 |
+
background-color: #FFCCCB;
|
62 |
+
}
|
63 |
+
"""
|
64 |
|
65 |
+
with gr.Blocks(css=css, theme='NoCrypt/miku') as demo:
|
66 |
+
gr.Markdown("## Ask Questions from Your Uploaded Documents")
|
67 |
+
file_input = gr.File(label="Upload Your File", file_types=['.pdf', '.txt', '.docx', '.csv', '.json', '.pptx', '.xml', '.xlsx'], file_count='multiple')
|
68 |
|
69 |
+
process_btn = gr.Button("Process Document")
|
70 |
+
status = gr.Textbox(label="Processing Status")
|
71 |
+
|
72 |
+
gr.Markdown("### Ask your question (type or speak):")
|
73 |
+
|
74 |
+
with gr.Row():
|
75 |
+
text_question = gr.Textbox(placeholder="Type your question...", scale=9, show_label=False)
|
76 |
+
mic_btn = gr.Button("🎤", scale=1, elem_id="micbttn")
|
77 |
+
|
78 |
+
audio_input = gr.Audio(sources=["microphone"], type="numpy", visible=False, label=None, elem_id="micINP")
|
79 |
+
|
80 |
+
submit_btn = gr.Button("Submit")
|
81 |
+
answer = gr.Markdown()
|
82 |
+
|
83 |
+
process_btn.click(upload_and_process, inputs=file_input, outputs=status)
|
84 |
+
mic_btn.click(show_audio, outputs=audio_input)
|
85 |
+
submit_btn.click(route_question, inputs=[text_question, audio_input], outputs=answer)
|
86 |
|
87 |
demo.launch(share=True) # Or demo.deploy(hf_space="your-username/your-space-name")
|
requirements.txt
CHANGED
@@ -11,4 +11,7 @@ unstructured[pdf]
|
|
11 |
unstructured[docx]
|
12 |
unstructured[ppt]
|
13 |
unstructured[excel]
|
14 |
-
unstructured[xml]
|
|
|
|
|
|
|
|
11 |
unstructured[docx]
|
12 |
unstructured[ppt]
|
13 |
unstructured[excel]
|
14 |
+
unstructured[xml]
|
15 |
+
torch
|
16 |
+
torchaudio
|
17 |
+
transformers
|
utils.py
CHANGED
@@ -12,6 +12,7 @@ warnings.filterwarnings("ignore")
|
|
12 |
from google import genai
|
13 |
from google.genai import types
|
14 |
from sentence_transformers import SentenceTransformer
|
|
|
15 |
from langchain_community.document_loaders import(
|
16 |
UnstructuredPDFLoader,
|
17 |
TextLoader,
|
@@ -138,4 +139,21 @@ def ask_gemini(prompt, client):
|
|
138 |
contents=[prompt],
|
139 |
config=types.GenerateContentConfig(max_output_tokens=2048, temperature=0.5, seed=42),
|
140 |
)
|
141 |
-
return response.text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from google import genai
|
13 |
from google.genai import types
|
14 |
from sentence_transformers import SentenceTransformer
|
15 |
+
from transformers import pipeline
|
16 |
from langchain_community.document_loaders import(
|
17 |
UnstructuredPDFLoader,
|
18 |
TextLoader,
|
|
|
139 |
contents=[prompt],
|
140 |
config=types.GenerateContentConfig(max_output_tokens=2048, temperature=0.5, seed=42),
|
141 |
)
|
142 |
+
return response.text
|
143 |
+
|
144 |
+
# Speech2Text:
|
145 |
+
def transcribe(audio, model="openai/whisper-base.en"):
|
146 |
+
if audio is None:
|
147 |
+
raise ValueError("No audio detected!")
|
148 |
+
|
149 |
+
transcriber = pipeline("automatic-speech-recognition", model=model)
|
150 |
+
sr, y = audio # Sampling rate (KHz) and y= amplitude array
|
151 |
+
|
152 |
+
if y.ndim > 1: # Convert to Mono (CH=1) if Stereo (CH=2; L & R)
|
153 |
+
y = y.mean(1)
|
154 |
+
|
155 |
+
y = y.astype(np.float32)
|
156 |
+
y /= np.max(np.abs(y)) # Normalizing the amplitude values in range [-1,1]
|
157 |
+
|
158 |
+
result = transcriber({"sampling_rate" : sr, "raw" : y})
|
159 |
+
return result["text"]
|