Spaces:
Running
Running
Commit
·
3cb4983
1
Parent(s):
0d7db6d
Update Screenshot img upload. Allow request interrupt previous prompt. Allow question editable
Browse files- app.py +98 -90
- requirements.txt +10 -9
- statics/edit.png +0 -0
- statics/index.html +10 -5
- statics/script.js +112 -43
- statics/styles.css +31 -38
app.py
CHANGED
@@ -1,60 +1,44 @@
|
|
1 |
-
#
|
2 |
-
import os, tempfile
|
3 |
from pathlib import Path
|
4 |
from typing import Dict
|
5 |
|
6 |
-
# Server
|
7 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
8 |
from fastapi.middleware.cors import CORSMiddleware
|
9 |
from fastapi.responses import JSONResponse, FileResponse
|
10 |
from fastapi.staticfiles import StaticFiles
|
11 |
|
12 |
-
# AI
|
13 |
-
import torch # For transformer
|
14 |
from google import genai
|
15 |
from google.genai import types
|
16 |
|
17 |
-
#
|
|
|
18 |
from pydub import AudioSegment
|
19 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
20 |
-
import numpy as np
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
25 |
|
26 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
27 |
if not GEMINI_API_KEY:
|
28 |
-
raise RuntimeError("GEMINI_API_KEY
|
29 |
|
30 |
-
# Tiny Whisper model is light enough for CPU Spaces; change if GPU is available
|
31 |
ASR_MODEL_ID = "openai/whisper-small.en"
|
32 |
-
ASR_LANGUAGE = "en"
|
33 |
-
SAMPLE_RATE =
|
34 |
|
35 |
-
|
36 |
-
# ── FastAPI App ───────────────────────────
|
37 |
-
############################################
|
38 |
|
39 |
app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
|
40 |
app.add_middleware(
|
41 |
CORSMiddleware,
|
42 |
-
allow_origins=["*"],
|
43 |
-
allow_methods=["*"],
|
44 |
-
allow_headers=["*"],
|
45 |
)
|
46 |
-
|
47 |
-
# Serve frontend assets
|
48 |
app.mount("/statics", StaticFiles(directory="statics"), name="statics")
|
49 |
|
50 |
-
############################################
|
51 |
-
# ── Global objects (lazy‑loaded) ──────────
|
52 |
-
############################################
|
53 |
-
|
54 |
-
# Globals
|
55 |
-
processor = None
|
56 |
-
model = None
|
57 |
-
|
58 |
# Enable Logging for Debugging
|
59 |
import psutil
|
60 |
import logging
|
@@ -84,84 +68,108 @@ def check_system_resources():
|
|
84 |
logger.warning("⚠️ High Disk usage detected!")
|
85 |
check_system_resources()
|
86 |
|
87 |
-
|
88 |
-
@app.on_event("startup")
|
89 |
-
async def load_models():
|
90 |
-
global processor, model
|
91 |
-
cache = Path("model_cache"); cache.mkdir(exist_ok=True)
|
92 |
-
# in startup (Transformer Whisper processing)
|
93 |
-
processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
|
94 |
-
model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
|
95 |
-
# Force English transcription – never translate
|
96 |
-
forced = processor.get_decoder_prompt_ids(language="english", task="transcribe")
|
97 |
-
model.config.forced_decoder_ids = forced
|
98 |
-
model.to("cpu")
|
99 |
-
model.eval()
|
100 |
-
logger.info("[STARTUP] Whisper loaded ✔")
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
# ── Helpers ───────────────────────────────
|
105 |
-
############################################
|
106 |
|
107 |
def build_prompt(question: str) -> str:
|
108 |
-
"""Craft a prompt that elicits concise, structured answers."""
|
109 |
return (
|
110 |
-
"You are a helpful career
|
111 |
-
"question clearly and concisely
|
112 |
-
"
|
113 |
-
"Ensure your answer is less than 200 words.\n\n"
|
114 |
-
f"Interview question: \"{question}\""
|
115 |
)
|
116 |
|
117 |
-
def
|
118 |
-
return psutil.Process().memory_info().rss / 1_048_576
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
@app.get("/")
|
125 |
-
async def root() -> FileResponse:
|
126 |
-
"""Serve the single‑page app."""
|
127 |
logger.info("[STATIC] Serving frontend")
|
128 |
return FileResponse(Path("statics/index.html"))
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
@app.post("/voice-transcribe")
|
132 |
-
async def voice_transcribe(file: UploadFile = File(...)):
|
133 |
-
"""Receive audio, transcribe, push to Gemini, return answer."""
|
134 |
if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
|
135 |
-
raise HTTPException(
|
136 |
-
#
|
137 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
138 |
-
tmp.write(await file.read())
|
139 |
-
|
140 |
try:
|
141 |
-
|
142 |
-
|
143 |
-
audio = np.array(seg.get_array_of_samples()).astype(np.float32) / (2**15)
|
144 |
inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
|
145 |
ids = model.generate(inputs.input_features.to(model.device))
|
146 |
question = processor.decode(ids[0], skip_special_tokens=True).strip()
|
147 |
-
if not question:
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
client = genai.Client(api_key=GEMINI_API_KEY)
|
153 |
-
response = client.models.generate_content(
|
154 |
-
model="gemini-2.5-flash-preview-04-17",
|
155 |
-
contents=prompt
|
156 |
-
)
|
157 |
-
answer = response.text.strip()
|
158 |
-
logger.info(f"[LLM] Decision answer: {answer}")
|
159 |
-
return JSONResponse(
|
160 |
-
{
|
161 |
-
"question": question,
|
162 |
-
"answer": answer,
|
163 |
-
"memory_mb": round(memory_usage_mb(), 1),
|
164 |
-
}
|
165 |
-
)
|
166 |
finally:
|
167 |
-
os.remove(tmp_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Interview Q&A – FastAPI backend
|
2 |
+
import base64, io, json, logging, os, tempfile
|
3 |
from pathlib import Path
|
4 |
from typing import Dict
|
5 |
|
|
|
6 |
from fastapi import FastAPI, File, UploadFile, HTTPException
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
from fastapi.responses import JSONResponse, FileResponse
|
9 |
from fastapi.staticfiles import StaticFiles
|
10 |
|
11 |
+
# AI / LLM
|
|
|
12 |
from google import genai
|
13 |
from google.genai import types
|
14 |
|
15 |
+
# ASR
|
16 |
+
import numpy as np
|
17 |
from pydub import AudioSegment
|
18 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
|
|
19 |
|
20 |
+
# Misc
|
21 |
+
from PIL import Image
|
22 |
+
|
23 |
+
##############################################################################
|
24 |
|
25 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
26 |
if not GEMINI_API_KEY:
|
27 |
+
raise RuntimeError("❌ GEMINI_API_KEY must be set as env var")
|
28 |
|
|
|
29 |
ASR_MODEL_ID = "openai/whisper-small.en"
|
30 |
+
ASR_LANGUAGE = "en"
|
31 |
+
SAMPLE_RATE = 16_000
|
32 |
|
33 |
+
##############################################################################
|
|
|
|
|
34 |
|
35 |
app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
|
36 |
app.add_middleware(
|
37 |
CORSMiddleware,
|
38 |
+
allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
|
|
|
|
|
39 |
)
|
|
|
|
|
40 |
app.mount("/statics", StaticFiles(directory="statics"), name="statics")
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# Enable Logging for Debugging
|
43 |
import psutil
|
44 |
import logging
|
|
|
68 |
logger.warning("⚠️ High Disk usage detected!")
|
69 |
check_system_resources()
|
70 |
|
71 |
+
##############################################################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
# Global ASR (lazy-loaded)
|
74 |
+
processor = model = None
|
|
|
|
|
75 |
|
76 |
def build_prompt(question: str) -> str:
|
|
|
77 |
return (
|
78 |
+
"You are a helpful career-coach AI. Answer the following interview "
|
79 |
+
"question clearly and concisely (≤200 words). Use markdown when helpful.\n\n"
|
80 |
+
f"Interview question: \"{question.strip()}\""
|
|
|
|
|
81 |
)
|
82 |
|
83 |
+
def memory_mb() -> float:
|
84 |
+
return round(psutil.Process().memory_info().rss / 1_048_576, 1)
|
85 |
|
86 |
+
@app.on_event("startup")
|
87 |
+
async def load_models():
|
88 |
+
global processor, model
|
89 |
+
cache = Path("model_cache"); cache.mkdir(exist_ok=True)
|
90 |
+
processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
|
91 |
+
model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
|
92 |
+
forced = processor.get_decoder_prompt_ids(task="transcribe", language="english")
|
93 |
+
model.config.forced_decoder_ids = forced
|
94 |
+
model.to("cpu").eval()
|
95 |
+
log.info("🔊 Whisper loaded ✔")
|
96 |
|
97 |
@app.get("/")
|
98 |
+
async def root() -> FileResponse: # serve SPA
|
|
|
99 |
logger.info("[STATIC] Serving frontend")
|
100 |
return FileResponse(Path("statics/index.html"))
|
101 |
|
102 |
+
##############################################################################
|
103 |
+
# ── MAIN ENDPOINTS ──────────────────────────────────────────────────────────
|
104 |
+
|
105 |
+
def call_gemini(prompt: str, vision_parts=None) -> str:
|
106 |
+
client = genai.Client(api_key=GEMINI_API_KEY)
|
107 |
+
kwargs: Dict = {}
|
108 |
+
if vision_parts: # multimodal call
|
109 |
+
kwargs["contents"] = vision_parts + [{"text": prompt}]
|
110 |
+
else:
|
111 |
+
kwargs["contents"] = prompt
|
112 |
+
resp = client.models.generate_content(
|
113 |
+
model="gemini-2.5-flash-preview-04-17", **kwargs
|
114 |
+
)
|
115 |
+
return resp.text.strip()
|
116 |
|
117 |
@app.post("/voice-transcribe")
|
118 |
+
async def voice_transcribe(file: UploadFile = File(...)):
|
|
|
119 |
if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
|
120 |
+
raise HTTPException(415, "Unsupported audio type")
|
121 |
+
# Write temporary audio file
|
122 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
|
123 |
+
tmp.write(await file.read()); tmp_path = tmp.name
|
124 |
+
# Audio processing and transcription
|
125 |
try:
|
126 |
+
seg = AudioSegment.from_file(tmp_path).set_frame_rate(SAMPLE_RATE).set_channels(1)
|
127 |
+
audio = np.array(seg.get_array_of_samples()).astype(np.float32) / (2 ** 15)
|
|
|
128 |
inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
|
129 |
ids = model.generate(inputs.input_features.to(model.device))
|
130 |
question = processor.decode(ids[0], skip_special_tokens=True).strip()
|
131 |
+
if not question:
|
132 |
+
raise ValueError("No speech detected")
|
133 |
+
|
134 |
+
answer = call_gemini(build_prompt(question))
|
135 |
+
return JSONResponse({"question": question, "answer": answer, "memory_mb": memory_mb()})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
finally:
|
137 |
+
os.remove(tmp_path)
|
138 |
+
|
139 |
+
@app.post("/image-question")
|
140 |
+
async def image_question(file: UploadFile = File(...)):
|
141 |
+
if file.content_type not in {"image/png", "image/jpeg"}:
|
142 |
+
raise HTTPException(415, "Unsupported image type")
|
143 |
+
# Read file and decode
|
144 |
+
raw = await file.read()
|
145 |
+
b64 = base64.b64encode(raw).decode()
|
146 |
+
# Send image data
|
147 |
+
vision_part = [{
|
148 |
+
"inline_data": {
|
149 |
+
"mime_type": file.content_type,
|
150 |
+
"data": b64
|
151 |
+
}
|
152 |
+
}]
|
153 |
+
# Ask Gemini to return JSON so we can split Q & A
|
154 |
+
prompt = (
|
155 |
+
"From the screenshot extract the interview question (English). "
|
156 |
+
"Then answer concisely (≤200 words). "
|
157 |
+
"Return pure JSON: {\"question\":\"...\",\"answer\":\"...\"}"
|
158 |
+
)
|
159 |
+
# Send prompt and image
|
160 |
+
text = call_gemini(prompt, vision_part)
|
161 |
+
try:
|
162 |
+
parsed = json.loads(text)
|
163 |
+
question, answer = parsed["question"], parsed["answer"]
|
164 |
+
except (json.JSONDecodeError, KeyError):
|
165 |
+
# Fallback: treat whole reply as answer
|
166 |
+
question, answer = "[Extracted from screenshot]", text
|
167 |
+
return JSONResponse({"question": question, "answer": answer, "memory_mb": memory_mb()})
|
168 |
+
|
169 |
+
@app.post("/text-question")
|
170 |
+
async def text_question(payload: Dict):
|
171 |
+
question = (payload.get("question") or "").strip()
|
172 |
+
if not question:
|
173 |
+
raise HTTPException(400, "question is required")
|
174 |
+
answer = call_gemini(build_prompt(question))
|
175 |
+
return JSONResponse({"question": question, "answer": answer, "memory_mb": memory_mb()})
|
requirements.txt
CHANGED
@@ -1,23 +1,24 @@
|
|
1 |
# Core server
|
2 |
fastapi
|
3 |
uvicorn[standard]
|
4 |
-
aiofiles # Static
|
5 |
-
python-multipart # File uploads
|
6 |
|
7 |
# Voice‑to‑text (Whisper via Transformers)
|
8 |
-
transformers
|
9 |
-
torch
|
10 |
-
huggingface_hub
|
11 |
accelerate
|
12 |
|
13 |
-
# Audio
|
14 |
pydub
|
15 |
ffmpeg-python
|
16 |
-
openai-whisper
|
|
|
17 |
|
18 |
# Gemini Flash 2.5
|
19 |
google-genai
|
20 |
-
python-dotenv # Optional – read GOOGLE_API_KEY
|
21 |
|
22 |
# Utilities
|
23 |
-
psutil # Lightweight health logging
|
|
|
|
|
|
|
|
1 |
# Core server
|
2 |
fastapi
|
3 |
uvicorn[standard]
|
4 |
+
aiofiles # Static files
|
|
|
5 |
|
6 |
# Voice‑to‑text (Whisper via Transformers)
|
7 |
+
transformers # For whisper
|
8 |
+
torch # Just to run transformer so don't remove
|
|
|
9 |
accelerate
|
10 |
|
11 |
+
# Audio & Image
|
12 |
pydub
|
13 |
ffmpeg-python
|
14 |
+
openai-whisper # pulls tiny‑en / small‑en
|
15 |
+
pillow
|
16 |
|
17 |
# Gemini Flash 2.5
|
18 |
google-genai
|
|
|
19 |
|
20 |
# Utilities
|
21 |
+
psutil # Lightweight health logging
|
22 |
+
python-multipart # File uploads
|
23 |
+
huggingface_hub
|
24 |
+
python-dotenv
|
statics/edit.png
ADDED
![]() |
statics/index.html
CHANGED
@@ -13,11 +13,16 @@
|
|
13 |
<body>
|
14 |
<main class="container">
|
15 |
<h1>Interview Q&A Assistant</h1>
|
16 |
-
<p class="subtitle"
|
17 |
-
|
18 |
-
|
19 |
-
<
|
20 |
-
<
|
|
|
|
|
|
|
|
|
|
|
21 |
<h2>AI Answer</h2>
|
22 |
<pre id="answer-output" class="output markdown"></pre>
|
23 |
</section>
|
|
|
13 |
<body>
|
14 |
<main class="container">
|
15 |
<h1>Interview Q&A Assistant</h1>
|
16 |
+
<p class="subtitle">🎙 Hold the button, ask your interview question, release to get an answer.<br>
|
17 |
+
📸 Or upload a screenshot of the question.</p>
|
18 |
+
<button id="record-button" class="record-btn">🎤 Hold to Ask</button>
|
19 |
+
<button id="screenshot-button" class="screenshot-btn">📸 Drop your Screenshot</button>
|
20 |
+
<input id="file-input" type="file" accept="image/*" hidden />
|
21 |
+
<section class="output-section">
|
22 |
+
<h2>Your Question
|
23 |
+
<img src="/statics/edit.png" id="edit-btn" class="edit-icon" title="Edit & re-ask">
|
24 |
+
</h2>
|
25 |
+
<div id="question-output" class="output" contenteditable="false"></div>
|
26 |
<h2>AI Answer</h2>
|
27 |
<pre id="answer-output" class="output markdown"></pre>
|
28 |
</section>
|
statics/script.js
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
/*******************************
|
2 |
* Interview Q&A Frontend JS *
|
3 |
*******************************/
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
|
6 |
-
const recordBtn = document.getElementById("record-button");
|
7 |
-
const questionEl = document.getElementById("question-output");
|
8 |
-
const answerEl = document.getElementById("answer-output");
|
9 |
-
|
10 |
-
// Typing animation util
|
11 |
function typeEffect(el, text, speed = 30) {
|
12 |
el.textContent = "";
|
13 |
let idx = 0;
|
@@ -18,53 +19,121 @@ function typeEffect(el, text, speed = 30) {
|
|
18 |
}, speed);
|
19 |
}
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
let mediaRecorder, chunks = [];
|
23 |
-
// Initialise media data
|
24 |
async function initMedia() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
try {
|
26 |
-
const
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
// Stop btn start write and send audio chunk
|
31 |
-
mediaRecorder.onstop = async () => {
|
32 |
-
const audioBlob = new Blob(chunks, { type: "audio/wav" });
|
33 |
-
chunks = [];
|
34 |
-
// Build form data
|
35 |
-
const form = new FormData();
|
36 |
-
form.append("file", audioBlob, "record.wav");
|
37 |
-
// UX feedback
|
38 |
-
typeEffect(questionEl, "⌛ Transcribing…");
|
39 |
-
answerEl.textContent = "";
|
40 |
-
try {
|
41 |
-
const res = await fetch("/voice-transcribe", { method: "POST", body: form });
|
42 |
-
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
43 |
-
const data = await res.json();
|
44 |
-
// render markdown after a small delay for dramatic effect
|
45 |
-
typeEffect(questionEl, data.question || "[no speech detected]");
|
46 |
-
setTimeout(() => typeEffect(answerEl, data.answer || "[no answer]"), 500);
|
47 |
-
} catch (err) {
|
48 |
-
typeEffect(answerEl, "❌ " + err.message);
|
49 |
-
}
|
50 |
-
};
|
51 |
} catch (err) {
|
52 |
-
|
|
|
|
|
53 |
}
|
54 |
-
}
|
|
|
55 |
|
56 |
-
|
57 |
function bindRecordBtn() {
|
58 |
-
if (!mediaRecorder) return;
|
59 |
recordBtn.addEventListener("mousedown", () => mediaRecorder.start());
|
60 |
recordBtn.addEventListener("mouseup", () => mediaRecorder.stop());
|
61 |
-
// Touch devices
|
62 |
recordBtn.addEventListener("touchstart", e => { e.preventDefault(); mediaRecorder.start(); });
|
63 |
-
recordBtn.addEventListener("touchend", e => { e.preventDefault(); mediaRecorder.stop();
|
64 |
}
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
window.addEventListener("DOMContentLoaded", async () => {
|
68 |
-
try {
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
1 |
/*******************************
|
2 |
* Interview Q&A Frontend JS *
|
3 |
*******************************/
|
4 |
+
const recordBtn = document.getElementById("record-button");
|
5 |
+
const screenshotBtn = document.getElementById("screenshot-button");
|
6 |
+
const fileInput = document.getElementById("file-input");
|
7 |
+
const questionEl = document.getElementById("question-output");
|
8 |
+
const answerEl = document.getElementById("answer-output");
|
9 |
+
const editBtn = document.getElementById("edit-btn");
|
10 |
|
11 |
+
/* ─────────────────── Typing effect utility ─────────────────── */
|
|
|
|
|
|
|
|
|
|
|
12 |
function typeEffect(el, text, speed = 30) {
|
13 |
el.textContent = "";
|
14 |
let idx = 0;
|
|
|
19 |
}, speed);
|
20 |
}
|
21 |
|
22 |
+
/* ─────────────────── Abort-controller wrapper ───────────────── */
|
23 |
+
let currentController = null;
|
24 |
+
function fetchWithAbort(url, opts = {}) {
|
25 |
+
if (currentController) currentController.abort(); // cancel previous req
|
26 |
+
currentController = new AbortController();
|
27 |
+
return fetch(url, { ...opts, signal: currentController.signal });
|
28 |
+
}
|
29 |
+
|
30 |
+
/* ─────────────────── Audio recording setup ─────────────────── */
|
31 |
let mediaRecorder, chunks = [];
|
|
|
32 |
async function initMedia() {
|
33 |
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
34 |
+
mediaRecorder = new MediaRecorder(stream);
|
35 |
+
|
36 |
+
mediaRecorder.ondataavailable = e => chunks.push(e.data);
|
37 |
+
|
38 |
+
mediaRecorder.onstop = async () => {
|
39 |
+
const audioBlob = new Blob(chunks, { type: "audio/wav" });
|
40 |
+
chunks = [];
|
41 |
+
|
42 |
+
const form = new FormData();
|
43 |
+
form.append("file", audioBlob, "record.wav");
|
44 |
+
|
45 |
+
questionEl.textContent = "⌛ Transcribing…";
|
46 |
+
answerEl.innerHTML = "";
|
47 |
+
|
48 |
+
try {
|
49 |
+
const res = await fetchWithAbort("/voice-transcribe", { method: "POST", body: form });
|
50 |
+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
51 |
+
const data = await res.json();
|
52 |
+
displayQa(data);
|
53 |
+
} catch (err) {
|
54 |
+
answerEl.textContent = "❌ " + err.message;
|
55 |
+
}
|
56 |
+
};
|
57 |
+
}
|
58 |
+
|
59 |
+
/* ─────────────── Screenshot / image-question upload ─────────── */
|
60 |
+
fileInput.addEventListener("change", async (e) => {
|
61 |
+
const file = e.target.files[0];
|
62 |
+
if (!file) return;
|
63 |
+
const form = new FormData();
|
64 |
+
form.append("file", file);
|
65 |
+
|
66 |
+
questionEl.textContent = "⌛ Processing screenshot…";
|
67 |
+
answerEl.innerHTML = "";
|
68 |
+
|
69 |
try {
|
70 |
+
const res = await fetchWithAbort("/image-question", { method: "POST", body: form });
|
71 |
+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
72 |
+
const data = await res.json();
|
73 |
+
displayQa(data);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
} catch (err) {
|
75 |
+
answerEl.textContent = "❌ " + err.message;
|
76 |
+
} finally {
|
77 |
+
fileInput.value = ""; // reset for next upload
|
78 |
}
|
79 |
+
});
|
80 |
+
screenshotBtn.addEventListener("click", () => fileInput.click());
|
81 |
|
82 |
+
/* ─────────────────── Hold-to-record UX ─────────────────────── */
|
83 |
function bindRecordBtn() {
|
|
|
84 |
recordBtn.addEventListener("mousedown", () => mediaRecorder.start());
|
85 |
recordBtn.addEventListener("mouseup", () => mediaRecorder.stop());
|
|
|
86 |
recordBtn.addEventListener("touchstart", e => { e.preventDefault(); mediaRecorder.start(); });
|
87 |
+
recordBtn.addEventListener("touchend", e => { e.preventDefault(); mediaRecorder.stop(); });
|
88 |
}
|
89 |
|
90 |
+
/* ─────────────────── Editable question block ───────────────── */
|
91 |
+
function enableEdit() {
|
92 |
+
questionEl.contentEditable = "true";
|
93 |
+
questionEl.classList.add("editing");
|
94 |
+
questionEl.focus();
|
95 |
+
}
|
96 |
+
|
97 |
+
async function sendEditedQuestion(text) {
|
98 |
+
questionEl.contentEditable = "false";
|
99 |
+
questionEl.classList.remove("editing");
|
100 |
+
answerEl.textContent = "⌛ Thinking…";
|
101 |
+
try {
|
102 |
+
const res = await fetchWithAbort("/text-question", {
|
103 |
+
method : "POST",
|
104 |
+
headers: { "Content-Type": "application/json" },
|
105 |
+
body : JSON.stringify({ question: text })
|
106 |
+
});
|
107 |
+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
108 |
+
const data = await res.json();
|
109 |
+
displayQa(data);
|
110 |
+
} catch (err) {
|
111 |
+
answerEl.textContent = "❌ " + err.message;
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
editBtn.addEventListener("click", () => enableEdit());
|
116 |
+
questionEl.addEventListener("keydown", (e) => {
|
117 |
+
if (e.key === "Enter") {
|
118 |
+
e.preventDefault();
|
119 |
+
const text = questionEl.innerText.trim();
|
120 |
+
if (text) sendEditedQuestion(text);
|
121 |
+
}
|
122 |
+
});
|
123 |
+
|
124 |
+
/* ─────────────────────── helpers ───────────────────────────── */
|
125 |
+
function displayQa(data) {
|
126 |
+
typeEffect(questionEl, data.question || "[no question]");
|
127 |
+
const html = DOMPurify.sanitize(marked.parse(data.answer || ""));
|
128 |
+
setTimeout(() => { answerEl.innerHTML = html; }, 400); // nice stagger
|
129 |
+
}
|
130 |
+
|
131 |
+
/* ─────────────────────── bootstrap ─────────────────────────── */
|
132 |
window.addEventListener("DOMContentLoaded", async () => {
|
133 |
+
try {
|
134 |
+
await initMedia();
|
135 |
+
bindRecordBtn();
|
136 |
+
} catch {
|
137 |
+
alert("Microphone permission is required.");
|
138 |
+
}
|
139 |
+
});
|
statics/styles.css
CHANGED
@@ -1,41 +1,34 @@
|
|
1 |
-
/* Simple, clean aesthetic */
|
2 |
:root {
|
3 |
-
--primary
|
4 |
-
--
|
5 |
-
--bg: #f8f9fc;
|
6 |
-
--mono: "Courier New", monospace;
|
7 |
}
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
.
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
.
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
}
|
36 |
-
|
37 |
-
|
38 |
-
.output.markdown h3, .output.markdown h2 { color:#55f; margin:6px 0; }
|
39 |
-
.output.markdown strong { font-weight:bold; color:#fff; background:#333; padding:0 4px; border-radius:3px; }
|
40 |
-
.output.markdown em { font-style:italic; color:#ffd700; }
|
41 |
-
.output.markdown ul { margin:4px 0 4px 20px; }
|
|
|
|
|
1 |
:root {
|
2 |
+
--primary:#0052cc; --accent:#ff4d4d; --sub:#66adff; --bg:#f8f9fc;
|
3 |
+
--mono:"Courier New",monospace;
|
|
|
|
|
4 |
}
|
5 |
|
6 |
+
/* Layout -----------------------------------------------------------------*/
|
7 |
+
html,body{margin:0;padding:0;background:var(--bg);font-family:Arial,Helvetica,sans-serif}
|
8 |
+
.container{max-width:720px;margin:20px auto;padding:24px;background:#fff;
|
9 |
+
border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,.08)}
|
10 |
+
h1{margin-top:0;text-align:center;color:var(--primary)}
|
11 |
+
.subtitle{text-align:center;color:#444;margin-bottom:16px}
|
12 |
+
|
13 |
+
/* Buttons ----------------------------------------------------------------*/
|
14 |
+
.record-btn,.screenshot-btn{
|
15 |
+
display:block;margin:0 auto 10px;padding:14px 28px;border:none;border-radius:50px;
|
16 |
+
font-size:17px;color:#fff;cursor:pointer;transition:background .25s}
|
17 |
+
.record-btn {background:var(--accent)} .record-btn:hover {background:#b80e0e}
|
18 |
+
.screenshot-btn{background:var(--sub)} .screenshot-btn:hover{background:#1f5089}
|
19 |
+
|
20 |
+
/* Output blocks -----------------------------------------------------------*/
|
21 |
+
.output-section h2{margin:22px 0 8px;color:var(--primary);display:flex;align-items:center;gap:6px}
|
22 |
+
.output{
|
23 |
+
background:#000;color:#0f0;padding:16px;min-height:60px;border-radius:4px;
|
24 |
+
overflow-x:auto;font-family:var(--mono);white-space:pre-wrap;word-wrap:break-word}
|
25 |
+
|
26 |
+
/* Edit icon ---------------------------------------------------------------*/
|
27 |
+
.edit-icon{width:18px;height:18px;cursor:pointer;opacity:.6;transition:opacity .2s}
|
28 |
+
.edit-icon:hover{opacity:1}
|
29 |
+
|
30 |
+
/* Markdown tweaks ---------------------------------------------------------*/
|
31 |
+
.output.markdown h3,.output.markdown h2{color:#55f;margin:6px 0}
|
32 |
+
.output.markdown strong{font-weight:bold;color:#fff;background:#333;padding:0 4px;border-radius:3px}
|
33 |
+
.output.markdown em{font-style:italic;color:#ffd700}
|
34 |
+
.output.markdown ul{margin:4px 0 4px 20px}
|
|
|
|
|
|
|
|