LiamKhoaLe commited on
Commit
3cb4983
·
1 Parent(s): 0d7db6d

Update Screenshot img upload. Allow request interrupt previous prompt. Allow question editable

Browse files
Files changed (6) hide show
  1. app.py +98 -90
  2. requirements.txt +10 -9
  3. statics/edit.png +0 -0
  4. statics/index.html +10 -5
  5. statics/script.js +112 -43
  6. statics/styles.css +31 -38
app.py CHANGED
@@ -1,60 +1,44 @@
1
- # Access site: https://binkhoale1812-interview-ai.hf.space/
2
- import os, tempfile
3
  from pathlib import Path
4
  from typing import Dict
5
 
6
- # Server
7
  from fastapi import FastAPI, File, UploadFile, HTTPException
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.responses import JSONResponse, FileResponse
10
  from fastapi.staticfiles import StaticFiles
11
 
12
- # AI + LLM
13
- import torch # For transformer
14
  from google import genai
15
  from google.genai import types
16
 
17
- # Audio Transcribe
 
18
  from pydub import AudioSegment
19
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
20
- import numpy as np
21
 
22
- ############################################
23
- # ── Configuration ────────────────────────
24
- ############################################
 
25
 
26
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
27
  if not GEMINI_API_KEY:
28
- raise RuntimeError("GEMINI_API_KEY environment variable must be set!")
29
 
30
- # Tiny Whisper model is light enough for CPU Spaces; change if GPU is available
31
  ASR_MODEL_ID = "openai/whisper-small.en"
32
- ASR_LANGUAGE = "en" # Force to English for interview setting
33
- SAMPLE_RATE = 16000
34
 
35
- ############################################
36
- # ── FastAPI App ───────────────────────────
37
- ############################################
38
 
39
  app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
40
  app.add_middleware(
41
  CORSMiddleware,
42
- allow_origins=["*"],
43
- allow_methods=["*"],
44
- allow_headers=["*"],
45
  )
46
-
47
- # Serve frontend assets
48
  app.mount("/statics", StaticFiles(directory="statics"), name="statics")
49
 
50
- ############################################
51
- # ── Global objects (lazy‑loaded) ──────────
52
- ############################################
53
-
54
- # Globals
55
- processor = None
56
- model = None
57
-
58
  # Enable Logging for Debugging
59
  import psutil
60
  import logging
@@ -84,84 +68,108 @@ def check_system_resources():
84
  logger.warning("⚠️ High Disk usage detected!")
85
  check_system_resources()
86
 
87
- # Startup
88
- @app.on_event("startup")
89
- async def load_models():
90
- global processor, model
91
- cache = Path("model_cache"); cache.mkdir(exist_ok=True)
92
- # in startup (Transformer Whisper processing)
93
- processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
94
- model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
95
- # Force English transcription – never translate
96
- forced = processor.get_decoder_prompt_ids(language="english", task="transcribe")
97
- model.config.forced_decoder_ids = forced
98
- model.to("cpu")
99
- model.eval()
100
- logger.info("[STARTUP] Whisper loaded ✔")
101
 
102
-
103
- ############################################
104
- # ── Helpers ───────────────────────────────
105
- ############################################
106
 
107
  def build_prompt(question: str) -> str:
108
- """Craft a prompt that elicits concise, structured answers."""
109
  return (
110
- "You are a helpful careercoach AI. Answer the following interview "
111
- "question clearly and concisely, offering practical insights when appropriate.\n"
112
- "Use markdown for **bold**, *italic*, and bullet‑lists when helpful. \n"
113
- "Ensure your answer is less than 200 words.\n\n"
114
- f"Interview question: \"{question}\""
115
  )
116
 
117
- def memory_usage_mb() -> float:
118
- return psutil.Process().memory_info().rss / 1_048_576 # bytes→MiB
119
 
120
- ############################################
121
- # ── Routes ────────────────────────────────
122
- ############################################
 
 
 
 
 
 
 
123
 
124
  @app.get("/")
125
- async def root() -> FileResponse:
126
- """Serve the single‑page app."""
127
  logger.info("[STATIC] Serving frontend")
128
  return FileResponse(Path("statics/index.html"))
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  @app.post("/voice-transcribe")
132
- async def voice_transcribe(file: UploadFile = File(...)): # noqa: B008
133
- """Receive audio, transcribe, push to Gemini, return answer."""
134
  if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
135
- raise HTTPException(status_code=415, detail="Unsupported audio type")
136
- # Save to a temp file (Whisper expects a filename/bytes)
137
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
138
- tmp.write(await file.read())
139
- tmp_path = tmp.name
140
  try:
141
- # ── 1. Transcribe
142
- seg = AudioSegment.from_file(tmp_path).set_frame_rate(SAMPLE_RATE).set_channels(1)
143
- audio = np.array(seg.get_array_of_samples()).astype(np.float32) / (2**15)
144
  inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
145
  ids = model.generate(inputs.input_features.to(model.device))
146
  question = processor.decode(ids[0], skip_special_tokens=True).strip()
147
- if not question: raise ValueError("Could not detect speech")
148
- logger.info(f"[VOICE] Detected transcribe: {question}")
149
- # ── 2. LLM answer
150
- prompt = build_prompt(question)
151
- # Gemini Flash 2.5 tuned for short latency
152
- client = genai.Client(api_key=GEMINI_API_KEY)
153
- response = client.models.generate_content(
154
- model="gemini-2.5-flash-preview-04-17",
155
- contents=prompt
156
- )
157
- answer = response.text.strip()
158
- logger.info(f"[LLM] Decision answer: {answer}")
159
- return JSONResponse(
160
- {
161
- "question": question,
162
- "answer": answer,
163
- "memory_mb": round(memory_usage_mb(), 1),
164
- }
165
- )
166
  finally:
167
- os.remove(tmp_path) # Rm audio when done
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Interview Q&A – FastAPI backend
2
+ import base64, io, json, logging, os, tempfile
3
  from pathlib import Path
4
  from typing import Dict
5
 
 
6
  from fastapi import FastAPI, File, UploadFile, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import JSONResponse, FileResponse
9
  from fastapi.staticfiles import StaticFiles
10
 
11
+ # AI / LLM
 
12
  from google import genai
13
  from google.genai import types
14
 
15
+ # ASR
16
+ import numpy as np
17
  from pydub import AudioSegment
18
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
19
 
20
+ # Misc
21
+ from PIL import Image
22
+
23
+ ##############################################################################
24
 
25
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
26
  if not GEMINI_API_KEY:
27
+ raise RuntimeError("GEMINI_API_KEY must be set as env var")
28
 
 
29
  ASR_MODEL_ID = "openai/whisper-small.en"
30
+ ASR_LANGUAGE = "en"
31
+ SAMPLE_RATE = 16_000
32
 
33
+ ##############################################################################
 
 
34
 
35
  app = FastAPI(title="Interview Q&A Assistant", docs_url="/docs")
36
  app.add_middleware(
37
  CORSMiddleware,
38
+ allow_origins=["*"], allow_methods=["*"], allow_headers=["*"],
 
 
39
  )
 
 
40
  app.mount("/statics", StaticFiles(directory="statics"), name="statics")
41
 
 
 
 
 
 
 
 
 
42
  # Enable Logging for Debugging
43
  import psutil
44
  import logging
 
68
  logger.warning("⚠️ High Disk usage detected!")
69
  check_system_resources()
70
 
71
+ ##############################################################################
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ # Global ASR (lazy-loaded)
74
+ processor = model = None
 
 
75
 
76
  def build_prompt(question: str) -> str:
 
77
  return (
78
+ "You are a helpful career-coach AI. Answer the following interview "
79
+ "question clearly and concisely (≤200 words). Use markdown when helpful.\n\n"
80
+ f"Interview question: \"{question.strip()}\""
 
 
81
  )
82
 
83
+ def memory_mb() -> float:
84
+ return round(psutil.Process().memory_info().rss / 1_048_576, 1)
85
 
86
+ @app.on_event("startup")
87
+ async def load_models():
88
+ global processor, model
89
+ cache = Path("model_cache"); cache.mkdir(exist_ok=True)
90
+ processor = WhisperProcessor.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
91
+ model = WhisperForConditionalGeneration.from_pretrained(ASR_MODEL_ID, cache_dir=cache)
92
+ forced = processor.get_decoder_prompt_ids(task="transcribe", language="english")
93
+ model.config.forced_decoder_ids = forced
94
+ model.to("cpu").eval()
95
+ log.info("🔊 Whisper loaded ✔")
96
 
97
  @app.get("/")
98
+ async def root() -> FileResponse: # serve SPA
 
99
  logger.info("[STATIC] Serving frontend")
100
  return FileResponse(Path("statics/index.html"))
101
 
102
+ ##############################################################################
103
+ # ── MAIN ENDPOINTS ──────────────────────────────────────────────────────────
104
+
105
+ def call_gemini(prompt: str, vision_parts=None) -> str:
106
+ client = genai.Client(api_key=GEMINI_API_KEY)
107
+ kwargs: Dict = {}
108
+ if vision_parts: # multimodal call
109
+ kwargs["contents"] = vision_parts + [{"text": prompt}]
110
+ else:
111
+ kwargs["contents"] = prompt
112
+ resp = client.models.generate_content(
113
+ model="gemini-2.5-flash-preview-04-17", **kwargs
114
+ )
115
+ return resp.text.strip()
116
 
117
  @app.post("/voice-transcribe")
118
+ async def voice_transcribe(file: UploadFile = File(...)):
 
119
  if file.content_type not in {"audio/wav", "audio/x-wav", "audio/mpeg"}:
120
+ raise HTTPException(415, "Unsupported audio type")
121
+ # Write temporary audio file
122
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
123
+ tmp.write(await file.read()); tmp_path = tmp.name
124
+ # Audio processing and transcription
125
  try:
126
+ seg = AudioSegment.from_file(tmp_path).set_frame_rate(SAMPLE_RATE).set_channels(1)
127
+ audio = np.array(seg.get_array_of_samples()).astype(np.float32) / (2 ** 15)
 
128
  inputs = processor(audio, sampling_rate=SAMPLE_RATE, return_tensors="pt")
129
  ids = model.generate(inputs.input_features.to(model.device))
130
  question = processor.decode(ids[0], skip_special_tokens=True).strip()
131
+ if not question:
132
+ raise ValueError("No speech detected")
133
+
134
+ answer = call_gemini(build_prompt(question))
135
+ return JSONResponse({"question": question, "answer": answer, "memory_mb": memory_mb()})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  finally:
137
+ os.remove(tmp_path)
138
+
139
+ @app.post("/image-question")
140
+ async def image_question(file: UploadFile = File(...)):
141
+ if file.content_type not in {"image/png", "image/jpeg"}:
142
+ raise HTTPException(415, "Unsupported image type")
143
+ # Read file and decode
144
+ raw = await file.read()
145
+ b64 = base64.b64encode(raw).decode()
146
+ # Send image data
147
+ vision_part = [{
148
+ "inline_data": {
149
+ "mime_type": file.content_type,
150
+ "data": b64
151
+ }
152
+ }]
153
+ # Ask Gemini to return JSON so we can split Q & A
154
+ prompt = (
155
+ "From the screenshot extract the interview question (English). "
156
+ "Then answer concisely (≤200 words). "
157
+ "Return pure JSON: {\"question\":\"...\",\"answer\":\"...\"}"
158
+ )
159
+ # Send prompt and image
160
+ text = call_gemini(prompt, vision_part)
161
+ try:
162
+ parsed = json.loads(text)
163
+ question, answer = parsed["question"], parsed["answer"]
164
+ except (json.JSONDecodeError, KeyError):
165
+ # Fallback: treat whole reply as answer
166
+ question, answer = "[Extracted from screenshot]", text
167
+ return JSONResponse({"question": question, "answer": answer, "memory_mb": memory_mb()})
168
+
169
+ @app.post("/text-question")
170
+ async def text_question(payload: Dict):
171
+ question = (payload.get("question") or "").strip()
172
+ if not question:
173
+ raise HTTPException(400, "question is required")
174
+ answer = call_gemini(build_prompt(question))
175
+ return JSONResponse({"question": question, "answer": answer, "memory_mb": memory_mb()})
requirements.txt CHANGED
@@ -1,23 +1,24 @@
1
  # Core server
2
  fastapi
3
  uvicorn[standard]
4
- aiofiles # Static file serving
5
- python-multipart # File uploads
6
 
7
  # Voice‑to‑text (Whisper via Transformers)
8
- transformers # For whisper
9
- torch # Just to run transformer so don't remove
10
- huggingface_hub
11
  accelerate
12
 
13
- # Audio
14
  pydub
15
  ffmpeg-python
16
- openai-whisper # pulls tiny‑en / small‑en
 
17
 
18
  # Gemini Flash 2.5
19
  google-genai
20
- python-dotenv # Optional – read GOOGLE_API_KEY
21
 
22
  # Utilities
23
- psutil # Lightweight health logging
 
 
 
 
1
  # Core server
2
  fastapi
3
  uvicorn[standard]
4
+ aiofiles # Static files
 
5
 
6
  # Voice‑to‑text (Whisper via Transformers)
7
+ transformers # For whisper
8
+ torch # Just to run transformer so don't remove
 
9
  accelerate
10
 
11
+ # Audio & Image
12
  pydub
13
  ffmpeg-python
14
+ openai-whisper # pulls tiny‑en / small‑en
15
+ pillow
16
 
17
  # Gemini Flash 2.5
18
  google-genai
 
19
 
20
  # Utilities
21
+ psutil # Lightweight health logging
22
+ python-multipart # File uploads
23
+ huggingface_hub
24
+ python-dotenv
statics/edit.png ADDED
statics/index.html CHANGED
@@ -13,11 +13,16 @@
13
  <body>
14
  <main class="container">
15
  <h1>Interview Q&amp;A Assistant</h1>
16
- <p class="subtitle">Hold the button, ask your interview question, release to get an answer.</p>
17
- <button id="record-button" class="record-btn">🎙 Hold&nbsp;to&nbsp;Ask</button>
18
- <section class="output-section">
19
- <h2>Your Question</h2>
20
- <pre id="question-output" class="output"></pre>
 
 
 
 
 
21
  <h2>AI&nbsp;Answer</h2>
22
  <pre id="answer-output" class="output markdown"></pre>
23
  </section>
 
13
  <body>
14
  <main class="container">
15
  <h1>Interview Q&amp;A Assistant</h1>
16
+ <p class="subtitle">🎙&nbsp;Hold the button, ask your interview question, release to get an answer.<br>
17
+ 📸&nbsp;Or upload a screenshot of the question.</p>
18
+ <button id="record-button" class="record-btn">🎤 Hold&nbsp;to&nbsp;Ask</button>
19
+ <button id="screenshot-button" class="screenshot-btn">📸 Drop&nbsp;your&nbsp;Screenshot</button>
20
+ <input id="file-input" type="file" accept="image/*" hidden />
21
+ <section class="output-section">
22
+ <h2>Your Question
23
+ <img src="/statics/edit.png" id="edit-btn" class="edit-icon" title="Edit & re-ask">
24
+ </h2>
25
+ <div id="question-output" class="output" contenteditable="false"></div>
26
  <h2>AI&nbsp;Answer</h2>
27
  <pre id="answer-output" class="output markdown"></pre>
28
  </section>
statics/script.js CHANGED
@@ -1,13 +1,14 @@
1
  /*******************************
2
  * Interview Q&A Frontend JS *
3
  *******************************/
 
 
 
 
 
 
4
 
5
- // Elements
6
- const recordBtn = document.getElementById("record-button");
7
- const questionEl = document.getElementById("question-output");
8
- const answerEl = document.getElementById("answer-output");
9
-
10
- // Typing animation util
11
  function typeEffect(el, text, speed = 30) {
12
  el.textContent = "";
13
  let idx = 0;
@@ -18,53 +19,121 @@ function typeEffect(el, text, speed = 30) {
18
  }, speed);
19
  }
20
 
21
- // Audio recording setup
 
 
 
 
 
 
 
 
22
  let mediaRecorder, chunks = [];
23
- // Initialise media data
24
  async function initMedia() {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  try {
26
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
27
- mediaRecorder = new MediaRecorder(stream);
28
- // Continuously push available data
29
- mediaRecorder.ondataavailable = e => chunks.push(e.data);
30
- // Stop btn start write and send audio chunk
31
- mediaRecorder.onstop = async () => {
32
- const audioBlob = new Blob(chunks, { type: "audio/wav" });
33
- chunks = [];
34
- // Build form data
35
- const form = new FormData();
36
- form.append("file", audioBlob, "record.wav");
37
- // UX feedback
38
- typeEffect(questionEl, "⌛ Transcribing…");
39
- answerEl.textContent = "";
40
- try {
41
- const res = await fetch("/voice-transcribe", { method: "POST", body: form });
42
- if (!res.ok) throw new Error(`HTTP ${res.status}`);
43
- const data = await res.json();
44
- // render markdown after a small delay for dramatic effect
45
- typeEffect(questionEl, data.question || "[no speech detected]");
46
- setTimeout(() => typeEffect(answerEl, data.answer || "[no answer]"), 500);
47
- } catch (err) {
48
- typeEffect(answerEl, "❌ " + err.message);
49
- }
50
- };
51
  } catch (err) {
52
- alert("Microphone access denied please allow permissions.");
 
 
53
  }
54
- }
 
55
 
56
- // Holdtorecord UX
57
  function bindRecordBtn() {
58
- if (!mediaRecorder) return;
59
  recordBtn.addEventListener("mousedown", () => mediaRecorder.start());
60
  recordBtn.addEventListener("mouseup", () => mediaRecorder.stop());
61
- // Touch devices
62
  recordBtn.addEventListener("touchstart", e => { e.preventDefault(); mediaRecorder.start(); });
63
- recordBtn.addEventListener("touchend", e => { e.preventDefault(); mediaRecorder.stop(); });
64
  }
65
 
66
- // Init on page load
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  window.addEventListener("DOMContentLoaded", async () => {
68
- try { await initMedia(); bindRecordBtn(); }
69
- catch (e) { alert("Mic permission required"); }
70
- });
 
 
 
 
 
1
  /*******************************
2
  * Interview Q&A Frontend JS *
3
  *******************************/
4
+ const recordBtn = document.getElementById("record-button");
5
+ const screenshotBtn = document.getElementById("screenshot-button");
6
+ const fileInput = document.getElementById("file-input");
7
+ const questionEl = document.getElementById("question-output");
8
+ const answerEl = document.getElementById("answer-output");
9
+ const editBtn = document.getElementById("edit-btn");
10
 
11
+ /* ─────────────────── Typing effect utility ─────────────────── */
 
 
 
 
 
12
  function typeEffect(el, text, speed = 30) {
13
  el.textContent = "";
14
  let idx = 0;
 
19
  }, speed);
20
  }
21
 
22
+ /* ─────────────────── Abort-controller wrapper ───────────────── */
23
+ let currentController = null;
24
+ function fetchWithAbort(url, opts = {}) {
25
+ if (currentController) currentController.abort(); // cancel previous req
26
+ currentController = new AbortController();
27
+ return fetch(url, { ...opts, signal: currentController.signal });
28
+ }
29
+
30
+ /* ─────────────────── Audio recording setup ─────────────────── */
31
  let mediaRecorder, chunks = [];
 
32
  async function initMedia() {
33
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
34
+ mediaRecorder = new MediaRecorder(stream);
35
+
36
+ mediaRecorder.ondataavailable = e => chunks.push(e.data);
37
+
38
+ mediaRecorder.onstop = async () => {
39
+ const audioBlob = new Blob(chunks, { type: "audio/wav" });
40
+ chunks = [];
41
+
42
+ const form = new FormData();
43
+ form.append("file", audioBlob, "record.wav");
44
+
45
+ questionEl.textContent = "⌛ Transcribing…";
46
+ answerEl.innerHTML = "";
47
+
48
+ try {
49
+ const res = await fetchWithAbort("/voice-transcribe", { method: "POST", body: form });
50
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
51
+ const data = await res.json();
52
+ displayQa(data);
53
+ } catch (err) {
54
+ answerEl.textContent = "❌ " + err.message;
55
+ }
56
+ };
57
+ }
58
+
59
+ /* ─────────────── Screenshot / image-question upload ─────────── */
60
+ fileInput.addEventListener("change", async (e) => {
61
+ const file = e.target.files[0];
62
+ if (!file) return;
63
+ const form = new FormData();
64
+ form.append("file", file);
65
+
66
+ questionEl.textContent = "⌛ Processing screenshot…";
67
+ answerEl.innerHTML = "";
68
+
69
  try {
70
+ const res = await fetchWithAbort("/image-question", { method: "POST", body: form });
71
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
72
+ const data = await res.json();
73
+ displayQa(data);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  } catch (err) {
75
+ answerEl.textContent = "❌ " + err.message;
76
+ } finally {
77
+ fileInput.value = ""; // reset for next upload
78
  }
79
+ });
80
+ screenshotBtn.addEventListener("click", () => fileInput.click());
81
 
82
+ /* ─────────────────── Hold-to-record UX ─────────────────────── */
83
  function bindRecordBtn() {
 
84
  recordBtn.addEventListener("mousedown", () => mediaRecorder.start());
85
  recordBtn.addEventListener("mouseup", () => mediaRecorder.stop());
 
86
  recordBtn.addEventListener("touchstart", e => { e.preventDefault(); mediaRecorder.start(); });
87
+ recordBtn.addEventListener("touchend", e => { e.preventDefault(); mediaRecorder.stop(); });
88
  }
89
 
90
+ /* ─────────────────── Editable question block ───────────────── */
91
+ function enableEdit() {
92
+ questionEl.contentEditable = "true";
93
+ questionEl.classList.add("editing");
94
+ questionEl.focus();
95
+ }
96
+
97
+ async function sendEditedQuestion(text) {
98
+ questionEl.contentEditable = "false";
99
+ questionEl.classList.remove("editing");
100
+ answerEl.textContent = "⌛ Thinking…";
101
+ try {
102
+ const res = await fetchWithAbort("/text-question", {
103
+ method : "POST",
104
+ headers: { "Content-Type": "application/json" },
105
+ body : JSON.stringify({ question: text })
106
+ });
107
+ if (!res.ok) throw new Error(`HTTP ${res.status}`);
108
+ const data = await res.json();
109
+ displayQa(data);
110
+ } catch (err) {
111
+ answerEl.textContent = "❌ " + err.message;
112
+ }
113
+ }
114
+
115
+ editBtn.addEventListener("click", () => enableEdit());
116
+ questionEl.addEventListener("keydown", (e) => {
117
+ if (e.key === "Enter") {
118
+ e.preventDefault();
119
+ const text = questionEl.innerText.trim();
120
+ if (text) sendEditedQuestion(text);
121
+ }
122
+ });
123
+
124
+ /* ─────────────────────── helpers ───────────────────────────── */
125
+ function displayQa(data) {
126
+ typeEffect(questionEl, data.question || "[no question]");
127
+ const html = DOMPurify.sanitize(marked.parse(data.answer || ""));
128
+ setTimeout(() => { answerEl.innerHTML = html; }, 400); // nice stagger
129
+ }
130
+
131
+ /* ─────────────────────── bootstrap ─────────────────────────── */
132
  window.addEventListener("DOMContentLoaded", async () => {
133
+ try {
134
+ await initMedia();
135
+ bindRecordBtn();
136
+ } catch {
137
+ alert("Microphone permission is required.");
138
+ }
139
+ });
statics/styles.css CHANGED
@@ -1,41 +1,34 @@
1
- /* Simple, clean aesthetic */
2
  :root {
3
- --primary: #0052cc;
4
- --accent: #ff6666;
5
- --bg: #f8f9fc;
6
- --mono: "Courier New", monospace;
7
  }
8
 
9
- html,body {
10
- margin: 0; padding: 0; background: var(--bg); font-family: Arial, sans-serif;
11
- }
12
-
13
- .container {
14
- max-width: 720px; margin: 40px auto; padding: 24px;
15
- background: #fff; border-radius: 8px; box-shadow: 0 4px 12px rgba(0,0,0,.08);
16
- }
17
-
18
- h1 { margin-top: 0; text-align: center; color: var(--primary); }
19
- .subtitle { text-align: center; color: #444; margin-bottom: 32px; }
20
-
21
- .record-btn {
22
- display: block; margin: 0 auto 24px; padding: 14px 28px;
23
- background: var(--accent); color: #fff; border: none; border-radius: 50px;
24
- font-size: 17px; cursor: pointer; transition: background .25s;
25
- }
26
-
27
- .record-btn:hover { background: #ff4d4d; }
28
-
29
- .output-section h2 { margin: 24px 0 8px; color: var(--primary); }
30
-
31
- .output {
32
- background: #000; color: #0f0; padding: 16px; min-height: 60px;
33
- border-radius: 4px; overflow-x: auto; font-family: var(--mono);
34
- white-space: pre-wrap; word-wrap: break-word;
35
- }
36
-
37
- /* --- new markdown styling --- */
38
- .output.markdown h3, .output.markdown h2 { color:#55f; margin:6px 0; }
39
- .output.markdown strong { font-weight:bold; color:#fff; background:#333; padding:0 4px; border-radius:3px; }
40
- .output.markdown em { font-style:italic; color:#ffd700; }
41
- .output.markdown ul { margin:4px 0 4px 20px; }
 
 
1
  :root {
2
+ --primary:#0052cc; --accent:#ff4d4d; --sub:#66adff; --bg:#f8f9fc;
3
+ --mono:"Courier New",monospace;
 
 
4
  }
5
 
6
+ /* Layout -----------------------------------------------------------------*/
7
+ html,body{margin:0;padding:0;background:var(--bg);font-family:Arial,Helvetica,sans-serif}
8
+ .container{max-width:720px;margin:20px auto;padding:24px;background:#fff;
9
+ border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,.08)}
10
+ h1{margin-top:0;text-align:center;color:var(--primary)}
11
+ .subtitle{text-align:center;color:#444;margin-bottom:16px}
12
+
13
+ /* Buttons ----------------------------------------------------------------*/
14
+ .record-btn,.screenshot-btn{
15
+ display:block;margin:0 auto 10px;padding:14px 28px;border:none;border-radius:50px;
16
+ font-size:17px;color:#fff;cursor:pointer;transition:background .25s}
17
+ .record-btn {background:var(--accent)} .record-btn:hover {background:#b80e0e}
18
+ .screenshot-btn{background:var(--sub)} .screenshot-btn:hover{background:#1f5089}
19
+
20
+ /* Output blocks -----------------------------------------------------------*/
21
+ .output-section h2{margin:22px 0 8px;color:var(--primary);display:flex;align-items:center;gap:6px}
22
+ .output{
23
+ background:#000;color:#0f0;padding:16px;min-height:60px;border-radius:4px;
24
+ overflow-x:auto;font-family:var(--mono);white-space:pre-wrap;word-wrap:break-word}
25
+
26
+ /* Edit icon ---------------------------------------------------------------*/
27
+ .edit-icon{width:18px;height:18px;cursor:pointer;opacity:.6;transition:opacity .2s}
28
+ .edit-icon:hover{opacity:1}
29
+
30
+ /* Markdown tweaks ---------------------------------------------------------*/
31
+ .output.markdown h3,.output.markdown h2{color:#55f;margin:6px 0}
32
+ .output.markdown strong{font-weight:bold;color:#fff;background:#333;padding:0 4px;border-radius:3px}
33
+ .output.markdown em{font-style:italic;color:#ffd700}
34
+ .output.markdown ul{margin:4px 0 4px 20px}