memorease commited on
Commit
e78d49b
·
verified ·
1 Parent(s): 6259ab9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -6
app.py CHANGED
@@ -3,29 +3,53 @@ import json
3
  from sentence_transformers import SentenceTransformer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
 
 
6
 
7
- # Küçük gömme modeli (daha az RAM kullanır)
8
- embedder = SentenceTransformer("paraphrase-MiniLM-L3-v2")
9
 
10
- # JSON veri yükle
 
 
 
 
 
11
  with open("memory_questions.json", "r") as f:
12
  memory_data = json.load(f)
13
 
14
  memory_texts = [item['description'] for item in memory_data]
15
  memory_embeddings = embedder.encode(memory_texts)
16
 
 
17
  def generate_question(user_memory):
 
18
  user_embedding = embedder.encode([user_memory])
19
  similarities = cosine_similarity(user_embedding, memory_embeddings)[0]
20
  best_match_index = np.argmax(similarities)
21
- return memory_data[best_match_index]['question']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
23
  iface = gr.Interface(
24
  fn=generate_question,
25
  inputs=gr.Textbox(label="Your Memory"),
26
  outputs=gr.Textbox(label="Generated Question"),
27
- title="MemoRease - Semantic Memory Question Generator",
28
- description="Find the most semantically similar question from your memory set."
29
  )
30
 
31
  iface.launch()
 
3
  from sentence_transformers import SentenceTransformer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import numpy as np
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM
7
+ import torch
8
 
9
+ # 1. Embed model for semantic similarity
10
+ embedder = SentenceTransformer("paraphrase-MiniLM-L3-v2") # Küçük ve hızlı
11
 
12
+ # 2. LLM model for question generation (TinyLLaMA)
13
+ llm_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
14
+ tokenizer = AutoTokenizer.from_pretrained(llm_model_id)
15
+ model = AutoModelForCausalLM.from_pretrained(llm_model_id)
16
+
17
+ # 3. Load memory-question data
18
  with open("memory_questions.json", "r") as f:
19
  memory_data = json.load(f)
20
 
21
  memory_texts = [item['description'] for item in memory_data]
22
  memory_embeddings = embedder.encode(memory_texts)
23
 
24
+ # 4. En alakalı memory'yi bul + LLM ile soru üret
25
  def generate_question(user_memory):
26
+ # (a) En benzer memory'yi bul
27
  user_embedding = embedder.encode([user_memory])
28
  similarities = cosine_similarity(user_embedding, memory_embeddings)[0]
29
  best_match_index = np.argmax(similarities)
30
+ matched_memory = memory_data[best_match_index]['description']
31
+
32
+ # (b) Prompt hazırlığı
33
+ prompt = f"<|system|>You are a helpful assistant who asks clear, meaningful questions based on short memories.<|user|>Memory: {matched_memory}\nGenerate a question that starts with What, Why, Who, When, or How.<|assistant|>"
34
+
35
+ # (c) LLM ile generate et
36
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids
37
+ output = model.generate(input_ids, max_new_tokens=50, do_sample=False)
38
+ result = tokenizer.decode(output[0], skip_special_tokens=True)
39
+
40
+ # (d) Sadece son üretilen kısmı al
41
+ if "<|assistant|>" in result:
42
+ result = result.split("<|assistant|>")[-1].strip()
43
+
44
+ return result
45
 
46
+ # 5. Gradio UI
47
  iface = gr.Interface(
48
  fn=generate_question,
49
  inputs=gr.Textbox(label="Your Memory"),
50
  outputs=gr.Textbox(label="Generated Question"),
51
+ title="MemoRease – LLM-Enhanced Question Generator",
52
+ description="Enter a memory. We'll find a similar one and generate a clear, meaningful question using TinyLLaMA."
53
  )
54
 
55
  iface.launch()