vumichien commited on
Commit
11aa943
·
1 Parent(s): ba982f5

Add application file

Browse files
app/routes/embedding_routes.py CHANGED
@@ -11,7 +11,7 @@ router = APIRouter()
11
  @router.post("/create-embeddings")
12
  async def create_embeddings(file: UploadFile = File(...)):
13
  """
14
- Create embeddings from an uploaded Excel or CSV file.
15
 
16
  - **file**: The Excel or CSV file containing questions and answers
17
 
@@ -31,8 +31,15 @@ async def create_embeddings(file: UploadFile = File(...)):
31
  detail="Unsupported file type. Please upload an Excel (.xlsx, .xls) or CSV (.csv) file.",
32
  )
33
 
 
 
 
 
 
34
  # Create a temporary file to store the uploaded file
35
- temp_file_path = f"temp_{file.filename}"
 
 
36
  try:
37
  # Save the uploaded file
38
  with open(temp_file_path, "wb") as buffer:
@@ -50,4 +57,9 @@ async def create_embeddings(file: UploadFile = File(...)):
50
  finally:
51
  # Clean up the temporary file
52
  if os.path.exists(temp_file_path):
53
- os.remove(temp_file_path)
 
 
 
 
 
 
11
  @router.post("/create-embeddings")
12
  async def create_embeddings(file: UploadFile = File(...)):
13
  """
14
+ Create embeddings from an uploaded Excel or CSV file containing question-answer pairs.
15
 
16
  - **file**: The Excel or CSV file containing questions and answers
17
 
 
31
  detail="Unsupported file type. Please upload an Excel (.xlsx, .xls) or CSV (.csv) file.",
32
  )
33
 
34
+ # Ensure temp directory exists
35
+ temp_dir = "/app/temp"
36
+ if not os.path.exists(temp_dir):
37
+ os.makedirs(temp_dir, exist_ok=True)
38
+
39
  # Create a temporary file to store the uploaded file
40
+ safe_filename = os.path.basename(file.filename).replace(" ", "_")
41
+ temp_file_path = os.path.join(temp_dir, f"temp_{safe_filename}")
42
+
43
  try:
44
  # Save the uploaded file
45
  with open(temp_file_path, "wb") as buffer:
 
57
  finally:
58
  # Clean up the temporary file
59
  if os.path.exists(temp_file_path):
60
+ try:
61
+ os.remove(temp_file_path)
62
+ except Exception as e:
63
+ print(
64
+ f"Warning: Could not remove temporary file {temp_file_path}: {str(e)}"
65
+ )
app/services/embedding_service.py CHANGED
@@ -7,8 +7,11 @@ from typing import List, Dict, Tuple, Any
7
 
8
  from app.services.model_service import get_model, reload_embeddings
9
 
 
 
 
10
  # Ensure data directory exists
11
- os.makedirs("data", exist_ok=True)
12
 
13
 
14
  def remove_prefix(text: str, prefix_pattern: str) -> str:
@@ -55,7 +58,8 @@ def save_raw_data(qa_list: List[Dict[str, str]]) -> None:
55
  """
56
  Save the raw question-answer pairs to a JSON file.
57
  """
58
- with open("data/raw.json", "w", encoding="utf-8") as json_file:
 
59
  json.dump(qa_list, json_file, ensure_ascii=False, indent=2)
60
 
61
 
@@ -74,11 +78,15 @@ def create_and_save_embeddings(qa_list: List[Dict[str, str]]) -> None:
74
  answer_embeddings = model.encode(answers, convert_to_numpy=True)
75
 
76
  # Save embeddings as numpy arrays
77
- np.save("data/question_embeddings.npy", question_embeddings)
78
- np.save("data/answer_embeddings.npy", answer_embeddings)
 
 
 
 
79
 
80
  # Save the original data
81
- with open("data/qa_data.json", "w", encoding="utf-8") as f:
82
  json.dump(qa_list, f, ensure_ascii=False, indent=2)
83
 
84
 
 
7
 
8
  from app.services.model_service import get_model, reload_embeddings
9
 
10
+ # Define data directory path
11
+ DATA_DIR = "/app/data"
12
+
13
  # Ensure data directory exists
14
+ os.makedirs(DATA_DIR, exist_ok=True)
15
 
16
 
17
  def remove_prefix(text: str, prefix_pattern: str) -> str:
 
58
  """
59
  Save the raw question-answer pairs to a JSON file.
60
  """
61
+ raw_path = os.path.join(DATA_DIR, "raw.json")
62
+ with open(raw_path, "w", encoding="utf-8") as json_file:
63
  json.dump(qa_list, json_file, ensure_ascii=False, indent=2)
64
 
65
 
 
78
  answer_embeddings = model.encode(answers, convert_to_numpy=True)
79
 
80
  # Save embeddings as numpy arrays
81
+ q_emb_path = os.path.join(DATA_DIR, "question_embeddings.npy")
82
+ a_emb_path = os.path.join(DATA_DIR, "answer_embeddings.npy")
83
+ qa_data_path = os.path.join(DATA_DIR, "qa_data.json")
84
+
85
+ np.save(q_emb_path, question_embeddings)
86
+ np.save(a_emb_path, answer_embeddings)
87
 
88
  # Save the original data
89
+ with open(qa_data_path, "w", encoding="utf-8") as f:
90
  json.dump(qa_list, f, ensure_ascii=False, indent=2)
91
 
92
 
app/services/model_service.py CHANGED
@@ -1,8 +1,12 @@
1
  import json
2
  import numpy as np
 
3
  from sentence_transformers import SentenceTransformer
4
  from typing import List, Dict, Tuple, Any, Optional
5
 
 
 
 
6
  # Global variables to store model and data
7
  _model = None
8
  _question_embeddings = None
@@ -37,10 +41,14 @@ def load_embeddings() -> Tuple[np.ndarray, np.ndarray, List[Dict[str, str]]]:
37
  global _question_embeddings, _answer_embeddings, _qa_data
38
 
39
  try:
40
- _question_embeddings = np.load("data/question_embeddings.npy")
41
- _answer_embeddings = np.load("data/answer_embeddings.npy")
 
 
 
 
42
 
43
- with open("data/qa_data.json", "r", encoding="utf-8") as f:
44
  _qa_data = json.load(f)
45
 
46
  return _question_embeddings, _answer_embeddings, _qa_data
 
1
  import json
2
  import numpy as np
3
+ import os
4
  from sentence_transformers import SentenceTransformer
5
  from typing import List, Dict, Tuple, Any, Optional
6
 
7
+ # Define data directory path
8
+ DATA_DIR = "/app/data"
9
+
10
  # Global variables to store model and data
11
  _model = None
12
  _question_embeddings = None
 
41
  global _question_embeddings, _answer_embeddings, _qa_data
42
 
43
  try:
44
+ q_emb_path = os.path.join(DATA_DIR, "question_embeddings.npy")
45
+ a_emb_path = os.path.join(DATA_DIR, "answer_embeddings.npy")
46
+ qa_data_path = os.path.join(DATA_DIR, "qa_data.json")
47
+
48
+ _question_embeddings = np.load(q_emb_path)
49
+ _answer_embeddings = np.load(a_emb_path)
50
 
51
+ with open(qa_data_path, "r", encoding="utf-8") as f:
52
  _qa_data = json.load(f)
53
 
54
  return _question_embeddings, _answer_embeddings, _qa_data