seanpedrickcase commited on
Commit
96d818b
·
1 Parent(s): b4066c5

Updated Dockerfile so it actually works. Modified config file to include/exclude models and change relevant options as needed. Fixed thumbs up/down feedback.

Browse files
Dockerfile CHANGED
@@ -1,44 +1,81 @@
1
- FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm
2
 
3
- RUN apt-get update \
4
- && apt-get install -y \
5
  g++ \
6
  make \
7
  cmake \
 
8
  unzip \
9
- libcurl4-openssl-dev \
10
- git \
11
- && apt-get clean \
12
- && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
 
13
 
14
  WORKDIR /src
15
 
16
- COPY requirements.txt .
17
 
18
- RUN pip install --no-cache-dir -r requirements_cpu.txt
 
 
 
 
 
19
 
 
 
20
 
 
 
 
 
 
21
 
22
  # Set up a new user named "user" with user ID 1000
23
  RUN useradd -m -u 1000 user
 
 
 
 
 
 
 
 
24
  # Switch to the "user" user
25
  USER user
 
26
  # Set home to the user's home directory
27
- ENV HOME=/home/user \
28
- PATH=/home/user/.local/bin:$PATH \
29
- PYTHONPATH=$HOME/app \
30
- PYTHONUNBUFFERED=1 \
31
- GRADIO_ALLOW_FLAGGING=never \
32
- GRADIO_NUM_PORTS=1 \
33
- GRADIO_SERVER_NAME=0.0.0.0 \
34
- GRADIO_THEME=huggingface \
35
- SYSTEM=spaces \
 
 
 
 
36
  LLAMA_CUBLAS=0
37
 
38
  # Set the working directory to the user's home directory
39
- WORKDIR $HOME/app
40
 
41
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
42
- COPY --chown=user . $HOME/app
 
 
 
43
 
44
  CMD ["python", "app.py"]
 
1
+ FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm AS builder
2
 
3
+ RUN apt-get update && \
4
+ apt-get install -y \
5
  g++ \
6
  make \
7
  cmake \
8
+ pkg-config \
9
  unzip \
10
+ libcurl4-openssl-dev \
11
+ build-essential \
12
+ libopenblas-dev \
13
+ git && \
14
+ apt-get clean && \
15
+ rm -rf /var/lib/apt/lists/*
16
+
17
+ # Upgrade pip
18
+ RUN python3 -m pip install --upgrade pip
19
+
20
+ # Optional: CMake args for BLAS for llama-cpp-python installation
21
+ ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
22
 
23
  WORKDIR /src
24
 
25
+ COPY requirements_aws.txt .
26
 
27
+ RUN pip install torch==2.5.1+cpu --target=/install --index-url https://download.pytorch.org/whl/cpu \
28
+ && pip install --no-cache-dir --target=/install sentence-transformers==4.1.0 --no-deps \
29
+ && pip install --no-cache-dir --target=/install span-marker==1.7.0 --no-deps \
30
+ && pip install --no-cache-dir --target=/install langchain-huggingface==0.1.2 --no-deps \
31
+ && pip install --no-cache-dir --target=/install keybert==0.9.0 --no-deps \
32
+ && pip install --no-cache-dir --target=/install -r requirements_aws.txt
33
 
34
+ # Stage 2: Final runtime image
35
+ FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm
36
 
37
+ RUN apt-get update && \
38
+ apt-get install -y \
39
+ libopenblas0 && \
40
+ apt-get clean && \
41
+ rm -rf /var/lib/apt/lists/*
42
 
43
  # Set up a new user named "user" with user ID 1000
44
  RUN useradd -m -u 1000 user
45
+
46
+ # Create required directories
47
+ RUN mkdir -p /home/user/app/{output,input,tld,logs,usage,feedback,config} \
48
+ && chown -R user:user /home/user/app
49
+
50
+ # Copy installed packages from builder stage
51
+ COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
52
+
53
  # Switch to the "user" user
54
  USER user
55
+
56
  # Set home to the user's home directory
57
+ ENV APP_HOME=/home/user
58
+
59
+ ENV PATH=$APP_HOME/.local/bin:$PATH \
60
+ PYTHONPATH=$APP_HOME/app \
61
+ PYTHONUNBUFFERED=1 \
62
+ PYTHONDONTWRITEBYTECODE=1 \
63
+ GRADIO_ALLOW_FLAGGING=never \
64
+ GRADIO_NUM_PORTS=1 \
65
+ GRADIO_SERVER_NAME=0.0.0.0 \
66
+ GRADIO_SERVER_PORT=7860 \
67
+ GRADIO_ANALYTICS_ENABLED=False \
68
+ TLDEXTRACT_CACHE=$APP_HOME/app/tld/.tld_set_snapshot \
69
+ SYSTEM=spaces \
70
  LLAMA_CUBLAS=0
71
 
72
  # Set the working directory to the user's home directory
73
+ WORKDIR $APP_HOME/app
74
 
75
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
76
+ COPY --chown=user . $APP_HOME/app
77
+
78
+ # Ensure permissions are really user:user again after copying
79
+ RUN chown -R user:user $APP_HOME/app && chmod -R u+rwX $APP_HOME/app
80
 
81
  CMD ["python", "app.py"]
app.py CHANGED
@@ -4,7 +4,7 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
4
  from langchain_community.vectorstores import FAISS
5
  import gradio as gr
6
  import pandas as pd
7
- from torch import float16
8
  from llama_cpp import Llama
9
  from huggingface_hub import hf_hub_download
10
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
@@ -15,7 +15,7 @@ from chatfuncs.ingest import embed_faiss_save_to_zip
15
  from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
16
  from chatfuncs.aws_functions import upload_file_to_s3
17
  from chatfuncs.auth import authenticate_user
18
- from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES
19
  from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
20
  import chatfuncs.chatfuncs as chatf
21
  import chatfuncs.ingest as ing
@@ -94,17 +94,17 @@ def create_hf_model(model_name:str, hf_token=HF_TOKEN):
94
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
95
  else:
96
  if hf_token:
97
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=hf_token) # , torch_dtype=float16
98
  else:
99
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # , torch_dtype=float16
100
  else:
101
  if "flan" in model_name:
102
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
103
  else:
104
  if hf_token:
105
- model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token) # , torch_dtype=float16
106
  else:
107
- model = AutoModelForCausalLM.from_pretrained(model_name) # , torch_dtype=float16
108
 
109
  if hf_token:
110
  tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length, token=hf_token)
@@ -212,6 +212,7 @@ with app:
212
 
213
  session_hash_textbox = gr.Textbox(value="", visible=False)
214
  s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
 
215
 
216
  access_logs_state = gr.State(access_logs_data_folder + 'dataset1.csv')
217
  access_s3_logs_loc_state = gr.State(access_logs_data_folder)
@@ -222,9 +223,8 @@ with app:
222
 
223
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
224
 
225
- gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
226
-
227
-
228
  with gr.Row():
229
  current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
230
  current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
@@ -233,10 +233,11 @@ with app:
233
 
234
  with gr.Row():
235
  #chat_height = 500
236
- chatbot = gr.Chatbot(value=None, avatar_images=('user.jfif', 'bot.jpg'), scale = 1, resizable=True, show_copy_all_button=True, show_copy_button=True, show_share_button=True, type='messages') # , height=chat_height
237
- with gr.Accordion("Open this tab to see the source paragraphs used to generate the answer", open = True):
238
- sources = gr.HTML(value = "Source paragraphs with the most relevant text will appear here") # , height=chat_height
239
 
 
240
  with gr.Row():
241
  message = gr.Textbox(
242
  label="Enter your question here",
@@ -247,12 +248,11 @@ with app:
247
  clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
248
  stop = gr.Button(value="Stop generating", variant="stop", scale=1)
249
 
250
- examples_set = gr.Radio(label="Example questions",
251
- choices=default_examples_set)
252
 
253
  current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
254
 
255
- with gr.Tab("Load in a different file to chat with"):
256
  with gr.Accordion("PDF file", open = False):
257
  in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
258
  load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
@@ -272,15 +272,25 @@ with app:
272
  ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
273
  file_out_box = gr.File(file_count='single', file_types=['.zip'])
274
 
275
- with gr.Tab("Advanced features"):
276
  out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
277
  temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
278
  with gr.Row():
279
- model_choice = gr.Radio(label="Choose a chat model", value=SMALL_MODEL_NAME, choices = default_model_choices)
280
- in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=True)
281
- change_model_button = gr.Button(value="Load model", scale=0)
282
- with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
283
- gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
 
 
 
 
 
 
 
 
 
 
284
 
285
  load_text = gr.Text(label="Load status")
286
 
@@ -318,7 +328,8 @@ with app:
318
  clear.click(lambda: None, None, chatbot, queue=False)
319
 
320
  # Thumbs up or thumbs down voting function
321
- chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], None)
 
322
 
323
 
324
  ###
 
4
  from langchain_community.vectorstores import FAISS
5
  import gradio as gr
6
  import pandas as pd
7
+ from torch import float16, float32
8
  from llama_cpp import Llama
9
  from huggingface_hub import hf_hub_download
10
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
 
15
  from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
16
  from chatfuncs.aws_functions import upload_file_to_s3
17
  from chatfuncs.auth import authenticate_user
18
+ from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES, RUN_GEMINI_MODELS, LOAD_LARGE_MODEL
19
  from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
20
  import chatfuncs.chatfuncs as chatf
21
  import chatfuncs.ingest as ing
 
94
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
95
  else:
96
  if hf_token:
97
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=hf_token, torch_dtype=float32) # , torch_dtype=float16 - not compatible with CPU and Gemma 3
98
  else:
99
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=float32) # , torch_dtype=float16
100
  else:
101
  if "flan" in model_name:
102
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
103
  else:
104
  if hf_token:
105
+ model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, torch_dtype=float32) # , torch_dtype=float16
106
  else:
107
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=float32) # , torch_dtype=float16
108
 
109
  if hf_token:
110
  tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length, token=hf_token)
 
212
 
213
  session_hash_textbox = gr.Textbox(value="", visible=False)
214
  s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
215
+ latest_user_rating_data_path = gr.Textbox(label="output_ratings_textbox", visible=False)
216
 
217
  access_logs_state = gr.State(access_logs_data_folder + 'dataset1.csv')
218
  access_s3_logs_loc_state = gr.State(access_logs_data_folder)
 
223
 
224
  gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
225
 
226
+ gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}, if available), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
227
+
 
228
  with gr.Row():
229
  current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
230
  current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
 
233
 
234
  with gr.Row():
235
  #chat_height = 500
236
+ chatbot = gr.Chatbot(value=None, avatar_images=('user.jfif', 'bot.jpg'), scale = 1, resizable=True, show_copy_all_button=True, show_copy_button=True, show_share_button=None, type='messages', max_height=500)
237
+ with gr.Accordion("Source paragraphs with the most relevant text will appear here", open = True):
238
+ sources = gr.HTML(value = "No relevant source paragraphs currently loaded", max_height=500) # , height=chat_height
239
 
240
+ gr.Markdown("Make sure that your questions are as specific as possible to allow the search engine to find the most relevant text to your query.")
241
  with gr.Row():
242
  message = gr.Textbox(
243
  label="Enter your question here",
 
248
  clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
249
  stop = gr.Button(value="Stop generating", variant="stop", scale=1)
250
 
251
+ examples_set = gr.Radio(label="Example questions", choices=default_examples_set)
 
252
 
253
  current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
254
 
255
+ with gr.Tab("Load in a different file/webpage"):
256
  with gr.Accordion("PDF file", open = False):
257
  in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
258
  load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
 
272
  ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
273
  file_out_box = gr.File(file_count='single', file_types=['.zip'])
274
 
275
+ with gr.Tab("Advanced settings - change model/model options"):
276
  out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
277
  temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
278
  with gr.Row():
279
+ with gr.Column(scale=3):
280
+ model_choice = gr.Radio(label="Choose a chat model", value=SMALL_MODEL_NAME, choices = default_model_choices)
281
+ if RUN_GEMINI_MODELS == "1":
282
+ in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=True)
283
+ else:
284
+ in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=False)
285
+ with gr.Column(scale=1):
286
+ change_model_button = gr.Button(value="Load model")
287
+
288
+ if LOAD_LARGE_MODEL == "1":
289
+ with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False, visible=True):
290
+ gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
291
+ else:
292
+ with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False, visible=False):
293
+ gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=False)
294
 
295
  load_text = gr.Text(label="Load status")
296
 
 
328
  clear.click(lambda: None, None, chatbot, queue=False)
329
 
330
  # Thumbs up or thumbs down voting function
331
+ chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], [latest_user_rating_data_path]).\
332
+ success(fn = upload_file_to_s3, inputs=[latest_user_rating_data_path, latest_user_rating_data_path], outputs=[s3_logs_output_textbox])
333
 
334
 
335
  ###
chatfuncs/aws_functions.py CHANGED
@@ -1,9 +1,7 @@
1
  from typing import Type, List
2
  import pandas as pd
3
  import boto3
4
- import tempfile
5
  import os
6
- from chatfuncs.helper_functions import get_or_create_env_var
7
  from chatfuncs.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
8
 
9
  PandasDataFrame = Type[pd.DataFrame]
@@ -17,7 +15,7 @@ if RUN_AWS_FUNCTIONS == "1":
17
  bucket_name = os.environ['']
18
  session = boto3.Session() # profile_name="default"
19
  except Exception as e:
20
- print(e)
21
 
22
  def get_assumed_role_info():
23
  sts_endpoint = 'https://sts.' + AWS_REGION + '.amazonaws.com'
 
1
  from typing import Type, List
2
  import pandas as pd
3
  import boto3
 
4
  import os
 
5
  from chatfuncs.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
6
 
7
  PandasDataFrame = Type[pd.DataFrame]
 
15
  bucket_name = os.environ['']
16
  session = boto3.Session() # profile_name="default"
17
  except Exception as e:
18
+ print("Failed to start boto3 session due to:", e)
19
 
20
  def get_assumed_role_info():
21
  sts_endpoint = 'https://sts.' + AWS_REGION + '.amazonaws.com'
chatfuncs/chatfuncs.py CHANGED
@@ -34,7 +34,7 @@ from langchain.docstore.document import Document
34
 
35
  from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
36
  from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
37
- from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS
38
 
39
  model_object = [] # Define empty list for model functions to run
40
  tokenizer = [] # Define empty list for model functions to run
@@ -1187,35 +1187,41 @@ def hide_block():
1187
 
1188
  # Vote function
1189
 
1190
- def vote(data: gr.LikeData, chat_history, instruction_prompt_out, model_type):
1191
- import os
1192
- import pandas as pd
1193
 
1194
- chat_history_last = str(str(chat_history[-1][0]) + " - " + str(chat_history[-1][1]))
 
 
 
 
 
 
 
 
 
 
 
1195
 
1196
  response_df = pd.DataFrame(data={"thumbs_up":data.liked,
1197
- "chosen_response":data.value,
1198
  "input_prompt":instruction_prompt_out,
1199
- "chat_history":chat_history_last,
1200
  "model_type": model_type,
1201
  "date_time": pd.Timestamp.now()}, index=[0])
1202
 
1203
  if data.liked:
1204
- print("You upvoted this response: " + data.value)
 
 
 
1205
 
1206
- if os.path.isfile("thumbs_up_data.csv"):
1207
- existing_thumbs_up_df = pd.read_csv("thumbs_up_data.csv")
1208
- thumbs_up_df_concat = pd.concat([existing_thumbs_up_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
1209
- thumbs_up_df_concat.to_csv("thumbs_up_data.csv")
1210
- else:
1211
- response_df.to_csv("thumbs_up_data.csv")
1212
 
 
 
 
 
1213
  else:
1214
- print("You downvoted this response: " + data.value)
1215
 
1216
- if os.path.isfile("thumbs_down_data.csv"):
1217
- existing_thumbs_down_df = pd.read_csv("thumbs_down_data.csv")
1218
- thumbs_down_df_concat = pd.concat([existing_thumbs_down_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
1219
- thumbs_down_df_concat.to_csv("thumbs_down_data.csv")
1220
- else:
1221
- response_df.to_csv("thumbs_down_data.csv")
 
34
 
35
  from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
36
  from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
37
+ from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS, FEEDBACK_LOGS_FOLDER
38
 
39
  model_object = [] # Define empty list for model functions to run
40
  tokenizer = [] # Define empty list for model functions to run
 
1187
 
1188
  # Vote function
1189
 
1190
+ def vote(data: gr.LikeData, chat_history:list[dict], instruction_prompt_out:str, model_type:str, feedback_folder:str=FEEDBACK_LOGS_FOLDER):
 
 
1191
 
1192
+ query_text = next(
1193
+ (entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'user'),
1194
+ "")
1195
+
1196
+ response_text = next(
1197
+ (entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'assistant'),
1198
+ "")
1199
+
1200
+ chat_history_latest = str(query_text + " - " + response_text)
1201
+
1202
+ if isinstance(data.value, list): chosen_response = data.value[-1]
1203
+ else: chosen_response = data.value
1204
 
1205
  response_df = pd.DataFrame(data={"thumbs_up":data.liked,
1206
+ "chosen_response":chosen_response,
1207
  "input_prompt":instruction_prompt_out,
1208
+ "chat_history":chat_history_latest,
1209
  "model_type": model_type,
1210
  "date_time": pd.Timestamp.now()}, index=[0])
1211
 
1212
  if data.liked:
1213
+ print("You upvoted this response:", chosen_response)
1214
+
1215
+ else:
1216
+ print("You downvoted this response:", chosen_response)
1217
 
1218
+ output_data_path = feedback_folder + "thumbs_up_down_data.csv"
 
 
 
 
 
1219
 
1220
+ if os.path.isfile(output_data_path):
1221
+ existing_thumbs_down_df = pd.read_csv(output_data_path)
1222
+ thumbs_down_df_concat = pd.concat([existing_thumbs_down_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
1223
+ thumbs_down_df_concat.to_csv(output_data_path)
1224
  else:
1225
+ response_df.to_csv(output_data_path)
1226
 
1227
+ return output_data_path
 
 
 
 
 
chatfuncs/config.py CHANGED
@@ -163,8 +163,12 @@ DISPLAY_FILE_NAMES_IN_LOGS = get_or_create_env_var('DISPLAY_FILE_NAMES_IN_LOGS',
163
 
164
  ###
165
  # RUN CONFIG
 
 
166
  GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
167
 
 
 
168
  HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
169
 
170
 
@@ -181,17 +185,28 @@ SMALL_MODEL_NAME = get_or_create_env_var("SMALL_MODEL_NAME", "Gemma 3 1B (small,
181
 
182
  SMALL_MODEL_REPO_ID = get_or_create_env_var("SMALL_MODEL_REPO_ID", 'google/gemma-3-1b-it') #'Qwen/Qwen2-0.5B-Instruct')
183
 
 
 
184
  LARGE_MODEL_NAME = get_or_create_env_var("LARGE_MODEL_NAME", "Phi 3.5 Mini (larger, slow)")
185
 
186
  LARGE_MODEL_REPO_ID = get_or_create_env_var("LARGE_MODEL_REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF") # "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
 
187
  LARGE_MODEL_GGUF_FILE = get_or_create_env_var("LARGE_MODEL_GGUF_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
188
 
 
 
 
 
 
 
189
  if RUN_AWS_FUNCTIONS == "1":
190
- default_model_choices = f'["{SMALL_MODEL_NAME}", "{LARGE_MODEL_NAME}", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"]'
191
- else:
192
- default_model_choices = f'["{SMALL_MODEL_NAME}", "{LARGE_MODEL_NAME}", "gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]'
 
 
193
 
194
- DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", default_model_choices)
195
 
196
  EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
197
 
 
163
 
164
  ###
165
  # RUN CONFIG
166
+ RUN_GEMINI_MODELS = get_or_create_env_var('RUN_GEMINI_MODELS', '1')
167
+
168
  GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
169
 
170
+ # NOTE THAT THIS IS REQUIRED
171
+
172
  HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
173
 
174
 
 
185
 
186
  SMALL_MODEL_REPO_ID = get_or_create_env_var("SMALL_MODEL_REPO_ID", 'google/gemma-3-1b-it') #'Qwen/Qwen2-0.5B-Instruct')
187
 
188
+ LOAD_LARGE_MODEL = get_or_create_env_var("LOAD_LARGE_MODEL", '0')
189
+
190
  LARGE_MODEL_NAME = get_or_create_env_var("LARGE_MODEL_NAME", "Phi 3.5 Mini (larger, slow)")
191
 
192
  LARGE_MODEL_REPO_ID = get_or_create_env_var("LARGE_MODEL_REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF") # "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
193
+
194
  LARGE_MODEL_GGUF_FILE = get_or_create_env_var("LARGE_MODEL_GGUF_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
195
 
196
+ # Build up options for models
197
+ default_model_choices = [SMALL_MODEL_NAME]
198
+
199
+ if LOAD_LARGE_MODEL == "1":
200
+ default_model_choices.append(LARGE_MODEL_NAME)
201
+
202
  if RUN_AWS_FUNCTIONS == "1":
203
+ default_model_choices.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"])
204
+
205
+ if RUN_GEMINI_MODELS == "1":
206
+ default_model_choices.extend(["gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "models/gemini-2.5-pro-exp-03-25"])
207
+
208
 
209
+ DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", str(default_model_choices))
210
 
211
  EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
212
 
chatfuncs/model_load.py CHANGED
@@ -17,15 +17,15 @@ temperature: float = 0.1
17
  top_k: int = 3
18
  top_p: float = 1
19
  repetition_penalty: float = 1.15
20
- flan_alpaca_repetition_penalty: float = 1.3
21
  last_n_tokens: int = 64
22
  max_new_tokens: int = 1024
23
  seed: int = 42
24
  reset: bool = False
25
  stream: bool = True
26
  threads: int = threads
27
- batch_size:int = 256
28
- context_length:int = 2048
29
  sample = True
30
 
31
  # Bedrock parameters
 
17
  top_k: int = 3
18
  top_p: float = 1
19
  repetition_penalty: float = 1.15
20
+ #flan_alpaca_repetition_penalty: float = 1.3
21
  last_n_tokens: int = 64
22
  max_new_tokens: int = 1024
23
  seed: int = 42
24
  reset: bool = False
25
  stream: bool = True
26
  threads: int = threads
27
+ batch_size:int = 128
28
+ context_length:int = 4096
29
  sample = True
30
 
31
  # Bedrock parameters
requirements.txt CHANGED
@@ -5,9 +5,9 @@ beautifulsoup4==4.13.4
5
  google-generativeai==0.8.5
6
  pandas==2.2.3
7
  transformers==4.51.3
8
- # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
9
- llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
10
- #-C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
11
  torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
12
  sentence_transformers==4.1.0
13
  faiss-cpu==1.10.0
 
5
  google-generativeai==0.8.5
6
  pandas==2.2.3
7
  transformers==4.51.3
8
+ # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
9
+ #llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # Older version based on wheel if the below line doesn't work
10
+ llama-cpp-python==0.3.8 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
11
  torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
12
  sentence_transformers==4.1.0
13
  faiss-cpu==1.10.0
requirements_aws.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #langchain==0.3.24
2
+ #langchain-huggingface==0.1.2 # Loaded in Dockerfile
3
+ boto3==1.38.0
4
+ python-dotenv==1.1.0
5
+ langchain-community==0.3.22
6
+ beautifulsoup4==4.13.4
7
+ google-generativeai==0.8.5
8
+ pandas==2.2.3
9
+ transformers==4.51.3
10
+ # For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
11
+ #llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # For linux
12
+ llama-cpp-python==0.3.8 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
13
+ #torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu # Loaded in Dockerfile
14
+ #sentence_transformers==4.1.0 # Loaded in Dockerfile
15
+ faiss-cpu==1.10.0
16
+ pypdf==5.4.0
17
+ python-docx==1.1.2
18
+ #keybert==0.9.0 # Loaded in Dockerfile
19
+ #span-marker==1.7.0 # Loaded in Dockerfile
20
+ gradio==5.25.2
21
+ nltk==3.9.1
22
+ bm25s==0.2.12
23
+ PyStemmer==2.2.0.3
24
+ scikit-learn==1.6.1
25
+ scipy==1.15.2
26
+ numpy==1.26.4
27
+
requirements_gpu.txt CHANGED
@@ -1,4 +1,4 @@
1
- langchain==0.3.24
2
  langchain-community==0.3.22
3
  langchain-huggingface==0.1.2
4
  beautifulsoup4==4.13.4
@@ -6,8 +6,8 @@ google-generativeai==0.8.5
6
  pandas==2.2.3
7
  transformers==4.51.3
8
  torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121
9
- llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
10
- #llama-cpp-python==0.3.8 -C cmake.args="-DGGML_CUDA=on"
11
  sentence_transformers==4.1.0
12
  faiss-cpu==1.10.0
13
  pypdf==5.4.0
 
1
+ #langchain==0.3.24
2
  langchain-community==0.3.22
3
  langchain-huggingface==0.1.2
4
  beautifulsoup4==4.13.4
 
6
  pandas==2.2.3
7
  transformers==4.51.3
8
  torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121
9
+ #llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
10
+ llama-cpp-python==0.3.8 -C cmake.args="-DGGML_CUDA=on"
11
  sentence_transformers==4.1.0
12
  faiss-cpu==1.10.0
13
  pypdf==5.4.0