Commit
·
96d818b
1
Parent(s):
b4066c5
Updated Dockerfile so it actually works. Modified config file to include/exclude models and change relevant options as needed. Fixed thumbs up/down feedback.
Browse files- Dockerfile +57 -20
- app.py +33 -22
- chatfuncs/aws_functions.py +1 -3
- chatfuncs/chatfuncs.py +27 -21
- chatfuncs/config.py +19 -4
- chatfuncs/model_load.py +3 -3
- requirements.txt +3 -3
- requirements_aws.txt +27 -0
- requirements_gpu.txt +3 -3
Dockerfile
CHANGED
@@ -1,44 +1,81 @@
|
|
1 |
-
FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm
|
2 |
|
3 |
-
RUN apt-get update \
|
4 |
-
|
5 |
g++ \
|
6 |
make \
|
7 |
cmake \
|
|
|
8 |
unzip \
|
9 |
-
libcurl4-openssl-dev \
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
WORKDIR /src
|
15 |
|
16 |
-
COPY
|
17 |
|
18 |
-
RUN pip install --
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Set up a new user named "user" with user ID 1000
|
23 |
RUN useradd -m -u 1000 user
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# Switch to the "user" user
|
25 |
USER user
|
|
|
26 |
# Set home to the user's home directory
|
27 |
-
ENV
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
LLAMA_CUBLAS=0
|
37 |
|
38 |
# Set the working directory to the user's home directory
|
39 |
-
WORKDIR $
|
40 |
|
41 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
42 |
-
COPY --chown=user . $
|
|
|
|
|
|
|
43 |
|
44 |
CMD ["python", "app.py"]
|
|
|
1 |
+
FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm AS builder
|
2 |
|
3 |
+
RUN apt-get update && \
|
4 |
+
apt-get install -y \
|
5 |
g++ \
|
6 |
make \
|
7 |
cmake \
|
8 |
+
pkg-config \
|
9 |
unzip \
|
10 |
+
libcurl4-openssl-dev \
|
11 |
+
build-essential \
|
12 |
+
libopenblas-dev \
|
13 |
+
git && \
|
14 |
+
apt-get clean && \
|
15 |
+
rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Upgrade pip
|
18 |
+
RUN python3 -m pip install --upgrade pip
|
19 |
+
|
20 |
+
# Optional: CMake args for BLAS for llama-cpp-python installation
|
21 |
+
ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS"
|
22 |
|
23 |
WORKDIR /src
|
24 |
|
25 |
+
COPY requirements_aws.txt .
|
26 |
|
27 |
+
RUN pip install torch==2.5.1+cpu --target=/install --index-url https://download.pytorch.org/whl/cpu \
|
28 |
+
&& pip install --no-cache-dir --target=/install sentence-transformers==4.1.0 --no-deps \
|
29 |
+
&& pip install --no-cache-dir --target=/install span-marker==1.7.0 --no-deps \
|
30 |
+
&& pip install --no-cache-dir --target=/install langchain-huggingface==0.1.2 --no-deps \
|
31 |
+
&& pip install --no-cache-dir --target=/install keybert==0.9.0 --no-deps \
|
32 |
+
&& pip install --no-cache-dir --target=/install -r requirements_aws.txt
|
33 |
|
34 |
+
# Stage 2: Final runtime image
|
35 |
+
FROM public.ecr.aws/docker/library/python:3.11.11-slim-bookworm
|
36 |
|
37 |
+
RUN apt-get update && \
|
38 |
+
apt-get install -y \
|
39 |
+
libopenblas0 && \
|
40 |
+
apt-get clean && \
|
41 |
+
rm -rf /var/lib/apt/lists/*
|
42 |
|
43 |
# Set up a new user named "user" with user ID 1000
|
44 |
RUN useradd -m -u 1000 user
|
45 |
+
|
46 |
+
# Create required directories
|
47 |
+
RUN mkdir -p /home/user/app/{output,input,tld,logs,usage,feedback,config} \
|
48 |
+
&& chown -R user:user /home/user/app
|
49 |
+
|
50 |
+
# Copy installed packages from builder stage
|
51 |
+
COPY --from=builder /install /usr/local/lib/python3.11/site-packages/
|
52 |
+
|
53 |
# Switch to the "user" user
|
54 |
USER user
|
55 |
+
|
56 |
# Set home to the user's home directory
|
57 |
+
ENV APP_HOME=/home/user
|
58 |
+
|
59 |
+
ENV PATH=$APP_HOME/.local/bin:$PATH \
|
60 |
+
PYTHONPATH=$APP_HOME/app \
|
61 |
+
PYTHONUNBUFFERED=1 \
|
62 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
63 |
+
GRADIO_ALLOW_FLAGGING=never \
|
64 |
+
GRADIO_NUM_PORTS=1 \
|
65 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
66 |
+
GRADIO_SERVER_PORT=7860 \
|
67 |
+
GRADIO_ANALYTICS_ENABLED=False \
|
68 |
+
TLDEXTRACT_CACHE=$APP_HOME/app/tld/.tld_set_snapshot \
|
69 |
+
SYSTEM=spaces \
|
70 |
LLAMA_CUBLAS=0
|
71 |
|
72 |
# Set the working directory to the user's home directory
|
73 |
+
WORKDIR $APP_HOME/app
|
74 |
|
75 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
76 |
+
COPY --chown=user . $APP_HOME/app
|
77 |
+
|
78 |
+
# Ensure permissions are really user:user again after copying
|
79 |
+
RUN chown -R user:user $APP_HOME/app && chmod -R u+rwX $APP_HOME/app
|
80 |
|
81 |
CMD ["python", "app.py"]
|
app.py
CHANGED
@@ -4,7 +4,7 @@ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
|
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
7 |
-
from torch import float16
|
8 |
from llama_cpp import Llama
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
@@ -15,7 +15,7 @@ from chatfuncs.ingest import embed_faiss_save_to_zip
|
|
15 |
from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
|
16 |
from chatfuncs.aws_functions import upload_file_to_s3
|
17 |
from chatfuncs.auth import authenticate_user
|
18 |
-
from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES
|
19 |
from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
|
20 |
import chatfuncs.chatfuncs as chatf
|
21 |
import chatfuncs.ingest as ing
|
@@ -94,17 +94,17 @@ def create_hf_model(model_name:str, hf_token=HF_TOKEN):
|
|
94 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
|
95 |
else:
|
96 |
if hf_token:
|
97 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=hf_token) # , torch_dtype=float16
|
98 |
else:
|
99 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # , torch_dtype=float16
|
100 |
else:
|
101 |
if "flan" in model_name:
|
102 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
|
103 |
else:
|
104 |
if hf_token:
|
105 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token) # , torch_dtype=float16
|
106 |
else:
|
107 |
-
model = AutoModelForCausalLM.from_pretrained(model_name) # , torch_dtype=float16
|
108 |
|
109 |
if hf_token:
|
110 |
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length, token=hf_token)
|
@@ -212,6 +212,7 @@ with app:
|
|
212 |
|
213 |
session_hash_textbox = gr.Textbox(value="", visible=False)
|
214 |
s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
|
|
|
215 |
|
216 |
access_logs_state = gr.State(access_logs_data_folder + 'dataset1.csv')
|
217 |
access_s3_logs_loc_state = gr.State(access_logs_data_folder)
|
@@ -222,9 +223,8 @@ with app:
|
|
222 |
|
223 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
224 |
|
225 |
-
gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
|
226 |
-
|
227 |
-
|
228 |
with gr.Row():
|
229 |
current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
|
230 |
current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
|
@@ -233,10 +233,11 @@ with app:
|
|
233 |
|
234 |
with gr.Row():
|
235 |
#chat_height = 500
|
236 |
-
chatbot = gr.Chatbot(value=None, avatar_images=('user.jfif', 'bot.jpg'), scale = 1, resizable=True, show_copy_all_button=True, show_copy_button=True, show_share_button=
|
237 |
-
with gr.Accordion("
|
238 |
-
sources = gr.HTML(value = "
|
239 |
|
|
|
240 |
with gr.Row():
|
241 |
message = gr.Textbox(
|
242 |
label="Enter your question here",
|
@@ -247,12 +248,11 @@ with app:
|
|
247 |
clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
|
248 |
stop = gr.Button(value="Stop generating", variant="stop", scale=1)
|
249 |
|
250 |
-
examples_set = gr.Radio(label="Example questions",
|
251 |
-
choices=default_examples_set)
|
252 |
|
253 |
current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
|
254 |
|
255 |
-
with gr.Tab("Load in a different file
|
256 |
with gr.Accordion("PDF file", open = False):
|
257 |
in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
|
258 |
load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
|
@@ -272,15 +272,25 @@ with app:
|
|
272 |
ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
|
273 |
file_out_box = gr.File(file_count='single', file_types=['.zip'])
|
274 |
|
275 |
-
with gr.Tab("Advanced
|
276 |
out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
|
277 |
temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
|
278 |
with gr.Row():
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
load_text = gr.Text(label="Load status")
|
286 |
|
@@ -318,7 +328,8 @@ with app:
|
|
318 |
clear.click(lambda: None, None, chatbot, queue=False)
|
319 |
|
320 |
# Thumbs up or thumbs down voting function
|
321 |
-
chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state],
|
|
|
322 |
|
323 |
|
324 |
###
|
|
|
4 |
from langchain_community.vectorstores import FAISS
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
7 |
+
from torch import float16, float32
|
8 |
from llama_cpp import Llama
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
|
|
15 |
from chatfuncs.helper_functions import get_connection_params, reveal_feedback_buttons, wipe_logs
|
16 |
from chatfuncs.aws_functions import upload_file_to_s3
|
17 |
from chatfuncs.auth import authenticate_user
|
18 |
+
from chatfuncs.config import FEEDBACK_LOGS_FOLDER, ACCESS_LOGS_FOLDER, USAGE_LOGS_FOLDER, HOST_NAME, COGNITO_AUTH, INPUT_FOLDER, OUTPUT_FOLDER, MAX_QUEUE_SIZE, DEFAULT_CONCURRENCY_LIMIT, MAX_FILE_SIZE, GRADIO_SERVER_PORT, ROOT_PATH, DEFAULT_EMBEDDINGS_LOCATION, EMBEDDINGS_MODEL_NAME, DEFAULT_DATA_SOURCE, HF_TOKEN, LARGE_MODEL_REPO_ID, LARGE_MODEL_GGUF_FILE, LARGE_MODEL_NAME, SMALL_MODEL_NAME, SMALL_MODEL_REPO_ID, DEFAULT_DATA_SOURCE_NAME, DEFAULT_EXAMPLES, DEFAULT_MODEL_CHOICES, RUN_GEMINI_MODELS, LOAD_LARGE_MODEL
|
19 |
from chatfuncs.model_load import torch_device, gpu_config, cpu_config, context_length
|
20 |
import chatfuncs.chatfuncs as chatf
|
21 |
import chatfuncs.ingest as ing
|
|
|
94 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")#, torch_dtype=torch.float16)
|
95 |
else:
|
96 |
if hf_token:
|
97 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=hf_token, torch_dtype=float32) # , torch_dtype=float16 - not compatible with CPU and Gemma 3
|
98 |
else:
|
99 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=float32) # , torch_dtype=float16
|
100 |
else:
|
101 |
if "flan" in model_name:
|
102 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)#, torch_dtype=torch.float16)
|
103 |
else:
|
104 |
if hf_token:
|
105 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, torch_dtype=float32) # , torch_dtype=float16
|
106 |
else:
|
107 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=float32) # , torch_dtype=float16
|
108 |
|
109 |
if hf_token:
|
110 |
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = context_length, token=hf_token)
|
|
|
212 |
|
213 |
session_hash_textbox = gr.Textbox(value="", visible=False)
|
214 |
s3_logs_output_textbox = gr.Textbox(label="S3 logs", visible=False)
|
215 |
+
latest_user_rating_data_path = gr.Textbox(label="output_ratings_textbox", visible=False)
|
216 |
|
217 |
access_logs_state = gr.State(access_logs_data_folder + 'dataset1.csv')
|
218 |
access_s3_logs_loc_state = gr.State(access_logs_data_folder)
|
|
|
223 |
|
224 |
gr.Markdown("<h1><center>Lightweight PDF / web page QA bot</center></h1>")
|
225 |
|
226 |
+
gr.Markdown(f"""Chat with PDF, web page or (new) csv/Excel documents. The default is a small model ({SMALL_MODEL_NAME}), that can only answer specific questions that are answered in the text. It cannot give overall impressions of, or summarise the document. The alternative ({LARGE_MODEL_NAME}, if available), can reason a little better, but is much slower (See Advanced settings tab).\n\nBy default '[{DEFAULT_DATA_SOURCE_NAME}]({DEFAULT_DATA_SOURCE})' is loaded.If you want to talk about another document or web page, please select from the second tab. If switching topic, please click the 'Clear chat' button.\n\nCaution: This is a public app. Please ensure that the document you upload is not sensitive is any way as other users may see it! Also, please note that LLM chatbots may give incomplete or incorrect information, so please use with care.""")
|
227 |
+
|
|
|
228 |
with gr.Row():
|
229 |
current_source = gr.Textbox(label="Current data source(s)", value=DEFAULT_DATA_SOURCE, scale = 10)
|
230 |
current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)
|
|
|
233 |
|
234 |
with gr.Row():
|
235 |
#chat_height = 500
|
236 |
+
chatbot = gr.Chatbot(value=None, avatar_images=('user.jfif', 'bot.jpg'), scale = 1, resizable=True, show_copy_all_button=True, show_copy_button=True, show_share_button=None, type='messages', max_height=500)
|
237 |
+
with gr.Accordion("Source paragraphs with the most relevant text will appear here", open = True):
|
238 |
+
sources = gr.HTML(value = "No relevant source paragraphs currently loaded", max_height=500) # , height=chat_height
|
239 |
|
240 |
+
gr.Markdown("Make sure that your questions are as specific as possible to allow the search engine to find the most relevant text to your query.")
|
241 |
with gr.Row():
|
242 |
message = gr.Textbox(
|
243 |
label="Enter your question here",
|
|
|
248 |
clear = gr.Button(value="Clear chat", variant="secondary", scale=1)
|
249 |
stop = gr.Button(value="Stop generating", variant="stop", scale=1)
|
250 |
|
251 |
+
examples_set = gr.Radio(label="Example questions", choices=default_examples_set)
|
|
|
252 |
|
253 |
current_topic = gr.Textbox(label="Feature currently disabled - Keywords related to current conversation topic.", placeholder="Keywords related to the conversation topic will appear here", visible=False)
|
254 |
|
255 |
+
with gr.Tab("Load in a different file/webpage"):
|
256 |
with gr.Accordion("PDF file", open = False):
|
257 |
in_pdf = gr.File(label="Upload pdf", file_count="multiple", file_types=['.pdf'])
|
258 |
load_pdf = gr.Button(value="Load in file", variant="secondary", scale=0)
|
|
|
272 |
ingest_embed_out = gr.Textbox(label="File/web page preparation progress")
|
273 |
file_out_box = gr.File(file_count='single', file_types=['.zip'])
|
274 |
|
275 |
+
with gr.Tab("Advanced settings - change model/model options"):
|
276 |
out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
|
277 |
temp_slide = gr.Slider(minimum=0.1, value = 0.5, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
|
278 |
with gr.Row():
|
279 |
+
with gr.Column(scale=3):
|
280 |
+
model_choice = gr.Radio(label="Choose a chat model", value=SMALL_MODEL_NAME, choices = default_model_choices)
|
281 |
+
if RUN_GEMINI_MODELS == "1":
|
282 |
+
in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=True)
|
283 |
+
else:
|
284 |
+
in_api_key = gr.Textbox(value = "", label="Enter Gemini API key (only if using Google API models)", lines=1, type="password",interactive=True, visible=False)
|
285 |
+
with gr.Column(scale=1):
|
286 |
+
change_model_button = gr.Button(value="Load model")
|
287 |
+
|
288 |
+
if LOAD_LARGE_MODEL == "1":
|
289 |
+
with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False, visible=True):
|
290 |
+
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=True)
|
291 |
+
else:
|
292 |
+
with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False, visible=False):
|
293 |
+
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=100, step = 1, visible=False)
|
294 |
|
295 |
load_text = gr.Text(label="Load status")
|
296 |
|
|
|
328 |
clear.click(lambda: None, None, chatbot, queue=False)
|
329 |
|
330 |
# Thumbs up or thumbs down voting function
|
331 |
+
chatbot.like(chatf.vote, [chat_history_state, instruction_prompt_out, model_type_state], [latest_user_rating_data_path]).\
|
332 |
+
success(fn = upload_file_to_s3, inputs=[latest_user_rating_data_path, latest_user_rating_data_path], outputs=[s3_logs_output_textbox])
|
333 |
|
334 |
|
335 |
###
|
chatfuncs/aws_functions.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
from typing import Type, List
|
2 |
import pandas as pd
|
3 |
import boto3
|
4 |
-
import tempfile
|
5 |
import os
|
6 |
-
from chatfuncs.helper_functions import get_or_create_env_var
|
7 |
from chatfuncs.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
|
8 |
|
9 |
PandasDataFrame = Type[pd.DataFrame]
|
@@ -17,7 +15,7 @@ if RUN_AWS_FUNCTIONS == "1":
|
|
17 |
bucket_name = os.environ['']
|
18 |
session = boto3.Session() # profile_name="default"
|
19 |
except Exception as e:
|
20 |
-
print(e)
|
21 |
|
22 |
def get_assumed_role_info():
|
23 |
sts_endpoint = 'https://sts.' + AWS_REGION + '.amazonaws.com'
|
|
|
1 |
from typing import Type, List
|
2 |
import pandas as pd
|
3 |
import boto3
|
|
|
4 |
import os
|
|
|
5 |
from chatfuncs.config import AWS_REGION, RUN_AWS_FUNCTIONS, QA_CHATBOT_BUCKET
|
6 |
|
7 |
PandasDataFrame = Type[pd.DataFrame]
|
|
|
15 |
bucket_name = os.environ['']
|
16 |
session = boto3.Session() # profile_name="default"
|
17 |
except Exception as e:
|
18 |
+
print("Failed to start boto3 session due to:", e)
|
19 |
|
20 |
def get_assumed_role_info():
|
21 |
sts_endpoint = 'https://sts.' + AWS_REGION + '.amazonaws.com'
|
chatfuncs/chatfuncs.py
CHANGED
@@ -34,7 +34,7 @@ from langchain.docstore.document import Document
|
|
34 |
|
35 |
from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
|
36 |
from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
|
37 |
-
from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS
|
38 |
|
39 |
model_object = [] # Define empty list for model functions to run
|
40 |
tokenizer = [] # Define empty list for model functions to run
|
@@ -1187,35 +1187,41 @@ def hide_block():
|
|
1187 |
|
1188 |
# Vote function
|
1189 |
|
1190 |
-
def vote(data: gr.LikeData, chat_history, instruction_prompt_out, model_type):
|
1191 |
-
import os
|
1192 |
-
import pandas as pd
|
1193 |
|
1194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1195 |
|
1196 |
response_df = pd.DataFrame(data={"thumbs_up":data.liked,
|
1197 |
-
"chosen_response":
|
1198 |
"input_prompt":instruction_prompt_out,
|
1199 |
-
"chat_history":
|
1200 |
"model_type": model_type,
|
1201 |
"date_time": pd.Timestamp.now()}, index=[0])
|
1202 |
|
1203 |
if data.liked:
|
1204 |
-
print("You upvoted this response:
|
|
|
|
|
|
|
1205 |
|
1206 |
-
|
1207 |
-
existing_thumbs_up_df = pd.read_csv("thumbs_up_data.csv")
|
1208 |
-
thumbs_up_df_concat = pd.concat([existing_thumbs_up_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
|
1209 |
-
thumbs_up_df_concat.to_csv("thumbs_up_data.csv")
|
1210 |
-
else:
|
1211 |
-
response_df.to_csv("thumbs_up_data.csv")
|
1212 |
|
|
|
|
|
|
|
|
|
1213 |
else:
|
1214 |
-
|
1215 |
|
1216 |
-
|
1217 |
-
existing_thumbs_down_df = pd.read_csv("thumbs_down_data.csv")
|
1218 |
-
thumbs_down_df_concat = pd.concat([existing_thumbs_down_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
|
1219 |
-
thumbs_down_df_concat.to_csv("thumbs_down_data.csv")
|
1220 |
-
else:
|
1221 |
-
response_df.to_csv("thumbs_down_data.csv")
|
|
|
34 |
|
35 |
from chatfuncs.prompts import instruction_prompt_template_alpaca, instruction_prompt_mistral_orca, instruction_prompt_phi3, instruction_prompt_llama3, instruction_prompt_qwen, instruction_prompt_template_orca, instruction_prompt_gemma
|
36 |
from chatfuncs.model_load import temperature, max_new_tokens, sample, repetition_penalty, top_p, top_k, torch_device, CtransGenGenerationConfig, max_tokens
|
37 |
+
from chatfuncs.config import GEMINI_API_KEY, AWS_DEFAULT_REGION, LARGE_MODEL_NAME, SMALL_MODEL_NAME, RUN_AWS_FUNCTIONS, FEEDBACK_LOGS_FOLDER
|
38 |
|
39 |
model_object = [] # Define empty list for model functions to run
|
40 |
tokenizer = [] # Define empty list for model functions to run
|
|
|
1187 |
|
1188 |
# Vote function
|
1189 |
|
1190 |
+
def vote(data: gr.LikeData, chat_history:list[dict], instruction_prompt_out:str, model_type:str, feedback_folder:str=FEEDBACK_LOGS_FOLDER):
|
|
|
|
|
1191 |
|
1192 |
+
query_text = next(
|
1193 |
+
(entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'user'),
|
1194 |
+
"")
|
1195 |
+
|
1196 |
+
response_text = next(
|
1197 |
+
(entry['content'] for entry in reversed(chat_history) if entry.get('role') == 'assistant'),
|
1198 |
+
"")
|
1199 |
+
|
1200 |
+
chat_history_latest = str(query_text + " - " + response_text)
|
1201 |
+
|
1202 |
+
if isinstance(data.value, list): chosen_response = data.value[-1]
|
1203 |
+
else: chosen_response = data.value
|
1204 |
|
1205 |
response_df = pd.DataFrame(data={"thumbs_up":data.liked,
|
1206 |
+
"chosen_response":chosen_response,
|
1207 |
"input_prompt":instruction_prompt_out,
|
1208 |
+
"chat_history":chat_history_latest,
|
1209 |
"model_type": model_type,
|
1210 |
"date_time": pd.Timestamp.now()}, index=[0])
|
1211 |
|
1212 |
if data.liked:
|
1213 |
+
print("You upvoted this response:", chosen_response)
|
1214 |
+
|
1215 |
+
else:
|
1216 |
+
print("You downvoted this response:", chosen_response)
|
1217 |
|
1218 |
+
output_data_path = feedback_folder + "thumbs_up_down_data.csv"
|
|
|
|
|
|
|
|
|
|
|
1219 |
|
1220 |
+
if os.path.isfile(output_data_path):
|
1221 |
+
existing_thumbs_down_df = pd.read_csv(output_data_path)
|
1222 |
+
thumbs_down_df_concat = pd.concat([existing_thumbs_down_df, response_df], ignore_index=True).drop("Unnamed: 0",axis=1, errors="ignore")
|
1223 |
+
thumbs_down_df_concat.to_csv(output_data_path)
|
1224 |
else:
|
1225 |
+
response_df.to_csv(output_data_path)
|
1226 |
|
1227 |
+
return output_data_path
|
|
|
|
|
|
|
|
|
|
chatfuncs/config.py
CHANGED
@@ -163,8 +163,12 @@ DISPLAY_FILE_NAMES_IN_LOGS = get_or_create_env_var('DISPLAY_FILE_NAMES_IN_LOGS',
|
|
163 |
|
164 |
###
|
165 |
# RUN CONFIG
|
|
|
|
|
166 |
GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
|
167 |
|
|
|
|
|
168 |
HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
|
169 |
|
170 |
|
@@ -181,17 +185,28 @@ SMALL_MODEL_NAME = get_or_create_env_var("SMALL_MODEL_NAME", "Gemma 3 1B (small,
|
|
181 |
|
182 |
SMALL_MODEL_REPO_ID = get_or_create_env_var("SMALL_MODEL_REPO_ID", 'google/gemma-3-1b-it') #'Qwen/Qwen2-0.5B-Instruct')
|
183 |
|
|
|
|
|
184 |
LARGE_MODEL_NAME = get_or_create_env_var("LARGE_MODEL_NAME", "Phi 3.5 Mini (larger, slow)")
|
185 |
|
186 |
LARGE_MODEL_REPO_ID = get_or_create_env_var("LARGE_MODEL_REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF") # "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
|
|
|
187 |
LARGE_MODEL_GGUF_FILE = get_or_create_env_var("LARGE_MODEL_GGUF_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
if RUN_AWS_FUNCTIONS == "1":
|
190 |
-
default_model_choices
|
191 |
-
|
192 |
-
|
|
|
|
|
193 |
|
194 |
-
DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", default_model_choices)
|
195 |
|
196 |
EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
|
197 |
|
|
|
163 |
|
164 |
###
|
165 |
# RUN CONFIG
|
166 |
+
RUN_GEMINI_MODELS = get_or_create_env_var('RUN_GEMINI_MODELS', '1')
|
167 |
+
|
168 |
GEMINI_API_KEY = get_or_create_env_var('GEMINI_API_KEY', '')
|
169 |
|
170 |
+
# NOTE THAT THIS IS REQUIRED
|
171 |
+
|
172 |
HF_TOKEN = get_or_create_env_var('HF_TOKEN', '')
|
173 |
|
174 |
|
|
|
185 |
|
186 |
SMALL_MODEL_REPO_ID = get_or_create_env_var("SMALL_MODEL_REPO_ID", 'google/gemma-3-1b-it') #'Qwen/Qwen2-0.5B-Instruct')
|
187 |
|
188 |
+
LOAD_LARGE_MODEL = get_or_create_env_var("LOAD_LARGE_MODEL", '0')
|
189 |
+
|
190 |
LARGE_MODEL_NAME = get_or_create_env_var("LARGE_MODEL_NAME", "Phi 3.5 Mini (larger, slow)")
|
191 |
|
192 |
LARGE_MODEL_REPO_ID = get_or_create_env_var("LARGE_MODEL_REPO_ID", "QuantFactory/Phi-3.5-mini-instruct-GGUF") # "QuantFactory/Phi-3-mini-128k-instruct-GGUF"), # "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2"), #"microsoft/Phi-3-mini-4k-instruct-gguf"),#"TheBloke/Mistral-7B-OpenOrca-GGUF"),
|
193 |
+
|
194 |
LARGE_MODEL_GGUF_FILE = get_or_create_env_var("LARGE_MODEL_GGUF_FILE", "Phi-3.5-mini-instruct.Q4_K_M.gguf") #"Phi-3-mini-128k-instruct.Q4_K_M.gguf") #"Meta-Llama-3-8B-Instruct-v2.Q6_K.gguf") #"Phi-3-mini-4k-instruct-q4.gguf")#"mistral-7b-openorca.Q4_K_M.gguf"),
|
195 |
|
196 |
+
# Build up options for models
|
197 |
+
default_model_choices = [SMALL_MODEL_NAME]
|
198 |
+
|
199 |
+
if LOAD_LARGE_MODEL == "1":
|
200 |
+
default_model_choices.append(LARGE_MODEL_NAME)
|
201 |
+
|
202 |
if RUN_AWS_FUNCTIONS == "1":
|
203 |
+
default_model_choices.extend(["anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"])
|
204 |
+
|
205 |
+
if RUN_GEMINI_MODELS == "1":
|
206 |
+
default_model_choices.extend(["gemini-2.0-flash-001", "gemini-2.5-flash-preview-04-17", "models/gemini-2.5-pro-exp-03-25"])
|
207 |
+
|
208 |
|
209 |
+
DEFAULT_MODEL_CHOICES = get_or_create_env_var("DEFAULT_MODEL_CHOICES", str(default_model_choices))
|
210 |
|
211 |
EMBEDDINGS_MODEL_NAME = get_or_create_env_var('EMBEDDINGS_MODEL_NAME', "BAAI/bge-base-en-v1.5") #"mixedbread-ai/mxbai-embed-xsmall-v1"
|
212 |
|
chatfuncs/model_load.py
CHANGED
@@ -17,15 +17,15 @@ temperature: float = 0.1
|
|
17 |
top_k: int = 3
|
18 |
top_p: float = 1
|
19 |
repetition_penalty: float = 1.15
|
20 |
-
flan_alpaca_repetition_penalty: float = 1.3
|
21 |
last_n_tokens: int = 64
|
22 |
max_new_tokens: int = 1024
|
23 |
seed: int = 42
|
24 |
reset: bool = False
|
25 |
stream: bool = True
|
26 |
threads: int = threads
|
27 |
-
batch_size:int =
|
28 |
-
context_length:int =
|
29 |
sample = True
|
30 |
|
31 |
# Bedrock parameters
|
|
|
17 |
top_k: int = 3
|
18 |
top_p: float = 1
|
19 |
repetition_penalty: float = 1.15
|
20 |
+
#flan_alpaca_repetition_penalty: float = 1.3
|
21 |
last_n_tokens: int = 64
|
22 |
max_new_tokens: int = 1024
|
23 |
seed: int = 42
|
24 |
reset: bool = False
|
25 |
stream: bool = True
|
26 |
threads: int = threads
|
27 |
+
batch_size:int = 128
|
28 |
+
context_length:int = 4096
|
29 |
sample = True
|
30 |
|
31 |
# Bedrock parameters
|
requirements.txt
CHANGED
@@ -5,9 +5,9 @@ beautifulsoup4==4.13.4
|
|
5 |
google-generativeai==0.8.5
|
6 |
pandas==2.2.3
|
7 |
transformers==4.51.3
|
8 |
-
# For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311
|
9 |
-
llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
|
10 |
-
|
11 |
torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
|
12 |
sentence_transformers==4.1.0
|
13 |
faiss-cpu==1.10.0
|
|
|
5 |
google-generativeai==0.8.5
|
6 |
pandas==2.2.3
|
7 |
transformers==4.51.3
|
8 |
+
# For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
9 |
+
#llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # Older version based on wheel if the below line doesn't work
|
10 |
+
llama-cpp-python==0.3.8 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
11 |
torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu
|
12 |
sentence_transformers==4.1.0
|
13 |
faiss-cpu==1.10.0
|
requirements_aws.txt
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#langchain==0.3.24
|
2 |
+
#langchain-huggingface==0.1.2 # Loaded in Dockerfile
|
3 |
+
boto3==1.38.0
|
4 |
+
python-dotenv==1.1.0
|
5 |
+
langchain-community==0.3.22
|
6 |
+
beautifulsoup4==4.13.4
|
7 |
+
google-generativeai==0.8.5
|
8 |
+
pandas==2.2.3
|
9 |
+
transformers==4.51.3
|
10 |
+
# For Windows https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.2/llama_cpp_python-0.3.2-cp311-#cp311-win_amd64.whl -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
11 |
+
#llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # For linux
|
12 |
+
llama-cpp-python==0.3.8 -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
13 |
+
#torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cpu # Loaded in Dockerfile
|
14 |
+
#sentence_transformers==4.1.0 # Loaded in Dockerfile
|
15 |
+
faiss-cpu==1.10.0
|
16 |
+
pypdf==5.4.0
|
17 |
+
python-docx==1.1.2
|
18 |
+
#keybert==0.9.0 # Loaded in Dockerfile
|
19 |
+
#span-marker==1.7.0 # Loaded in Dockerfile
|
20 |
+
gradio==5.25.2
|
21 |
+
nltk==3.9.1
|
22 |
+
bm25s==0.2.12
|
23 |
+
PyStemmer==2.2.0.3
|
24 |
+
scikit-learn==1.6.1
|
25 |
+
scipy==1.15.2
|
26 |
+
numpy==1.26.4
|
27 |
+
|
requirements_gpu.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
langchain==0.3.24
|
2 |
langchain-community==0.3.22
|
3 |
langchain-huggingface==0.1.2
|
4 |
beautifulsoup4==4.13.4
|
@@ -6,8 +6,8 @@ google-generativeai==0.8.5
|
|
6 |
pandas==2.2.3
|
7 |
transformers==4.51.3
|
8 |
torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121
|
9 |
-
llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
|
10 |
-
|
11 |
sentence_transformers==4.1.0
|
12 |
faiss-cpu==1.10.0
|
13 |
pypdf==5.4.0
|
|
|
1 |
+
#langchain==0.3.24
|
2 |
langchain-community==0.3.22
|
3 |
langchain-huggingface==0.1.2
|
4 |
beautifulsoup4==4.13.4
|
|
|
6 |
pandas==2.2.3
|
7 |
transformers==4.51.3
|
8 |
torch==2.5.1 --extra-index-url https://download.pytorch.org/whl/cu121
|
9 |
+
#llama-cpp-python==0.3.2 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
|
10 |
+
llama-cpp-python==0.3.8 -C cmake.args="-DGGML_CUDA=on"
|
11 |
sentence_transformers==4.1.0
|
12 |
faiss-cpu==1.10.0
|
13 |
pypdf==5.4.0
|