nicole-ait commited on
Commit
0658357
·
1 Parent(s): 53be521

init w/ file upload

Browse files
Files changed (3) hide show
  1. .gitignore +5 -0
  2. app.py +97 -0
  3. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .vscode
2
+ __pycache__
3
+
4
+ docker-compose.yml
5
+ Dockerfile
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+
4
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.document_loaders import TextLoader
8
+
9
+
10
+ def load_embeddings():
11
+ print(os.environ)
12
+ model_name = os.environ['HUGGINGFACEHUB_EMBEDDINGS_MODEL_NAME']
13
+ return HuggingFaceInstructEmbeddings(model_name=model_name)
14
+
15
+
16
+ def split_file(file):
17
+ print(file.name)
18
+ loader = TextLoader(file.name)
19
+ documents = loader.load()
20
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
21
+ return text_splitter.split_documents(documents)
22
+
23
+
24
+ def get_persist_directory(file_name):
25
+ return os.path.join(os.environ['CHROMADB_PERSIST_DIRECTORY'], file_name)
26
+
27
+
28
+ def process_file(file):
29
+ embeddings = load_embeddings()
30
+ print(embeddings)
31
+ docs = split_file(file)
32
+ print(docs)
33
+
34
+ file_name, _ = os.path.splitext(os.path.basename(file.name))
35
+ persist_directory = get_persist_directory(file_name)
36
+ print(persist_directory)
37
+ vectordb = Chroma.from_documents(documents=docs, embedding=embeddings,
38
+ collection_name=file_name, persist_directory=persist_directory)
39
+ print(vectordb._client.list_collections())
40
+ vectordb.persist()
41
+ return None
42
+
43
+
44
+ def load_vectordb(file_name):
45
+ embeddings = load_embeddings()
46
+
47
+ persist_directory = get_persist_directory(file_name)
48
+ vectordb = Chroma(collection_name=file_name,
49
+ embedding_function=embeddings, persist_directory=persist_directory)
50
+ return vectordb
51
+
52
+
53
+ def add_text(bot_history, text):
54
+ bot_history = bot_history + [(text, None)]
55
+ return bot_history, ""
56
+
57
+
58
+ def bot(bot_history):
59
+ bot_history[-1][1] = 'so cool!'
60
+ return bot_history
61
+
62
+
63
+ def clear_bot():
64
+ return None
65
+
66
+
67
+ title = "QnA Chatbot"
68
+
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown(f"# {title}")
71
+
72
+ with gr.Row():
73
+ with gr.Column(scale=0.5):
74
+ upload = gr.File(file_types=["text"], label="Upload file")
75
+
76
+ process = gr.Button("Process")
77
+
78
+ with gr.Column(scale=0.5):
79
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
80
+
81
+ txt = gr.Textbox(
82
+ show_label=False,
83
+ placeholder="Enter text and press enter",
84
+ ).style(container=False)
85
+
86
+ clear = gr.Button("Clear")
87
+
88
+ process.click(process_file, upload, None)
89
+
90
+ txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
91
+ bot, chatbot, chatbot
92
+ )
93
+ clear.click(clear_bot, None, chatbot)
94
+
95
+ demo.title = title
96
+
97
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ huggingface_hub
3
+ sentence_transformers
4
+ InstructorEmbedding
5
+ chromadb