Spaces:
Running
Running
File size: 7,786 Bytes
01b8e8e f99d6db b8acde7 f99d6db b8acde7 01b8e8e 39503cb 01b8e8e f456ef3 acb72cc f99d6db acb72cc 5692cb3 acb72cc 5692cb3 acb72cc 5692cb3 acb72cc 5692cb3 acb72cc f99d6db acb72cc 710a34d f99d6db 710a34d 39503cb 01b8e8e 5634055 01b8e8e 843bc9e 5634055 dd7488f 39503cb 01b8e8e 5692cb3 6a6afbf 843bc9e 42468fb 101be32 42468fb 5692cb3 01b8e8e 39503cb 843bc9e 01b8e8e 6a6afbf 01b8e8e 6a6afbf 843bc9e 1b47089 843bc9e 1b47089 6a6afbf 1b47089 dbcf2e8 46323da 6a6afbf dbcf2e8 6a6afbf 843bc9e 4107940 843bc9e 4107940 dbcf2e8 710a34d dbcf2e8 f99d6db 4107940 f99d6db 6a6afbf 4107940 dbcf2e8 46323da 6a6afbf dbcf2e8 6a6afbf 843bc9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import streamlit as st
from interface.draw_pipelines import get_pipeline_graph
from interface.utils import (
extract_text_from_file,
extract_text_from_url,
get_pipelines,
reset_vars_data,
)
def component_select_pipeline(container):
pipeline_names, pipeline_funcs, pipeline_func_parameters = get_pipelines()
with st.spinner("Loading Pipeline..."):
with container:
selected_pipeline = st.selectbox(
"Select pipeline",
pipeline_names,
index=(
pipeline_names.index("Keyword Search")
if "Keyword Search" in pipeline_names
else 0
),
)
index_pipe = pipeline_names.index(selected_pipeline)
st.write("---")
st.header("Pipeline Parameters")
# Process audio_output first to ensure top_k is set correctly
audio_output_value = False
for parameter, value in pipeline_func_parameters[index_pipe].items():
if parameter == "audio_output":
audio_output_value = st.checkbox(parameter, value)
pipeline_func_parameters[index_pipe][
"audio_output"
] = audio_output_value
if audio_output_value:
pipeline_func_parameters[index_pipe]["top_k"] = 3
break
# Then process all other parameters
for parameter, value in pipeline_func_parameters[index_pipe].items():
if parameter == "audio_output":
continue
elif isinstance(value, str):
value = st.text_input(parameter, value)
elif isinstance(value, bool):
value = st.checkbox(parameter, value)
elif isinstance(value, int):
if parameter == "top_k" and audio_output_value:
value = 3
value = int(st.number_input(parameter, value=value))
elif isinstance(value, float):
value = float(st.number_input(parameter, value=value))
pipeline_func_parameters[index_pipe][parameter] = value
if (
st.session_state["pipeline"] is None
or st.session_state["pipeline"]["name"] != selected_pipeline
or list(
st.session_state["pipeline_func_parameters"][index_pipe].values()
)
!= list(pipeline_func_parameters[index_pipe].values())
):
st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
(
search_pipeline,
index_pipeline,
) = pipeline_funcs[
index_pipe
](**pipeline_func_parameters[index_pipe])
st.session_state["pipeline"] = {
"name": selected_pipeline,
"search_pipeline": search_pipeline,
"index_pipeline": index_pipeline,
"doc": pipeline_funcs[index_pipe].__doc__,
}
reset_vars_data()
# TODO: Use elasticsearch and remove this workaround for TFIDF
# Reload if Keyword Search is selected
elif st.session_state["pipeline"]["name"] == "Keyword Search":
st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
(
search_pipeline,
index_pipeline,
) = pipeline_funcs[
index_pipe
](**pipeline_func_parameters[index_pipe])
st.session_state["pipeline"] = {
"name": selected_pipeline,
"search_pipeline": search_pipeline,
"index_pipeline": index_pipeline,
"doc": pipeline_funcs[index_pipe].__doc__,
}
def component_show_pipeline(pipeline, pipeline_name):
"""Draw the pipeline"""
expander_text = "Show pipeline"
if pipeline["doc"] is not None and "BUG" in pipeline["doc"]:
expander_text += " ⚠️"
with st.expander(expander_text):
if pipeline["doc"] is not None:
st.markdown(pipeline["doc"])
fig = get_pipeline_graph(pipeline[pipeline_name])
st.plotly_chart(fig, use_container_width=True)
def component_show_search_result(container, results):
with container:
for idx, document in enumerate(results):
st.markdown(f"### Match {idx+1}")
st.markdown(f"**Text**: {document['text']}")
st.markdown(f"**Document**: {document['id']}")
st.json(document)
if "_split_id" in document["meta"]:
st.markdown(f"**Document Chunk**: {document['meta']['_split_id']}")
if "score" in document:
st.markdown(f"**Score**: {document['score']:.3f}")
if "content_audio" in document:
try:
with open(document["content_audio"], "rb") as audio_file:
audio_bytes = audio_file.read()
st.audio(audio_bytes, format="audio/wav")
except Exception as e:
st.error(f"Error loading audio: {str(e)}")
st.markdown("---")
def component_text_input(container, doc_id):
"""Draw the Text Input widget"""
with container:
texts = []
with st.expander("Enter documents"):
while True:
text = st.text_input(f"Document {doc_id}", key=doc_id)
if text != "":
texts.append({"text": text, "doc_id": doc_id})
doc_id += 1
st.markdown("---")
else:
break
corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in texts]
return corpus, doc_id
def component_article_url(container, doc_id):
"""Draw the Article URL widget"""
with container:
urls = []
with st.expander("Enter URLs"):
while True:
url = st.text_input(f"URL {doc_id}", key=doc_id)
if url != "":
urls.append({"text": extract_text_from_url(url), "doc_id": doc_id})
doc_id += 1
st.markdown("---")
else:
break
for idx, doc in enumerate(urls):
with st.expander(f"Preview URL {idx}"):
st.write(doc["text"])
corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in urls]
return corpus, doc_id
def component_file_input(container, doc_id):
"""Draw the extract text from file widget"""
with container:
files = []
with st.expander("Enter Files"):
while True:
file = st.file_uploader(
"Upload a .txt, .pdf, .csv, image file, audio file", key=doc_id
)
if file is not None:
extracted_text = extract_text_from_file(file)
if extracted_text is not None:
files.append({"text": extracted_text, "doc_id": doc_id})
doc_id += 1
st.markdown("---")
else:
break
else:
break
for idx, doc in enumerate(files):
with st.expander(f"Preview File {idx}"):
st.write(doc["text"])
corpus = [{"text": doc["text"], "id": doc["doc_id"]} for doc in files]
return corpus, doc_id
|