Spaces:
Sleeping
Sleeping
"""app.py | |
Smolagents agent given an SQL tool over a SQLite database built with data files | |
from the Internation Consortium of Investigative Journalism (ICIJ.org). | |
Agentic framework: | |
- smolagents | |
Database: | |
- SQLite | |
Generation: | |
- Mistral | |
:author: Didier Guillevic | |
:date: 2025-01-12 | |
""" | |
import gradio as gr | |
import icij_utils | |
import smolagents | |
import os | |
import pathlib | |
# | |
# Init a SQLite database with the data files from ICIJ.org | |
# | |
ICIJ_LEAKS_DB_NAME = 'icij_leaks.db' | |
ICIJ_LEAKS_DATA_DIR = './icij_data' | |
# Remove existing database (if present), since we will recreate it below. | |
icij_db_path = pathlib.Path(ICIJ_LEAKS_DB_NAME) | |
icij_db_path.unlink(missing_ok=True) | |
# Load ICIJ data files into an SQLite database | |
loader = icij_utils.ICIJDataLoader(ICIJ_LEAKS_DB_NAME) | |
loader.load_all_files(ICIJ_LEAKS_DATA_DIR) | |
# | |
# Init an SQLAchemy instane (over the SQLite database) | |
# | |
db = icij_utils.ICIJDatabaseConnector(ICIJ_LEAKS_DB_NAME) | |
schema = db.get_full_database_schema() | |
# | |
# Build an SQL tool | |
# | |
schema = db.get_full_database_schema() | |
metadata = icij_utils.ICIJDatabaseMetadata() | |
tool_description = ( | |
"Tool for querying the ICIJ offshore database containing financial data leaks. " | |
"This tool can execute SQL queries and return the results. " | |
"Beware that this tool's output is a string representation of the execution output.\n" | |
"It can use the following tables:" | |
) | |
# Add table documentation | |
for table, doc in metadata.TABLE_DOCS.items(): | |
tool_description += f"\n\nTable: {table}\n" | |
tool_description += f"Description: {doc.strip()}\n" | |
tool_description += "Columns:\n" | |
# Add column documentation and types | |
if table in schema: | |
for col_name, col_type in schema[table].items(): | |
col_doc = metadata.COLUMN_DOCS.get(table, {}).get(col_name, "No documentation available") | |
#tool_description += f" - {col_name}: {col_type}: {col_doc}\n" | |
tool_description += f" - {col_name}: {col_type}\n" | |
# Add source documentation | |
#tool_description += "\n\nSource IDs:\n" | |
#for source_id, descrip in metadata.SOURCE_IDS.items(): | |
# tool_description += f"- {source_id}: {descrip}\n" | |
def sql_tool(query: str) -> str: | |
"""Description to be set beloiw... | |
Args: | |
query: The query to perform. This should be correct SQL. | |
""" | |
output = "" | |
with db.get_engine().connect() as con: | |
rows = con.execute(sqlalchemy.text(query)) | |
for row in rows: | |
output += "\n" + str(row) | |
return output | |
sql_tool.description = tool_description | |
# | |
# language models | |
# | |
default_model = smolagents.HfApiModel() | |
mistral_api_key = os.environ["MISTRAL_API_KEY"] | |
mistral_model_id = "mistral/codestral-latest" | |
mistral_model = smolagents.LiteLLMModel( | |
model_id=mistral_model_id, api_key=mistral_api_key) | |
# | |
# Define the agent | |
# | |
agent = smolagents.CodeAgent( | |
tools=[sql_tool], | |
model=mistral_model | |
) | |
def generate_response(query: str) -> str: | |
"""Generate a response given query. | |
Args: | |
Returns: | |
- the response from the agent having access to a database over the ICIJ | |
data and a large language model. | |
""" | |
agent_output = agent.run(query) | |
return agent_output | |
# | |
# User interface | |
# | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
# SQL agent | |
Database: ICIJ data on offshore financial data leaks. | |
""") | |
# Inputs: question | |
question = gr.Textbox( | |
label="Question to answer", | |
placeholder="" | |
) | |
# Response | |
response = gr.Textbox( | |
label="Response", | |
placeholder="" | |
) | |
# Button | |
with gr.Row(): | |
response_button = gr.Button("Submit", variant='primary') | |
clear_button = gr.Button("Clear", variant='secondary') | |
# Example questions given default provided PDF file | |
with gr.Accordion("Sample questions", open=False): | |
gr.Examples( | |
[ | |
["",], | |
["",], | |
], | |
inputs=[question,], | |
outputs=[response,], | |
fn=generate_response, | |
cache_examples=False, | |
label="Sample questions" | |
) | |
# Documentation | |
with gr.Accordion("Documentation", open=False): | |
gr.Markdown(""" | |
- Agentic framework: smolagents | |
- Data: icij.org | |
- Database: SQLite, SQLAlchemy | |
- Generation: Mistral | |
- Examples: Generated using Claude.ai | |
""") | |
# Click actions | |
response_button.click( | |
fn=generate_response, | |
inputs=[question,], | |
outputs=[response,] | |
) | |
clear_button.click( | |
fn=lambda: ('', ''), | |
inputs=[], | |
outputs=[question, response] | |
) | |
demo.launch(show_api=False) |