ataliba / app.py
amiguel's picture
Update app.py
f2855af verified
import streamlit as st
import os
import time
import PyPDF2
from docx import Document
import pandas as pd
from dotenv import load_dotenv
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
# Load environment variables
load_dotenv()
# Avatars and bios
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
ATALIBA_BIO = """
**I am Ataliba Miguel's Digital Twin** πŸ€–
**Background:**
- πŸŽ“ Mechanical Engineering (BSc)
- β›½ Oil & Gas Engineering (MSc Specialization)
- πŸ”§ 17+ years in Oil & Gas Industry
- πŸ” Current: Topside Inspection Methods Engineer @ TotalEnergies
- πŸ€– AI Practitioner Specialist
- πŸš€ Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)
**Capabilities:**
- Technical document analysis
- Engineering insights
- AI-powered problem solving
- Cross-domain knowledge integration
Ask me about engineering challenges, AI applications, or industry best practices!
"""
# UI Setup
st.markdown("""
<style>
@import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
* { font-family: 'Tw Cen MT', sans-serif; }
.st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
</style>
""", unsafe_allow_html=True)
st.title("πŸš€ Ataliba o Agent Nerdx πŸš€")
# Sidebar
with st.sidebar:
st.header("⚑️ Hugging Face Model Loaded")
st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA")
uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])
# Session state
if "file_context" not in st.session_state:
st.session_state.file_context = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# File parser
def parse_file(file):
try:
if file.type == "application/pdf":
reader = PyPDF2.PdfReader(file)
return "\n".join([page.extract_text() for page in reader.pages])
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs])
elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
df = pd.read_excel(file)
return df.to_string()
except Exception as e:
st.error(f"Error processing file: {str(e)}")
return None
# Process file
if uploaded_file and not st.session_state.file_context:
st.session_state.file_context = parse_file(uploaded_file)
if st.session_state.file_context:
st.sidebar.success("βœ… Document loaded successfully")
# Load model
@st.cache_resource
def load_unsloth_model():
base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit"
adapter = "amiguel/unsloth_finetune_test"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=base_model,
max_seq_length=2048,
dtype=None,
load_in_4bit=True
)
model.load_adapter(adapter)
FastLanguageModel.for_inference(model)
return model, tokenizer
# Generate response
def generate_response(prompt):
bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at',
'background', 'experience', 'valonylabs', 'totalenergies']
if any(trigger in prompt.lower() for trigger in bio_triggers):
for line in ATALIBA_BIO.split('\n'):
yield line + '\n'
time.sleep(0.1)
return
try:
model, tokenizer = load_unsloth_model()
context = st.session_state.file_context or ""
full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}"
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
for line in response.split('\n'):
yield line + '\n'
time.sleep(0.05)
except Exception as e:
yield f"⚠️ Model Error: {str(e)}"
# Chat interface
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
st.markdown(msg["content"])
if prompt := st.chat_input("Ask about documents or technical matters..."):
st.session_state.chat_history.append({"role": "user", "content": prompt})
with st.chat_message("user", avatar=USER_AVATAR):
st.markdown(prompt)
with st.chat_message("assistant", avatar=BOT_AVATAR):
response_placeholder = st.empty()
full_response = ""
for chunk in generate_response(prompt):
full_response += chunk
response_placeholder.markdown(full_response + "β–Œ")
response_placeholder.markdown(full_response)
st.session_state.chat_history.append({"role": "assistant", "content": full_response})