File size: 5,222 Bytes
c9f0dd1 34383e3 c9f0dd1 34383e3 c9f0dd1 34383e3 1358218 34383e3 1358218 34383e3 1358218 34383e3 c9f0dd1 34383e3 c9f0dd1 34383e3 f2855af 34383e3 c9f0dd1 34383e3 c9f0dd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import streamlit as st
import os
import time
import PyPDF2
from docx import Document
import pandas as pd
from dotenv import load_dotenv
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
# Load environment variables
load_dotenv()
# Avatars and bios
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
ATALIBA_BIO = """
**I am Ataliba Miguel's Digital Twin** π€
**Background:**
- π Mechanical Engineering (BSc)
- β½ Oil & Gas Engineering (MSc Specialization)
- π§ 17+ years in Oil & Gas Industry
- π Current: Topside Inspection Methods Engineer @ TotalEnergies
- π€ AI Practitioner Specialist
- π Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)
**Capabilities:**
- Technical document analysis
- Engineering insights
- AI-powered problem solving
- Cross-domain knowledge integration
Ask me about engineering challenges, AI applications, or industry best practices!
"""
# UI Setup
st.markdown("""
<style>
@import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
* { font-family: 'Tw Cen MT', sans-serif; }
.st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
</style>
""", unsafe_allow_html=True)
st.title("π Ataliba o Agent Nerdx π")
# Sidebar
with st.sidebar:
st.header("β‘οΈ Hugging Face Model Loaded")
st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA")
uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])
# Session state
if "file_context" not in st.session_state:
st.session_state.file_context = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# File parser
def parse_file(file):
try:
if file.type == "application/pdf":
reader = PyPDF2.PdfReader(file)
return "\n".join([page.extract_text() for page in reader.pages])
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs])
elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
df = pd.read_excel(file)
return df.to_string()
except Exception as e:
st.error(f"Error processing file: {str(e)}")
return None
# Process file
if uploaded_file and not st.session_state.file_context:
st.session_state.file_context = parse_file(uploaded_file)
if st.session_state.file_context:
st.sidebar.success("β
Document loaded successfully")
# Load model
@st.cache_resource
def load_unsloth_model():
base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit"
adapter = "amiguel/unsloth_finetune_test"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=base_model,
max_seq_length=2048,
dtype=None,
load_in_4bit=True
)
model.load_adapter(adapter)
FastLanguageModel.for_inference(model)
return model, tokenizer
# Generate response
def generate_response(prompt):
bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at',
'background', 'experience', 'valonylabs', 'totalenergies']
if any(trigger in prompt.lower() for trigger in bio_triggers):
for line in ATALIBA_BIO.split('\n'):
yield line + '\n'
time.sleep(0.1)
return
try:
model, tokenizer = load_unsloth_model()
context = st.session_state.file_context or ""
full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}"
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
for line in response.split('\n'):
yield line + '\n'
time.sleep(0.05)
except Exception as e:
yield f"β οΈ Model Error: {str(e)}"
# Chat interface
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
st.markdown(msg["content"])
if prompt := st.chat_input("Ask about documents or technical matters..."):
st.session_state.chat_history.append({"role": "user", "content": prompt})
with st.chat_message("user", avatar=USER_AVATAR):
st.markdown(prompt)
with st.chat_message("assistant", avatar=BOT_AVATAR):
response_placeholder = st.empty()
full_response = ""
for chunk in generate_response(prompt):
full_response += chunk
response_placeholder.markdown(full_response + "β")
response_placeholder.markdown(full_response)
st.session_state.chat_history.append({"role": "assistant", "content": full_response})
|