Spaces:

amiguel
/

ataliba

Sleeping

File size: 5,222 Bytes

import streamlit as st
import os
import time
import PyPDF2
from docx import Document
import pandas as pd
from dotenv import load_dotenv
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

# Load environment variables
load_dotenv()

# Avatars and bios
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"

ATALIBA_BIO = """
**I am Ataliba Miguel's Digital Twin** 🤖

**Background:**
- 🎓 Mechanical Engineering (BSc)
- ⛽ Oil & Gas Engineering (MSc Specialization)
- 🔧 17+ years in Oil & Gas Industry
- 🔍 Current: Topside Inspection Methods Engineer @ TotalEnergies
- 🤖 AI Practitioner Specialist
- 🚀 Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)

**Capabilities:**
- Technical document analysis
- Engineering insights
- AI-powered problem solving
- Cross-domain knowledge integration

Ask me about engineering challenges, AI applications, or industry best practices!
"""

# UI Setup
st.markdown("""
    <style>
    @import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
    * { font-family: 'Tw Cen MT', sans-serif; }
    .st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
    </style>
""", unsafe_allow_html=True)

st.title("🚀 Ataliba o Agent Nerdx 🚀")

# Sidebar
with st.sidebar:
    st.header("⚡️ Hugging Face Model Loaded")
    st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA")
    uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])

# Session state
if "file_context" not in st.session_state:
    st.session_state.file_context = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# File parser
def parse_file(file):
    try:
        if file.type == "application/pdf":
            reader = PyPDF2.PdfReader(file)
            return "\n".join([page.extract_text() for page in reader.pages])
        elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            doc = Document(file)
            return "\n".join([para.text for para in doc.paragraphs])
        elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
            df = pd.read_excel(file)
            return df.to_string()
    except Exception as e:
        st.error(f"Error processing file: {str(e)}")
        return None

# Process file
if uploaded_file and not st.session_state.file_context:
    st.session_state.file_context = parse_file(uploaded_file)
    if st.session_state.file_context:
        st.sidebar.success("✅ Document loaded successfully")

# Load model
@st.cache_resource
def load_unsloth_model():
    base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit"
    adapter = "amiguel/unsloth_finetune_test"
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=base_model,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True
    )
    model.load_adapter(adapter)
    FastLanguageModel.for_inference(model)
    return model, tokenizer

# Generate response
def generate_response(prompt):
    bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at', 
                    'background', 'experience', 'valonylabs', 'totalenergies']

    if any(trigger in prompt.lower() for trigger in bio_triggers):
        for line in ATALIBA_BIO.split('\n'):
            yield line + '\n'
            time.sleep(0.1)
        return

    try:
        model, tokenizer = load_unsloth_model()
        context = st.session_state.file_context or ""
        full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}"

        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        for line in response.split('\n'):
            yield line + '\n'
            time.sleep(0.05)

    except Exception as e:
        yield f"⚠️ Model Error: {str(e)}"

# Chat interface
for msg in st.session_state.chat_history:
    with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
        st.markdown(msg["content"])

if prompt := st.chat_input("Ask about documents or technical matters..."):
    st.session_state.chat_history.append({"role": "user", "content": prompt})
    with st.chat_message("user", avatar=USER_AVATAR):
        st.markdown(prompt)

    with st.chat_message("assistant", avatar=BOT_AVATAR):
        response_placeholder = st.empty()
        full_response = ""

        for chunk in generate_response(prompt):
            full_response += chunk
            response_placeholder.markdown(full_response + "▌")

        response_placeholder.markdown(full_response)
        st.session_state.chat_history.append({"role": "assistant", "content": full_response})