File size: 5,222 Bytes
c9f0dd1
 
 
 
 
 
 
34383e3
 
c9f0dd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34383e3
c9f0dd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34383e3
 
1358218
34383e3
 
 
 
1358218
34383e3
1358218
34383e3
 
c9f0dd1
 
 
 
 
34383e3
c9f0dd1
 
 
 
 
 
 
34383e3
 
f2855af
34383e3
 
 
 
 
 
c9f0dd1
34383e3
c9f0dd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import os
import time
import PyPDF2
from docx import Document
import pandas as pd
from dotenv import load_dotenv
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

# Load environment variables
load_dotenv()

# Avatars and bios
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"

ATALIBA_BIO = """
**I am Ataliba Miguel's Digital Twin** πŸ€–

**Background:**
- πŸŽ“ Mechanical Engineering (BSc)
- β›½ Oil & Gas Engineering (MSc Specialization)
- πŸ”§ 17+ years in Oil & Gas Industry
- πŸ” Current: Topside Inspection Methods Engineer @ TotalEnergies
- πŸ€– AI Practitioner Specialist
- πŸš€ Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)

**Capabilities:**
- Technical document analysis
- Engineering insights
- AI-powered problem solving
- Cross-domain knowledge integration

Ask me about engineering challenges, AI applications, or industry best practices!
"""

# UI Setup
st.markdown("""
    <style>
    @import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
    * { font-family: 'Tw Cen MT', sans-serif; }
    .st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
    </style>
""", unsafe_allow_html=True)

st.title("πŸš€ Ataliba o Agent Nerdx πŸš€")

# Sidebar
with st.sidebar:
    st.header("⚑️ Hugging Face Model Loaded")
    st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA")
    uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])

# Session state
if "file_context" not in st.session_state:
    st.session_state.file_context = None
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# File parser
def parse_file(file):
    try:
        if file.type == "application/pdf":
            reader = PyPDF2.PdfReader(file)
            return "\n".join([page.extract_text() for page in reader.pages])
        elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
            doc = Document(file)
            return "\n".join([para.text for para in doc.paragraphs])
        elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
            df = pd.read_excel(file)
            return df.to_string()
    except Exception as e:
        st.error(f"Error processing file: {str(e)}")
        return None

# Process file
if uploaded_file and not st.session_state.file_context:
    st.session_state.file_context = parse_file(uploaded_file)
    if st.session_state.file_context:
        st.sidebar.success("βœ… Document loaded successfully")

# Load model
@st.cache_resource
def load_unsloth_model():
    base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit"
    adapter = "amiguel/unsloth_finetune_test"
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=base_model,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True
    )
    model.load_adapter(adapter)
    FastLanguageModel.for_inference(model)
    return model, tokenizer

# Generate response
def generate_response(prompt):
    bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at', 
                    'background', 'experience', 'valonylabs', 'totalenergies']

    if any(trigger in prompt.lower() for trigger in bio_triggers):
        for line in ATALIBA_BIO.split('\n'):
            yield line + '\n'
            time.sleep(0.1)
        return

    try:
        model, tokenizer = load_unsloth_model()
        context = st.session_state.file_context or ""
        full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}"

        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
        outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        for line in response.split('\n'):
            yield line + '\n'
            time.sleep(0.05)

    except Exception as e:
        yield f"⚠️ Model Error: {str(e)}"

# Chat interface
for msg in st.session_state.chat_history:
    with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
        st.markdown(msg["content"])

if prompt := st.chat_input("Ask about documents or technical matters..."):
    st.session_state.chat_history.append({"role": "user", "content": prompt})
    with st.chat_message("user", avatar=USER_AVATAR):
        st.markdown(prompt)

    with st.chat_message("assistant", avatar=BOT_AVATAR):
        response_placeholder = st.empty()
        full_response = ""

        for chunk in generate_response(prompt):
            full_response += chunk
            response_placeholder.markdown(full_response + "β–Œ")

        response_placeholder.markdown(full_response)
        st.session_state.chat_history.append({"role": "assistant", "content": full_response})