Spaces:

amiguel
/

ataliba

Sleeping

App Files Files Community

ataliba / app.py

amiguel

Update app.py

f2855af verified 7 days ago

raw

history blame contribute delete

5.22 kB

	import streamlit as st
	import os
	import time
	import PyPDF2
	from docx import Document
	import pandas as pd
	from dotenv import load_dotenv
	from unsloth import FastLanguageModel
	from transformers import AutoTokenizer

	# Load environment variables
	load_dotenv()

	# Avatars and bios
	USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
	BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"

	ATALIBA_BIO = """
	I am Ataliba Miguel's Digital Twin 🤖

	Background:
	- 🎓 Mechanical Engineering (BSc)
	- ⛽ Oil & Gas Engineering (MSc Specialization)
	- 🔧 17+ years in Oil & Gas Industry
	- 🔍 Current: Topside Inspection Methods Engineer @ TotalEnergies
	- 🤖 AI Practitioner Specialist
	- 🚀 Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)

	Capabilities:
	- Technical document analysis
	- Engineering insights
	- AI-powered problem solving
	- Cross-domain knowledge integration

	Ask me about engineering challenges, AI applications, or industry best practices!
	"""

	# UI Setup
	st.markdown("""
	<style>
	@import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
	* { font-family: 'Tw Cen MT', sans-serif; }
	.st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
	</style>
	""", unsafe_allow_html=True)

	st.title("🚀 Ataliba o Agent Nerdx 🚀")

	# Sidebar
	with st.sidebar:
	st.header("⚡️ Hugging Face Model Loaded")
	st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA")
	uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])

	# Session state
	if "file_context" not in st.session_state:
	st.session_state.file_context = None
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# File parser
	def parse_file(file):
	try:
	if file.type == "application/pdf":
	reader = PyPDF2.PdfReader(file)
	return "\n".join([page.extract_text() for page in reader.pages])
	elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	doc = Document(file)
	return "\n".join([para.text for para in doc.paragraphs])
	elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
	df = pd.read_excel(file)
	return df.to_string()
	except Exception as e:
	st.error(f"Error processing file: {str(e)}")
	return None

	# Process file
	if uploaded_file and not st.session_state.file_context:
	st.session_state.file_context = parse_file(uploaded_file)
	if st.session_state.file_context:
	st.sidebar.success("✅ Document loaded successfully")

	# Load model
	@st.cache_resource
	def load_unsloth_model():
	base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit"
	adapter = "amiguel/unsloth_finetune_test"
	model, tokenizer = FastLanguageModel.from_pretrained(
	model_name=base_model,
	max_seq_length=2048,
	dtype=None,
	load_in_4bit=True
	)
	model.load_adapter(adapter)
	FastLanguageModel.for_inference(model)
	return model, tokenizer

	# Generate response
	def generate_response(prompt):
	bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at',
	'background', 'experience', 'valonylabs', 'totalenergies']

	if any(trigger in prompt.lower() for trigger in bio_triggers):
	for line in ATALIBA_BIO.split('\n'):
	yield line + '\n'
	time.sleep(0.1)
	return

	try:
	model, tokenizer = load_unsloth_model()
	context = st.session_state.file_context or ""
	full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}"

	inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
	outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	for line in response.split('\n'):
	yield line + '\n'
	time.sleep(0.05)

	except Exception as e:
	yield f"⚠️ Model Error: {str(e)}"

	# Chat interface
	for msg in st.session_state.chat_history:
	with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
	st.markdown(msg["content"])

	if prompt := st.chat_input("Ask about documents or technical matters..."):
	st.session_state.chat_history.append({"role": "user", "content": prompt})
	with st.chat_message("user", avatar=USER_AVATAR):
	st.markdown(prompt)

	with st.chat_message("assistant", avatar=BOT_AVATAR):
	response_placeholder = st.empty()
	full_response = ""

	for chunk in generate_response(prompt):
	full_response += chunk
	response_placeholder.markdown(full_response + "▌")

	response_placeholder.markdown(full_response)
	st.session_state.chat_history.append({"role": "assistant", "content": full_response})