demo_1 / app.py
Steph254's picture
Update app.py
3ecadea verified
raw
history blame
8.32 kB
import os
import gradio as gr
import torch
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
# Set Hugging Face Token for Authentication (ensure it's set in your environment)
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
# Base model (needed for QLoRA adapter)
BASE_MODEL = "meta-llama/Llama-3-1B-Instruct"
QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
# Function to load Llama model
def load_llama_model():
print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.bfloat16 if torch.has_bfloat16 else torch.float32, # Use bfloat16 if available, else float32
device_map="cpu", # Ensure it runs on CPU
token=HUGGINGFACE_TOKEN
)
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False, token=HUGGINGFACE_TOKEN)
print("Loading QLoRA adapter...")
model = PeftModel.from_pretrained(
model,
QLORA_ADAPTER,
token=HUGGINGFACE_TOKEN
)
print("Merging LoRA weights...")
model = model.merge_and_unload() # Merge LoRA weights for inference
return tokenizer, model
# Load Llama 3.2 model
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"
tokenizer, model = load_llama_model(MODEL_NAME)
# Load Llama Guard for content moderation
LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME)
# Define Prompt Templates
PROMPTS = {
"project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate:
1. Project timeline with milestones
2. Required technology stack
3. Potential risks
4. Team composition
5. Cost estimation
Project: {project_description}<|completion|>""",
"code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature:
{feature_description}
Considerations:
- Use {programming_language}
- Follow {coding_standards}
- Include error handling
- Add documentation<|completion|>""",
"risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan:
{project_data}
Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>"""
}
# Function: Content Moderation using Llama Guard
def moderate_input(user_input):
# Llama Guard specific prompt format
prompt = f"""<|begin_of_text|><|user|>
Input: {user_input}
Please verify that this input doesn't violate any content policies.
<|assistant|>"""
inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True)
with torch.no_grad(): # Disable gradient calculation for inference
outputs = guard_model.generate(
inputs.input_ids,
max_length=256,
temperature=0.1
)
response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True)
if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower():
return "⚠️ Content flagged by Llama Guard. Please modify your input."
return None # Safe input, proceed normally
# Function: Generate AI responses
def generate_response(prompt_type, **kwargs):
prompt = PROMPTS[prompt_type].format(**kwargs)
moderation_warning = moderate_input(prompt)
if moderation_warning:
return moderation_warning # Stop processing if flagged
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
with torch.no_grad(): # Disable gradient calculation for inference
outputs = model.generate(
inputs.input_ids,
max_length=1024,
temperature=0.7 if prompt_type == "project_analysis" else 0.5,
top_p=0.9,
do_sample=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Function: Analyze project
def analyze_project(project_desc):
return generate_response("project_analysis", project_description=project_desc)
# Function: Generate code
def generate_code(feature_desc, lang="Python", standards="PEP8"):
return generate_response("code_generation", feature_description=feature_desc, programming_language=lang, coding_standards=standards)
# Function: Predict risks
def predict_risks(project_data):
risks = generate_response("risk_analysis", project_data=project_data)
try:
# Try to extract JSON part from the response
import re
json_match = re.search(r'\{.*\}', risks, re.DOTALL)
if json_match:
return json.loads(json_match.group(0))
return {"error": "Could not parse JSON response"}
except json.JSONDecodeError:
return {"error": "Invalid JSON response. Please refine your input."}
# Gradio UI
def create_gradio_interface():
with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")
# Project Analysis Tab
with gr.Tab("Project Setup"):
project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
project_output = gr.Textbox(label="Project Analysis", lines=15) # Changed from JSON to Textbox for better formatting
analyze_btn = gr.Button("Analyze Project")
analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)
# Code Generation Tab
with gr.Tab("Code Assistant"):
code_input = gr.Textbox(label="Feature Description", lines=3)
lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python")
standards_select = gr.Dropdown(["PEP8", "Google", "Airbnb"], label="Coding Standard", value="PEP8")
code_output = gr.Code(label="Generated Code")
code_btn = gr.Button("Generate Code")
code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output)
# Risk Analysis Tab
with gr.Tab("Risk Analysis"):
risk_input = gr.Textbox(label="Project Plan", lines=5)
risk_output = gr.JSON(label="Risk Predictions")
risk_btn = gr.Button("Predict Risks")
risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output)
# Real-time Chatbot for Collaboration
with gr.Tab("Live Collaboration"):
gr.Markdown("## Real-time Project Collaboration")
chat = gr.Chatbot(height=400)
msg = gr.Textbox(label="Chat with AI PM")
clear = gr.Button("Clear Chat")
def respond(message, chat_history):
moderation_warning = moderate_input(message)
if moderation_warning:
chat_history.append((message, moderation_warning))
return "", chat_history
# Format chat history for context
history_text = ""
for i, (usr, ai) in enumerate(chat_history[-3:]): # Use last 3 messages for context
history_text += f"User: {usr}\nAI: {ai}\n"
prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat:
Context: {message}
Chat History: {history_text}
User: {message}<|completion|>"""
inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=1024,
temperature=0.7,
top_p=0.9,
do_sample=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
chat_history.append((message, response))
return "", chat_history
msg.submit(respond, [msg, chat], [msg, chat])
clear.click(lambda: None, None, chat, queue=False)
return demo
# Run Gradio App
if __name__ == "__main__":
interface = create_gradio_interface()
interface.launch(share=True)