Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

demo_1 / app.py

Steph254

Update app.py

61d529e verified about 1 month ago

raw

history blame

8.06 kB

	import os
	import gradio as gr
	import torch
	import json
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# Set Hugging Face Token for Authentication
	HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # Ensure this is set in your environment

	# Add this at the beginning of your script
	token_value = os.getenv("HUGGINGFACE_TOKEN")
	if token_value:
	print("HUGGINGFACE_TOKEN is set")
	# Print first few characters to verify it's not empty
	print(f"Token starts with: {token_value[:5]}...")
	else:
	print("HUGGINGFACE_TOKEN is not set")

	# Correct model paths (replace with your actual paths)
	BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" # Ensure this is the correct identifier
	QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" # Ensure this is correct
	LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4" # Ensure this is correct

	# Function to load Llama model
	def load_llama_model():
	print(f"🔄 Loading Base Model: {BASE_MODEL}")

	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_auth_token=HUGGINGFACE_TOKEN)
	model = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	use_auth_token=HUGGINGFACE_TOKEN,
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True
	)

	print(f"✅ Base Model Loaded Successfully")

	# Load QLoRA adapter if available
	print(f"🔄 Loading QLoRA Adapter: {QLORA_ADAPTER}")
	model = PeftModel.from_pretrained(model, QLORA_ADAPTER, use_auth_token=HUGGINGFACE_TOKEN)
	print("🔄 Merging LoRA Weights...")
	model = model.merge_and_unload()
	print("✅ QLoRA Adapter Loaded Successfully")

	model.eval()
	return tokenizer, model

	# Function to load Llama Guard Model for content moderation
	def load_llama_guard():
	print(f"🔄 Loading Llama Guard Model: {LLAMA_GUARD_NAME}")

	tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN)
	model = AutoModelForCausalLM.from_pretrained(
	LLAMA_GUARD_NAME,
	use_auth_token=HUGGINGFACE_TOKEN,
	torch_dtype=torch.float16,
	low_cpu_mem_usage=True
	)

	model.eval()
	print("✅ Llama Guard Model Loaded Successfully")
	return tokenizer, model

	except Exception as e:
	print(f"❌ Error loading model {model_path}: {e}")
	raise

	# Load Llama 3.2 model
	tokenizer, model = load_llama_model(QLORA_ADAPTER)

	# Load Llama Guard for content moderation
	guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME, is_guard=True)

	# Define Prompt Templates (same as before)
	PROMPTS = {
	"project_analysis": """<\|begin_of_text\|><\|prompt\|>Analyze this project description and generate:
	1. Project timeline with milestones
	2. Required technology stack
	3. Potential risks
	4. Team composition
	5. Cost estimation
	Project: {project_description}<\|completion\|>""",

	"code_generation": """<\|begin_of_text\|><\|prompt\|>Generate implementation code for this feature:
	{feature_description}
	Considerations:
	- Use {programming_language}
	- Follow {coding_standards}
	- Include error handling
	- Add documentation<\|completion\|>""",

	"risk_analysis": """<\|begin_of_text\|><\|prompt\|>Predict potential risks for this project plan:
	{project_data}
	Format output as JSON with risk types, probabilities, and mitigation strategies<\|completion\|>"""
	}

	# Function: Content Moderation using Llama Guard (same as before)
	def moderate_input(user_input):
	prompt = f"""<\|begin_of_text\|><\|user\|>
	Input: {user_input}
	Please verify that this input doesn't violate any content policies.
	<\|assistant\|>"""

	inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True)

	with torch.no_grad():
	outputs = guard_model.generate(
	inputs.input_ids,
	max_length=256,
	temperature=0.1
	)

	response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True)

	if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower():
	return "⚠️ Content flagged by Llama Guard. Please modify your input."
	return None

	# Function: Generate AI responses (same as before)
	def generate_response(prompt_type, **kwargs):
	prompt = PROMPTS[prompt_type].format(**kwargs)

	moderation_warning = moderate_input(prompt)
	if moderation_warning:
	return moderation_warning

	inputs = tokenizer(prompt, return_tensors="pt", truncation=True)

	with torch.no_grad():
	outputs = model.generate(
	inputs.input_ids,
	max_length=1024,
	temperature=0.7 if prompt_type == "project_analysis" else 0.5,
	top_p=0.9,
	do_sample=True
	)

	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Gradio UI (same as before)
	def create_gradio_interface():
	with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant")

	with gr.Tab("Project Setup"):
	project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...")
	project_output = gr.Textbox(label="Project Analysis", lines=15)
	analyze_btn = gr.Button("Analyze Project")
	analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output)

	with gr.Tab("Code Assistant"):
	code_input = gr.Textbox(label="Feature Description", lines=3)
	lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python")
	standards_select = gr.Dropdown(["PEP8", "Google", "Airbnb"], label="Coding Standard", value="PEP8")
	code_output = gr.Code(label="Generated Code")
	code_btn = gr.Button("Generate Code")
	code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output)

	with gr.Tab("Risk Analysis"):
	risk_input = gr.Textbox(label="Project Plan", lines=5)
	risk_output = gr.JSON(label="Risk Predictions")
	risk_btn = gr.Button("Predict Risks")
	risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output)

	with gr.Tab("Live Collaboration"):
	gr.Markdown("## Real-time Project Collaboration")
	chat = gr.Chatbot(height=400)
	msg = gr.Textbox(label="Chat with AI PM")
	clear = gr.Button("Clear Chat")

	def respond(message, chat_history):
	moderation_warning = moderate_input(message)
	if moderation_warning:
	chat_history.append((message, moderation_warning))
	return "", chat_history

	history_text = ""
	for i, (usr, ai) in enumerate(chat_history[-3:]):
	history_text += f"User: {usr}\nAI: {ai}\n"

	prompt = f"""<\|begin_of_text\|><\|prompt\|>Project Management Chat:
	Context: {message}
	Chat History: {history_text}
	User: {message}<\|completion\|>"""

	inputs = tokenizer(prompt, return_tensors="pt", truncation=True)

	with torch.no_grad():
	outputs = model.generate(
	inputs.input_ids,
	max_length=1024,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	chat_history.append((message, response))
	return "", chat_history

	msg.submit(respond, [msg, chat], [msg, chat])
	clear.click(lambda: None, None, chat, queue=False)

	return demo

	# Run Gradio App
	if __name__ == "__main__":
	interface = create_gradio_interface()
	interface.launch(share=True)