|
import os |
|
import gradio as gr |
|
import torch |
|
import json |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from peft import PeftModel |
|
|
|
|
|
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") |
|
|
|
|
|
token_value = os.getenv("HUGGINGFACE_TOKEN") |
|
if token_value: |
|
print("HUGGINGFACE_TOKEN is set") |
|
|
|
print(f"Token starts with: {token_value[:5]}...") |
|
else: |
|
print("HUGGINGFACE_TOKEN is not set") |
|
|
|
|
|
BASE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" |
|
QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8" |
|
LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4" |
|
|
|
|
|
def load_llama_model(): |
|
print(f"🔄 Loading Base Model: {BASE_MODEL}") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_auth_token=HUGGINGFACE_TOKEN) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
BASE_MODEL, |
|
use_auth_token=HUGGINGFACE_TOKEN, |
|
torch_dtype=torch.float16, |
|
low_cpu_mem_usage=True |
|
) |
|
|
|
print(f"✅ Base Model Loaded Successfully") |
|
|
|
|
|
print(f"🔄 Loading QLoRA Adapter: {QLORA_ADAPTER}") |
|
model = PeftModel.from_pretrained(model, QLORA_ADAPTER, use_auth_token=HUGGINGFACE_TOKEN) |
|
print("🔄 Merging LoRA Weights...") |
|
model = model.merge_and_unload() |
|
print("✅ QLoRA Adapter Loaded Successfully") |
|
|
|
model.eval() |
|
return tokenizer, model |
|
|
|
|
|
def load_llama_guard(): |
|
print(f"🔄 Loading Llama Guard Model: {LLAMA_GUARD_NAME}") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(LLAMA_GUARD_NAME, use_auth_token=HUGGINGFACE_TOKEN) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
LLAMA_GUARD_NAME, |
|
use_auth_token=HUGGINGFACE_TOKEN, |
|
torch_dtype=torch.float16, |
|
low_cpu_mem_usage=True |
|
) |
|
|
|
model.eval() |
|
print("✅ Llama Guard Model Loaded Successfully") |
|
return tokenizer, model |
|
|
|
except Exception as e: |
|
print(f"❌ Error loading model {model_path}: {e}") |
|
raise |
|
|
|
|
|
tokenizer, model = load_llama_model(QLORA_ADAPTER) |
|
|
|
|
|
guard_tokenizer, guard_model = load_llama_model(LLAMA_GUARD_NAME, is_guard=True) |
|
|
|
|
|
PROMPTS = { |
|
"project_analysis": """<|begin_of_text|><|prompt|>Analyze this project description and generate: |
|
1. Project timeline with milestones |
|
2. Required technology stack |
|
3. Potential risks |
|
4. Team composition |
|
5. Cost estimation |
|
Project: {project_description}<|completion|>""", |
|
|
|
"code_generation": """<|begin_of_text|><|prompt|>Generate implementation code for this feature: |
|
{feature_description} |
|
Considerations: |
|
- Use {programming_language} |
|
- Follow {coding_standards} |
|
- Include error handling |
|
- Add documentation<|completion|>""", |
|
|
|
"risk_analysis": """<|begin_of_text|><|prompt|>Predict potential risks for this project plan: |
|
{project_data} |
|
Format output as JSON with risk types, probabilities, and mitigation strategies<|completion|>""" |
|
} |
|
|
|
|
|
def moderate_input(user_input): |
|
prompt = f"""<|begin_of_text|><|user|> |
|
Input: {user_input} |
|
Please verify that this input doesn't violate any content policies. |
|
<|assistant|>""" |
|
|
|
inputs = guard_tokenizer(prompt, return_tensors="pt", truncation=True) |
|
|
|
with torch.no_grad(): |
|
outputs = guard_model.generate( |
|
inputs.input_ids, |
|
max_length=256, |
|
temperature=0.1 |
|
) |
|
|
|
response = guard_tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
if "flagged" in response.lower() or "violated" in response.lower() or "policy violation" in response.lower(): |
|
return "⚠️ Content flagged by Llama Guard. Please modify your input." |
|
return None |
|
|
|
|
|
def generate_response(prompt_type, **kwargs): |
|
prompt = PROMPTS[prompt_type].format(**kwargs) |
|
|
|
moderation_warning = moderate_input(prompt) |
|
if moderation_warning: |
|
return moderation_warning |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_length=1024, |
|
temperature=0.7 if prompt_type == "project_analysis" else 0.5, |
|
top_p=0.9, |
|
do_sample=True |
|
) |
|
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
def create_gradio_interface(): |
|
with gr.Blocks(title="AI Project Manager", theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# 🚀 AI-Powered Project Manager & Code Assistant") |
|
|
|
with gr.Tab("Project Setup"): |
|
project_input = gr.Textbox(label="Project Description", lines=5, placeholder="Describe your project...") |
|
project_output = gr.Textbox(label="Project Analysis", lines=15) |
|
analyze_btn = gr.Button("Analyze Project") |
|
analyze_btn.click(analyze_project, inputs=project_input, outputs=project_output) |
|
|
|
with gr.Tab("Code Assistant"): |
|
code_input = gr.Textbox(label="Feature Description", lines=3) |
|
lang_select = gr.Dropdown(["Python", "JavaScript", "Java", "C++"], label="Language", value="Python") |
|
standards_select = gr.Dropdown(["PEP8", "Google", "Airbnb"], label="Coding Standard", value="PEP8") |
|
code_output = gr.Code(label="Generated Code") |
|
code_btn = gr.Button("Generate Code") |
|
code_btn.click(generate_code, inputs=[code_input, lang_select, standards_select], outputs=code_output) |
|
|
|
with gr.Tab("Risk Analysis"): |
|
risk_input = gr.Textbox(label="Project Plan", lines=5) |
|
risk_output = gr.JSON(label="Risk Predictions") |
|
risk_btn = gr.Button("Predict Risks") |
|
risk_btn.click(predict_risks, inputs=risk_input, outputs=risk_output) |
|
|
|
with gr.Tab("Live Collaboration"): |
|
gr.Markdown("## Real-time Project Collaboration") |
|
chat = gr.Chatbot(height=400) |
|
msg = gr.Textbox(label="Chat with AI PM") |
|
clear = gr.Button("Clear Chat") |
|
|
|
def respond(message, chat_history): |
|
moderation_warning = moderate_input(message) |
|
if moderation_warning: |
|
chat_history.append((message, moderation_warning)) |
|
return "", chat_history |
|
|
|
history_text = "" |
|
for i, (usr, ai) in enumerate(chat_history[-3:]): |
|
history_text += f"User: {usr}\nAI: {ai}\n" |
|
|
|
prompt = f"""<|begin_of_text|><|prompt|>Project Management Chat: |
|
Context: {message} |
|
Chat History: {history_text} |
|
User: {message}<|completion|>""" |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", truncation=True) |
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_length=1024, |
|
temperature=0.7, |
|
top_p=0.9, |
|
do_sample=True |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
chat_history.append((message, response)) |
|
return "", chat_history |
|
|
|
msg.submit(respond, [msg, chat], [msg, chat]) |
|
clear.click(lambda: None, None, chat, queue=False) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
interface = create_gradio_interface() |
|
interface.launch(share=True) |