Spaces:
Running
on
Zero
Running
on
Zero
File size: 9,874 Bytes
36029e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
import gradio as gr
import os
import torch
import numpy as np
import random
from huggingface_hub import login, HfFolder
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import logging
import spaces
from threading import Thread
from collections.abc import Iterator
import csv
from llama_cpp import Llama
# Increase CSV field size limit
csv.field_size_limit(1000000)
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
# Set a seed for reproducibility
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
# Login to Hugging Face
token = os.getenv("hf_token")
HfFolder.save_token(token)
login(token)
model_paths = [
'karths/binary_classification_train_port',
'karths/binary_classification_train_perf',
"karths/binary_classification_train_main",
"karths/binary_classification_train_secu",
"karths/binary_classification_train_reli",
"karths/binary_classification_train_usab",
"karths/binary_classification_train_comp"
]
quality_mapping = {
'binary_classification_train_port': 'Portability',
'binary_classification_train_main': 'Maintainability',
'binary_classification_train_secu': 'Security',
'binary_classification_train_reli': 'Reliability',
'binary_classification_train_usab': 'Usability',
'binary_classification_train_perf': 'Performance',
'binary_classification_train_comp': 'Compatibility'
}
# Pre-load models and tokenizer for quality prediction
tokenizer = AutoTokenizer.from_pretrained("distilroberta-base")
models = {path: AutoModelForSequenceClassification.from_pretrained(path) for path in model_paths}
def get_quality_name(model_name):
return quality_mapping.get(model_name.split('/')[-1], "Unknown Quality")
def model_prediction(model, text, device):
model.to(device)
model.eval()
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probs = softmax(logits.cpu().numpy(), axis=1)
avg_prob = np.mean(probs[:, 1])
model.to("cpu")
return avg_prob
# --- Llama CPP Model Setup with GPU ---
LLAMA_MAX_MAX_NEW_TOKENS = 512
LLAMA_DEFAULT_MAX_NEW_TOKENS = 512
LLAMA_MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "1024"))
# Check if GPU is available
gpu_layers = None
if torch.cuda.is_available():
# Use all GPU layers - you can adjust this number based on your GPU memory
gpu_layers = -1
logging.info("GPU is available. Using GPU acceleration for llama-cpp.")
else:
logging.info("GPU is not available. Using CPU for llama-cpp.")
# Initialize the Llama model with GPU acceleration
llm = Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
filename="*q8_0.gguf", # Using q8_0 quantization
n_gpu_layers=gpu_layers, # Use GPU acceleration if available
verbose=False
)
def llama_generate(
message: str,
max_new_tokens: int = LLAMA_DEFAULT_MAX_NEW_TOKENS,
temperature: float = 0.3,
top_p: float = 0.9,
top_k: int = 50,
repetition_penalty: float = 1.2,
) -> str:
try:
output = llm(
message,
max_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repeat_penalty=repetition_penalty,
echo=False, # Don't include the prompt in the output
)
# Extract the generated text from the output
return output['choices'][0]['text']
except Exception as e:
logging.error(f"Error during Llama generation: {e}")
return f"Error generating text: {str(e)}"
def generate_explanation(issue_text, top_quality):
"""Generates an explanation for the *single* top quality above threshold."""
if not top_quality:
return "<div style='color: red;'>No explanation available as no quality tags met the threshold.</div>"
quality_name = top_quality[0] # Get the name of the top quality
prompt = f"""
Given the following issue description:
---
{issue_text}
---
Explain why this issue might be classified as a **{quality_name}** issue. Provide a concise explanation, relating it back to the issue description. Keep the explanation short and concise and dont include anything else.
"""
print(prompt)
try:
explanation = llama_generate(prompt)
# Format for better readability, directly including the quality name.
formatted_explanation = f"<p>{explanation}</p>"
return f"<div style='overflow-y: scroll; max-height: 400px;'>{formatted_explanation}</div>"
except Exception as e:
logging.error(f"Error during Llama generation: {e}")
return "<div style='color: red;'>An error occurred while generating the explanation.</div>"
# @spaces.GPU(duration=60)
def main_interface(text):
if not text.strip():
return "<div style='color: red;'>No text provided. Please enter a valid issue description.</div>", "", ""
if len(text) < 30:
return "<div style='color: red;'>Text is less than 30 characters.</div>", "", ""
device = "cuda" if torch.cuda.is_available() else "cpu"
results = []
for model_path, model in models.items():
quality_name = get_quality_name(model_path)
avg_prob = model_prediction(model, text, device)
if avg_prob >= 0.95: # Keep *all* results above the threshold
results.append((quality_name, avg_prob))
logging.info(f"Model: {model_path}, Quality: {quality_name}, Average Probability: {avg_prob:.3f}")
if not results:
return "<div style='color: red;'>No recommendation. Prediction probability is below the threshold.</div>", "", ""
# Sort and get the top result (if any meet the threshold)
top_result = sorted(results, key=lambda x: x[1], reverse=True)
if top_result:
top_quality = top_result[:1] # Select only the top result
output_html = render_html_output(top_quality)
explanation = generate_explanation(text, top_quality)
else: # Handle case no predictions >= 0.95
output_html = "<div style='color: red;'>No quality tag met the prediction probability threshold (>= 0.95).</div>"
explanation = ""
return output_html, "", explanation
def render_html_output(top_qualities):
#Simplified to show only the top prediction
styles = """
<style>
.quality-container {
font-family: Arial, sans-serif;
text-align: center;
margin-top: 20px;
}
.quality-label, .ranking {
display: inline-block;
padding: 0.5em 1em;
font-size: 18px;
font-weight: bold;
color: white;
background-color: #007bff;
border-radius: 0.5rem;
margin-right: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
</style>
"""
if not top_qualities: # Handle empty case
return styles + "<div class='quality-container'>No Top Prediction</div>"
quality, _ = top_qualities[0] #We know there is only one
html_content = f"""
<div class="quality-container">
<span class="ranking">Top Prediction</span>
<span class="quality-label">{quality}</span>
</div>
"""
return styles + html_content
example_texts = [
["The algorithm does not accurately distinguish between the positive and negative classes during edge cases.\n\nEnvironment: Production\nReproduction: Run the classifier on the test dataset with known edge cases."],
["The regression tests do not cover scenarios involving concurrent user sessions.\n\nEnvironment: Test automation suite\nReproduction: Update the test scripts to include tests for concurrent sessions."],
["There is frequent miscommunication between the development and QA teams regarding feature specifications.\n\nEnvironment: Inter-team meetings\nReproduction: Audit recent communication logs and meeting notes between the teams."],
["The service-oriented architecture does not effectively isolate failures, leading to cascading failures across services.\n\nEnvironment: Microservices architecture\nReproduction: Simulate a service failure and observe the impact on other services."]
]
# Improved CSS for better layout and appearance
css = """
.quality-container {
font-family: Arial, sans-serif;
text-align: center;
margin-top: 20px;
padding: 10px;
border: 1px solid #ddd; /* Added border */
border-radius: 8px; /* Rounded corners */
background-color: #f9f9f9; /* Light background */
}
.quality-label, .ranking {
display: inline-block;
padding: 0.5em 1em;
font-size: 18px;
font-weight: bold;
color: white;
background-color: #007bff;
border-radius: 0.5rem;
margin-right: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
}
#explanation {
border: 1px solid #ccc;
padding: 10px;
margin-top: 10px;
border-radius: 4px;
background-color: #fff; /* White background for explanation */
overflow-y: auto; /* Ensure scrollbar appears if needed */
}
"""
interface = gr.Interface(
fn=main_interface,
inputs=gr.Textbox(lines=7, label="Issue Description", placeholder="Enter your issue text here"),
outputs=[
gr.HTML(label="Prediction Output"),
gr.Textbox(label="Predictions", visible=False),
gr.Markdown(label="Explanation")
],
title="QualityTagger",
description="This tool classifies text into different quality domains such as Security, Usability, etc., and provides explanations.",
examples=example_texts,
css=css # Apply the CSS
)
interface.launch(share=True) |