karths's picture
Rename app.py to app2.py
66e2369 verified
import gradio as gr
import os
import json
from openai import OpenAI
# Load sensitive information from environment variables
RUNPOD_API_KEY = os.getenv('RUNPOD_API_KEY')
RUNPOD_ENDPOINT_ID = os.getenv('RUNPOD_ENDPOINT_ID')
BASE_URL = f"https://api.runpod.ai/v2/{RUNPOD_ENDPOINT_ID}/openai/v1"
MODEL_NAME = "karths/coder_commit_32B" # The specific model hosted on RunPod
MAX_TOKENS = 4096# Max tokens for the model response
# --- OpenAI Client Initialization ---
# Check if the API key is provided
if not RUNPOD_API_KEY:
raise ValueError("RunPod API key not found. Please set the RUNPOD_API_KEY environment variable or add it directly in the script.")
# Initialize the OpenAI client to connect to the RunPod endpoint
client = OpenAI(
api_key=RUNPOD_API_KEY,
base_url=BASE_URL,
)
# --- Gradio App Configuration ---
title = "Python Maintainability Refactoring (RunPod)"
description = """
## Instructions for Using the Model
### Model Loading Time:
- Please allow time for the model on RunPod to initialize if it's starting fresh ("Cold Start").
### Code Submission:
- You can enter or paste your Python code you wish to have refactored, or use the provided example.
### Python Code Constraints:
- Keep the code reasonably sized. While the 120-line limit was for the previous setup, large code blocks might still face limitations depending on the RunPod instance and model constraints. Max response length is set to {} tokens.
### Understanding Changes:
- It's important to read the "Changes made" section (if provided by the model) in the refactored code response. This will help in understanding what modifications have been made.
### Usage Recommendation:
- Intended for research and evaluation purposes.
""".format(MAX_TOKENS)
system_prompt = """### Instruction:
Refactor the provided Python code to improve its maintainability and efficiency and reduce complexity. Include the refactored code along with comments on the changes made for improving the metrics.
### Input:
"""
css = """.toast-wrap { display: none !important } """
examples = [
["""def analyze_sales_data(sales_records):
active_sales = filter(lambda record: record['status'] == 'active', sales_records)
sales_by_category = {}
for record in active_sales:
category = record['category']
total_sales = record['units_sold'] * record['price_per_unit']
if category not in sales_by_category:
sales_by_category[category] = {'total_sales': 0, 'total_units': 0}
sales_by_category[category]['total_sales'] += total_sales
sales_by_category[category]['total_units'] += record['units_sold']
average_sales_data = []
for category, data in sales_by_category.items():
average_sales = data['total_sales'] / data['total_units'] if data['total_units'] > 0 else 0 # Avoid division by zero
sales_by_category[category]['average_sales'] = average_sales
average_sales_data.append((category, average_sales))
average_sales_data.sort(key=lambda x: x[1], reverse=True)
for rank, (category, _) in enumerate(average_sales_data, start=1):
sales_by_category[category]['rank'] = rank
return sales_by_category"""],
["""import pandas as pd
import re
import ast
from code_bert_score import score # Assuming this library is available in the environment
import numpy as np
def preprocess_code(source_text):
def remove_comments_and_docstrings(source_code):
# Remove single-line comments
source_code = re.sub(r'#.*', '', source_code)
# Remove multi-line strings (docstrings)
source_code = re.sub(r'(\'\'\'(.*?)\'\'\'|\"\"\"(.*?)\"\"\")', '', source_code, flags=re.DOTALL)
return source_code.strip() # Added strip
# Pattern to extract code specifically from markdown blocks if present
pattern = r"```python\s+(.+?)\s+```"
matches = re.findall(pattern, source_text, re.DOTALL)
code_to_process = '\n'.join(matches) if matches else source_text
cleaned_code = remove_comments_and_docstrings(code_to_process)
return cleaned_code
def evaluate_dataframe(df):
results = {'P': [], 'R': [], 'F1': [], 'F3': []}
for index, row in df.iterrows():
try:
# Ensure inputs are lists of strings
cands = [preprocess_code(str(row['generated_text']))] # Added str() conversion
refs = [preprocess_code(str(row['output']))] # Added str() conversion
# Ensure code_bert_score.score returns four values
score_results = score(cands, refs, lang='python')
if len(score_results) == 4:
P, R, F1, F3 = score_results
results['P'].append(P.item() if hasattr(P, 'item') else P) # Handle potential tensor output
results['R'].append(R.item() if hasattr(R, 'item') else R)
results['F1'].append(F1.item() if hasattr(F1, 'item') else F1)
results['F3'].append(F3.item() if hasattr(F3, 'item') else F3) # Assuming F3 is returned
else:
print(f"Warning: Unexpected number of return values from score function for row {index}. Got {len(score_results)} values.")
for key in results.keys():
results[key].append(np.nan) # Append NaN for unexpected format
except Exception as e:
print(f"Error processing row {index}: {e}")
for key in results.keys():
results[key].append(np.nan) # Use NaN for errors
df_metrics = pd.DataFrame(results)
return df_metrics
def evaluate_dataframe_multiple_runs(df, runs=3):
all_results = []
print(f"Starting evaluation for {runs} runs...")
for run in range(runs):
print(f"Run {run + 1}/{runs}")
df_metrics = evaluate_dataframe(df.copy()) # Use a copy to avoid side effects if df is modified
all_results.append(df_metrics)
print(f"Run {run + 1} completed.")
if not all_results:
print("No results collected.")
return pd.DataFrame(), pd.DataFrame()
# Concatenate results and calculate statistics
try:
concatenated_results = pd.concat(all_results)
df_metrics_mean = concatenated_results.groupby(level=0).mean()
df_metrics_std = concatenated_results.groupby(level=0).std()
print("Mean and standard deviation calculated.")
except Exception as e:
print(f"Error calculating statistics: {e}")
# Return empty DataFrames or handle as appropriate
return pd.DataFrame(), pd.DataFrame()
return df_metrics_mean, df_metrics_std"""]
]
# --- Core Logic ---
def gen_solution(prompt):
"""
Generates a solution for a given problem prompt by calling the LLM via RunPod.
Parameters:
- prompt (str): The problem prompt including the system message and user input.
Returns:
- str: The generated solution text, or an error message.
"""
try:
# Call the OpenAI compatible endpoint on RunPod
completion = client.chat.completions.create(
model=MODEL_NAME,
messages=[{"role": "user", "content": prompt}],
temperature=0.1, # Keep temperature low for more deterministic refactoring
top_p=1.0,
max_tokens=MAX_TOKENS,
# stream=False # Explicitly setting stream to False (default)
)
# Extract the response content
response_content = completion.choices[0].message.content
return response_content
except Exception as e:
print(f"Error calling RunPod API: {e}")
# Provide a user-friendly error message
return f"Error: Could not get response from the model. Details: {str(e)}"
# --- Gradio Interface Function ---
def predict(message, history):
"""
Handles the user input, calls the backend model, and returns the response.
'history' parameter is required by gr.ChatInterface but might not be used here.
"""
# Construct the full prompt
input_prompt = system_prompt + str(message) # Using the format from the original code
# Get the refactored code from the backend
refactored_code_response = gen_solution(input_prompt)
# The response is returned directly to the ChatInterface
return refactored_code_response
# --- Launch Gradio Interface ---
# Use gr.ChatInterface for a chat-like experience
gr.ChatInterface(
predict,
chatbot=gr.Chatbot(height=500, label="Refactored Code and Explanation"),
textbox=gr.Textbox(lines=10, label="Python Code", placeholder="Enter or Paste your Python code here..."),
title=title,
description=description,
theme="abidlabs/Lime", # Or choose another theme e.g., gr.themes.Default()
examples=examples,
cache_examples=False, # Consider enabling caching if examples don't change often
submit_btn="Submit Code",
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
css=css # Apply custom CSS if needed
).queue().launch(share=True) # share=True creates a public link (use with caution)