Spaces:
Running
Running
import joblib | |
import json | |
import re | |
# Load the models | |
rf_model = joblib.load("personality_rf_model.joblib") | |
vectorizer = joblib.load("tfidf_vectorizer.joblib") | |
label_encoder = joblib.load("label_encoder.joblib") | |
# Load personality profiles | |
with open("personality_profiles.json", "r") as f: | |
PERSONALITY_MAP = json.load(f) | |
# Define dominant types | |
DOMINANT_TYPES = { | |
"O": { | |
"label": "High Openness", | |
"description": "Curious, imaginative, and open to new experiences. Thrives in creative and unconventional environments." | |
}, | |
"C": { | |
"label": "High Conscientiousness", | |
"description": "Organized, dependable, and goal-oriented. Strong sense of duty and self-discipline." | |
}, | |
"E": { | |
"label": "High Extraversion", | |
"description": "Energetic, outgoing, and thrives on social interaction. Feeds off external stimulation." | |
}, | |
"A": { | |
"label": "High Agreeableness", | |
"description": "Kind-hearted, empathetic, and cooperative. Seeks harmony and avoids conflict." | |
}, | |
"N": { | |
"label": "High Neuroticism", | |
"description": "Emotionally sensitive, reactive, and prone to mood swings. Deeply introspective." | |
} | |
} | |
def preprocess_text(text): | |
"""Preprocess text for prediction""" | |
text = text.lower() | |
text = re.sub(r'[^a-zA-Z\s]', '', text) | |
text = re.sub(r'\s+', ' ', text) | |
return text.strip() | |
def predict_personality(choices_text): | |
"""Predict personality based on choices text""" | |
try: | |
# Extract choice numbers from the text | |
choices = re.findall(r'Step \d+: (\d+)\.', choices_text) | |
# Define trait mapping for each option number | |
trait_mapping = { | |
"1": "O", # Openness | |
"2": "C", # Conscientiousness | |
"3": "E", # Extraversion/Agreeableness | |
"4": "N" # Neuroticism | |
} | |
# Initialize trait counts | |
trait_counts = {"O": 0, "C": 0, "E": 0, "A": 0, "N": 0} | |
total_choices = len(choices) | |
# Count traits based on choices | |
for choice in choices: | |
if choice in trait_mapping: | |
trait = trait_mapping[choice] | |
trait_counts[trait] += 1 | |
# Special case: option 3 contributes to both E and A | |
if choice == "3": | |
trait_counts["A"] += 0.5 # Reduced weight for secondary trait | |
# Calculate trait scores as percentages | |
trait_scores = { | |
trait: count/total_choices | |
for trait, count in trait_counts.items() | |
} if total_choices > 0 else trait_counts | |
# Find dominant trait(s) with stricter criteria | |
max_score = max(trait_scores.values()) | |
# Count traits that share the max score | |
max_score_traits = [ | |
trait for trait, score in trait_scores.items() | |
if abs(score - max_score) < 0.01 # Account for floating point comparison | |
] | |
# Only consider dominant if: | |
# 1. Single trait has highest score | |
# 2. Score is significantly higher than others (>= 0.5) | |
# 3. No other trait is close to the max score | |
if len(max_score_traits) == 1 and max_score >= 0.5: | |
dominant_trait = max_score_traits[0] | |
other_scores = [score for trait, score in trait_scores.items() | |
if trait != dominant_trait] | |
max_other = max(other_scores) if other_scores else 0 | |
if max_score - max_other >= 0.2: # Must be significantly higher | |
profile = DOMINANT_TYPES[dominant_trait] | |
return { | |
"type": dominant_trait, | |
"category": "Dominant Trait", | |
"label": profile["label"], | |
"description": profile["description"], | |
"traits": trait_scores | |
} | |
# If no clear dominant trait, use binary code | |
binary_code = "".join(["H" if trait_scores[trait] > 0.3 else "L" | |
for trait in ["O", "C", "E", "A", "N"]]) # Fixed order | |
# Get profile from personality map | |
profile = PERSONALITY_MAP.get(binary_code, { | |
"label": "Mixed Profile", | |
"description": "A balanced combination of different personality traits." | |
}) | |
return { | |
"type": binary_code, | |
"category": "Mixed Profile", | |
"label": profile["label"], | |
"description": profile["description"], | |
"traits": trait_scores | |
} | |
except Exception as e: | |
print(f"Error during prediction: {e}") | |
return { | |
"type": "ERROR", | |
"category": "Error", | |
"label": "Prediction Error", | |
"description": str(e), | |
"traits": {} | |
} | |
# if __name__ == "__main__": | |
# example_text = "I enjoy meeting new people and trying new experiences" | |
# result = predict_personality(example_text) | |
# print(json.dumps(result, indent=2)) |