PSYCHEPLOT / model.py
kousthubh's picture
Final Commit
9801786 verified
import joblib
import json
import re
# Load the models
rf_model = joblib.load("personality_rf_model.joblib")
vectorizer = joblib.load("tfidf_vectorizer.joblib")
label_encoder = joblib.load("label_encoder.joblib")
# Load personality profiles
with open("personality_profiles.json", "r") as f:
PERSONALITY_MAP = json.load(f)
# Define dominant types
DOMINANT_TYPES = {
"O": {
"label": "High Openness",
"description": "Curious, imaginative, and open to new experiences. Thrives in creative and unconventional environments."
},
"C": {
"label": "High Conscientiousness",
"description": "Organized, dependable, and goal-oriented. Strong sense of duty and self-discipline."
},
"E": {
"label": "High Extraversion",
"description": "Energetic, outgoing, and thrives on social interaction. Feeds off external stimulation."
},
"A": {
"label": "High Agreeableness",
"description": "Kind-hearted, empathetic, and cooperative. Seeks harmony and avoids conflict."
},
"N": {
"label": "High Neuroticism",
"description": "Emotionally sensitive, reactive, and prone to mood swings. Deeply introspective."
}
}
def preprocess_text(text):
"""Preprocess text for prediction"""
text = text.lower()
text = re.sub(r'[^a-zA-Z\s]', '', text)
text = re.sub(r'\s+', ' ', text)
return text.strip()
def predict_personality(choices_text):
"""Predict personality based on choices text"""
try:
# Extract choice numbers from the text
choices = re.findall(r'Step \d+: (\d+)\.', choices_text)
# Define trait mapping for each option number
trait_mapping = {
"1": "O", # Openness
"2": "C", # Conscientiousness
"3": "E", # Extraversion/Agreeableness
"4": "N" # Neuroticism
}
# Initialize trait counts
trait_counts = {"O": 0, "C": 0, "E": 0, "A": 0, "N": 0}
total_choices = len(choices)
# Count traits based on choices
for choice in choices:
if choice in trait_mapping:
trait = trait_mapping[choice]
trait_counts[trait] += 1
# Special case: option 3 contributes to both E and A
if choice == "3":
trait_counts["A"] += 0.5 # Reduced weight for secondary trait
# Calculate trait scores as percentages
trait_scores = {
trait: count/total_choices
for trait, count in trait_counts.items()
} if total_choices > 0 else trait_counts
# Find dominant trait(s) with stricter criteria
max_score = max(trait_scores.values())
# Count traits that share the max score
max_score_traits = [
trait for trait, score in trait_scores.items()
if abs(score - max_score) < 0.01 # Account for floating point comparison
]
# Only consider dominant if:
# 1. Single trait has highest score
# 2. Score is significantly higher than others (>= 0.5)
# 3. No other trait is close to the max score
if len(max_score_traits) == 1 and max_score >= 0.5:
dominant_trait = max_score_traits[0]
other_scores = [score for trait, score in trait_scores.items()
if trait != dominant_trait]
max_other = max(other_scores) if other_scores else 0
if max_score - max_other >= 0.2: # Must be significantly higher
profile = DOMINANT_TYPES[dominant_trait]
return {
"type": dominant_trait,
"category": "Dominant Trait",
"label": profile["label"],
"description": profile["description"],
"traits": trait_scores
}
# If no clear dominant trait, use binary code
binary_code = "".join(["H" if trait_scores[trait] > 0.3 else "L"
for trait in ["O", "C", "E", "A", "N"]]) # Fixed order
# Get profile from personality map
profile = PERSONALITY_MAP.get(binary_code, {
"label": "Mixed Profile",
"description": "A balanced combination of different personality traits."
})
return {
"type": binary_code,
"category": "Mixed Profile",
"label": profile["label"],
"description": profile["description"],
"traits": trait_scores
}
except Exception as e:
print(f"Error during prediction: {e}")
return {
"type": "ERROR",
"category": "Error",
"label": "Prediction Error",
"description": str(e),
"traits": {}
}
# if __name__ == "__main__":
# example_text = "I enjoy meeting new people and trying new experiences"
# result = predict_personality(example_text)
# print(json.dumps(result, indent=2))