Feelings_to_Emoji / generate_embeddings.py
Dan Mo
Add script to generate and save embeddings for models
cfb0d15
"""
Utility script to pre-generate embedding pickle files for all models.
This script will:
1. Load each embedding model
2. Generate embeddings for both emotion and event dictionaries
3. Save the embeddings as pickle files in the 'embeddings' directory
Run this script once locally to create all pickle files before uploading to the repository.
"""
import os
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
from config import CONFIG, EMBEDDING_MODELS
from utils import (logger, kitchen_txt_to_dict,
save_embeddings_to_pickle, get_embeddings_pickle_path)
def generate_embeddings_for_model(model_key, model_info):
"""Generate and save embeddings for a specific model.
Args:
model_key: Key of the model in EMBEDDING_MODELS
model_info: Model information dictionary
Returns:
Tuple of (success_emotion, success_event)
"""
model_id = model_info['id']
print(f"\nProcessing model: {model_key} ({model_id}) - {model_info['size']}")
try:
# Load the model
print(f"Loading {model_key} model...")
model = SentenceTransformer(model_id)
# Load emoji dictionaries
print("Loading emoji dictionaries...")
emotion_dict = kitchen_txt_to_dict(CONFIG["emotion_file"])
event_dict = kitchen_txt_to_dict(CONFIG["item_file"])
if not emotion_dict or not event_dict:
print("Error: Failed to load emoji dictionaries")
return False, False
# Generate emotion embeddings
print(f"Generating {len(emotion_dict)} emotion embeddings...")
emotion_embeddings = {}
for emoji, desc in tqdm(emotion_dict.items()):
emotion_embeddings[emoji] = model.encode(desc)
# Generate event embeddings
print(f"Generating {len(event_dict)} event embeddings...")
event_embeddings = {}
for emoji, desc in tqdm(event_dict.items()):
event_embeddings[emoji] = model.encode(desc)
# Save embeddings
emotion_pickle_path = get_embeddings_pickle_path(model_id, "emotion")
event_pickle_path = get_embeddings_pickle_path(model_id, "event")
success_emotion = save_embeddings_to_pickle(emotion_embeddings, emotion_pickle_path)
success_event = save_embeddings_to_pickle(event_embeddings, event_pickle_path)
return success_emotion, success_event
except Exception as e:
print(f"Error generating embeddings for model {model_key}: {e}")
return False, False
def main():
"""Main function to generate embeddings for all models."""
# Create embeddings directory if it doesn't exist
os.makedirs('embeddings', exist_ok=True)
print(f"Generating embeddings for {len(EMBEDDING_MODELS)} models...")
results = {}
# Generate embeddings for each model
for model_key, model_info in EMBEDDING_MODELS.items():
success_emotion, success_event = generate_embeddings_for_model(model_key, model_info)
results[model_key] = {
'emotion': success_emotion,
'event': success_event
}
# Print summary
print("\n=== Embedding Generation Summary ===")
for model_key, result in results.items():
status_emotion = "βœ“ Success" if result['emotion'] else "βœ— Failed"
status_event = "βœ“ Success" if result['event'] else "βœ— Failed"
print(f"{model_key:<10}: Emotion: {status_emotion}, Event: {status_event}")
print("\nDone! Embedding pickle files are stored in the 'embeddings' directory.")
print("You can now upload these files to your repository.")
if __name__ == "__main__":
main()