Spaces:
Running
Running
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
import gradio as gr | |
import spacy | |
import subprocess | |
# Run the spacy model download command | |
# try: | |
# Try to load the model to check if it's already installed | |
# nlp = spacy.load("en_core_web_trf") | |
# except OSError: | |
# If the model is not found, download it | |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"]) | |
nlp = spacy.load("en_core_web_trf") | |
model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True) | |
df_new = pd.read_csv('last_df.csv') | |
df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey') | |
# | |
# | |
# Function to extract city name from the query | |
def get_city_name(query): | |
text_query = nlp(query) | |
for city in text_query.ents: | |
if city.label_ == "GPE": | |
return city.text.lower() | |
return None | |
# Function to filter DataFrame by location | |
def filter_by_loc(query): | |
city_name = get_city_name(query) | |
if city_name in df_new['locality'].str.lower().unique(): | |
filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()] | |
return filtered_df | |
else: | |
return df_new | |
import torch.nn as nn | |
import torch | |
import ast | |
# Function to calculate similarity score | |
def get_similarity_score(row, query_embedding): | |
similarity = nn.CosineSimilarity(dim=0) # Use dim=0 for 1D tensors | |
# Safely evaluate string representations of lists | |
rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding'])) | |
hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding'])) | |
review_embedding = torch.tensor(ast.literal_eval(row['review_embedding'])) | |
sim1 = similarity(rating_value_embedding, query_embedding) | |
sim2 = similarity(hotel_combined_embedding, query_embedding) | |
sim3 = similarity(review_embedding, query_embedding) | |
return sim1.item() + sim2.item() + sim3.item() | |
# Main function to process the query and return results | |
def process_query(query): | |
query_embedding = model.encode(query) | |
# Filter DataFrame by location | |
filtered_data = filter_by_loc(query) | |
# Convert query_embedding to a tensor if it is not already | |
query_embedding_tensor = torch.tensor(query_embedding) | |
# Apply the similarity function to the filtered DataFrame | |
filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1) | |
# df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1) | |
top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1) | |
hotel_name = top_similar['hotel_name'].values[0] | |
hotel_description = top_similar['hotel_description'].values[0] | |
hotel_rate = top_similar['rate'].values[0] | |
hotel_price_range = top_similar['price_range'].values[0] | |
hotel_review = top_similar['review_title'].values[0] | |
hotel_city = top_similar['locality'].values[0] | |
hotel_country = top_similar['country'].values[0] | |
# Format the output | |
result = "Here's the most similar hotel we found:\n" | |
result += "-" * 30 + "\n" | |
result += f"Hotel Name: {hotel_name}\n" | |
result += f"City: {hotel_city}\n" | |
result += f"Country: {hotel_country}\n" | |
result += f"Star Rating: {hotel_rate}\n" | |
result += f"Price Range: {hotel_price_range}\n" | |
return result | |
ui = gr.Interface( | |
fn=process_query, | |
inputs=gr.Textbox(label="Query", placeholder="Enter your query"), | |
outputs="text", | |
title="Hotel Similarity Finder", | |
description="Enter a query to find similar hotels." | |
) | |
ui.launch() | |