Spaces:

Ralqasimi
/

Chatbot

Sleeping

File size: 1,451 Bytes

# Import necessary modules
import re
import faiss
from sentence_transformers import SentenceTransformer

# Clean text function
def clean_text(text):
    """
    Cleans text by removing unnecessary symbols and whitespace.
    """
    text = re.sub(r"\s+", " ", text)  # Replace multiple spaces with one
    text = re.sub(r"[^ء-يa-zA-Z0-9.,!?؛:\-\(\)\n ]+", "", text)  # Keep Arabic, English, and punctuation
    return text.strip()

# Create FAISS index
def create_faiss_index(texts):
    """
    Create a FAISS index from the provided list of texts.
    """
    # Clean the text before indexing
    texts = [clean_text(t) for t in texts]

    # Load pre-trained SentenceTransformer model
    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    embeddings = model.encode(texts)

    # Create the FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    return index, texts

# Search the FAISS index
def search_faiss(faiss_index, stored_texts, query, top_k=3):
    """
    Search FAISS for the most relevant texts.
    """
    from sentence_transformers import SentenceTransformer

    model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
    query_embedding = model.encode([query])

    distances, indices = faiss_index.search(query_embedding, top_k)
    results = [stored_texts[i] for i in indices[0] if i < len(stored_texts)]

    return results