Spaces:
Sleeping
Sleeping
File size: 1,641 Bytes
0f1938f 535a3a5 0f1938f 535a3a5 0f1938f 535a3a5 0f1938f 535a3a5 0f1938f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import random
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Any, Optional
from prompts import CATEGORY_SUGGESTION_PROMPT, TEXT_CLASSIFICATION_PROMPT
class BaseClassifier:
"""Base class for text classifiers"""
def __init__(self) -> None:
pass
def classify(self, texts: List[str], categories: Optional[List[str]] = None) -> List[Dict[str, Any]]:
"""
Classify a list of texts into categories
Args:
texts (list): List of text strings to classify
categories (list, optional): List of category names. If None, categories will be auto-detected
Returns:
list: List of classification results with categories, confidence scores, and explanations
"""
raise NotImplementedError("Subclasses must implement this method")
def _generate_default_categories(self, texts: List[str], num_clusters: int = 5) -> List[str]:
"""
Generate default categories based on text clustering
Args:
texts (list): List of text strings
num_clusters (int): Number of clusters to generate
Returns:
list: List of category names
"""
# Simple implementation - in real system this would be more sophisticated
default_categories: List[str] = [f"Category {i+1}" for i in range(num_clusters)]
return default_categories
|