File size: 1,774 Bytes
979c7a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import fasttext
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np

app = FastAPI(
    title="Language Detection API",
    description="Language detection API using FastText v0.9.2 and lid.176.bin model",
    version="1.0.0"
)

# Load the language identification model
# Model: lid.176.bin (v1.0)
# - Trained on Wikipedia, Tatoeba and SETimes
# - Supports 176 languages
# - Uses character n-grams (minn=3, maxn=6 by default)
# - Vector dimension: 16
model = fasttext.load_model("/app/lid.176.bin")

# Monkey patch fastText's predict method to use np.asarray
# This is needed because FastText's native predict method returns a tuple of lists,
# but we need numpy arrays for better performance and compatibility
original_predict = model.predict
def safe_predict(text, k=-1, threshold=0.0):
    labels, probs = original_predict(text, k, threshold)
    return np.asarray(labels), np.asarray(probs)
model.predict = safe_predict

class TextRequest(BaseModel):
    text: str

class PredictionResponse(BaseModel):
    language: str
    confidence: float

@app.post("/detect", response_model=PredictionResponse)
async def detect_language(request: TextRequest):
    try:
        # Get prediction
        predictions = model.predict(request.text)
        
        # Extract language and confidence
        language = predictions[0][0].replace("__label__", "")
        confidence = float(predictions[1][0])
        
        return PredictionResponse(
            language=language,
            confidence=confidence
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
async def root():
    return {"message": "Language Detection API is running. Use /docs for the API documentation."}