Commit
·
0c9becc
1
Parent(s):
97cd79a
Update code
Browse files- routers/soundex.py +2 -1
- routers/spell.py +3 -2
- routers/tokenize.py +4 -3
- routers/util.py +5 -4
routers/soundex.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
|
|
4 |
from pythainlp.soundex import (
|
5 |
soundex as py_soundex
|
6 |
)
|
@@ -26,7 +27,7 @@ def soundex(word: str, engine: SoundexEngine = "udom83"):
|
|
26 |
- **word**: A word that want into phonetic code.
|
27 |
- **engine**: Soundex Engine (default is udom83)
|
28 |
"""
|
29 |
-
return
|
30 |
json.dumps({"soundex": py_soundex(text=word, engine=engine)}, ensure_ascii=False),
|
31 |
media_type="application/json; charset=utf-8",
|
32 |
)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
from pythainlp.soundex import (
|
6 |
soundex as py_soundex
|
7 |
)
|
|
|
27 |
- **word**: A word that want into phonetic code.
|
28 |
- **engine**: Soundex Engine (default is udom83)
|
29 |
"""
|
30 |
+
return JSONResponse(
|
31 |
json.dumps({"soundex": py_soundex(text=word, engine=engine)}, ensure_ascii=False),
|
32 |
media_type="application/json; charset=utf-8",
|
33 |
)
|
routers/spell.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
|
|
4 |
from pythainlp.spell import (
|
5 |
correct as py_correct,
|
6 |
spell as py_spell
|
@@ -34,7 +35,7 @@ def correct(word: float, engine: CorrectEngine = "pn"):
|
|
34 |
- **word**: A word that want corrects the spelling of the given word.
|
35 |
- **engine**: Correct Engine (default is pn)
|
36 |
"""
|
37 |
-
return
|
38 |
json.dumps({"word": py_correct(word, engine=engine)}, ensure_ascii=False),
|
39 |
media_type="application/json; charset=utf-8",
|
40 |
)
|
@@ -50,7 +51,7 @@ def spell(word: float, engine: SpellEngine = "pn"):
|
|
50 |
- **word**: A word that want to check spell.
|
51 |
- **engine**: Spell Engine (default is pn)
|
52 |
"""
|
53 |
-
return
|
54 |
json.dumps({"word": py_spell(word, engine=engine)}, ensure_ascii=False),
|
55 |
media_type="application/json; charset=utf-8",
|
56 |
)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
from pythainlp.spell import (
|
6 |
correct as py_correct,
|
7 |
spell as py_spell
|
|
|
35 |
- **word**: A word that want corrects the spelling of the given word.
|
36 |
- **engine**: Correct Engine (default is pn)
|
37 |
"""
|
38 |
+
return JSONResponse(
|
39 |
json.dumps({"word": py_correct(word, engine=engine)}, ensure_ascii=False),
|
40 |
media_type="application/json; charset=utf-8",
|
41 |
)
|
|
|
51 |
- **word**: A word that want to check spell.
|
52 |
- **engine**: Spell Engine (default is pn)
|
53 |
"""
|
54 |
+
return JSONResponse(
|
55 |
json.dumps({"word": py_spell(word, engine=engine)}, ensure_ascii=False),
|
56 |
media_type="application/json; charset=utf-8",
|
57 |
)
|
routers/tokenize.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
|
|
4 |
from pythainlp.tokenize import (
|
5 |
word_tokenize as py_word_tokenize,
|
6 |
subword_tokenize as py_subword_tokenize,
|
@@ -50,7 +51,7 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
|
|
50 |
- **text**: Text that want to tokenize.
|
51 |
- **engine**: Word Tokenize Engine (default is newmm)
|
52 |
"""
|
53 |
-
return
|
54 |
json.dumps({"words": py_word_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
55 |
media_type="application/json; charset=utf-8",
|
56 |
)
|
@@ -66,7 +67,7 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
|
|
66 |
- **text**: Text that want to tokenize.
|
67 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
68 |
"""
|
69 |
-
return
|
70 |
json.dumps({"subwords": py_subword_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
71 |
media_type="application/json; charset=utf-8",
|
72 |
)
|
@@ -82,7 +83,7 @@ def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
|
|
82 |
- **text**: Text that want to tokenize.
|
83 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
84 |
"""
|
85 |
-
return
|
86 |
json.dumps({"sents": py_sent_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
87 |
media_type="application/json; charset=utf-8",
|
88 |
)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
from pythainlp.tokenize import (
|
6 |
word_tokenize as py_word_tokenize,
|
7 |
subword_tokenize as py_subword_tokenize,
|
|
|
51 |
- **text**: Text that want to tokenize.
|
52 |
- **engine**: Word Tokenize Engine (default is newmm)
|
53 |
"""
|
54 |
+
return JSONResponse(
|
55 |
json.dumps({"words": py_word_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
56 |
media_type="application/json; charset=utf-8",
|
57 |
)
|
|
|
67 |
- **text**: Text that want to tokenize.
|
68 |
- **engine**: Sub word Tokenize Engine (default is tcc)
|
69 |
"""
|
70 |
+
return JSONResponse(
|
71 |
json.dumps({"subwords": py_subword_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
72 |
media_type="application/json; charset=utf-8",
|
73 |
)
|
|
|
83 |
- **text**: Text that want to tokenize.
|
84 |
- **engine**: Sentence Tokenize Engine (default is crfcut)
|
85 |
"""
|
86 |
+
return JSONResponse(
|
87 |
json.dumps({"sents": py_sent_tokenize(text=text, engine=engine)}, ensure_ascii=False),
|
88 |
media_type="application/json; charset=utf-8",
|
89 |
)
|
routers/util.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
|
|
4 |
from pythainlp.util import (
|
5 |
bahttext as py_bahttext,
|
6 |
normalize as py_normalize,
|
@@ -14,7 +15,7 @@ def bahttext(number: float):
|
|
14 |
"""
|
15 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
16 |
"""
|
17 |
-
return
|
18 |
json.dumps({"bahttext": py_bahttext(number)}, ensure_ascii=False),
|
19 |
media_type="application/json; charset=utf-8",
|
20 |
)
|
@@ -25,7 +26,7 @@ def normalize(text: str):
|
|
25 |
"""
|
26 |
Normalize and clean Thai text
|
27 |
"""
|
28 |
-
return
|
29 |
json.dumps({"text": py_normalize(text)}, ensure_ascii=False),
|
30 |
media_type="application/json; charset=utf-8",
|
31 |
)
|
@@ -36,7 +37,7 @@ def tone_detector(syllable: str):
|
|
36 |
"""
|
37 |
Thai tone detector for word.
|
38 |
"""
|
39 |
-
return
|
40 |
json.dumps({"tone": py_tone_detector(syllable)}, ensure_ascii=False),
|
41 |
media_type="application/json; charset=utf-8",
|
42 |
)
|
@@ -53,7 +54,7 @@ def thaiword_to_num(text: str):
|
|
53 |
|
54 |
- **text**: Spelled-out numerals in Thai scripts
|
55 |
"""
|
56 |
-
return
|
57 |
json.dumps({"number": py_thaiword_to_num(text)}, ensure_ascii=False),
|
58 |
media_type="application/json; charset=utf-8",
|
59 |
)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import json
|
3 |
from fastapi import APIRouter, Response
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
from pythainlp.util import (
|
6 |
bahttext as py_bahttext,
|
7 |
normalize as py_normalize,
|
|
|
15 |
"""
|
16 |
This api converts a number to Thai text and adds a suffix “บาท” (Baht).
|
17 |
"""
|
18 |
+
return JSONResponse(
|
19 |
json.dumps({"bahttext": py_bahttext(number)}, ensure_ascii=False),
|
20 |
media_type="application/json; charset=utf-8",
|
21 |
)
|
|
|
26 |
"""
|
27 |
Normalize and clean Thai text
|
28 |
"""
|
29 |
+
return JSONResponse(
|
30 |
json.dumps({"text": py_normalize(text)}, ensure_ascii=False),
|
31 |
media_type="application/json; charset=utf-8",
|
32 |
)
|
|
|
37 |
"""
|
38 |
Thai tone detector for word.
|
39 |
"""
|
40 |
+
return JSONResponse(
|
41 |
json.dumps({"tone": py_tone_detector(syllable)}, ensure_ascii=False),
|
42 |
media_type="application/json; charset=utf-8",
|
43 |
)
|
|
|
54 |
|
55 |
- **text**: Spelled-out numerals in Thai scripts
|
56 |
"""
|
57 |
+
return JSONResponse(
|
58 |
json.dumps({"number": py_thaiword_to_num(text)}, ensure_ascii=False),
|
59 |
media_type="application/json; charset=utf-8",
|
60 |
)
|