wannaphong commited on
Commit
b9def7b
·
1 Parent(s): f7003d5

Update docs

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. routers/soundex.py +5 -0
  3. routers/tokenize.py +16 -1
app.py CHANGED
@@ -26,7 +26,7 @@ app = FastAPI(
26
  # },
27
  license_info={
28
  "name": "Apache 2.0",
29
- "identifier": "Apache 2.0",
30
  },
31
  )
32
 
 
26
  # },
27
  license_info={
28
  "name": "Apache 2.0",
29
+ "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
30
  },
31
  )
32
 
routers/soundex.py CHANGED
@@ -19,5 +19,10 @@ class SoundexEngine(str, Enum):
19
  def soundex(text: str, engine: SoundexEngine = "udom83"):
20
  """
21
  This api converts Thai text into phonetic code.
 
 
 
 
 
22
  """
23
  return {"soundex": py_soundex(text=text, engine=engine)}
 
19
  def soundex(text: str, engine: SoundexEngine = "udom83"):
20
  """
21
  This api converts Thai text into phonetic code.
22
+
23
+ ## Input
24
+
25
+ = **text**: A word that want into phonetic code.
26
+ - **engine**: Soundex Engine (default is udom83)
27
  """
28
  return {"soundex": py_soundex(text=text, engine=engine)}
routers/tokenize.py CHANGED
@@ -43,6 +43,11 @@ class SentTokenizeEngine(BaseModel):
43
  def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
44
  """
45
  Word tokenize or word segmentation for Thai language
 
 
 
 
 
46
  """
47
  return {"words": py_word_tokenize(text=text, engine=engine)}
48
 
@@ -50,7 +55,12 @@ def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
50
  @router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
51
  def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
52
  """
53
- subword tokenize or subword segmentation for Thai language
 
 
 
 
 
54
  """
55
  return {"subwords": py_subword_tokenize(text=text, engine=engine)}
56
 
@@ -59,5 +69,10 @@ def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
59
  def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
60
  """
61
  Thai sentence segmentation
 
 
 
 
 
62
  """
63
  return {"sents": py_sent_tokenize(text=text, engine=engine)}
 
43
  def word_tokenize(text: str, engine: WordTokenizeEngine = "newmm"):
44
  """
45
  Word tokenize or word segmentation for Thai language
46
+
47
+ ## Input
48
+
49
+ = **text**: Text that want to tokenize.
50
+ - **engine**: Word Tokenize Engine (default is newmm)
51
  """
52
  return {"words": py_word_tokenize(text=text, engine=engine)}
53
 
 
55
  @router.post('/subword_tokenize', response_model=SubwordTokenizeResponse)
56
  def subword_tokenize(text: str, engine: SubwordTokenizeEngine = "tcc"):
57
  """
58
+ Subword tokenize or subword segmentation for Thai language
59
+
60
+ ## Input
61
+
62
+ = **text**: Text that want to tokenize.
63
+ - **engine**: Sub word Tokenize Engine (default is tcc)
64
  """
65
  return {"subwords": py_subword_tokenize(text=text, engine=engine)}
66
 
 
69
  def sent_tokenize(text: str, engine: SentTokenizeEngine = "crfcut"):
70
  """
71
  Thai sentence segmentation
72
+
73
+ ## Input
74
+
75
+ = **text**: Text that want to tokenize.
76
+ - **engine**: Sentence Tokenize Engine (default is crfcut)
77
  """
78
  return {"sents": py_sent_tokenize(text=text, engine=engine)}