Spaces:
Sleeping
Sleeping
File size: 1,414 Bytes
e55a136 2c2081e dbfaf91 e55a136 66fcc65 2c2081e 66fcc65 2c2081e 66fcc65 2c2081e 66fcc65 2c2081e e55a136 2c2081e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
"""Define mlbee."""
# pylint: disable=invalid-name
from typing import List
from cmat2aset310 import cmat2aset
from logzero import logger
from st_mlbee.gen_cmat import gen_cmat
def st_mlbee(
text1: List[str],
text2: List[str],
eps: float = 10,
min_samples: int = 6,
# show_plot: bool = True,
):
"""Align multilingual texts.
Args:
text1: list of strings
text2: list of strings
eps: epsilon
min_samples: minimum number of points to be considered as a cluster
x show_plot: whether to show plots, refactored to cmat2html's show_plot
Returns:
Aligned text pairs.
"""
if not (
[elm for elm in text1 if elm.strip()] and [elm for elm in text2 if elm.strip()]
):
logger.warning("One or both inputs are empty")
raise Exception("Nothing to do...exiting")
try:
# from json_de2zh.gen_cmat import gen_cmat # noqa # pylint: disable=import-outside-toplevel
cmat = gen_cmat(text1, text2)
# logger.level is reset to 20 in fastlid
st_mlbee.cmat = cmat
except Exception as e:
logger.exception(e)
raise
try:
# aset = cmat2aset(cmat.T)
aset = cmat2aset(cmat, eps=eps, min_samples=min_samples)
st_mlbee.aset = aset
except Exception as e:
logger.exception(e)
raise
# paired texts or aset?
return aset
|