meisaicheck-api / prepare.py
vumichien's picture
update quant 8int config
28bdc3c
from sentence_transformers import (
SentenceTransformer,
export_static_quantized_openvino_model,
export_dynamic_quantized_onnx_model,
)
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3"
def export_model(backend="onnx", use_qint8=False):
if backend == "openvino":
model = SentenceTransformer(MODEL_NAME, backend="openvino")
if use_qint8:
export_static_quantized_openvino_model(
model,
quantization_config=None,
model_name_or_path=MODEL_NAME,
push_to_hub=True,
)
else:
model.push_to_hub(MODEL_NAME)
elif backend == "onnx":
model = SentenceTransformer(MODEL_NAME, backend="onnx")
if use_qint8:
export_dynamic_quantized_onnx_model(
model, "avx512_vnni", MODEL_NAME, push_to_hub=True
)
else:
model.push_to_hub(MODEL_NAME)
else:
raise ValueError(f"Invalid backend: {backend}")
# Export all combinations
for backend in ["openvino", "onnx"]:
for use_qint8 in [True, False]:
print(f"Exporting {backend} model with QINT8={use_qint8}")
export_model(backend=backend, use_qint8=use_qint8)