Spaces:
Running
Running
from sentence_transformers import ( | |
SentenceTransformer, | |
export_static_quantized_openvino_model, | |
export_dynamic_quantized_onnx_model, | |
) | |
MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3" | |
def export_model(backend="onnx", use_qint8=False): | |
if backend == "openvino": | |
model = SentenceTransformer(MODEL_NAME, backend="openvino") | |
if use_qint8: | |
export_static_quantized_openvino_model( | |
model, | |
quantization_config=None, | |
model_name_or_path=MODEL_NAME, | |
push_to_hub=True, | |
) | |
else: | |
model.push_to_hub(MODEL_NAME) | |
elif backend == "onnx": | |
model = SentenceTransformer(MODEL_NAME, backend="onnx") | |
if use_qint8: | |
export_dynamic_quantized_onnx_model( | |
model, "avx512_vnni", MODEL_NAME, push_to_hub=True | |
) | |
else: | |
model.push_to_hub(MODEL_NAME) | |
else: | |
raise ValueError(f"Invalid backend: {backend}") | |
# Export all combinations | |
for backend in ["openvino", "onnx"]: | |
for use_qint8 in [True, False]: | |
print(f"Exporting {backend} model with QINT8={use_qint8}") | |
export_model(backend=backend, use_qint8=use_qint8) | |