from sentence_transformers import ( SentenceTransformer, export_static_quantized_openvino_model, export_dynamic_quantized_onnx_model, ) MODEL_NAME = "Detomo/cl-nagoya-sup-simcse-ja-nss-v_1_0_3" def export_model(backend="onnx", use_qint8=False): if backend == "openvino": model = SentenceTransformer(MODEL_NAME, backend="openvino") if use_qint8: export_static_quantized_openvino_model( model, quantization_config=None, model_name_or_path=MODEL_NAME, push_to_hub=True, ) else: model.push_to_hub(MODEL_NAME) elif backend == "onnx": model = SentenceTransformer(MODEL_NAME, backend="onnx") if use_qint8: export_dynamic_quantized_onnx_model( model, "avx512_vnni", MODEL_NAME, push_to_hub=True ) else: model.push_to_hub(MODEL_NAME) else: raise ValueError(f"Invalid backend: {backend}") # Export all combinations for backend in ["openvino", "onnx"]: for use_qint8 in [True, False]: print(f"Exporting {backend} model with QINT8={use_qint8}") export_model(backend=backend, use_qint8=use_qint8)