nomic-ai/nomic-embed-text-v2-moe · Inferencing with SentenceTransformer leads to IndexError: map::at Error

Code:

from sentence_transformers import SentenceTransformer

model = SentenceTransformer(
    "nomic-ai/nomic-embed-text-v2-moe",
    backend="torch",
    trust_remote_code=True
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)

StackTrace:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[4], line 13
      5 model = SentenceTransformer(
      6     "nomic-ai/nomic-embed-text-v2-moe",
      8     backend="torch",
      9     trust_remote_code=True
     10 )
     12 # Verify that everything works as expected
---> 13 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
     14 print(embeddings.shape)
     16 similarities = model.similarity(embeddings, embeddings)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py:685, in SentenceTransformer.encode(self, sentences, prompt_name, prompt, batch_size, show_progress_bar, output_value, precision, convert_to_numpy, convert_to_tensor, device, normalize_embeddings, **kwargs)
    682 features.update(extra_features)
    684 with torch.no_grad():
--> 685     out_features = self.forward(features, **kwargs)
    686     if self.device.type == "hpu":
    687         out_features = copy.deepcopy(out_features)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py:758, in SentenceTransformer.forward(self, input, **kwargs)
    756     module_kwarg_keys = self.module_kwargs.get(module_name, [])
    757     module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
--> 758     input = module(input, **module_kwargs)
    759 return input

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/models/Transformer.py:442, in Transformer.forward(self, features, **kwargs)
    435 """Returns token_embeddings, cls_token"""
    436 trans_features = {
    437     key: value
    438     for key, value in features.items()
    439     if key in ["input_ids", "attention_mask", "token_type_ids", "inputs_embeds"]
    440 }
--> 442 outputs = self.auto_model(**trans_features, **kwargs, return_dict=True)
    443 token_embeddings = outputs[0]
    444 features["token_embeddings"] = token_embeddings

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1910, in NomicBertModel.forward(self, input_ids, attention_mask, position_ids, token_type_ids, return_dict, matryoshka_dim, inputs_embeds)
   1907 hidden_states = self.emb_drop(hidden_states)
   1909 attention_mask = self.get_extended_attention_mask(attention_mask, hidden_states.shape[:-1])
-> 1910 sequence_output = self.encoder(hidden_states, attention_mask=attention_mask, return_dict=return_dict)
   1912 pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
   1914 if matryoshka_dim:

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1789, in NomicBertEncoder.forward(self, hidden_states, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, is_padded_inputs, rope)
   1768         hidden_states, hidden_states2, residual = torch.utils.checkpoint.checkpoint(
   1769             create_custom_forward(layer),
   1770             hidden_states,
   (...)
   1785             use_reentrant=False,
   1786         )
   1788     else:
-> 1789         hidden_states, hidden_states2, residual = layer(
   1790             hidden_states,
   1791             hidden_states2,
   1792             residual,
   1793             attention_mask,
   1794             position_ids,
   1795             None,
   1796             is_padded_inputs,
   1797             output_attentions,
   1798             use_cache,
   1799             rope=rope,
   1800         )
   1801 return hidden_states

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1718, in NomicBertBlock.forward(self, hidden_states, hidden_states2, residual, attention_mask, position_ids, past_key_value, is_padded_inputs, output_attentions, use_cache, cu_seqlens, max_seq_len, rope)
   1716 hidden_states = self.norm1((self.dropout1(attn_outputs) + hidden_states).to(dtype=self.norm1.weight.dtype))
   1717 if self.moe:
-> 1718     mlp_out = self.mlp(hidden_states, torch.where(attention_mask.squeeze() == 0, 1, 0))
   1719 else:
   1720     mlp_out = self.mlp(hidden_states)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/moe.py:512, in MoE.forward(self, x, attention_mask)
    509     x_valid = x.view(-1, hidden_dim)
    511 scores, expert_weights, top_experts = self.router(x_valid)
--> 512 out = self.experts(x_valid, scores, expert_weights, top_experts)
    514 out = out.to(x.dtype)
    515 # Compute the experts.
    516 
    517 # Reconstruct the full sequence with padding

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/moe.py:458, in ParallelMLP.forward(self, x, scores, expert_weights, top_experts)
    455 in_shape = x.size()
    457 # Compute the experts.
--> 458 x, tokens_per_expert = self.forward_fn(
    459     x, expert_weights, top_experts)
    460 if self.training and self.args.moe_loss_weight > 0.0:
    461     save_load_balancing_loss((tokens_per_expert, scores))

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/dmoe.py:270, in ParallelDroplessMLP.forward_once(self, x, expert_weights, top_experts)
    268 def forward_once(self, x, expert_weights, top_experts):
    269     if self.args.mlp_impl == 'sparse':
--> 270         return self.sparse_forward_once(
    271             x, expert_weights, top_experts)
    272     else:
    273         return self.grouped_forward_once(
    274             x, expert_weights, top_experts)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/dmoe.py:155, in ParallelDroplessMLP.sparse_forward_once(self, x, expert_weights, top_experts)
    152     topo = self.topology(x, padded_bins)
    154 # Perform the expert computation.
--> 155 x = self.mlp(x, topo)
    157 # Un-route the data for the MoE output.
    158 x = ops.padded_scatter(
    159     x,
    160     indices,
   (...)
    164     padded_bins,
    165     self.top_k)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/mlp.py:357, in SparseMLP.forward(self, x, topo)
    353     return memory_optimized_mlp(
    354         x, w1, w2, topo, self.args.activation_fn)
    356 # Compute the MLP.
--> 357 x = stk.ops.sdd(x, w1.t(), topo)
    358 activation_fn_out = act_fn(x, self.args.activation_fn)
    359 return stk.ops.dsd(activation_fn_out, w2)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/ops/linear_ops.py:42, in sdd(a, b, topo)
     40 assert isinstance(topo, Matrix)
     41 assert topo.is_contiguous()
---> 42 out = sputnik.sdd(
     43     a, b,
     44     topo.size(),
     45     topo.data,
     46     topo.offsets,
     47     topo.row_indices,
     48     topo.column_indices,
     49     topo.offsets_t,
     50     topo.column_indices_t,
     51     topo.block_offsets_t)
     52 return Matrix(topo.size(),
     53               out,
     54               topo.row_indices,
   (...)
     58               topo.offsets_t,
     59               topo.block_offsets_t)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/autograd/function.py:574, in Function.apply(cls, *args, **kwargs)
    571 if not torch._C._are_functorch_transforms_active():
    572     # See NOTE: [functorch vjp and autograd interaction]
    573     args = _functorch.utils.unwrap_dead_wrappers(args)
--> 574     return super().apply(*args, **kwargs)  # type: ignore[misc]
    576 if not is_setup_ctx_defined:
    577     raise RuntimeError(
    578         "In order to use an autograd.Function with functorch transforms "
    579         "(vmap, grad, jvp, jacrev, ...), it must override the setup_context "
    580         "staticmethod. For more details, please see "
    581         "https://pytorch.org/docs/main/notes/extending.func.html"
    582     )

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/autocast.py:28, in custom_fwd.<locals>.decorate_fwd(*args, **kwargs)
     26         dtype = torch.get_autocast_gpu_dtype()
     27         return fwd(*_cast(args, dtype), **_cast(kwargs, dtype))
---> 28 return fwd(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/sputnik.py:263, in SDD.forward(ctx, lhs, rhs, shape, data, offsets, row_indices, column_indices, offsets_t, column_indices_t, block_offsets_t)
    258 ctx.shape = shape
    259 out = torch.empty(
    260     data.shape,
    261     dtype=lhs.dtype,
    262     device=lhs.device)
--> 263 backend.sdd(lhs,
    264             rhs,
    265             shape,
    266             out,
    267             offsets,
    268             row_indices,
    269             column_indices)
    270 return out

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/triton_kernels.py:372, in sdd(lhs, rhs, shape, out, offsets, row_indices, column_indices)
    369 if trans_B:
    370     stride_bk, stride_bn = rhs.stride(1), rhs.stride(0)
--> 372 _sdd_kernel[grid](
    373     lhs, rhs, out, M, N, K,
    374     stride_am, stride_ak,
    375     stride_bk, stride_bn,
    376     out.stride(1), out.stride(2),
    377     row_indices, column_indices,
    378     GROUP_M=128, ACC_TYPE=ACC_TYPE
    379     )

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/jit.py:345, in KernelInterface.__getitem__.<locals>.<lambda>(*args, **kwargs)
    339 def __getitem__(self, grid) -> T:
    340     """
    341     A JIT function is launched with: fn[grid](*args, **kwargs).
    342     Hence JITFunction.__getitem__ returns a callable proxy that
    343     memorizes the grid.
    344     """
--> 345     return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/autotuner.py:171, in Autotuner.run(self, *args, **kwargs)
    169 if config.pre_hook is not None:
    170     config.pre_hook({**self.nargs, **kwargs, **config.all_kwargs()})
--> 171 ret = self.fn.run(
    172     *args,
    173     **kwargs,
    174     **config.all_kwargs(),
    175 )
    176 self.nargs = None
    177 return ret

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/jit.py:662, in JITFunction.run(self, grid, warmup, *args, **kwargs)
    660     # compile the kernel
    661     src = self.ASTSource(self, signature, constants, configs[0])
--> 662     kernel = self.compile(
    663         src,
    664         target=target,
    665         options=options.__dict__,
    666     )
    667     self.cache[device][key] = kernel
    669 # Check that used global values have not changed.

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/compiler/compiler.py:282, in compile(src, target, options)
    280 use_ttgir_loc = os.environ.get("USE_TTGIR_LOC", "0") == "1"
    281 for ext, compile_ir in list(stages.items())[first_stage:]:
--> 282     next_module = compile_ir(module, metadata)
    283     ir_filename = f"{src.name}.{ext}"
    284     metadata_group[ir_filename] = fn_cache_manager.put(next_module, ir_filename)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/backends/nvidia/compiler.py:318, in CUDABackend.add_stages.<locals>.<lambda>(src, metadata)
    316 stages["ttir"] = lambda src, metadata: self.make_ttir(src, metadata, options)
    317 stages["ttgir"] = lambda src, metadata: self.make_ttgir(src, metadata, options, self.capability)
--> 318 stages["llir"] = lambda src, metadata: self.make_llir(src, metadata, options, self.capability)
    319 stages["ptx"] = lambda src, metadata: self.make_ptx(src, metadata, options, self.capability)
    320 stages["cubin"] = lambda src, metadata: self.make_cubin(src, metadata, options, self.capability)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/backends/nvidia/compiler.py:216, in CUDABackend.make_llir(src, metadata, options, capability)
    214 if os.environ.get("TRITON_DISABLE_LINE_INFO", "0") == "0":
    215     passes.llvmir.add_di_scope(pm)
--> 216 pm.run(mod)
    217 # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
    218 llvm.init_targets()

IndexError: map::at

Dependencies installed in virtual env:

aiohappyeyeballs==2.6.1
aiohttp==3.11.18
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
astroid==2.11.7
asttokens==3.0.0
async-timeout==4.0.3
attrs==25.3.0
Authlib==1.5.2
black==25.1.0
blinker==1.9.0
Brotli==1.1.0
certifi==2025.1.31
cffi==1.17.1
cfgv==3.4.0
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6
coloredlogs==15.0.1
comm==0.2.2
ConfigArgParse==1.7
coverage==7.8.0
cryptography==44.0.2
datasets==3.5.0
debugpy==1.8.14
decorator==5.2.1
Deprecated==1.2.18
dill==0.3.8
distlib==0.3.9
einops==0.8.1
evaluate==0.4.3
exceptiongroup==1.2.2
executing==2.2.0
filelock==3.18.0
Flask==3.1.0
flask-cors==5.0.1
Flask-Limiter==1.4
Flask-Login==0.6.3
flatbuffers==25.2.10
frozenlist==1.6.0
fsspec==2024.12.0
gevent==25.4.1
geventhttpclient==2.3.3
greenlet==3.2.1
grpcio==1.71.0
grpcio-health-checking==1.71.0
grpcio-tools==1.71.0
gunicorn==20.1.0
h11==0.14.0
hf-xet==1.0.3
httpcore==1.0.8
httpx==0.27.0
huggingface-hub==0.30.2
humanfriendly==10.0
identify==2.6.10
idna==3.10
iniconfig==2.1.0
ipykernel==6.29.5
ipython==8.35.0
isort==5.13.2
itsdangerous==2.2.0
jedi==0.19.2
Jinja2==3.1.6
joblib==1.4.2
jsonformatter==0.2.3
jupyter_client==8.6.3
jupyter_core==5.7.2
lazy-object-proxy==1.11.0
limits==5.0.0
locust==2.35.0
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mccabe==0.7.0
mdurl==0.1.2
megablocks @ git+https://github.com/nomic-ai/megablocks.git@4806f68091896fe4288c90d6bd7969188f291bad
mpmath==1.3.0
msgpack==1.1.0
multidict==6.4.3
multiprocess==0.70.16
mypy==1.15.0
mypy_extensions==1.1.0
nest-asyncio==1.6.0
networkx==3.4.2
nodeenv==1.9.1
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvtx-cu12==12.1.105
onnx==1.17.0
onnxruntime-gpu==1.21.1
optimum==1.24.0
overrides==3.1.0
packaging==24.2
pandas==2.0.3
parso==0.8.4
pathspec==0.12.1
pexpect==4.9.0
pillow==11.2.1
platformdirs==4.3.7
pluggy==1.5.0
pre_commit==4.2.0
prompt_toolkit==3.0.51
propcache==0.3.1
protobuf==5.29.4
psutil==5.9.8
ptyprocess==0.7.0
pure_eval==0.2.3
pyarrow==19.0.1
pycparser==2.22
pycryptodome==3.22.0
pydantic==2.11.3
pydantic_core==2.33.1
Pygments==2.19.1
pylint==2.13.9
pytest==8.3.5
pytest-cov==6.1.1
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
pyzmq==26.4.0
redis==3.5.3
regex==2024.11.6
requests==2.32.3
rich==13.9.4
safetensors==0.5.3
scikit-learn==1.6.1
scipy==1.15.2
sentence-transformers==4.1.0
setproctitle==1.3.5
six==1.17.0
sniffio==1.3.1
stack-data==0.6.3
stanford-stk==0.7.1
sympy==1.13.3
taskipy==1.14.1
threadpoolctl==3.6.0
tokenizers==0.21.1
tomli==2.2.1
torch==2.4.1
tornado==6.4.2
tqdm==4.67.1
traitlets==5.14.3
transformers==4.48.3
triton==3.0.0
types-requests==2.32.0.20250328
types-setuptools==79.0.0.20250422
typing-inspection==0.4.0
typing_extensions==4.13.2
tzdata==2025.2
urllib3==2.4.0
validators==0.22.0
virtualenv==20.30.0
wcwidth==0.2.13
weaviate-client==4.5.2
Werkzeug==3.1.3
wrapt==1.17.2
xxhash==3.5.0
yarl==1.20.0
zope.event==5.0
zope.interface==7.2