Inferencing with SentenceTransformer leads to IndexError: map::at Error

#20
by AshokRaja - opened

Code:

from sentence_transformers import SentenceTransformer

model = SentenceTransformer(
    "nomic-ai/nomic-embed-text-v2-moe",
    backend="torch",
    trust_remote_code=True
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)

StackTrace:

---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[4], line 13
      5 model = SentenceTransformer(
      6     "nomic-ai/nomic-embed-text-v2-moe",
      8     backend="torch",
      9     trust_remote_code=True
     10 )
     12 # Verify that everything works as expected
---> 13 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
     14 print(embeddings.shape)
     16 similarities = model.similarity(embeddings, embeddings)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py:685, in SentenceTransformer.encode(self, sentences, prompt_name, prompt, batch_size, show_progress_bar, output_value, precision, convert_to_numpy, convert_to_tensor, device, normalize_embeddings, **kwargs)
    682 features.update(extra_features)
    684 with torch.no_grad():
--> 685     out_features = self.forward(features, **kwargs)
    686     if self.device.type == "hpu":
    687         out_features = copy.deepcopy(out_features)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py:758, in SentenceTransformer.forward(self, input, **kwargs)
    756     module_kwarg_keys = self.module_kwargs.get(module_name, [])
    757     module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
--> 758     input = module(input, **module_kwargs)
    759 return input

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/models/Transformer.py:442, in Transformer.forward(self, features, **kwargs)
    435 """Returns token_embeddings, cls_token"""
    436 trans_features = {
    437     key: value
    438     for key, value in features.items()
    439     if key in ["input_ids", "attention_mask", "token_type_ids", "inputs_embeds"]
    440 }
--> 442 outputs = self.auto_model(**trans_features, **kwargs, return_dict=True)
    443 token_embeddings = outputs[0]
    444 features["token_embeddings"] = token_embeddings

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1910, in NomicBertModel.forward(self, input_ids, attention_mask, position_ids, token_type_ids, return_dict, matryoshka_dim, inputs_embeds)
   1907 hidden_states = self.emb_drop(hidden_states)
   1909 attention_mask = self.get_extended_attention_mask(attention_mask, hidden_states.shape[:-1])
-> 1910 sequence_output = self.encoder(hidden_states, attention_mask=attention_mask, return_dict=return_dict)
   1912 pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
   1914 if matryoshka_dim:

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1789, in NomicBertEncoder.forward(self, hidden_states, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, is_padded_inputs, rope)
   1768         hidden_states, hidden_states2, residual = torch.utils.checkpoint.checkpoint(
   1769             create_custom_forward(layer),
   1770             hidden_states,
   (...)
   1785             use_reentrant=False,
   1786         )
   1788     else:
-> 1789         hidden_states, hidden_states2, residual = layer(
   1790             hidden_states,
   1791             hidden_states2,
   1792             residual,
   1793             attention_mask,
   1794             position_ids,
   1795             None,
   1796             is_padded_inputs,
   1797             output_attentions,
   1798             use_cache,
   1799             rope=rope,
   1800         )
   1801 return hidden_states

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1718, in NomicBertBlock.forward(self, hidden_states, hidden_states2, residual, attention_mask, position_ids, past_key_value, is_padded_inputs, output_attentions, use_cache, cu_seqlens, max_seq_len, rope)
   1716 hidden_states = self.norm1((self.dropout1(attn_outputs) + hidden_states).to(dtype=self.norm1.weight.dtype))
   1717 if self.moe:
-> 1718     mlp_out = self.mlp(hidden_states, torch.where(attention_mask.squeeze() == 0, 1, 0))
   1719 else:
   1720     mlp_out = self.mlp(hidden_states)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/moe.py:512, in MoE.forward(self, x, attention_mask)
    509     x_valid = x.view(-1, hidden_dim)
    511 scores, expert_weights, top_experts = self.router(x_valid)
--> 512 out = self.experts(x_valid, scores, expert_weights, top_experts)
    514 out = out.to(x.dtype)
    515 # Compute the experts.
    516 
    517 # Reconstruct the full sequence with padding

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/moe.py:458, in ParallelMLP.forward(self, x, scores, expert_weights, top_experts)
    455 in_shape = x.size()
    457 # Compute the experts.
--> 458 x, tokens_per_expert = self.forward_fn(
    459     x, expert_weights, top_experts)
    460 if self.training and self.args.moe_loss_weight > 0.0:
    461     save_load_balancing_loss((tokens_per_expert, scores))

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/dmoe.py:270, in ParallelDroplessMLP.forward_once(self, x, expert_weights, top_experts)
    268 def forward_once(self, x, expert_weights, top_experts):
    269     if self.args.mlp_impl == 'sparse':
--> 270         return self.sparse_forward_once(
    271             x, expert_weights, top_experts)
    272     else:
    273         return self.grouped_forward_once(
    274             x, expert_weights, top_experts)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/dmoe.py:155, in ParallelDroplessMLP.sparse_forward_once(self, x, expert_weights, top_experts)
    152     topo = self.topology(x, padded_bins)
    154 # Perform the expert computation.
--> 155 x = self.mlp(x, topo)
    157 # Un-route the data for the MoE output.
    158 x = ops.padded_scatter(
    159     x,
    160     indices,
   (...)
    164     padded_bins,
    165     self.top_k)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/mlp.py:357, in SparseMLP.forward(self, x, topo)
    353     return memory_optimized_mlp(
    354         x, w1, w2, topo, self.args.activation_fn)
    356 # Compute the MLP.
--> 357 x = stk.ops.sdd(x, w1.t(), topo)
    358 activation_fn_out = act_fn(x, self.args.activation_fn)
    359 return stk.ops.dsd(activation_fn_out, w2)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/ops/linear_ops.py:42, in sdd(a, b, topo)
     40 assert isinstance(topo, Matrix)
     41 assert topo.is_contiguous()
---> 42 out = sputnik.sdd(
     43     a, b,
     44     topo.size(),
     45     topo.data,
     46     topo.offsets,
     47     topo.row_indices,
     48     topo.column_indices,
     49     topo.offsets_t,
     50     topo.column_indices_t,
     51     topo.block_offsets_t)
     52 return Matrix(topo.size(),
     53               out,
     54               topo.row_indices,
   (...)
     58               topo.offsets_t,
     59               topo.block_offsets_t)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/autograd/function.py:574, in Function.apply(cls, *args, **kwargs)
    571 if not torch._C._are_functorch_transforms_active():
    572     # See NOTE: [functorch vjp and autograd interaction]
    573     args = _functorch.utils.unwrap_dead_wrappers(args)
--> 574     return super().apply(*args, **kwargs)  # type: ignore[misc]
    576 if not is_setup_ctx_defined:
    577     raise RuntimeError(
    578         "In order to use an autograd.Function with functorch transforms "
    579         "(vmap, grad, jvp, jacrev, ...), it must override the setup_context "
    580         "staticmethod. For more details, please see "
    581         "https://pytorch.org/docs/main/notes/extending.func.html"
    582     )

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/autocast.py:28, in custom_fwd.<locals>.decorate_fwd(*args, **kwargs)
     26         dtype = torch.get_autocast_gpu_dtype()
     27         return fwd(*_cast(args, dtype), **_cast(kwargs, dtype))
---> 28 return fwd(*args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/sputnik.py:263, in SDD.forward(ctx, lhs, rhs, shape, data, offsets, row_indices, column_indices, offsets_t, column_indices_t, block_offsets_t)
    258 ctx.shape = shape
    259 out = torch.empty(
    260     data.shape,
    261     dtype=lhs.dtype,
    262     device=lhs.device)
--> 263 backend.sdd(lhs,
    264             rhs,
    265             shape,
    266             out,
    267             offsets,
    268             row_indices,
    269             column_indices)
    270 return out

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/triton_kernels.py:372, in sdd(lhs, rhs, shape, out, offsets, row_indices, column_indices)
    369 if trans_B:
    370     stride_bk, stride_bn = rhs.stride(1), rhs.stride(0)
--> 372 _sdd_kernel[grid](
    373     lhs, rhs, out, M, N, K,
    374     stride_am, stride_ak,
    375     stride_bk, stride_bn,
    376     out.stride(1), out.stride(2),
    377     row_indices, column_indices,
    378     GROUP_M=128, ACC_TYPE=ACC_TYPE
    379     )

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/jit.py:345, in KernelInterface.__getitem__.<locals>.<lambda>(*args, **kwargs)
    339 def __getitem__(self, grid) -> T:
    340     """
    341     A JIT function is launched with: fn[grid](*args, **kwargs).
    342     Hence JITFunction.__getitem__ returns a callable proxy that
    343     memorizes the grid.
    344     """
--> 345     return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/autotuner.py:171, in Autotuner.run(self, *args, **kwargs)
    169 if config.pre_hook is not None:
    170     config.pre_hook({**self.nargs, **kwargs, **config.all_kwargs()})
--> 171 ret = self.fn.run(
    172     *args,
    173     **kwargs,
    174     **config.all_kwargs(),
    175 )
    176 self.nargs = None
    177 return ret

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/jit.py:662, in JITFunction.run(self, grid, warmup, *args, **kwargs)
    660     # compile the kernel
    661     src = self.ASTSource(self, signature, constants, configs[0])
--> 662     kernel = self.compile(
    663         src,
    664         target=target,
    665         options=options.__dict__,
    666     )
    667     self.cache[device][key] = kernel
    669 # Check that used global values have not changed.

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/compiler/compiler.py:282, in compile(src, target, options)
    280 use_ttgir_loc = os.environ.get("USE_TTGIR_LOC", "0") == "1"
    281 for ext, compile_ir in list(stages.items())[first_stage:]:
--> 282     next_module = compile_ir(module, metadata)
    283     ir_filename = f"{src.name}.{ext}"
    284     metadata_group[ir_filename] = fn_cache_manager.put(next_module, ir_filename)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/backends/nvidia/compiler.py:318, in CUDABackend.add_stages.<locals>.<lambda>(src, metadata)
    316 stages["ttir"] = lambda src, metadata: self.make_ttir(src, metadata, options)
    317 stages["ttgir"] = lambda src, metadata: self.make_ttgir(src, metadata, options, self.capability)
--> 318 stages["llir"] = lambda src, metadata: self.make_llir(src, metadata, options, self.capability)
    319 stages["ptx"] = lambda src, metadata: self.make_ptx(src, metadata, options, self.capability)
    320 stages["cubin"] = lambda src, metadata: self.make_cubin(src, metadata, options, self.capability)

File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/backends/nvidia/compiler.py:216, in CUDABackend.make_llir(src, metadata, options, capability)
    214 if os.environ.get("TRITON_DISABLE_LINE_INFO", "0") == "0":
    215     passes.llvmir.add_di_scope(pm)
--> 216 pm.run(mod)
    217 # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
    218 llvm.init_targets()

IndexError: map::at

Dependencies installed in virtual env:

aiohappyeyeballs==2.6.1
aiohttp==3.11.18
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
astroid==2.11.7
asttokens==3.0.0
async-timeout==4.0.3
attrs==25.3.0
Authlib==1.5.2
black==25.1.0
blinker==1.9.0
Brotli==1.1.0
certifi==2025.1.31
cffi==1.17.1
cfgv==3.4.0
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6
coloredlogs==15.0.1
comm==0.2.2
ConfigArgParse==1.7
coverage==7.8.0
cryptography==44.0.2
datasets==3.5.0
debugpy==1.8.14
decorator==5.2.1
Deprecated==1.2.18
dill==0.3.8
distlib==0.3.9
einops==0.8.1
evaluate==0.4.3
exceptiongroup==1.2.2
executing==2.2.0
filelock==3.18.0
Flask==3.1.0
flask-cors==5.0.1
Flask-Limiter==1.4
Flask-Login==0.6.3
flatbuffers==25.2.10
frozenlist==1.6.0
fsspec==2024.12.0
gevent==25.4.1
geventhttpclient==2.3.3
greenlet==3.2.1
grpcio==1.71.0
grpcio-health-checking==1.71.0
grpcio-tools==1.71.0
gunicorn==20.1.0
h11==0.14.0
hf-xet==1.0.3
httpcore==1.0.8
httpx==0.27.0
huggingface-hub==0.30.2
humanfriendly==10.0
identify==2.6.10
idna==3.10
iniconfig==2.1.0
ipykernel==6.29.5
ipython==8.35.0
isort==5.13.2
itsdangerous==2.2.0
jedi==0.19.2
Jinja2==3.1.6
joblib==1.4.2
jsonformatter==0.2.3
jupyter_client==8.6.3
jupyter_core==5.7.2
lazy-object-proxy==1.11.0
limits==5.0.0
locust==2.35.0
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mccabe==0.7.0
mdurl==0.1.2
megablocks @ git+https://github.com/nomic-ai/megablocks.git@4806f68091896fe4288c90d6bd7969188f291bad
mpmath==1.3.0
msgpack==1.1.0
multidict==6.4.3
multiprocess==0.70.16
mypy==1.15.0
mypy_extensions==1.1.0
nest-asyncio==1.6.0
networkx==3.4.2
nodeenv==1.9.1
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvtx-cu12==12.1.105
onnx==1.17.0
onnxruntime-gpu==1.21.1
optimum==1.24.0
overrides==3.1.0
packaging==24.2
pandas==2.0.3
parso==0.8.4
pathspec==0.12.1
pexpect==4.9.0
pillow==11.2.1
platformdirs==4.3.7
pluggy==1.5.0
pre_commit==4.2.0
prompt_toolkit==3.0.51
propcache==0.3.1
protobuf==5.29.4
psutil==5.9.8
ptyprocess==0.7.0
pure_eval==0.2.3
pyarrow==19.0.1
pycparser==2.22
pycryptodome==3.22.0
pydantic==2.11.3
pydantic_core==2.33.1
Pygments==2.19.1
pylint==2.13.9
pytest==8.3.5
pytest-cov==6.1.1
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
pyzmq==26.4.0
redis==3.5.3
regex==2024.11.6
requests==2.32.3
rich==13.9.4
safetensors==0.5.3
scikit-learn==1.6.1
scipy==1.15.2
sentence-transformers==4.1.0
setproctitle==1.3.5
six==1.17.0
sniffio==1.3.1
stack-data==0.6.3
stanford-stk==0.7.1
sympy==1.13.3
taskipy==1.14.1
threadpoolctl==3.6.0
tokenizers==0.21.1
tomli==2.2.1
torch==2.4.1
tornado==6.4.2
tqdm==4.67.1
traitlets==5.14.3
transformers==4.48.3
triton==3.0.0
types-requests==2.32.0.20250328
types-setuptools==79.0.0.20250422
typing-inspection==0.4.0
typing_extensions==4.13.2
tzdata==2025.2
urllib3==2.4.0
validators==0.22.0
virtualenv==20.30.0
wcwidth==0.2.13
weaviate-client==4.5.2
Werkzeug==3.1.3
wrapt==1.17.2
xxhash==3.5.0
yarl==1.20.0
zope.event==5.0
zope.interface==7.2

Update:
This issue gets resolved when megablocks is uninstalled.

So the issue is within megablocks lib that needs to be checked.

Nomic AI org

What GPU are you running this on? IIRC Megablocks only runs on certain GPUs

@zpn I'm running it on Nvidia L40S GPU.

Nomic AI org

I think this might be due to a triton version error, does this work if you upgrade Triton?

Your need to confirm your account before you can post a new comment.

Sign up or log in to comment