Inferencing with SentenceTransformer leads to IndexError: map::at Error
Code:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer(
"nomic-ai/nomic-embed-text-v2-moe",
backend="torch",
trust_remote_code=True
)
# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)
similarities = model.similarity(embeddings, embeddings)
print(similarities)
StackTrace:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[4], line 13
5 model = SentenceTransformer(
6 "nomic-ai/nomic-embed-text-v2-moe",
8 backend="torch",
9 trust_remote_code=True
10 )
12 # Verify that everything works as expected
---> 13 embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
14 print(embeddings.shape)
16 similarities = model.similarity(embeddings, embeddings)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py:685, in SentenceTransformer.encode(self, sentences, prompt_name, prompt, batch_size, show_progress_bar, output_value, precision, convert_to_numpy, convert_to_tensor, device, normalize_embeddings, **kwargs)
682 features.update(extra_features)
684 with torch.no_grad():
--> 685 out_features = self.forward(features, **kwargs)
686 if self.device.type == "hpu":
687 out_features = copy.deepcopy(out_features)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/SentenceTransformer.py:758, in SentenceTransformer.forward(self, input, **kwargs)
756 module_kwarg_keys = self.module_kwargs.get(module_name, [])
757 module_kwargs = {key: value for key, value in kwargs.items() if key in module_kwarg_keys}
--> 758 input = module(input, **module_kwargs)
759 return input
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/sentence_transformers/models/Transformer.py:442, in Transformer.forward(self, features, **kwargs)
435 """Returns token_embeddings, cls_token"""
436 trans_features = {
437 key: value
438 for key, value in features.items()
439 if key in ["input_ids", "attention_mask", "token_type_ids", "inputs_embeds"]
440 }
--> 442 outputs = self.auto_model(**trans_features, **kwargs, return_dict=True)
443 token_embeddings = outputs[0]
444 features["token_embeddings"] = token_embeddings
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1910, in NomicBertModel.forward(self, input_ids, attention_mask, position_ids, token_type_ids, return_dict, matryoshka_dim, inputs_embeds)
1907 hidden_states = self.emb_drop(hidden_states)
1909 attention_mask = self.get_extended_attention_mask(attention_mask, hidden_states.shape[:-1])
-> 1910 sequence_output = self.encoder(hidden_states, attention_mask=attention_mask, return_dict=return_dict)
1912 pooled_output = self.pooler(sequence_output) if self.pooler is not None else None
1914 if matryoshka_dim:
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1789, in NomicBertEncoder.forward(self, hidden_states, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, is_padded_inputs, rope)
1768 hidden_states, hidden_states2, residual = torch.utils.checkpoint.checkpoint(
1769 create_custom_forward(layer),
1770 hidden_states,
(...)
1785 use_reentrant=False,
1786 )
1788 else:
-> 1789 hidden_states, hidden_states2, residual = layer(
1790 hidden_states,
1791 hidden_states2,
1792 residual,
1793 attention_mask,
1794 position_ids,
1795 None,
1796 is_padded_inputs,
1797 output_attentions,
1798 use_cache,
1799 rope=rope,
1800 )
1801 return hidden_states
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/.cache/huggingface/modules/transformers_modules/nomic-ai/nomic-bert-2048/e5042dce39060cc34bc223455f25cf1d26db4655/modeling_hf_nomic_bert.py:1718, in NomicBertBlock.forward(self, hidden_states, hidden_states2, residual, attention_mask, position_ids, past_key_value, is_padded_inputs, output_attentions, use_cache, cu_seqlens, max_seq_len, rope)
1716 hidden_states = self.norm1((self.dropout1(attn_outputs) + hidden_states).to(dtype=self.norm1.weight.dtype))
1717 if self.moe:
-> 1718 mlp_out = self.mlp(hidden_states, torch.where(attention_mask.squeeze() == 0, 1, 0))
1719 else:
1720 mlp_out = self.mlp(hidden_states)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/moe.py:512, in MoE.forward(self, x, attention_mask)
509 x_valid = x.view(-1, hidden_dim)
511 scores, expert_weights, top_experts = self.router(x_valid)
--> 512 out = self.experts(x_valid, scores, expert_weights, top_experts)
514 out = out.to(x.dtype)
515 # Compute the experts.
516
517 # Reconstruct the full sequence with padding
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/moe.py:458, in ParallelMLP.forward(self, x, scores, expert_weights, top_experts)
455 in_shape = x.size()
457 # Compute the experts.
--> 458 x, tokens_per_expert = self.forward_fn(
459 x, expert_weights, top_experts)
460 if self.training and self.args.moe_loss_weight > 0.0:
461 save_load_balancing_loss((tokens_per_expert, scores))
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/dmoe.py:270, in ParallelDroplessMLP.forward_once(self, x, expert_weights, top_experts)
268 def forward_once(self, x, expert_weights, top_experts):
269 if self.args.mlp_impl == 'sparse':
--> 270 return self.sparse_forward_once(
271 x, expert_weights, top_experts)
272 else:
273 return self.grouped_forward_once(
274 x, expert_weights, top_experts)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/dmoe.py:155, in ParallelDroplessMLP.sparse_forward_once(self, x, expert_weights, top_experts)
152 topo = self.topology(x, padded_bins)
154 # Perform the expert computation.
--> 155 x = self.mlp(x, topo)
157 # Un-route the data for the MoE output.
158 x = ops.padded_scatter(
159 x,
160 indices,
(...)
164 padded_bins,
165 self.top_k)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/megablocks/layers/mlp.py:357, in SparseMLP.forward(self, x, topo)
353 return memory_optimized_mlp(
354 x, w1, w2, topo, self.args.activation_fn)
356 # Compute the MLP.
--> 357 x = stk.ops.sdd(x, w1.t(), topo)
358 activation_fn_out = act_fn(x, self.args.activation_fn)
359 return stk.ops.dsd(activation_fn_out, w2)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/ops/linear_ops.py:42, in sdd(a, b, topo)
40 assert isinstance(topo, Matrix)
41 assert topo.is_contiguous()
---> 42 out = sputnik.sdd(
43 a, b,
44 topo.size(),
45 topo.data,
46 topo.offsets,
47 topo.row_indices,
48 topo.column_indices,
49 topo.offsets_t,
50 topo.column_indices_t,
51 topo.block_offsets_t)
52 return Matrix(topo.size(),
53 out,
54 topo.row_indices,
(...)
58 topo.offsets_t,
59 topo.block_offsets_t)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/torch/autograd/function.py:574, in Function.apply(cls, *args, **kwargs)
571 if not torch._C._are_functorch_transforms_active():
572 # See NOTE: [functorch vjp and autograd interaction]
573 args = _functorch.utils.unwrap_dead_wrappers(args)
--> 574 return super().apply(*args, **kwargs) # type: ignore[misc]
576 if not is_setup_ctx_defined:
577 raise RuntimeError(
578 "In order to use an autograd.Function with functorch transforms "
579 "(vmap, grad, jvp, jacrev, ...), it must override the setup_context "
580 "staticmethod. For more details, please see "
581 "https://pytorch.org/docs/main/notes/extending.func.html"
582 )
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/autocast.py:28, in custom_fwd.<locals>.decorate_fwd(*args, **kwargs)
26 dtype = torch.get_autocast_gpu_dtype()
27 return fwd(*_cast(args, dtype), **_cast(kwargs, dtype))
---> 28 return fwd(*args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/sputnik.py:263, in SDD.forward(ctx, lhs, rhs, shape, data, offsets, row_indices, column_indices, offsets_t, column_indices_t, block_offsets_t)
258 ctx.shape = shape
259 out = torch.empty(
260 data.shape,
261 dtype=lhs.dtype,
262 device=lhs.device)
--> 263 backend.sdd(lhs,
264 rhs,
265 shape,
266 out,
267 offsets,
268 row_indices,
269 column_indices)
270 return out
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/stk/backend/triton_kernels.py:372, in sdd(lhs, rhs, shape, out, offsets, row_indices, column_indices)
369 if trans_B:
370 stride_bk, stride_bn = rhs.stride(1), rhs.stride(0)
--> 372 _sdd_kernel[grid](
373 lhs, rhs, out, M, N, K,
374 stride_am, stride_ak,
375 stride_bk, stride_bn,
376 out.stride(1), out.stride(2),
377 row_indices, column_indices,
378 GROUP_M=128, ACC_TYPE=ACC_TYPE
379 )
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/jit.py:345, in KernelInterface.__getitem__.<locals>.<lambda>(*args, **kwargs)
339 def __getitem__(self, grid) -> T:
340 """
341 A JIT function is launched with: fn[grid](*args, **kwargs).
342 Hence JITFunction.__getitem__ returns a callable proxy that
343 memorizes the grid.
344 """
--> 345 return lambda *args, **kwargs: self.run(grid=grid, warmup=False, *args, **kwargs)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/autotuner.py:171, in Autotuner.run(self, *args, **kwargs)
169 if config.pre_hook is not None:
170 config.pre_hook({**self.nargs, **kwargs, **config.all_kwargs()})
--> 171 ret = self.fn.run(
172 *args,
173 **kwargs,
174 **config.all_kwargs(),
175 )
176 self.nargs = None
177 return ret
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/runtime/jit.py:662, in JITFunction.run(self, grid, warmup, *args, **kwargs)
660 # compile the kernel
661 src = self.ASTSource(self, signature, constants, configs[0])
--> 662 kernel = self.compile(
663 src,
664 target=target,
665 options=options.__dict__,
666 )
667 self.cache[device][key] = kernel
669 # Check that used global values have not changed.
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/compiler/compiler.py:282, in compile(src, target, options)
280 use_ttgir_loc = os.environ.get("USE_TTGIR_LOC", "0") == "1"
281 for ext, compile_ir in list(stages.items())[first_stage:]:
--> 282 next_module = compile_ir(module, metadata)
283 ir_filename = f"{src.name}.{ext}"
284 metadata_group[ir_filename] = fn_cache_manager.put(next_module, ir_filename)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/backends/nvidia/compiler.py:318, in CUDABackend.add_stages.<locals>.<lambda>(src, metadata)
316 stages["ttir"] = lambda src, metadata: self.make_ttir(src, metadata, options)
317 stages["ttgir"] = lambda src, metadata: self.make_ttgir(src, metadata, options, self.capability)
--> 318 stages["llir"] = lambda src, metadata: self.make_llir(src, metadata, options, self.capability)
319 stages["ptx"] = lambda src, metadata: self.make_ptx(src, metadata, options, self.capability)
320 stages["cubin"] = lambda src, metadata: self.make_cubin(src, metadata, options, self.capability)
File ~/miniconda3/envs/virtual_env/lib/python3.10/site-packages/triton/backends/nvidia/compiler.py:216, in CUDABackend.make_llir(src, metadata, options, capability)
214 if os.environ.get("TRITON_DISABLE_LINE_INFO", "0") == "0":
215 passes.llvmir.add_di_scope(pm)
--> 216 pm.run(mod)
217 # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
218 llvm.init_targets()
IndexError: map::at
Dependencies installed in virtual env:
aiohappyeyeballs==2.6.1
aiohttp==3.11.18
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
astroid==2.11.7
asttokens==3.0.0
async-timeout==4.0.3
attrs==25.3.0
Authlib==1.5.2
black==25.1.0
blinker==1.9.0
Brotli==1.1.0
certifi==2025.1.31
cffi==1.17.1
cfgv==3.4.0
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6
coloredlogs==15.0.1
comm==0.2.2
ConfigArgParse==1.7
coverage==7.8.0
cryptography==44.0.2
datasets==3.5.0
debugpy==1.8.14
decorator==5.2.1
Deprecated==1.2.18
dill==0.3.8
distlib==0.3.9
einops==0.8.1
evaluate==0.4.3
exceptiongroup==1.2.2
executing==2.2.0
filelock==3.18.0
Flask==3.1.0
flask-cors==5.0.1
Flask-Limiter==1.4
Flask-Login==0.6.3
flatbuffers==25.2.10
frozenlist==1.6.0
fsspec==2024.12.0
gevent==25.4.1
geventhttpclient==2.3.3
greenlet==3.2.1
grpcio==1.71.0
grpcio-health-checking==1.71.0
grpcio-tools==1.71.0
gunicorn==20.1.0
h11==0.14.0
hf-xet==1.0.3
httpcore==1.0.8
httpx==0.27.0
huggingface-hub==0.30.2
humanfriendly==10.0
identify==2.6.10
idna==3.10
iniconfig==2.1.0
ipykernel==6.29.5
ipython==8.35.0
isort==5.13.2
itsdangerous==2.2.0
jedi==0.19.2
Jinja2==3.1.6
joblib==1.4.2
jsonformatter==0.2.3
jupyter_client==8.6.3
jupyter_core==5.7.2
lazy-object-proxy==1.11.0
limits==5.0.0
locust==2.35.0
markdown-it-py==3.0.0
MarkupSafe==3.0.2
matplotlib-inline==0.1.7
mccabe==0.7.0
mdurl==0.1.2
megablocks @ git+https://github.com/nomic-ai/megablocks.git@4806f68091896fe4288c90d6bd7969188f291bad
mpmath==1.3.0
msgpack==1.1.0
multidict==6.4.3
multiprocess==0.70.16
mypy==1.15.0
mypy_extensions==1.1.0
nest-asyncio==1.6.0
networkx==3.4.2
nodeenv==1.9.1
numpy==1.26.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.20.5
nvidia-nvjitlink-cu12==12.8.93
nvidia-nvtx-cu12==12.1.105
onnx==1.17.0
onnxruntime-gpu==1.21.1
optimum==1.24.0
overrides==3.1.0
packaging==24.2
pandas==2.0.3
parso==0.8.4
pathspec==0.12.1
pexpect==4.9.0
pillow==11.2.1
platformdirs==4.3.7
pluggy==1.5.0
pre_commit==4.2.0
prompt_toolkit==3.0.51
propcache==0.3.1
protobuf==5.29.4
psutil==5.9.8
ptyprocess==0.7.0
pure_eval==0.2.3
pyarrow==19.0.1
pycparser==2.22
pycryptodome==3.22.0
pydantic==2.11.3
pydantic_core==2.33.1
Pygments==2.19.1
pylint==2.13.9
pytest==8.3.5
pytest-cov==6.1.1
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
pyzmq==26.4.0
redis==3.5.3
regex==2024.11.6
requests==2.32.3
rich==13.9.4
safetensors==0.5.3
scikit-learn==1.6.1
scipy==1.15.2
sentence-transformers==4.1.0
setproctitle==1.3.5
six==1.17.0
sniffio==1.3.1
stack-data==0.6.3
stanford-stk==0.7.1
sympy==1.13.3
taskipy==1.14.1
threadpoolctl==3.6.0
tokenizers==0.21.1
tomli==2.2.1
torch==2.4.1
tornado==6.4.2
tqdm==4.67.1
traitlets==5.14.3
transformers==4.48.3
triton==3.0.0
types-requests==2.32.0.20250328
types-setuptools==79.0.0.20250422
typing-inspection==0.4.0
typing_extensions==4.13.2
tzdata==2025.2
urllib3==2.4.0
validators==0.22.0
virtualenv==20.30.0
wcwidth==0.2.13
weaviate-client==4.5.2
Werkzeug==3.1.3
wrapt==1.17.2
xxhash==3.5.0
yarl==1.20.0
zope.event==5.0
zope.interface==7.2
Update:
This issue gets resolved when megablocks is uninstalled.
So the issue is within megablocks lib that needs to be checked.
What GPU are you running this on? IIRC Megablocks only runs on certain GPUs
I think this might be due to a triton version error, does this work if you upgrade Triton?