We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hey, thank you for the repo!
I'm trying to fine-tune this model but I get the following error during a forward pass:
--------------------------------------------------------------------------- AssertionError Traceback (most recent call last) <ipython-input-30-7c37553b6273> in <cell line: 3>() 2 model.to(device) 3 for b in train_loader_mini: ----> 4 model(b['input_ids'].to(device), b['attention_mask'].to(device)) 29 frames /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: <ipython-input-28-b127d93a6869> in forward(self, input_ids, attention_mask) 13 def forward(self, input_ids, attention_mask): 14 # Get the BERT embeddings ---> 15 outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) 16 pooled_output = outputs.hidden_states[-1][:,0,:] # [CLS] embedding 17 /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/bert_layers.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict) 860 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 861 --> 862 outputs = self.bert( 863 input_ids, 864 attention_mask=attention_mask, /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/bert_layers.py in forward(self, input_ids, token_type_ids, attention_mask, position_ids, output_all_encoded_layers, masked_tokens_mask, **kwargs) 606 subset_mask = masked_tokens_mask | first_col_mask 607 --> 608 encoder_outputs = self.encoder( 609 embedding_output, 610 attention_mask, /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/bert_layers.py in forward(self, hidden_states, attention_mask, output_all_encoded_layers, subset_mask) 444 if subset_mask is None: 445 for layer_module in self.layer: --> 446 hidden_states = layer_module(hidden_states, 447 cu_seqlens, 448 seqlen, /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/bert_layers.py in forward(self, hidden_states, cu_seqlens, seqlen, subset_idx, indices, attn_mask, bias) 325 bias: None or (batch, heads, max_seqlen_in_batch, max_seqlen_in_batch) 326 """ --> 327 attention_output = self.attention(hidden_states, cu_seqlens, seqlen, 328 subset_idx, indices, attn_mask, bias) 329 layer_output = self.mlp(attention_output) /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/bert_layers.py in forward(self, input_tensor, cu_seqlens, max_s, subset_idx, indices, attn_mask, bias) 238 bias: None or (batch, heads, max_seqlen_in_batch, max_seqlen_in_batch) 239 """ --> 240 self_output = self.self(input_tensor, cu_seqlens, max_s, indices, 241 attn_mask, bias) 242 if subset_idx is not None: /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _wrapped_call_impl(self, *args, **kwargs) 1516 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1517 else: -> 1518 return self._call_impl(*args, **kwargs) 1519 1520 def _call_impl(self, *args, **kwargs): /usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs) 1525 or _global_backward_pre_hooks or _global_backward_hooks 1526 or _global_forward_hooks or _global_forward_pre_hooks): -> 1527 return forward_call(*args, **kwargs) 1528 1529 try: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/bert_layers.py in forward(self, hidden_states, cu_seqlens, max_seqlen_in_batch, indices, attn_mask, bias) 179 bias_dtype = bias.dtype 180 bias = bias.to(torch.float16) --> 181 attention = flash_attn_qkvpacked_func(qkv, bias) 182 attention = attention.to(orig_dtype) 183 bias = bias.to(bias_dtype) /usr/local/lib/python3.10/dist-packages/torch/autograd/function.py in apply(cls, *args, **kwargs) 537 # See NOTE: [functorch vjp and autograd interaction] 538 args = _functorch.utils.unwrap_dead_wrappers(args) --> 539 return super().apply(*args, **kwargs) # type: ignore[misc] 540 541 if cls.setup_context == _SingleLevelFunction.setup_context: ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/flash_attn_triton.py in forward(ctx, qkv, bias, causal, softmax_scale) 1019 if qkv.stride(-1) != 1: 1020 qkv = qkv.contiguous() -> 1021 o, lse, ctx.softmax_scale = _flash_attn_forward( 1022 qkv[:, :, 0], 1023 qkv[:, :, 1], ~/.cache/huggingface/modules/transformers_modules/zhihan1996/DNABERT-2-117M/1d020b803b871a976f5f3d5565f0eac8f2c7bb81/flash_attn_triton.py in _flash_attn_forward(q, k, v, bias, causal, softmax_scale) 824 # num_warps = 4 if d <= 64 else 8 825 grid = lambda META: (triton.cdiv(seqlen_q, META['BLOCK_M']), batch * nheads) --> 826 _fwd_kernel[grid]( # type: ignore 827 q, 828 k, /usr/local/lib/python3.10/dist-packages/triton/runtime/autotuner.py in run(self, *args, **kwargs) 112 full_nargs = {**self.nargs, **kwargs, **self.best_config.kwargs} 113 config.pre_hook(full_nargs) --> 114 ret = self.fn.run(*args, num_warps=config.num_warps, num_stages=config.num_stages, **kwargs, **config.kwargs) 115 self.nargs = None 116 return ret /usr/local/lib/python3.10/dist-packages/triton/runtime/autotuner.py in run(self, *args, **kwargs) 230 for v, heur in self.values.items(): 231 kwargs[v] = heur({**dict(zip(self.arg_names, args)), **kwargs}) --> 232 return self.fn.run(*args, **kwargs) 233 234 <string> in _fwd_kernel(Q, K, V, Bias, Out, Lse, TMP, softmax_scale, stride_qb, stride_qh, stride_qm, stride_kb, stride_kh, stride_kn, stride_vb, stride_vh, stride_vn, stride_bb, stride_bh, stride_bm, stride_ob, stride_oh, stride_om, nheads, seqlen_q, seqlen_k, seqlen_q_rounded, headdim, CACHE_KEY_SEQLEN_Q, CACHE_KEY_SEQLEN_K, BIAS_TYPE, IS_CAUSAL, BLOCK_HEADDIM, EVEN_M, EVEN_N, EVEN_HEADDIM, BLOCK_M, BLOCK_N, grid, num_warps, num_stages, extern_libs, stream, warmup, device, device_type) /usr/local/lib/python3.10/dist-packages/triton/compiler/compiler.py in compile(fn, **kwargs) 423 # cache manager 424 if is_cuda or is_hip: --> 425 so_path = make_stub(name, signature, constants) 426 else: 427 so_path = _device_backend.make_launcher_stub(name, signature, constants) /usr/local/lib/python3.10/dist-packages/triton/compiler/make_launcher.py in make_stub(name, signature, constants) 37 with open(src_path, "w") as f: 38 f.write(src) ---> 39 so = _build(name, src_path, tmpdir) 40 with open(so, "rb") as f: 41 return so_cache_manager.put(f.read(), so_name, binary=True) /usr/local/lib/python3.10/dist-packages/triton/common/build.py in _build(name, src, srcdir) 59 hip_include_dir = os.path.join(rocm_path_dir(), "include") 60 else: ---> 61 cuda_lib_dirs = libcuda_dirs() 62 cu_include_dir = cuda_include_dir() 63 suffix = sysconfig.get_config_var('EXT_SUFFIX') /usr/local/lib/python3.10/dist-packages/triton/common/build.py in libcuda_dirs() 28 msg += 'Possible files are located at %s.' % str(locs) 29 msg += 'Please create a symlink of libcuda.so to any of the file.' ---> 30 assert any(os.path.exists(os.path.join(path, 'libcuda.so')) for path in dirs), msg 31 return dirs 32 AssertionError: libcuda.so cannot found!
Thank you for any help!
The text was updated successfully, but these errors were encountered:
Do you have CUDA installed on your environment? This does not seem to be a problem with this repo.
Sorry, something went wrong.
No branches or pull requests
Hey, thank you for the repo!
I'm trying to fine-tune this model but I get the following error during a forward pass:
Thank you for any help!
The text was updated successfully, but these errors were encountered: