captum
captum copied to clipboard
If my input is a seq rather than a tensor in my model, how should I use IG?
my model
class Longformer_Net(nn.Module):
def __init__(self, model_name, device, add_special_tokens=False, bidirectional=False, num_layers=1):
super().__init__()
self.device = device
self.add_special_tokens = add_special_tokens
self.model_name = model_name
# model_name = 'pre-model/' + 'longformer-base-4096'
self.config = LongformerConfig.from_pretrained(self.model_name)
self.tokenizer = LongformerTokenizer.from_pretrained(self.model_name)
self.longformer = LongformerModel.from_pretrained(self.model_name, config=self.config) # (B,S,256)
... ... ... ...
self.dense1 = nn.Linear(in_features=96, out_features=64) # first 512
self.dense1_dp = nn.Dropout(0.5)
self.dense2 = nn.Linear(in_features=64, out_features=1)
def forward(self, x): # look here!!! the format of x is "AC CT TG GG AC ...."
encoded_inputs = self.tokenizer(x, return_tensors='pt', add_special_tokens=self.add_special_tokens,
padding=True)
encoded_inputs.to(self.device)
x = self.longformer(**encoded_inputs)[0]
# print("self.electra", x.shape) # (B,S,H)
x = x.permute(0, 2, 1)
... ...
x = x.reshape([x.shape[0], -1])
x = self.dense1(x)
# print("dense1(x)", x.shape)
x = F.relu(x)
# x = self.dense1_dp(x)
x = self.dense2(x)
x = torch.sigmoid(x)
x = x.view(-1)
return x
use IG
# 创建DNA序列识别模型实例
# 加载预训练模型权重
model = Longformer_Net(config.model_name, config.device,
config.add_special_tokens,
bidirectional=config.bidirectional,
num_layers=config.num_layers)
model.load_state_dict(torch.load("../model/model_137.pkl"))
# 准备输入DNA序列
sequence = ["agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
"agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
"agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper()]
# encoded_sequence = encode_sequence(sequence)
kmer = Kmer(k=[config.kmer], stride=1, return_type='seq')
print(f"PBC[{config.pc_name}] --- kmer = {config.kmer}")
train_kmer_seq = kmer.run_fea(sequence) # to get the inputs, like "AC CT CG TA ..."
tokens = model.tokenizer(train_kmer_seq, return_tensors='pt', add_special_tokens=False,
padding=True)
# 提取输入张量
input_ids = tokens['input_ids'].to(model.device)
encoded_sequence = model.longformer(**tokens)[0]
# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(model)
# 计算输入DNA序列的梯度
attributions = ig.attribute(input_ids, target=0)
# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
print(f"Position {i + 1}: {attr.item()}")
HELP
The first param of ig.attribute need a tensor
but my model's input is a seq, , like "AC CT CG TA ..."
how should i do?
the newest error is:
ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
A detail demo
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients
from transformers import LongformerModel, LongformerTokenizer, LongformerConfig
# 定义DNA序列识别模型
class DNARecognitionModel(nn.Module):
def __init__(self, model_name):
super(DNARecognitionModel, self).__init__()
self.config = LongformerConfig.from_pretrained(model_name)
self.tokenizer = LongformerTokenizer.from_pretrained(model_name)
self.longformer = LongformerModel.from_pretrained(model_name, config=self.config)
# 在这里定义你的模型结构
def forward(self, x):
encoded_inputs = self.tokenizer(x, return_tensors='pt', add_special_tokens=False, padding=True, truncation=True)
input_ids = encoded_inputs['input_ids'].to(self.device) # 转换为torch.Tensor并移到相应的设备上
attention_mask = encoded_inputs['attention_mask'].to(self.device) # 转换为torch.Tensor并移到相应的设备上
# 在这里进行模型的前向传播逻辑
x = self.longformer(input_ids=input_ids, attention_mask=attention_mask)[0]
# 其他模型结构...
return x
# 创建DNA序列识别模型实例
model = DNARecognitionModel(model_name='pre-model/longformer-base-4096')
# 加载预训练模型权重
model.load_state_dict(torch.load('model_weights.pt'))
# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(model)
# 准备输入DNA序列
sequence = "ACGTGACTAGCTGATCG"
# 将输入序列转换为Tensor
inputs = torch.tensor([sequence])
# 计算输入DNA序列的梯度
attributions = ig.attribute(inputs, target=0)
# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
print(f"Position {i + 1}: {attr.item()}")
a new error
I have spend many days to sovle the issue, but nothing.... who can help me... 绝望了要
from captum.attr import remove_interpretable_embedding_layer
import numpy as np
from feature_extraction import Kmer
# 创建DNA序列识别模型实例
# 加载预训练模型权重
model = Longformer_base_lstm_Net(config.model_name, config.device,
config.add_special_tokens,
bidirectional=config.bidirectional,
num_layers=config.num_layers)
model.load_state_dict(torch.load("../model/model_137.pkl"))
# 准备输入DNA序列
sequence = ["agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
"agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
"agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper()]
# encoded_sequence = encode_sequence(sequence)
kmer = Kmer(k=[config.kmer], stride=1, return_type='seq')
print(f"PBC[{config.pc_name}] --- kmer = {config.kmer}")
train_kmer_seq = kmer.run_fea(sequence)
tokens = model.tokenizer(train_kmer_seq, return_tensors='pt', add_special_tokens=False,
padding=True)
encoded_sequence = model.longformer(**tokens)[0]
from captum.attr import configure_interpretable_embedding_layer
interpretable_emb = configure_interpretable_embedding_layer(model, "longformer")
input_emb = interpretable_emb.indices_to_embeddings(**tokens)[0]
# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(model)
# 计算输入DNA序列的梯度
attributions = ig.attribute(input_emb)
remove_interpretable_embedding_layer(model, interpretable_emb)
# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
print(f"Position {i + 1}: {attr.item()}")
error
ValueError Traceback (most recent call last)
Input In [5], in <cell line: 41>()
38 ig = IntegratedGradients(model)
40 # 计算输入DNA序列的梯度
---> 41 attributions = ig.attribute(input_emb)
42 remove_interpretable_embedding_layer(model, interpretable_emb)
43 # 输出各个位置的重要性分数
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/log/__init__.py:42, in log_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
40 @wraps(func)
41 def wrapper(*args, **kwargs):
---> 42 return func(*args, **kwargs)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:286, in IntegratedGradients.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta)
274 attributions = _batch_attribution(
275 self,
276 num_examples,
(...)
283 method=method,
284 )
285 else:
--> 286 attributions = self._attribute(
287 inputs=inputs,
288 baselines=baselines,
289 target=target,
290 additional_forward_args=additional_forward_args,
291 n_steps=n_steps,
292 method=method,
293 )
295 if return_convergence_delta:
296 start_point, end_point = baselines, inputs
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:351, in IntegratedGradients._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, step_sizes_and_alphas)
348 expanded_target = _expand_target(target, n_steps)
350 # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...)
--> 351 grads = self.gradient_func(
352 forward_fn=self.forward_func,
353 inputs=scaled_features_tpl,
354 target_ind=expanded_target,
355 additional_forward_args=input_additional_args,
356 )
358 # flattening grads so that we can multilpy it with step-size
359 # calling contiguous to avoid `memory whole` problems
360 scaled_grads = [
361 grad.contiguous().view(n_steps, -1)
362 * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device)
363 for grad in grads
364 ]
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/gradient.py:112, in compute_gradients(forward_fn, inputs, target_ind, additional_forward_args)
94 r"""
95 Computes gradients of the output with respect to inputs for an
96 arbitrary forward function.
(...)
108 arguments) if no additional arguments are required
109 """
110 with torch.autograd.set_grad_enabled(True):
111 # runs forward pass
--> 112 outputs = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
113 assert outputs[0].numel() == 1, (
114 "Target not provided when necessary, cannot"
115 " take gradient with respect to multiple outputs."
116 )
117 # torch.unbind(forward_out) is a list of scalar tensor tuples and
118 # contains batch_size * #steps elements
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/common.py:482, in _run_forward(forward_func, inputs, target, additional_forward_args)
479 inputs = _format_inputs(inputs)
480 additional_forward_args = _format_additional_forward_args(additional_forward_args)
--> 482 output = forward_func(
483 *(*inputs, *additional_forward_args)
484 if additional_forward_args is not None
485 else inputs
486 )
487 return _select_targets(output, target)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~/Pycharm Projects/myNewPro/src/module/longformer_model.py:152, in Longformer_base_lstm_Net.forward(self, x)
151 def forward(self, x):
--> 152 encoded_inputs = self.tokenizer(x, return_tensors='pt', add_special_tokens=self.add_special_tokens,
153 padding=True)
154 encoded_inputs.to(self.device)
155 x = self.longformer(**encoded_inputs)[0]
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2377, in PreTrainedTokenizerBase.__call__(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
2374 return False
2376 if not _is_valid_text_input(text):
-> 2377 raise ValueError(
2378 "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
2379 "or `List[List[str]]` (batch of pretokenized examples)."
2380 )
2382 if text_pair is not None and not _is_valid_text_input(text_pair):
2383 raise ValueError(
2384 "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
2385 "or `List[List[str]]` (batch of pretokenized examples)."
2386 )
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2377, in PreTrainedTokenizerBase.__call__(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
2374 return False
2376 if not _is_valid_text_input(text):
-> 2377 raise ValueError(
2378 "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
2379 "or `List[List[str]]` (batch of pretokenized examples)."
2380 )
2382 if text_pair is not None and not _is_valid_text_input(text_pair):
2383 raise ValueError(
2384 "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
2385 "or `List[List[str]]` (batch of pretokenized examples)."
2386 )
ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
because the input (named x) of my model requires seqs ranther than tensor
a new Error
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
I made a wrapper for my model, just like this:
class Model4IG(nn.Module):
def __init__(self, model):
super().__init__()
self.device = model.device
self.add_special_tokens = model.add_special_tokens
self.model_name = model.model_name
# model_name = 'pre-model/' + 'longformer-base-4096'
self.config = model.config
self.tokenizer = model.tokenizer
self.longformer = model.longformer # (B,S,256)
....
def forward(self, input_ids, attention_mask):
x = self.longformer(input_ids, attention_mask)[0]
.........
error details
RuntimeError Traceback (most recent call last)
Input In [12], in <cell line: 10>()
4 # 计算输入DNA序列的梯度
5 """Output indices for
6 which gradients are computed (for classification cases,
7 this is usually the target class).
8 If the network returns a scalar value per example,
9 no target index is necessary."""
---> 10 attributions = ig.attribute(inputs=tokens["input_ids"], additional_forward_args=tokens["attention_mask"])
11 # 输出各个位置的重要性分数
12 for i, attr in enumerate(attributions.squeeze()):
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/log/__init__.py:42, in log_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
40 @wraps(func)
41 def wrapper(*args, **kwargs):
---> 42 return func(*args, **kwargs)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:286, in IntegratedGradients.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta)
274 attributions = _batch_attribution(
275 self,
276 num_examples,
(...)
283 method=method,
284 )
285 else:
--> 286 attributions = self._attribute(
287 inputs=inputs,
288 baselines=baselines,
289 target=target,
290 additional_forward_args=additional_forward_args,
291 n_steps=n_steps,
292 method=method,
293 )
295 if return_convergence_delta:
296 start_point, end_point = baselines, inputs
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:351, in IntegratedGradients._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, step_sizes_and_alphas)
348 expanded_target = _expand_target(target, n_steps)
350 # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...)
--> 351 grads = self.gradient_func(
352 forward_fn=self.forward_func,
353 inputs=scaled_features_tpl,
354 target_ind=expanded_target,
355 additional_forward_args=input_additional_args,
356 )
358 # flattening grads so that we can multilpy it with step-size
359 # calling contiguous to avoid `memory whole` problems
360 scaled_grads = [
361 grad.contiguous().view(n_steps, -1)
362 * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device)
363 for grad in grads
364 ]
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/gradient.py:112, in compute_gradients(forward_fn, inputs, target_ind, additional_forward_args)
94 r"""
95 Computes gradients of the output with respect to inputs for an
96 arbitrary forward function.
(...)
108 arguments) if no additional arguments are required
109 """
110 with torch.autograd.set_grad_enabled(True):
111 # runs forward pass
--> 112 outputs = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
113 assert outputs[0].numel() == 1, (
114 "Target not provided when necessary, cannot"
115 " take gradient with respect to multiple outputs."
116 )
117 # torch.unbind(forward_out) is a list of scalar tensor tuples and
118 # contains batch_size * #steps elements
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/common.py:482, in _run_forward(forward_func, inputs, target, additional_forward_args)
479 inputs = _format_inputs(inputs)
480 additional_forward_args = _format_additional_forward_args(additional_forward_args)
--> 482 output = forward_func(
483 *(*inputs, *additional_forward_args)
484 if additional_forward_args is not None
485 else inputs
486 )
487 return _select_targets(output, target)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
Input In [9], in Model4IG.forward(self, input_ids, attention_mask)
40 attention_mask.to(self.device)
41 # x.to(self.device)
---> 42 x = self.longformer(input_ids=input_ids, attention_mask=attention_mask)[0]
43 x = x.permute(0, 2, 1)
45 x = self.cnn(x)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py:1673, in LongformerModel.forward(self, input_ids, attention_mask, global_attention_mask, head_mask, token_type_ids, position_ids, inputs_embeds, output_attentions, output_hidden_states, return_dict)
1667 # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
1668 # ourselves in which case we just need to make it broadcastable to all heads.
1669 extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device)[
1670 :, 0, 0, :
1671 ]
-> 1673 embedding_output = self.embeddings(
1674 input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
1675 )
1677 encoder_outputs = self.encoder(
1678 embedding_output,
1679 attention_mask=extended_attention_mask,
(...)
1683 return_dict=return_dict,
1684 )
1685 sequence_output = encoder_outputs[0]
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py:491, in LongformerEmbeddings.forward(self, input_ids, token_type_ids, position_ids, inputs_embeds)
488 token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
490 if inputs_embeds is None:
--> 491 inputs_embeds = self.word_embeddings(input_ids)
492 position_embeddings = self.position_embeddings(position_ids)
493 token_type_embeddings = self.token_type_embeddings(token_type_ids)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/sparse.py:158, in Embedding.forward(self, input)
157 def forward(self, input: Tensor) -> Tensor:
--> 158 return F.embedding(
159 input, self.weight, self.padding_idx, self.max_norm,
160 self.norm_type, self.scale_grad_by_freq, self.sparse)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/functional.py:2199, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2193 # Note [embedding_renorm set_grad_enabled]
2194 # XXX: equivalent to
2195 # with torch.no_grad():
2196 # torch.embedding_renorm_
2197 # remove once script supports set_grad_enabled
2198 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2199 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)
new try and new error
I get some tricks in FAQ, so I use
configure_interpretable_embedding_layerfor mylongformer embedding layerand run code in cpu because of using BiLSTM in my model However, I get a new ERROR:
RuntimeError Traceback (most recent call last)
Input In [6], in <cell line: 10>()
7 ig = IntegratedGradients(modelIG)
9 # 计算输入DNA序列的梯度
---> 10 attributions = ig.attribute(inputs=tokens["input_ids"], additional_forward_args=tokens["attention_mask"])
11 remove_interpretable_embedding_layer(modelIG, interpretable_emb)
12 # 输出各个位置的重要性分数
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/log/__init__.py:42, in log_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
40 @wraps(func)
41 def wrapper(*args, **kwargs):
---> 42 return func(*args, **kwargs)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:286, in IntegratedGradients.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta)
274 attributions = _batch_attribution(
275 self,
276 num_examples,
(...)
283 method=method,
284 )
285 else:
--> 286 attributions = self._attribute(
287 inputs=inputs,
288 baselines=baselines,
289 target=target,
290 additional_forward_args=additional_forward_args,
291 n_steps=n_steps,
292 method=method,
293 )
295 if return_convergence_delta:
296 start_point, end_point = baselines, inputs
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:351, in IntegratedGradients._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, step_sizes_and_alphas)
348 expanded_target = _expand_target(target, n_steps)
350 # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...)
--> 351 grads = self.gradient_func(
352 forward_fn=self.forward_func,
353 inputs=scaled_features_tpl,
354 target_ind=expanded_target,
355 additional_forward_args=input_additional_args,
356 )
358 # flattening grads so that we can multilpy it with step-size
359 # calling contiguous to avoid `memory whole` problems
360 scaled_grads = [
361 grad.contiguous().view(n_steps, -1)
362 * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device)
363 for grad in grads
364 ]
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/gradient.py:112, in compute_gradients(forward_fn, inputs, target_ind, additional_forward_args)
94 r"""
95 Computes gradients of the output with respect to inputs for an
96 arbitrary forward function.
(...)
108 arguments) if no additional arguments are required
109 """
110 with torch.autograd.set_grad_enabled(True):
111 # runs forward pass
--> 112 outputs = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
113 assert outputs[0].numel() == 1, (
114 "Target not provided when necessary, cannot"
115 " take gradient with respect to multiple outputs."
116 )
117 # torch.unbind(forward_out) is a list of scalar tensor tuples and
118 # contains batch_size * #steps elements
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/common.py:482, in _run_forward(forward_func, inputs, target, additional_forward_args)
479 inputs = _format_inputs(inputs)
480 additional_forward_args = _format_additional_forward_args(additional_forward_args)
--> 482 output = forward_func(
483 *(*inputs, *additional_forward_args)
484 if additional_forward_args is not None
485 else inputs
486 )
487 return _select_targets(output, target)
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
1126 # If we don't have any hooks, we want to skip the rest of the logic in
1127 # this function, and just call forward.
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
Input In [3], in Model4IG.forward(self, input_ids, attention_mask)
42 x = self.longformer(input_ids=input_ids, attention_mask=attention_mask)[0]
43 print("self.electra", x.shape) # (B,S,H)
---> 44 x = x.permute(0, 2, 1)
46 x = self.cnn(x)
47 # print("cnn(x)", x.shape)
RuntimeError: number of dims don't match in permute
the dimension of longformer's output changed into 80-D, the disered demension is [batch_size, 80, 768]
there is my new source code:
from captum.attr import configure_interpretable_embedding_layer
interpretable_emb = configure_interpretable_embedding_layer(modelIG, "longformer")
input_emb = interpretable_emb.indices_to_embeddings(**tokens)[0]
# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(modelIG)
# 计算输入DNA序列的梯度
attributions = ig.attribute(inputs=tokens["input_ids"], additional_forward_args=tokens["attention_mask"])
remove_interpretable_embedding_layer(modelIG, interpretable_emb)
# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
print(f"Position {i + 1}: {attr.item()}")
I have try many ways and spent many days to solve it, even when I was going to bed, i have to think how to make it work
Is there any body? I will appreciate everyone who provide advice
one more to find a conclusion
I find the difference of the dimentions after ig.attr
first, the forward is correct, such as input_ids is [batch_size, 80], attention_mask is [batch_size, 80] and the output of lonformer(input_ids, attention_mask)[0] is [batch_size, 80, 768].
second, after ig.attr, the dimention is changed because of the param n_step (default value is 50), input_ids becomes [batch_size50, 80], attention_mask is [batch_size50, 80]
now the error can be explained: the new valued is tensorFloat rather than int or long, so they can not be indexed!!!!
Therefore, in FAQ, it says we need use configure_interpretable_embedding_layer to replace our embedding layer!!!
God help me !!!
after using "configure_interpretable_embedding_layer", new ERROR
now I have used "configure_interpretable_embedding_layer", but the output demension is not same as the output of forward forward: input_ids is [batch_size, 80], attention_mask is [batch_size, 80] and the output of lonformer(input_ids, attention_mask)[0] is [batch_size, 80, 768] ig.attr: input_ids is [batch_size, 80], attention_mask is [batch_size, 80] and the output of lonformer(input_ids, attention_mask)[0] is ==[80, 768]==, where is the 1???