captum icon indicating copy to clipboard operation
captum copied to clipboard

If my input is a seq rather than a tensor in my model, how should I use IG?

Open BinchaoPeng opened this issue 2 years ago • 6 comments

my model

class Longformer_Net(nn.Module):
    
    def __init__(self, model_name, device, add_special_tokens=False, bidirectional=False, num_layers=1):
        super().__init__()
        self.device = device
        self.add_special_tokens = add_special_tokens
        self.model_name = model_name
        # model_name = 'pre-model/' + 'longformer-base-4096'
        self.config = LongformerConfig.from_pretrained(self.model_name)
        self.tokenizer = LongformerTokenizer.from_pretrained(self.model_name)
        self.longformer = LongformerModel.from_pretrained(self.model_name, config=self.config)  # (B,S,256)

        ... ... ... ...
        
        self.dense1 = nn.Linear(in_features=96, out_features=64)  # first 512
        self.dense1_dp = nn.Dropout(0.5)

        self.dense2 = nn.Linear(in_features=64, out_features=1)

    def forward(self, x):  # look here!!! the format of x is "AC CT TG GG AC ...."
        encoded_inputs = self.tokenizer(x, return_tensors='pt', add_special_tokens=self.add_special_tokens,
                                        padding=True)
        encoded_inputs.to(self.device)
        x = self.longformer(**encoded_inputs)[0]
        # print("self.electra", x.shape)  # (B,S,H)

        x = x.permute(0, 2, 1)

        ... ...

        x = x.reshape([x.shape[0], -1])

        x = self.dense1(x)
        # print("dense1(x)", x.shape)
        x = F.relu(x)
        # x = self.dense1_dp(x)

        x = self.dense2(x)
        x = torch.sigmoid(x)
        x = x.view(-1)
        return x

use IG

# 创建DNA序列识别模型实例
# 加载预训练模型权重
model = Longformer_Net(config.model_name, config.device,
                                 config.add_special_tokens,
                                 bidirectional=config.bidirectional,
                                 num_layers=config.num_layers)
model.load_state_dict(torch.load("../model/model_137.pkl"))



# 准备输入DNA序列
sequence = ["agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
            "agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
            "agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper()]
# encoded_sequence = encode_sequence(sequence)

kmer = Kmer(k=[config.kmer], stride=1, return_type='seq') 
print(f"PBC[{config.pc_name}] --- kmer = {config.kmer}")

train_kmer_seq = kmer.run_fea(sequence)  # to get the inputs, like "AC CT CG TA ..."

tokens = model.tokenizer(train_kmer_seq, return_tensors='pt', add_special_tokens=False,
                                        padding=True)
# 提取输入张量
input_ids = tokens['input_ids'].to(model.device)

encoded_sequence = model.longformer(**tokens)[0]

# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(model)

# 计算输入DNA序列的梯度
attributions = ig.attribute(input_ids, target=0)

# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
    print(f"Position {i + 1}: {attr.item()}")

HELP

The first param of ig.attribute need a tensor but my model's input is a seq, , like "AC CT CG TA ..." how should i do?

BinchaoPeng avatar Jun 07 '23 12:06 BinchaoPeng

the newest error is:

ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

BinchaoPeng avatar Jun 07 '23 12:06 BinchaoPeng

A detail demo

import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients
from transformers import LongformerModel, LongformerTokenizer, LongformerConfig

# 定义DNA序列识别模型
class DNARecognitionModel(nn.Module):
    def __init__(self, model_name):
        super(DNARecognitionModel, self).__init__()
        self.config = LongformerConfig.from_pretrained(model_name)
        self.tokenizer = LongformerTokenizer.from_pretrained(model_name)
        self.longformer = LongformerModel.from_pretrained(model_name, config=self.config)
        # 在这里定义你的模型结构

    def forward(self, x):
        encoded_inputs = self.tokenizer(x, return_tensors='pt', add_special_tokens=False, padding=True, truncation=True)
        input_ids = encoded_inputs['input_ids'].to(self.device)  # 转换为torch.Tensor并移到相应的设备上
        attention_mask = encoded_inputs['attention_mask'].to(self.device)  # 转换为torch.Tensor并移到相应的设备上

        # 在这里进行模型的前向传播逻辑
        x = self.longformer(input_ids=input_ids, attention_mask=attention_mask)[0]
        # 其他模型结构...

        return x

# 创建DNA序列识别模型实例
model = DNARecognitionModel(model_name='pre-model/longformer-base-4096')

# 加载预训练模型权重
model.load_state_dict(torch.load('model_weights.pt'))

# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(model)

# 准备输入DNA序列
sequence = "ACGTGACTAGCTGATCG"

# 将输入序列转换为Tensor
inputs = torch.tensor([sequence])

# 计算输入DNA序列的梯度
attributions = ig.attribute(inputs, target=0)

# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
    print(f"Position {i + 1}: {attr.item()}")

BinchaoPeng avatar Jun 07 '23 13:06 BinchaoPeng

a new error

I have spend many days to sovle the issue, but nothing.... who can help me... 绝望了要

from captum.attr import remove_interpretable_embedding_layer
import numpy as np
from feature_extraction import Kmer

# 创建DNA序列识别模型实例
# 加载预训练模型权重
model = Longformer_base_lstm_Net(config.model_name, config.device,
                                 config.add_special_tokens,
                                 bidirectional=config.bidirectional,
                                 num_layers=config.num_layers)
model.load_state_dict(torch.load("../model/model_137.pkl"))



# 准备输入DNA序列
sequence = ["agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
            "agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper(),
            "agcaagagaacatctggcctgcgggcaccactgctacagtggtggtcactggcaaacaagAtcgcgacgaaagccaggatt".upper()]
# encoded_sequence = encode_sequence(sequence)

kmer = Kmer(k=[config.kmer], stride=1, return_type='seq')
print(f"PBC[{config.pc_name}] --- kmer = {config.kmer}")

train_kmer_seq = kmer.run_fea(sequence)

tokens = model.tokenizer(train_kmer_seq, return_tensors='pt', add_special_tokens=False,
                                        padding=True)


encoded_sequence = model.longformer(**tokens)[0]

from captum.attr import configure_interpretable_embedding_layer

interpretable_emb = configure_interpretable_embedding_layer(model, "longformer")

input_emb = interpretable_emb.indices_to_embeddings(**tokens)[0]
# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(model)

# 计算输入DNA序列的梯度
attributions = ig.attribute(input_emb)
remove_interpretable_embedding_layer(model, interpretable_emb)
# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
    print(f"Position {i + 1}: {attr.item()}")

error

ValueError                                Traceback (most recent call last)
Input In [5], in <cell line: 41>()
     38 ig = IntegratedGradients(model)
     40 # 计算输入DNA序列的梯度
---> 41 attributions = ig.attribute(input_emb)
     42 remove_interpretable_embedding_layer(model, interpretable_emb)
     43 # 输出各个位置的重要性分数

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/log/__init__.py:42, in log_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
     40 @wraps(func)
     41 def wrapper(*args, **kwargs):
---> 42     return func(*args, **kwargs)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:286, in IntegratedGradients.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta)
    274     attributions = _batch_attribution(
    275         self,
    276         num_examples,
   (...)
    283         method=method,
    284     )
    285 else:
--> 286     attributions = self._attribute(
    287         inputs=inputs,
    288         baselines=baselines,
    289         target=target,
    290         additional_forward_args=additional_forward_args,
    291         n_steps=n_steps,
    292         method=method,
    293     )
    295 if return_convergence_delta:
    296     start_point, end_point = baselines, inputs
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:351, in IntegratedGradients._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, step_sizes_and_alphas)
    348 expanded_target = _expand_target(target, n_steps)
    350 # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...)
--> 351 grads = self.gradient_func(
    352     forward_fn=self.forward_func,
    353     inputs=scaled_features_tpl,
    354     target_ind=expanded_target,
    355     additional_forward_args=input_additional_args,
    356 )
    358 # flattening grads so that we can multilpy it with step-size
    359 # calling contiguous to avoid `memory whole` problems
    360 scaled_grads = [
    361     grad.contiguous().view(n_steps, -1)
    362     * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device)
    363     for grad in grads
    364 ]

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/gradient.py:112, in compute_gradients(forward_fn, inputs, target_ind, additional_forward_args)
     94 r"""
     95 Computes gradients of the output with respect to inputs for an
     96 arbitrary forward function.
   (...)
    108                 arguments) if no additional arguments are required
    109 """
    110 with torch.autograd.set_grad_enabled(True):
    111     # runs forward pass
--> 112     outputs = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
    113     assert outputs[0].numel() == 1, (
    114         "Target not provided when necessary, cannot"
    115         " take gradient with respect to multiple outputs."
    116     )
    117     # torch.unbind(forward_out) is a list of scalar tensor tuples and
    118     # contains batch_size * #steps elements
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/common.py:482, in _run_forward(forward_func, inputs, target, additional_forward_args)
    479 inputs = _format_inputs(inputs)
    480 additional_forward_args = _format_additional_forward_args(additional_forward_args)
--> 482 output = forward_func(
    483     *(*inputs, *additional_forward_args)
    484     if additional_forward_args is not None
    485     else inputs
    486 )
    487 return _select_targets(output, target)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~/Pycharm Projects/myNewPro/src/module/longformer_model.py:152, in Longformer_base_lstm_Net.forward(self, x)
    151 def forward(self, x):
--> 152     encoded_inputs = self.tokenizer(x, return_tensors='pt', add_special_tokens=self.add_special_tokens,
    153                                     padding=True)
    154     encoded_inputs.to(self.device)
    155     x = self.longformer(**encoded_inputs)[0]

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2377, in PreTrainedTokenizerBase.__call__(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
   2374         return False
   2376 if not _is_valid_text_input(text):
-> 2377     raise ValueError(
   2378         "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
   2379         "or `List[List[str]]` (batch of pretokenized examples)."
   2380     )
   2382 if text_pair is not None and not _is_valid_text_input(text_pair):
   2383     raise ValueError(
   2384         "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
   2385         "or `List[List[str]]` (batch of pretokenized examples)."
   2386     )
File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:2377, in PreTrainedTokenizerBase.__call__(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
   2374         return False
   2376 if not _is_valid_text_input(text):
-> 2377     raise ValueError(
   2378         "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
   2379         "or `List[List[str]]` (batch of pretokenized examples)."
   2380     )
   2382 if text_pair is not None and not _is_valid_text_input(text_pair):
   2383     raise ValueError(
   2384         "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
   2385         "or `List[List[str]]` (batch of pretokenized examples)."
   2386     )

ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

because the input (named x) of my model requires seqs ranther than tensor

BinchaoPeng avatar Jun 08 '23 14:06 BinchaoPeng

a new Error

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)

I made a wrapper for my model, just like this:

class Model4IG(nn.Module):
    def __init__(self, model):
        super().__init__()
        self.device = model.device
        self.add_special_tokens = model.add_special_tokens
        self.model_name = model.model_name
        # model_name = 'pre-model/' + 'longformer-base-4096'
        self.config = model.config
        self.tokenizer = model.tokenizer
        self.longformer = model.longformer  # (B,S,256)

        ....

    def forward(self, input_ids, attention_mask):
         x = self.longformer(input_ids, attention_mask)[0]
        .........

error details

RuntimeError                              Traceback (most recent call last)
Input In [12], in <cell line: 10>()
      4 # 计算输入DNA序列的梯度
      5 """Output indices for
      6 which gradients are computed (for classification cases,
      7 this is usually the target class).
      8 If the network returns a scalar value per example,
      9 no target index is necessary."""
---> 10 attributions = ig.attribute(inputs=tokens["input_ids"], additional_forward_args=tokens["attention_mask"])
     11 # 输出各个位置的重要性分数
     12 for i, attr in enumerate(attributions.squeeze()):

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/log/__init__.py:42, in log_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
     40 @wraps(func)
     41 def wrapper(*args, **kwargs):
---> 42     return func(*args, **kwargs)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:286, in IntegratedGradients.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta)
    274     attributions = _batch_attribution(
    275         self,
    276         num_examples,
   (...)
    283         method=method,
    284     )
    285 else:
--> 286     attributions = self._attribute(
    287         inputs=inputs,
    288         baselines=baselines,
    289         target=target,
    290         additional_forward_args=additional_forward_args,
    291         n_steps=n_steps,
    292         method=method,
    293     )
    295 if return_convergence_delta:
    296     start_point, end_point = baselines, inputs

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:351, in IntegratedGradients._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, step_sizes_and_alphas)
    348 expanded_target = _expand_target(target, n_steps)
    350 # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...)
--> 351 grads = self.gradient_func(
    352     forward_fn=self.forward_func,
    353     inputs=scaled_features_tpl,
    354     target_ind=expanded_target,
    355     additional_forward_args=input_additional_args,
    356 )
    358 # flattening grads so that we can multilpy it with step-size
    359 # calling contiguous to avoid `memory whole` problems
    360 scaled_grads = [
    361     grad.contiguous().view(n_steps, -1)
    362     * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device)
    363     for grad in grads
    364 ]

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/gradient.py:112, in compute_gradients(forward_fn, inputs, target_ind, additional_forward_args)
     94 r"""
     95 Computes gradients of the output with respect to inputs for an
     96 arbitrary forward function.
   (...)
    108                 arguments) if no additional arguments are required
    109 """
    110 with torch.autograd.set_grad_enabled(True):
    111     # runs forward pass
--> 112     outputs = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
    113     assert outputs[0].numel() == 1, (
    114         "Target not provided when necessary, cannot"
    115         " take gradient with respect to multiple outputs."
    116     )
    117     # torch.unbind(forward_out) is a list of scalar tensor tuples and
    118     # contains batch_size * #steps elements

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/common.py:482, in _run_forward(forward_func, inputs, target, additional_forward_args)
    479 inputs = _format_inputs(inputs)
    480 additional_forward_args = _format_additional_forward_args(additional_forward_args)
--> 482 output = forward_func(
    483     *(*inputs, *additional_forward_args)
    484     if additional_forward_args is not None
    485     else inputs
    486 )
    487 return _select_targets(output, target)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [9], in Model4IG.forward(self, input_ids, attention_mask)
     40 attention_mask.to(self.device)
     41 # x.to(self.device)
---> 42 x = self.longformer(input_ids=input_ids, attention_mask=attention_mask)[0]
     43 x = x.permute(0, 2, 1)
     45 x = self.cnn(x)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py:1673, in LongformerModel.forward(self, input_ids, attention_mask, global_attention_mask, head_mask, token_type_ids, position_ids, inputs_embeds, output_attentions, output_hidden_states, return_dict)
   1667 # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
   1668 # ourselves in which case we just need to make it broadcastable to all heads.
   1669 extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device)[
   1670     :, 0, 0, :
   1671 ]
-> 1673 embedding_output = self.embeddings(
   1674     input_ids=input_ids, position_ids=position_ids, token_type_ids=token_type_ids, inputs_embeds=inputs_embeds
   1675 )
   1677 encoder_outputs = self.encoder(
   1678     embedding_output,
   1679     attention_mask=extended_attention_mask,
   (...)
   1683     return_dict=return_dict,
   1684 )
   1685 sequence_output = encoder_outputs[0]

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/transformers/models/longformer/modeling_longformer.py:491, in LongformerEmbeddings.forward(self, input_ids, token_type_ids, position_ids, inputs_embeds)
    488     token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
    490 if inputs_embeds is None:
--> 491     inputs_embeds = self.word_embeddings(input_ids)
    492 position_embeddings = self.position_embeddings(position_ids)
    493 token_type_embeddings = self.token_type_embeddings(token_type_ids)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/sparse.py:158, in Embedding.forward(self, input)
    157 def forward(self, input: Tensor) -> Tensor:
--> 158     return F.embedding(
    159         input, self.weight, self.padding_idx, self.max_norm,
    160         self.norm_type, self.scale_grad_by_freq, self.sparse)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/functional.py:2199, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   2193     # Note [embedding_renorm set_grad_enabled]
   2194     # XXX: equivalent to
   2195     # with torch.no_grad():
   2196     #   torch.embedding_renorm_
   2197     # remove once script supports set_grad_enabled
   2198     _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2199 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)

BinchaoPeng avatar Jun 12 '23 13:06 BinchaoPeng

new try and new error

I get some tricks in FAQ, so I use configure_interpretable_embedding_layer for my longformer embedding layer and run code in cpu because of using BiLSTM in my model However, I get a new ERROR:

RuntimeError                              Traceback (most recent call last)
Input In [6], in <cell line: 10>()
      7 ig = IntegratedGradients(modelIG)
      9 # 计算输入DNA序列的梯度
---> 10 attributions = ig.attribute(inputs=tokens["input_ids"], additional_forward_args=tokens["attention_mask"])
     11 remove_interpretable_embedding_layer(modelIG, interpretable_emb)
     12 # 输出各个位置的重要性分数

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/log/__init__.py:42, in log_usage.<locals>._log_usage.<locals>.wrapper(*args, **kwargs)
     40 @wraps(func)
     41 def wrapper(*args, **kwargs):
---> 42     return func(*args, **kwargs)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:286, in IntegratedGradients.attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, internal_batch_size, return_convergence_delta)
    274     attributions = _batch_attribution(
    275         self,
    276         num_examples,
   (...)
    283         method=method,
    284     )
    285 else:
--> 286     attributions = self._attribute(
    287         inputs=inputs,
    288         baselines=baselines,
    289         target=target,
    290         additional_forward_args=additional_forward_args,
    291         n_steps=n_steps,
    292         method=method,
    293     )
    295 if return_convergence_delta:
    296     start_point, end_point = baselines, inputs

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/attr/_core/integrated_gradients.py:351, in IntegratedGradients._attribute(self, inputs, baselines, target, additional_forward_args, n_steps, method, step_sizes_and_alphas)
    348 expanded_target = _expand_target(target, n_steps)
    350 # grads: dim -> (bsz * #steps x inputs[0].shape[1:], ...)
--> 351 grads = self.gradient_func(
    352     forward_fn=self.forward_func,
    353     inputs=scaled_features_tpl,
    354     target_ind=expanded_target,
    355     additional_forward_args=input_additional_args,
    356 )
    358 # flattening grads so that we can multilpy it with step-size
    359 # calling contiguous to avoid `memory whole` problems
    360 scaled_grads = [
    361     grad.contiguous().view(n_steps, -1)
    362     * torch.tensor(step_sizes).view(n_steps, 1).to(grad.device)
    363     for grad in grads
    364 ]

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/gradient.py:112, in compute_gradients(forward_fn, inputs, target_ind, additional_forward_args)
     94 r"""
     95 Computes gradients of the output with respect to inputs for an
     96 arbitrary forward function.
   (...)
    108                 arguments) if no additional arguments are required
    109 """
    110 with torch.autograd.set_grad_enabled(True):
    111     # runs forward pass
--> 112     outputs = _run_forward(forward_fn, inputs, target_ind, additional_forward_args)
    113     assert outputs[0].numel() == 1, (
    114         "Target not provided when necessary, cannot"
    115         " take gradient with respect to multiple outputs."
    116     )
    117     # torch.unbind(forward_out) is a list of scalar tensor tuples and
    118     # contains batch_size * #steps elements

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/captum/_utils/common.py:482, in _run_forward(forward_func, inputs, target, additional_forward_args)
    479 inputs = _format_inputs(inputs)
    480 additional_forward_args = _format_additional_forward_args(additional_forward_args)
--> 482 output = forward_func(
    483     *(*inputs, *additional_forward_args)
    484     if additional_forward_args is not None
    485     else inputs
    486 )
    487 return _select_targets(output, target)

File ~/anaconda3/envs/myPro/lib/python3.8/site-packages/torch/nn/modules/module.py:1130, in Module._call_impl(self, *input, **kwargs)
   1126 # If we don't have any hooks, we want to skip the rest of the logic in
   1127 # this function, and just call forward.
   1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1129         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130     return forward_call(*input, **kwargs)
   1131 # Do not call functions when jit is used
   1132 full_backward_hooks, non_full_backward_hooks = [], []

Input In [3], in Model4IG.forward(self, input_ids, attention_mask)
     42 x = self.longformer(input_ids=input_ids, attention_mask=attention_mask)[0]
     43 print("self.electra", x.shape)  # (B,S,H)
---> 44 x = x.permute(0, 2, 1)
     46 x = self.cnn(x)
     47 # print("cnn(x)", x.shape)

RuntimeError: number of dims don't match in permute

the dimension of longformer's output changed into 80-D, the disered demension is [batch_size, 80, 768]

there is my new source code:

from captum.attr import configure_interpretable_embedding_layer

interpretable_emb = configure_interpretable_embedding_layer(modelIG, "longformer")

input_emb = interpretable_emb.indices_to_embeddings(**tokens)[0]
# 创建Captum库的IntegratedGradients对象
ig = IntegratedGradients(modelIG)

# 计算输入DNA序列的梯度
attributions = ig.attribute(inputs=tokens["input_ids"], additional_forward_args=tokens["attention_mask"])
remove_interpretable_embedding_layer(modelIG, interpretable_emb)
# 输出各个位置的重要性分数
for i, attr in enumerate(attributions.squeeze()):
    print(f"Position {i + 1}: {attr.item()}")

I have try many ways and spent many days to solve it, even when I was going to bed, i have to think how to make it work

Is there any body? I will appreciate everyone who provide advice

one more to find a conclusion

I find the difference of the dimentions after ig.attr first, the forward is correct, such as input_ids is [batch_size, 80], attention_mask is [batch_size, 80] and the output of lonformer(input_ids, attention_mask)[0] is [batch_size, 80, 768]. second, after ig.attr, the dimention is changed because of the param n_step (default value is 50), input_ids becomes [batch_size50, 80], attention_mask is [batch_size50, 80] now the error can be explained: the new valued is tensorFloat rather than int or long, so they can not be indexed!!!!

Therefore, in FAQ, it says we need use configure_interpretable_embedding_layer to replace our embedding layer!!!

God help me !!!

BinchaoPeng avatar Jun 13 '23 06:06 BinchaoPeng

after using "configure_interpretable_embedding_layer", new ERROR

now I have used "configure_interpretable_embedding_layer", but the output demension is not same as the output of forward forward: input_ids is [batch_size, 80], attention_mask is [batch_size, 80] and the output of lonformer(input_ids, attention_mask)[0] is [batch_size, 80, 768] ig.attr: input_ids is [batch_size, 80], attention_mask is [batch_size, 80] and the output of lonformer(input_ids, attention_mask)[0] is ==[80, 768]==, where is the 1???

BinchaoPeng avatar Jun 13 '23 08:06 BinchaoPeng