使用带有 nn.Embedding 的 captum 获取 RuntimeError
Using captum with nn.Embedding getting RuntimeError
我正在使用 captum 库并收到以下错误。这是重现错误的完整代码。
RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vocab_size = 1024
embedding_dim = 32
seq_len = 128
num_classes = 5
hidden_dim = 256
class predictor(nn.Module):
def __init__(self):
super().__init__()
self.seq_len = seq_len
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.vocab_size, self.embedding_dim = vocab_size, embedding_dim
self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
self.linear = nn.Linear(self.seq_len*self.embedding_dim, self.num_classes)
def forward(self, x):
x = self.embedding(x.long())
x = x.reshape(-1, self.seq_len*self.embedding_dim)
x = F.relu(self.linear(x))
return x
class wrapper_predictor(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
x = self.model(x)
x = F.softmax(x, dim=1)
return x
indexes = torch.Tensor(np.random.randint(0, vocab_size, (seq_len))).to(device)
model = predictor().to(device)
wrapper_model = wrapper_predictor(model).to(device)
ig = IntegratedGradients(wrapper_model)
attributions, delta = ig.attribute(inputs=indexes, target=0, n_steps=1, return_convergence_delta=True)
我解决了 LayerIntegratedGradients 的问题。
这里是 link 阅读更多内容以了解其他可能的解决方案。 https://captum.ai/tutorials/IMDB_TorchText_Interpret
这是使用 LayerIntegratedGradients 的实例,使用模型的正向函数和嵌入层作为 link 中给出的示例。
这是使用 LayerIntegratedGradients 和 nn.Embedding
的示例代码
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients, LayerIntegratedGradients
from torchsummary import summary
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vocab_size = 1024
embedding_dim = 1
seq_len = 128
num_classes = 5
hidden_dim = 256
class predictor(nn.Module):
def __init__(self):
super(predictor, self).__init__()
self.seq_len = seq_len
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.vocab_size, self.embedding_dim = vocab_size, embedding_dim
self.embedding = nn.Sequential(
nn.Embedding(self.vocab_size, self.embedding_dim),
)
self.embedding.weight = torch.randn((self.vocab_size, self.embedding_dim), requires_grad=True)
self.fc = nn.Sequential(
nn.Linear(self.seq_len*self.embedding_dim, self.hidden_dim, device=device, bias=False),
nn.Linear(self.hidden_dim, self.num_classes, device=device, bias=False),
)
def forward(self, x):
x = self.embedding(x.long())
x = x.view(-1, self.seq_len*self.embedding_dim)
x = self.fc(x)
return x
class wrapper_predictor(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
x = self.model(x)
x = F.softmax(x, dim=1) #keep softmax out of forward if attribution score is too low.
return x
model = predictor().to(device)
indexes = torch.Tensor(np.random.randint(0, vocab_size, (seq_len))).to(device)
input_size = indexes.shape
summary(model=model, input_size=input_size, batch_size=-1, device='cuda')
wrapper_model = wrapper_predictor(model).to(device)
lig = LayerIntegratedGradients(model, model.embedding)
attributions, delta = lig.attribute(inputs=indexes, target=0, n_steps=1, return_convergence_delta=True)
我正在使用 captum 库并收到以下错误。这是重现错误的完整代码。
RuntimeError: One of the differentiated Tensors appears to not have been used in the graph. Set allow_unused=True if this is the desired behavior.
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vocab_size = 1024
embedding_dim = 32
seq_len = 128
num_classes = 5
hidden_dim = 256
class predictor(nn.Module):
def __init__(self):
super().__init__()
self.seq_len = seq_len
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.vocab_size, self.embedding_dim = vocab_size, embedding_dim
self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
self.linear = nn.Linear(self.seq_len*self.embedding_dim, self.num_classes)
def forward(self, x):
x = self.embedding(x.long())
x = x.reshape(-1, self.seq_len*self.embedding_dim)
x = F.relu(self.linear(x))
return x
class wrapper_predictor(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
x = self.model(x)
x = F.softmax(x, dim=1)
return x
indexes = torch.Tensor(np.random.randint(0, vocab_size, (seq_len))).to(device)
model = predictor().to(device)
wrapper_model = wrapper_predictor(model).to(device)
ig = IntegratedGradients(wrapper_model)
attributions, delta = ig.attribute(inputs=indexes, target=0, n_steps=1, return_convergence_delta=True)
我解决了 LayerIntegratedGradients 的问题。
这里是 link 阅读更多内容以了解其他可能的解决方案。 https://captum.ai/tutorials/IMDB_TorchText_Interpret
这是使用 LayerIntegratedGradients 的实例,使用模型的正向函数和嵌入层作为 link 中给出的示例。
这是使用 LayerIntegratedGradients 和 nn.Embedding
的示例代码import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from captum.attr import IntegratedGradients, LayerIntegratedGradients
from torchsummary import summary
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vocab_size = 1024
embedding_dim = 1
seq_len = 128
num_classes = 5
hidden_dim = 256
class predictor(nn.Module):
def __init__(self):
super(predictor, self).__init__()
self.seq_len = seq_len
self.num_classes = num_classes
self.hidden_dim = hidden_dim
self.vocab_size, self.embedding_dim = vocab_size, embedding_dim
self.embedding = nn.Sequential(
nn.Embedding(self.vocab_size, self.embedding_dim),
)
self.embedding.weight = torch.randn((self.vocab_size, self.embedding_dim), requires_grad=True)
self.fc = nn.Sequential(
nn.Linear(self.seq_len*self.embedding_dim, self.hidden_dim, device=device, bias=False),
nn.Linear(self.hidden_dim, self.num_classes, device=device, bias=False),
)
def forward(self, x):
x = self.embedding(x.long())
x = x.view(-1, self.seq_len*self.embedding_dim)
x = self.fc(x)
return x
class wrapper_predictor(nn.Module):
def __init__(self, model):
super().__init__()
self.model = model
def forward(self, x):
x = self.model(x)
x = F.softmax(x, dim=1) #keep softmax out of forward if attribution score is too low.
return x
model = predictor().to(device)
indexes = torch.Tensor(np.random.randint(0, vocab_size, (seq_len))).to(device)
input_size = indexes.shape
summary(model=model, input_size=input_size, batch_size=-1, device='cuda')
wrapper_model = wrapper_predictor(model).to(device)
lig = LayerIntegratedGradients(model, model.embedding)
attributions, delta = lig.attribute(inputs=indexes, target=0, n_steps=1, return_convergence_delta=True)