【发布时间】:2018-03-24 01:56:43
【问题描述】:
按照https://github.com/spro/practical-pytorch/blob/master/seq2seq-translation/seq2seq-translation.ipynb的教程进行操作
有一个USE_CUDA 标志,用于控制 CPU(当为 False)到 GPU(当为 True)类型之间的变量和张量类型。
使用来自en-fr.tsv 的数据并将句子转换为变量:
import unicodedata
import string
import re
import random
import time
import math
from gensim.corpora.dictionary import Dictionary
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import LongTensor, FloatTensor
from torch import optim
import torch.nn.functional as F
import numpy as np
MAX_LENGTH = 10
USE_CUDA = False
# Turn a Unicode string to plain ASCII, thanks to http://stackoverflow.com/a/518232/2809427
def unicode_to_ascii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
)
# Lowercase, trim, and remove non-letter characters
def normalize_string(s):
s = unicode_to_ascii(s.lower().strip())
s = re.sub(r"([.!?])", r" \1", s)
s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
return s
SOS_IDX, SOS_TOKEN = 0, '<s>'
EOS_IDX, EOS_TOKEN = 1, '</s>'
UNK_IDX, UNK_TOKEN = 2, '<unk>'
PAD_IDX, PAD_TOKEN = 3, '<blank>'
lines = open('en-fr.tsv').read().strip().split('\n')
pairs = [[normalize_string(s).split() for s in l.split('\t')] for l in lines]
src_sents, trg_sents = zip(*pairs)
src_dict = Dictionary([[SOS_TOKEN, EOS_TOKEN, UNK_TOKEN, PAD_TOKEN]])
src_dict.add_documents(src_sents)
trg_dict = Dictionary([[SOS_TOKEN, EOS_TOKEN, UNK_TOKEN, PAD_TOKEN]])
trg_dict.add_documents(trg_sents)
def variablize_sentences(sentence, dictionary):
indices = [dictionary.token2id[tok] for tok in sentence] + [dictionary.token2id[EOS_TOKEN]]
var = Variable(LongTensor(indices).view(-1, 1))
return var.cuda() if USE_CUDA else var
input_variables = [variablize_sentences(sent, src_dict) for sent in src_sents]
output_variables = [variablize_sentences(sent, trg_dict) for sent in trg_sents]
并使用 Encoder-Attn-Decoder 网络:
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=1):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
self.embedding = self.embedding.cuda() if USE_CUDA else self.embedding
self.gru = self.gru.cuda() if USE_CUDA else self.gru
def forward(self, word_inputs, hidden):
seq_len = len(word_inputs)
embedded = self.embedding(word_inputs).view(seq_len, 1, -1)
embedded = embedded.cuda() if USE_CUDA else embedded
output, hidden = self.gru(embedded, hidden)
output = output.cuda() if USE_CUDA else output
hiddne = hidden.cuda() if USE_CUDA else hidden
return output, hidden
def init_hidden(self):
hidden = Variable(torch.zeros(self.n_layers, 1, self.hidden_size))
return hidden.cuda() if USE_CUDA else hidden
class Attn(nn.Module):
def __init__(self, method, hidden_size, max_length=MAX_LENGTH):
super(Attn, self).__init__()
self.method = method
self.hidden_size = hidden_size
if self.method == 'general':
self.attn = nn.Linear(self.hidden_size, hidden_size)
elif self.method == 'concat':
self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
self.other = nn.Parameter(FloatTensor(1, hidden_size))
def forward(self, hidden, encoder_outputs):
seq_len = len(encoder_outputs)
# Create variable to store attention energies
attn_energies = Variable(torch.zeros(seq_len)) # B x 1 x S
attn_energies = attn_energies.cuda() if USE_CUDA else attn_energies
# Calculate energies for each encoder output
for i in range(seq_len):
attn_energies[i] = self.score(hidden, encoder_outputs[i])
# Normalize energies to weights in range 0 to 1, resize to 1 x 1 x seq_len
return F.softmax(attn_energies).unsqueeze(0).unsqueeze(0)
def score(self, hidden, encoder_output):
if self.method == 'dot':
energy =torch.dot(hidden.view(-1), encoder_output.view(-1))
elif self.method == 'general':
energy = self.attn(encoder_output)
energy = torch.dot(hidden.view(-1), energy.view(-1))
elif self.method == 'concat':
energy = self.attn(torch.cat((hidden, encoder_output), 1))
energy = torch.dot(self.v.view(-1), energy.view(-1))
return energy
class AttnDecoderRNN(nn.Module):
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout_p=0.1):
super(AttnDecoderRNN, self).__init__()
# Keep parameters for reference
self.attn_model = attn_model
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
self.dropout_p = dropout_p
# Define layers
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru = nn.GRU(hidden_size * 2, hidden_size, n_layers, dropout=dropout_p)
self.out = nn.Linear(hidden_size * 2, output_size)
self.embedding = self.embedding.cuda() if USE_CUDA else self.embedding
self.gru = self.gru.cuda() if USE_CUDA else self.gru
self.out = self.out.cuda() if USE_CUDA else self.out
# Choose attention model
if attn_model != 'none':
self.attn = Attn(attn_model, hidden_size)
self.attn = self.attn.cuda() if USE_CUDA else self.attn
def forward(self, word_input, last_context, last_hidden, encoder_outputs):
# Note: we run this one step at a time
# Get the embedding of the current input word (last output word)
word_embedded = self.embedding(word_input).view(1, 1, -1) # S=1 x B x N
# Combine embedded input word and last context, run through RNN
rnn_input = torch.cat((word_embedded, last_context.unsqueeze(0)), 2)
rnn_output, hidden = self.gru(rnn_input, last_hidden)
# Calculate attention from current RNN state and all encoder outputs; apply to encoder outputs
attn_weights = self.attn(rnn_output.squeeze(0), encoder_outputs)
context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x 1 x N
# Final output layer (next word prediction) using the RNN hidden state and context vector
rnn_output = rnn_output.squeeze(0) # S=1 x B x N -> B x N
context = context.squeeze(1) # B x S=1 x N -> B x N
output = F.log_softmax(self.out(torch.cat((rnn_output, context), 1)))
if USE_CUDA:
return output.cuda(), context.cuda(), hidden.cuda(), attn_weights.cuda()
else:
return output, context, hidden, attn_weights
并测试网络:
encoder_test = EncoderRNN(10, 10, 2) # I, H , L
decoder_test = AttnDecoderRNN('general', 10, 10, 2) # A, H, O, L
encoder_hidden = encoder_test.init_hidden()
if USE_CUDA:
word_inputs = Variable(torch.LongTensor([1, 2, 3]).cuda())
else:
word_inputs = Variable(torch.LongTensor([1, 2, 3]))
encoder_outputs, encoder_hidden = encoder_test(word_inputs, encoder_hidden)
decoder_attns = torch.zeros(1, 3, 3)
decoder_hidden = encoder_hidden
decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size))
decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(word_inputs[0], decoder_context, decoder_hidden, encoder_outputs)
print(decoder_output)
print(decoder_hidden)
print(decoder_attn)
代码在 CPU 上运行良好,
[出]:
EncoderRNN (
(embedding): Embedding(10, 10)
(gru): GRU(10, 10, num_layers=2)
)
AttnDecoderRNN (
(embedding): Embedding(10, 10)
(gru): GRU(20, 10, num_layers=2, dropout=0.1)
(out): Linear (20 -> 10)
(attn): Attn (
(attn): Linear (10 -> 10)
)
)
Variable containing:
-2.4378 -2.3556 -2.3391 -2.5070 -2.3439 -2.3415 -2.3976 -2.1832 -1.9976 -2.2213
[torch.FloatTensor of size 1x10]
Variable containing:
(0 ,.,.) =
Columns 0 to 8
-0.2325 0.0775 0.5415 0.4876 -0.5771 -0.0687 0.1832 -0.5285 0.2508
Columns 9 to 9
-0.1837
(1 ,.,.) =
Columns 0 to 8
-0.1389 -0.2605 -0.0518 0.3405 0.0774 0.1815 0.0297 -0.1304 -0.1015
Columns 9 to 9
0.2602
[torch.FloatTensor of size 2x1x10]
Variable containing:
(0 ,.,.) =
0.3334 0.3291 0.3374
[torch.FloatTensor of size 1x1x3]
但是将标志更改为USE_GPU=True时,会在初始化decoder_test对象时抛出错误,它会抛出TypeError:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-76-b3c660013934> in <module>()
12 decoder_context = Variable(torch.zeros(1, decoder_test.hidden_size))
13
---> 14 decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(word_inputs[0], decoder_context, decoder_hidden, encoder_outputs)
15 print(decoder_output)
16 print(decoder_hidden)
~/.local/lib/python3.5/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
222 for hook in self._forward_pre_hooks.values():
223 hook(self, input)
--> 224 result = self.forward(*input, **kwargs)
225 for hook in self._forward_hooks.values():
226 hook_result = hook(self, input, result)
<ipython-input-75-34ecfe9b3112> in forward(self, word_input, last_context, last_hidden, encoder_outputs)
32
33 # Combine embedded input word and last context, run through RNN
---> 34 rnn_input = torch.cat((word_embedded, last_context.unsqueeze(0)), 2)
35 rnn_output, hidden = self.gru(rnn_input, last_hidden)
36
~/.local/lib/python3.5/site-packages/torch/autograd/variable.py in cat(iterable, dim)
895 @staticmethod
896 def cat(iterable, dim=0):
--> 897 return Concat.apply(dim, *iterable)
898
899 @staticmethod
~/.local/lib/python3.5/site-packages/torch/autograd/_functions/tensor.py in forward(ctx, dim, *inputs)
315 ctx.dim = dim
316 ctx.input_sizes = [i.size(dim) for i in inputs]
--> 317 return torch.cat(inputs, dim)
318
319 @staticmethod
TypeError: cat received an invalid combination of arguments - got (tuple, int), but expected one of:
* (sequence[torch.cuda.FloatTensor] seq)
* (sequence[torch.cuda.FloatTensor] seq, int dim)
didn't match because some of the arguments have invalid types: (tuple, int)
问题是为什么这些类型在 CUDA 中不匹配但它在 CPU 上工作以及如何解决这个问题?
PyTorch 是否有一个全局标志,可以只将所有类型更改为 CUDA 类型,而不是搞乱 CPU/GPU 类型?
【问题讨论】:
-
也许您在模型
encoder_test和decoder_test以及Variabledecoder_context中忘记了.cuda() -
谢谢@MauelLagunas!事实上,
encoder_hidden和decoder_context没有.cuda()
标签: python torch pytorch tensor