현재 pytorch 공식 튜토리얼에 소개되고 있는 Attention 기반 Sequence to Sequence 튜토리얼을 따라하고 있는데요.(링크: 기초부터 시작하는 NLP: Sequence to Sequence 네트워크와 Attention을 이용한 번역 — 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean))
그런데, 모델은 잘 구축한 것 같은데, 역전파를 수행할 때, 텐서에 Auto Gradient function 이없다는 아래와 같은 메세지가 발생했습니다.
- 에러 메세지 : "RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn"
대체 코드의 어떤 부분이 문제인지 잘 모르겠습니다. 답변 부탁드립니다.
코드 전문은 아래와 같습니다.
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from typing import List
SOS_TOKEN = 0
EOS_TOKEN = 1
MAX_LENGTH = 10
eng_prefixes = (
"i am ", "i m ",
"he is", "he s ",
"she is", "she s ",
"you are", "you re ",
"we are", "we re ",
"they are", "they re "
)
class Lang:
def __init__(self, name):
self.name = name
self.word2index = {}
self.word2count = {}
self.index2word = {0: "SOS", 1: "EOS"} # label <-> word mapping
self.n_words = 2 # word에 대한 labeling 하기 위함
def addSentence(self, sentence):
for word in sentence.split(" "):
self.addWord(word)
def addWord(self, word):
# word가 처음 등장한다면
if word not in self.word2index:
self.word2index[word] = self.n_words
self.word2count[word] = 1 # 단어 등장 횟수 기록
self.index2word[self.n_words] = word
self.n_words += 1
else:
self.word2count[word] += 1
# unicode -> ascii 변환
def unicodeToAscii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
)
# 소문자, 다듬기, 그리고 문자가 아닌 문자 제거
def normalizeString(s):
s = unicodeToAscii(s.lower().strip())
s = re.sub(r"([.!?])", r" \1", s)
s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
return s
def readLangs(lang1, lang2, reverse=False):
print("Reading lines...")
# 파일을 읽고 줄로 분리
lines = open('dataset/data-pytorch-tutorial-seq2seq-attention/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\
read().strip().split('\n')
# 모든 줄을 쌍으로 분리하고 정규화
pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
# 쌍을 뒤집고, Lang 인스턴스 생성
if reverse:
pairs = [list(reversed(p)) for p in pairs]
input_lang = Lang(lang2)
output_lang = Lang(lang1)
else:
input_lang = Lang(lang1)
output_lang = Lang(lang2)
return input_lang, output_lang, pairs
def filterPair(p):
return len(p[0].split(' ')) < MAX_LENGTH and \
len(p[1].split(' ')) < MAX_LENGTH and \
p[1].startswith(eng_prefixes)
def filterPairs(pairs):
return [pair for pair in pairs if filterPair(pair)]
def prepareData(lang1, lang2, reverse=False):
input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)
print("Read %s sentence pairs" % len(pairs))
pairs = filterPairs(pairs)
print("Trimmed to %s sentence pairs" % len(pairs))
print("Counting words...")
for pair in pairs:
input_lang.addSentence(pair[0])
output_lang.addSentence(pair[1])
print("Counted words:")
print(input_lang.name, input_lang.n_words)
print(output_lang.name, output_lang.n_words)
return input_lang, output_lang, pairs
input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
def indexesFromSentence(lang, sentence):
# 특정 단어의 index 번호를 리스트로 반환
return [lang.word2index[word] for word in sentence.split(' ')]
def tensorFromSentence(lang, sentence, is_input=False):
indexes: List[int] = indexesFromSentence(lang, sentence)
indexes.append(EOS_TOKEN) # 문장의 끝이라고 표시하기 위한 토큰 끝에 추가
tensor = torch.Tensor(indexes).to(torch.long)
if is_input:
return F.pad(tensor, pad=(0, MAX_LENGTH-tensor.size()[0])).view(1, -1)
else:
return tensor.view(1, -1, 1)
def tensorsFromPair(pair):
global input_lang, output_lang
input_tensor = tensorFromSentence(input_lang, pair[0], is_input=True) # french
target_tensor = tensorFromSentence(output_lang, pair[1], is_input=True) # english
return input_tensor, target_tensor
tensors = (tensorsFromPair(p) for p in pairs)
fren, engs = [], []
for t in tensors:
fren.append(t[0])
engs.append(t[1])
x_french = torch.cat(fren, dim=0)
y_english = torch.cat(engs, dim=0)
x_french.shape, y_english.shape
from torch.utils.data import Dataset, DataLoader
class FrenchEnglishDataset(Dataset):
def __init__(self, x_french: torch.Tensor, y_english: List[torch.Tensor]):
self.x_fren = x_french
self.y_eng = y_english
def __len__(self):
return len(self.x_fren)
def __getitem__(self, idx):
x_fren: torch.Tensor = self.x_fren[idx]
y_eng: torch.Tensor = self.y_eng[idx]
return x_fren, y_eng
def verbose_shape(*tensors):
for t in tensors:
print(t.shape, end=' ')
class Encoder(nn.Module):
def __init__(self, vocab_size, embedding_size, hidden_size):
super(Encoder, self).__init__()
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.hidden_size = hidden_size
self.embed_layer = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=self.embedding_size)
self.lstm = nn.LSTM(input_size=self.embedding_size, hidden_size=self.hidden_size, num_layers=1, batch_first=True)
def forward(self, x):
x = self.embed_layer(x)
hs, (h, c) = self.lstm(x)
return hs, h, c
class Decoder(nn.Module):
def __init__(self, vocab_size, embedding_size, hidden_size):
super(Decoder, self).__init__()
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.hidden_size = hidden_size
self.embed_layer = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=self.embedding_size)
self.lstm = nn.LSTM(input_size=self.embedding_size, hidden_size=self.hidden_size, num_layers=1, batch_first=True)
self.affine = nn.Linear(in_features=self.hidden_size*2, out_features=self.vocab_size)
def forward(self, enc_hs, enc_h, enc_c, y):
""" 여기서는 1개의 input sequence가 들어왔다고 가정하고 작성되어야 함
"""
x = self.embed_layer(y)
dec_hs, (dec_h, dec_c) = self.lstm(x, (enc_h, enc_c))
#==========
# Attention
#==========
# 1.가중치 계산
s = torch.bmm(enc_hs, dec_hs.transpose(1, 2))
a = F.softmax(s, dim=1)
# 2.선택 작업
c = torch.sum(enc_hs * a, dim=1, keepdim=True)
ch = torch.cat((c, dec_hs), dim=-1)
z = self.affine(ch).squeeze()
a = torch.argmax(z, dim=1, keepdim=True)
return a
from torch.optim import Adam
# dataset
dataset = FrenchEnglishDataset(x_french, y_english)
dataloader = DataLoader(dataset, batch_size=128)
# params
french_vocab_size = len(input_lang.word2index)
english_vocab_size = len(output_lang.word2index)
embedding_size = 128
hidden_size = 256
learning_rate = 0.001
# model
encoder = Encoder(french_vocab_size, embedding_size, hidden_size)
decoder = Decoder(english_vocab_size, embedding_size, hidden_size)
# Loss
criterion = nn.CrossEntropyLoss()
# optimizer
encoder_optimizer = Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = Adam(decoder.parameters(), lr=learning_rate)
# train
n_batch = 1
for x_fren, y_eng in dataloader:
# init gradients
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()
# Encoder-forward
enc_hs, enc_h, enc_c = encoder(x_fren)
# Decoder-forward (1-step씩)
B, T = y_eng.size()
decoder_input = torch.zeros(B, 1, dtype=torch.long)
decoder_outputs = []
for t in range(T):
decoder_output = decoder(enc_hs, enc_h, enc_c, decoder_input)
decoder_outputs.append(decoder_output)
decoder_input = y_eng[:,t].view(-1,1) # Teacher-forcing
decoder_outputs = torch.cat(decoder_outputs, dim=1).to(torch.float32)
# loss
decoder_labels = y_eng.to(torch.float32)
loss = criterion(decoder_outputs, decoder_labels)
print(loss.item())
loss.backward()
# update params
encoder_optimizer.step()
decoder_optimizer.step()
print(f"Epoch | N({n_batch})-bacth Loss :{loss.item()}")
n_batch += 1
break