PyTorch 中每个时期的准确度答案

【问题标题】：Accuracy per epoch in PyTorchPyTorch 中每个时期的准确度
【发布时间】：2020-11-07 01:03:14
【问题描述】：

我使用 pytorch 制作了一个聊天机器人，并希望在每个时期都显示准确性。我不太了解如何做到这一点。我可以显示损失但不知道如何显示我的准确性

这是我的代码：-

from nltk_utils import tokenize, stem, bag_of_words
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from model import NeuralNet
from torch.autograd import Variable

all_words=[]
tags=[]
xy=[]
questionsP1=[]
questionsP2=[]
questionsP3=[]
questionsP4=[]
questionTag={}

with open('new.json', encoding="utf8") as file:
        data = json.load(file)    
    
for intent in data["intents"]:
    for proficiency in intent["proficiency"]:
        for questions in proficiency["questions"]:
            for responses in questions["responses"]:        
                wrds = tokenize(responses)
                all_words.extend(wrds)
                xy.append((wrds, questions["tag"]))
                
            if questions["tag"] in tags:
                print(questions["tag"])
        
            if questions["tag"] not in tags:
                tags.append(questions["tag"])
                    
            if proficiency["level"] == "P1":
                questionsP1.append(questions["question"])
                questionTag[questions["question"]]=questions["tag"]
                    
            if proficiency["level"] == "P2":
                questionsP2.append(questions["question"])
                questionTag[questions["question"]]=questions["tag"]
                
            if proficiency["level"] == "P3":
                questionsP3.append(questions["question"])
                questionTag[questions["question"]]=questions["tag"]
                
            if proficiency["level"] == "P4":
                questionsP4.append(questions["question"])
                questionTag[questions["question"]]=questions["tag"]

ignore_words = ['?', '!', '.', ',']
all_words = [stem(x) for x in all_words if x not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))


X_train = []
y_train = []

for tokenized_response, tag in xy:
    bag = bag_of_words(tokenized_response, all_words)
    print(bag)
    X_train.append( bag )

    label = tags.index( tag )
    y_train.append( label )
print(y_train)

X_train = np.array( X_train )
y_train = np.array( y_train )

class ChatDataset(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train
    
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.n_samples

#HyperParameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 994

dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size=batch_size, shuffle = True, num_workers = 2)


device = 'cpu'
model = NeuralNet(input_size, hidden_size, output_size).to(device)

#loss and optimizer 
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

for epoch in range( num_epochs ):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)

        #Forward
        outputs = model(words)
        loss = criterion(outputs, labels)

        #backward and optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f'epoch {epoch + 1}/ {num_epochs}, loss={loss.item(): .4f}')

print(f'final loss, loss={loss.item(): .4f}')

data = {
    "model_state": model.state_dict(),
    "input_size": input_size,
    "output_size": output_size,
    "hidden_size": hidden_size,
    "all_words": all_words,
    "tags": tags,
}

FILE = "data.pth"
torch.save(data, FILE)

with open('new.json', 'r') as f:
    intents = json.load(f)

bot_name = "Sam"
while True:
    sentence = input("You: ")
    if sentence == 'quit':
        break
    sentence = tokenize(sentence)
    X = bag_of_words(sentence, all_words)
    X = X.reshape( 1, X.shape[0])
    X = torch.from_numpy( X )

    output = model( X )
    _, predicted = torch.max(output, dim=1)
    tag = tags[predicted.item()]
    print(tag)

    probs = torch.softmax(output, dim=1)
    probs = probs[0][predicted.item()]
    
    print( probs.item() )

    if probs.item() > 0.75:
        for intent in intents["intents"]:
            for proficiency in intent["proficiency"]:
                for questions in proficiency["questions"]:
                    if questions["tag"] == tag:
                        print(f'{bot_name}: {questions["question"]}')
    else:
        print(f'{bot_name}: Probability Too Low')

print(f'Training Complete. File saved to {FILE}')

我的聊天机器人正在反向工作...我正在尝试将答案映射到正确的问题。任何帮助将不胜感激。

【问题讨论】：

标签： machine-learning nlp pytorch

【解决方案1】：

根据您的代码，labels 包含在outputs 中应具有最高值的索引，以便将样本计为正确的预测。

所以要计算验证准确度：

correct = 0
total = 0
model.eval()
with torch.no_grad():
    for (words, labels) in validation_loader:
        words = words.to(device)
        labels = labels.to(device)
        total += labels.shape[0]

        outputs = model(words)

        correct += torch.sum(labels == outputs.argmax(dim=-1))

accuracy = correct / total

【讨论】：

在再次训练之前不要忘记将模型设置回model.train()。您的模型可能不使用在训练和评估之间具有不同行为的层，但如果使用（例如 dropout 或 batchnorm），那么这是一个令人讨厌的错误。