【问题标题】:Pytorch-GPU what am I forgetting to move over to the GPU?Pytorch-GPU 我忘记转移到 GPU 上的是什么?
【发布时间】:2021-11-24 10:41:42
【问题描述】:

我收到此错误。我遗漏了什么,我觉得我已经厌倦了一切。 还有没有一种简单的方法可以只使用 GPU 而不是 CPU 我觉得我也尝试了所有这些选项。就像不在任何地方使用 .cuda() 一样

这是我的第一个中立网络之一,所以请放轻松。 (大部分来自Class)

 RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking arugment for argument target in method wrapper_nll_loss_forward)


import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt 
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

torch.cuda.set_device(0)

def load_data():

    num_workers = 0

    load_data.batch_size = 20

    transform = transforms.ToTensor()


    train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
    test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)


    load_data.train_loader = torch.utils.data.DataLoader(train_data, 
                                batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_data, 
                                batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)



def visualize():

    dataiter = iter(load_data.train_loader)
    visualize.images, labels = dataiter.next()
    visualize.images = visualize.images.numpy()

    fig = plt.figure(figsize=(25, 4))
    for idx in np.arange(load_data.batch_size):
        ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
        ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
        ax.set_title(str(labels[idx].item()))
    #plt.show()

def fig_values():
    img = np.squeeze(visualize.images[1])

    fig = plt.figure(figsize = (12,12))
    ax = fig.add_subplot(111)
    ax.imshow(img, cmap='gray')
    width, height = img.shape
    thresh = img.max()/2.5
    for x in range(width):
        for y in range(height):
            val = round(img[x][y],2) if img[x][y] !=0 else 0
            ax.annotate(str(val), xy=(y,x),
                        horizontalalignment='center',
                        verticalalignment='center',
                        color='white' if img[x][y]<thresh else 'black')
    #plt.show()


load_data()
visualize()
fig_values()

class NeuralNet(nn.Module):
    def __init__(self, gpu = True):
        super(NeuralNet, self ).__init__()
        self.fc1 = nn.Linear(28 * 28, 16).cuda()
        self.fc2 = nn.Linear(16, 10).cuda()

    def forward(self, x):
        x = x.view(-1, 28 * 28).cuda()
        x = F.relu(self.fc1(x)).cuda()

        x = self.fc2(x).cuda()

        return x


def training():
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    n_epochs = 100 

    model.train().cuda()

    for epoch in range(n_epochs):
        train_loss = 0.0
        for data, target in load_data.train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*data.size(0)

        train_loss = train_loss/len(load_data.train_loader.dataset)

        print('Epoch: {} \tTraining Loss: {:.6f}'.format(
            epoch+1, 
            train_loss
            ))


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()

【问题讨论】:

    标签: neural-network pytorch gpu


    【解决方案1】:

    您的datatarget 不在GPU 中(考虑删除重复的cuda 调用)。

    你也在做不必要的cuda(),这是不需要的。简单地说,看看你的数据和模型在哪里。把模型拿到GPU,把数据和标签拿到GPU,最后把数据喂给模型。

    不要使用cuda(),使用to.device(),从长远来看它更安全,并且在多 GPU 设置中易于定制。

    import torch.cuda
    import numpy as np
    import time
    from torchvision import datasets
    import torchvision.transforms as transforms
    import matplotlib.pyplot as plt 
    import torch.nn as nn
    import torch.nn.functional as F
    from torchsummary import summary
    
    torch.cuda.set_device(0)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    def load_data():
    
        num_workers = 0
    
        load_data.batch_size = 20
    
        transform = transforms.ToTensor()
    
    
        train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
        test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
    
    
        load_data.train_loader = torch.utils.data.DataLoader(train_data, 
                                    batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
        test_loader = torch.utils.data.DataLoader(test_data, 
                                    batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
    
    
    
    def visualize():
    
        dataiter = iter(load_data.train_loader)
        visualize.images, labels = dataiter.next()
        visualize.images = visualize.images.numpy()
    
        fig = plt.figure(figsize=(25, 4))
        for idx in np.arange(load_data.batch_size):
            ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
            ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
            ax.set_title(str(labels[idx].item()))
        #plt.show()
    
    def fig_values():
        img = np.squeeze(visualize.images[1])
    
        fig = plt.figure(figsize = (12,12))
        ax = fig.add_subplot(111)
        ax.imshow(img, cmap='gray')
        width, height = img.shape
        thresh = img.max()/2.5
        for x in range(width):
            for y in range(height):
                val = round(img[x][y],2) if img[x][y] !=0 else 0
                ax.annotate(str(val), xy=(y,x),
                            horizontalalignment='center',
                            verticalalignment='center',
                            color='white' if img[x][y]<thresh else 'black')
        #plt.show()
    
    
    load_data()
    visualize()
    fig_values()
    
    class NeuralNet(nn.Module):
        def __init__(self, gpu = True):
            super(NeuralNet, self ).__init__()
            self.fc1 = nn.Linear(28 * 28, 16)
            self.fc2 = nn.Linear(16, 10)
    
        def forward(self, x):
            x = x.view(-1, 28 * 28)
            x = F.relu(self.fc1(x))
    
            x = self.fc2(x)
    
            return x
    
    
    def training():
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
        n_epochs = 100 
    
        model.train()
    
        for epoch in range(n_epochs):
            train_loss = 0.0
            for data, target in load_data.train_loader:
                optimizer.zero_grad()
                ###################################
                data = data.to(device)
                target = target.to(device)
                ###################################
                output = model(data)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()*data.size(0)
    
            train_loss = train_loss/len(load_data.train_loader.dataset)
    
            print('Epoch: {} \tTraining Loss: {:.6f}'.format(
                epoch+1, 
                train_loss
                ))
    
    
    
    
    model = NeuralNet().to(device)
    summary(model, input_size=(1, 28, 28))
    training()
    

    【讨论】:

      【解决方案2】:

      显然您的 target 变量不在 GPU 上。

      另外,在forward() 中调用.cuda() 是个坏主意

      def forward(self, x):
          x = x.view(-1, 28 * 28).cuda() # BAD
          x = F.relu(self.fc1(x)).cuda() # BAD
          x = self.fc2(x).cuda() #BAD
          return x
      

      相反,删除所有 .cuda() 内的转发并这样做是主循环

      for data, target in load_data.train_loader:
          data = data.cuda()
          target = target.cuda()
      

      【讨论】:

        猜你喜欢
        • 2019-02-26
        • 2018-03-24
        • 2019-10-23
        • 2019-04-02
        • 2019-09-30
        • 2022-06-27
        • 2016-07-14
        • 2019-06-26
        • 1970-01-01
        相关资源
        最近更新 更多