修改 Caffe VGG 16 以在 PyTorch 上处理 1 通道图像答案

【问题标题】：Modification to Caffe VGG 16 to handle 1 channel images on PyTorch修改 Caffe VGG 16 以在 PyTorch 上处理 1 通道图像
【发布时间】：2019-01-23 19:06:41
【问题描述】：

我正在将 VGG16 网络转换为全卷积网络，并修改输入以接受单通道图像。重现性的完整代码如下。

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

import numpy as np
import torchvision.datasets as datasets
import copy

from torch.utils import model_zoo
from torchvision import models
from collections import OrderedDict

def convolutionalize(modules, input_size):
    """
    Recast `modules` into fully convolutional form
    """
    fully_conv_modules = []
    x = Variable(torch.zeros((1, ) + input_size))
    for m in modules:
         if isinstance(m, nn.Linear):
              n = nn.Conv2d(x.size(1), m.weight.size(0), kernel_size=(x.size(2), x.size(3)))
              n.weight.data.view(-1).copy_(m.weight.data.view(-1))
              n.bias.data.view(-1).copy_(m.bias.data.view(-1))
              m = n
         fully_conv_modules.append(m)
         x = m(x)
    return fully_conv_modules



def vgg16(is_caffe=True):
     """
     Load the VGG-16 net for use as a fully convolutional backbone.
     """
     vgg16 = models.vgg16(pretrained=True)
     # cast into fully convolutional form (as list of layers)
     vgg16 = convolutionalize(list(vgg16.features) + list(vgg16.classifier),
                         (3, 224, 224))
     # name layers like the original paper
     names = ['conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
    'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
    'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3',
    'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'pool4',
    'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'pool5',
    'fc6', 'relu6', 'drop6', 'fc7', 'relu7', 'drop7', 'fc8']

    vgg16 = nn.Sequential(OrderedDict(zip(names, vgg16)))

    if is_caffe:
        # substitute original Caffe weights for improved fine-tuning accuracy
        # see https://github.com/jcjohnson/pytorch-vgg
        caffe_params = model_zoo.load_url('https://s3-us-west-2.amazonaws.com/'
                                      'jcjohns-models/vgg16-00b39a1b.pth')
        for new_p, old_p in zip(vgg16.parameters(), caffe_params.values()):
            new_p.data.copy_(old_p.view_as(new_p))
        # surgery: decapitate final classifier
   del vgg16._modules['fc8']  # note: risky use of private interface
   # surgery: keep fuller spatial dims by including incomplete pooling regions
   for m in vgg16.modules():
       if isinstance(m, nn.MaxPool2d):
                m.ceil_mode = True
   return vgg16


class Learner(nn.Module):

     def __init__(self, num_classes, singleChannel=False):
          super().__init__()

          backbone = vgg16(is_caffe=True)
          for k in list(backbone._modules)[-6:]:
                del backbone._modules[k]


          supp_backbone = copy.deepcopy(backbone)

          # Modify conv1_1 of conditioning branch to have 1 input channels
          # Init the weights in the new channels to the channel-wise mean
          # of the pre-trained conv1_1 weights
          if singleChannel==True:
               old_conv1 = backbone._modules['conv1_1'].weight.data
               mean_conv1 = torch.mean(old_conv1, dim=1, keepdim=True)
               new_conv1 = nn.Conv2d(1, old_conv1.size(0), kernel_size=old_conv1.size(2), stride=1, padding=1)
               new_conv1.weight.data = mean_conv1
               new_conv1.bias.data = backbone._modules['conv1_1'].bias.data
               backbone._modules['conv1_1'] = new_conv1

          self.encoder = copy.deepcopy(backbone)
          self.num_classes=num_classes

     def forward(self,im):

          # encode image
          supp_feats = self.encoder(im)

          return supp_feats




 model=Learner(num_classes=2,singleChannel=True).cpu()
 mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=None)
 im2arr = np.array(mnist_trainset[1][0])
 im2arr = im2arr[np.newaxis,:, :,] # shape(1,28,28)

 model.train()
 x=model(torch.from_numpy(im2arr))

我希望 x 是一个火炬张量输出，但收到一条错误消息 “ValueError：预期 4D 张量作为输入，取而代之的是 3D 张量。”在最后一行

【问题讨论】：

您需要添加一个批量维度。检查这个stackoverflow.com/questions/53852355/…你的问题似乎相似

标签： deep-learning caffe pytorch mnist vgg-net

【解决方案1】：

您需要输入形状为 Batch-Channel-Height-Width，即 4D。在您的情况下，您只有一个通道，因此您“挤出”了这个单例维度，但 pytorch 不喜欢它！

试试

im2arr = im2arr[np.newaxis, np.newaxis, :, :]  # add singleton for the channles as well

【讨论】：

另外的问题是我需要转换成一个火炬变量
@AbhijeetParida 根据我的经验，简单地将灰度通道复制到三个并将这个“假彩色”图像输入网络会更方便。