这个 CNN 的降维似乎违背了我对理论的理解答案

【问题标题】：Dimensionality reduction in this CNN seems to be going against my understanding of the theory这个 CNN 的降维似乎违背了我对理论的理解
【发布时间】：2017-05-06 15:48:51
【问题描述】：

我有一个具有以下架构的两层 CNN：

这是用 tensorflow 表示的架构：

import os
import tensorflow as tf
import sys
import urllib
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import glob
train = []
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif
    img=np.asarray(Image.open(filename))
    img_flat = img.reshape(img.size)
    train.append(img_flat)

if sys.version_info[0] >= 3:
  from urllib.request import urlretrieve
else:
  from urllib import urlretrieve

LOGDIR = 'log3/'
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/'

### MNIST EMBEDDINGS ###
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2]
labels_test = np.array(ckp_labels).reshape(-1,1)

enc = OneHotEncoder()
enc.fit(labels_test)
labels_final = enc.transform(labels_test).toarray()

train = np.asarray(train)

# Add convolution layer
def conv_layer(input, size_in, size_out, name="conv"):
  with tf.name_scope(name):
    #w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W")
    #b = tf.Variable(tf.zeros([size_out]), name="B")
    w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
    act = tf.nn.relu(conv + b)
    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", act)
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

# Add fully connected layer
def fc_layer(input, size_in, size_out, name="fc"):
  with tf.name_scope(name):
    w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
    act = tf.nn.relu(tf.matmul(input, w) + b)
    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", act)
    return act


def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):

  tf.reset_default_graph()
  tf.set_random_seed(1)
  sess = tf.Session()

  # Setup placeholders, and reshape the data
  x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
  x_image = tf.reshape(x, [-1, 256, 256, 1])
  tf.summary.image('input', x_image, 3)
  y = tf.placeholder(tf.float32, shape=[None, 7], name="labels")

  if use_two_conv:
    conv1 = conv_layer(x_image, 1, 32, "conv1")
    conv_out = conv_layer(conv1, 32, 64, "conv2")
  else:
    conv1 = conv_layer(x_image, 1, 64, "conv")
    conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #adding padding "VALID" means no padding
  flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])


  if use_two_fc:
    fc1 = fc_layer(flattened, 55 * 55 * 64, 40, "fc1")
    embedding_input = fc1
    embedding_size = 40
    logits = fc_layer(fc1, 40, 7, "fc2")
  else:
    embedding_input = flattened
    embedding_size = 7*7*64
    logits = fc_layer(flattened, 7*7*64, 10, "fc")

  with tf.name_scope("xent"):
    xent = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=logits, labels=y), name="xent")
    tf.summary.scalar("xent", xent)

  with tf.name_scope("train"):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

  with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("accuracy", accuracy)

  summ = tf.summary.merge_all()


  embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
  assignment = embedding.assign(embedding_input)
  saver = tf.train.Saver()

  sess.run(tf.global_variables_initializer())
  writer = tf.summary.FileWriter(LOGDIR + hparam)
  writer.add_graph(sess.graph)

  config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
  embedding_config = config.embeddings.add()
  embedding_config.tensor_name = embedding.name
  embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
  embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
  # Specify the width and height of a single thumbnail.
  embedding_config.sprite.single_image_dim.extend([256, 256])
  tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)

  for i in range(300):
    batch_index = random.sample(range(0,100),25)

    if i % 5 == 0:
      [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train[batch_index], y: labels_final[batch_index]})
      writer.add_summary(s, i)
      print ("train accuracy:", train_accuracy)
    sess.run(train_step, feed_dict={x: train[batch_index], y: labels_final[batch_index]})

def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
  conv_param = "conv2" if use_two_conv else "conv1"
  fc_param = "fc2" if use_two_fc else "fc1"
  return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param)

def main():
  # You can try adding some more learning rates
  #for learning_rate in [1E-3, 1E-4, 1E-5]:
  for learning_rate in [1E-4]:

    # Include "False" as a value to try different model architectures
    #for use_two_fc in [True, False]:
    for use_two_fc in [True]:
      #for use_two_conv in [True, False]:
      for use_two_conv in [True]:
        # Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2")
        hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
        print('Starting run for %s' % hparam)
        sys.stdout.flush() # this forces print-ed lines to show up.

        # Actually run with the new settings
        mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)


if __name__ == '__main__':
  main()

根据我的数学，一切都检查了，但是当我运行代码时，我收到以下错误：

InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 6553600 values, but the requested shape requires a multiple of 193600

我在这行代码中遇到了错误：

flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])

我真的不确定为什么会这样。我的数学有问题吗，因为他们假设 conv_out 的扁平大小应该是[-1, 64*64*64]。

任何帮助将不胜感激。如果您需要更多信息，请告诉我。

【问题讨论】：

您能否更清楚地说明此错误发生的位置以及您期望的尺寸是多少？考虑到大量代码，我很难解决这个问题。
当然，我已经添加了引发此错误的行。预期尺寸是该代码行中的输入。我预计 [55,55,64] 是尺寸，但 [64,64,64] 有效。不知道为什么

标签： python tensorflow conv-neural-network dimensionality-reduction

【解决方案1】：

在我看来，您计算错了每个卷积/池化层的输出大小。以下是如何解决这个问题。我将您的代码提炼成这样：

import tensorflow as tf
import numpy as np

def conv_layer(input, size_in, size_out, name="conv"):
  with tf.name_scope(name):
    w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="VALID")
    act = tf.nn.relu(conv + b)
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])

conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")

flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])

sess.run(tf.global_variables_initializer())
print(sess.run(tf.shape(conv1), {x: np.zeros([1, 256*256])}))

此代码提供正确形状的零输入，并使用tf.shape() 计算conv1 输出的形状。我回来了：

[ 1 128 128 32]

这与您计算的数字不符。

我怀疑您错误地计算了填充，但是如果不知道您是如何得出顶部表格中的数字的，就很难说。如果不出意外，第一个卷积具有填充 SAME 和步长 1，因此输入和输出将具有相同的空间维度。

希望这会有所帮助！

【讨论】：