【问题标题】:Dimensionality reduction in this CNN seems to be going against my understanding of the theory这个 CNN 的降维似乎违背了我对理论的理解
【发布时间】:2017-05-06 15:48:51
【问题描述】:

我有一个具有以下架构的两层 CNN:

这是用 tensorflow 表示的架构:

import os
import tensorflow as tf
import sys
import urllib
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from PIL import Image
import glob
train = []
for filename in glob.glob('/Users/madhavthaker/Documents/CSCI63/Final Project/face-emoticon-master/data/ck+_scaled/*.png'): #assuming gif
    img=np.asarray(Image.open(filename))
    img_flat = img.reshape(img.size)
    train.append(img_flat)

if sys.version_info[0] >= 3:
  from urllib.request import urlretrieve
else:
  from urllib import urlretrieve

LOGDIR = 'log3/'
GITHUB_URL ='https://raw.githubusercontent.com/mamcgrath/TensorBoard-TF-Dev-Summit-Tutorial/master/'

### MNIST EMBEDDINGS ###
ckp_labels = [5, 0, 3, 5, 4, 0, 1, 3, 5, 4, 0, 3, 5, 0, 1, 5, 4, 0, 0, 0, 2, 1, 3, 5, 0, 3, 5, 1, 3, 5, 0, 3, 5, 4, 0, 3, 5, 3, 1, 1, 0, 4, 5, 2, 1, 5, 3, 5, 1, 5, 3, 1, 5, 1, 5, 0, 1, 5, 3, 5, 1, 3, 0, 1, 5, 2, 3, 1, 5, 3, 1, 3, 1, 5, 3, 2, 5, 3, 1, 5, 3, 4, 0, 5, 0, 3, 1, 3, 2, 5, 1, 3, 5, 1, 5, 4, 0, 3, 1, 5, 1, 2, 5, 1, 3, 5, 3, 5, 1, 3, 5, 5, 3, 1, 1, 3, 4, 1, 5, 4, 1, 5, 0, 1, 3, 5, 2, 3, 5, 5, 3, 5, 1, 0, 1, 5, 3, 0, 5, 1, 0, 3, 5, 0, 3, 5, 3, 1, 4, 5, 1, 3, 5, 1, 3, 1, 3, 5, 1, 5, 0, 3, 5, 1, 1, 4, 1, 5, 1, 4, 1, 0, 1, 3, 5, 5, 0, 1, 0, 5, 4, 0, 5, 3, 5, 3, 5, 1, 3, 5, 2, 0, 5, 2, 0, 5, 2, 3, 4, 3, 2, 5, 1, 5, 0, 3, 0, 1, 3, 5, 0, 1, 3, 5, 0, 4, 3, 3, 1, 4, 2, 1, 3, 5, 5, 3, 0, 3, 1, 5, 5, 0, 3, 5, 3, 2, 5, 3, 4, 7, 7, 7, 7, 7, 7, 7, 7, 0, 2, 4, 0, 7, 2, 0, 7, 0, 7, 2, 4, 4, 0, 2, 4, 7, 2]
labels_test = np.array(ckp_labels).reshape(-1,1)

enc = OneHotEncoder()
enc.fit(labels_test)
labels_final = enc.transform(labels_test).toarray()

train = np.asarray(train)

# Add convolution layer
def conv_layer(input, size_in, size_out, name="conv"):
  with tf.name_scope(name):
    #w = tf.Variable(tf.zeros([5, 5, size_in, size_out]), name="W")
    #b = tf.Variable(tf.zeros([size_out]), name="B")
    w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="SAME")
    act = tf.nn.relu(conv + b)
    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", act)
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

# Add fully connected layer
def fc_layer(input, size_in, size_out, name="fc"):
  with tf.name_scope(name):
    w = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
    act = tf.nn.relu(tf.matmul(input, w) + b)
    tf.summary.histogram("weights", w)
    tf.summary.histogram("biases", b)
    tf.summary.histogram("activations", act)
    return act


def mnist_model(learning_rate, use_two_conv, use_two_fc, hparam):

  tf.reset_default_graph()
  tf.set_random_seed(1)
  sess = tf.Session()

  # Setup placeholders, and reshape the data
  x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
  x_image = tf.reshape(x, [-1, 256, 256, 1])
  tf.summary.image('input', x_image, 3)
  y = tf.placeholder(tf.float32, shape=[None, 7], name="labels")

  if use_two_conv:
    conv1 = conv_layer(x_image, 1, 32, "conv1")
    conv_out = conv_layer(conv1, 32, 64, "conv2")
  else:
    conv1 = conv_layer(x_image, 1, 64, "conv")
    conv_out = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") #adding padding "VALID" means no padding
  flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])


  if use_two_fc:
    fc1 = fc_layer(flattened, 55 * 55 * 64, 40, "fc1")
    embedding_input = fc1
    embedding_size = 40
    logits = fc_layer(fc1, 40, 7, "fc2")
  else:
    embedding_input = flattened
    embedding_size = 7*7*64
    logits = fc_layer(flattened, 7*7*64, 10, "fc")

  with tf.name_scope("xent"):
    xent = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits=logits, labels=y), name="xent")
    tf.summary.scalar("xent", xent)

  with tf.name_scope("train"):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(xent)

  with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(y, -1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("accuracy", accuracy)

  summ = tf.summary.merge_all()


  embedding = tf.Variable(tf.zeros([1024, embedding_size]), name="test_embedding")
  assignment = embedding.assign(embedding_input)
  saver = tf.train.Saver()

  sess.run(tf.global_variables_initializer())
  writer = tf.summary.FileWriter(LOGDIR + hparam)
  writer.add_graph(sess.graph)

  config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
  embedding_config = config.embeddings.add()
  embedding_config.tensor_name = embedding.name
  embedding_config.sprite.image_path = LOGDIR + 'sprite_1024.png'
  embedding_config.metadata_path = LOGDIR + 'labels_1024.tsv'
  # Specify the width and height of a single thumbnail.
  embedding_config.sprite.single_image_dim.extend([256, 256])
  tf.contrib.tensorboard.plugins.projector.visualize_embeddings(writer, config)

  for i in range(300):
    batch_index = random.sample(range(0,100),25)

    if i % 5 == 0:
      [train_accuracy, s] = sess.run([accuracy, summ], feed_dict={x: train[batch_index], y: labels_final[batch_index]})
      writer.add_summary(s, i)
      print ("train accuracy:", train_accuracy)
    sess.run(train_step, feed_dict={x: train[batch_index], y: labels_final[batch_index]})

def make_hparam_string(learning_rate, use_two_fc, use_two_conv):
  conv_param = "conv2" if use_two_conv else "conv1"
  fc_param = "fc2" if use_two_fc else "fc1"
  return "lr_%.0E%s%s" % (learning_rate, conv_param, fc_param)

def main():
  # You can try adding some more learning rates
  #for learning_rate in [1E-3, 1E-4, 1E-5]:
  for learning_rate in [1E-4]:

    # Include "False" as a value to try different model architectures
    #for use_two_fc in [True, False]:
    for use_two_fc in [True]:
      #for use_two_conv in [True, False]:
      for use_two_conv in [True]:
        # Construct a hyperparameter string for each one (example: "lr_1E-3fc2conv2")
        hparam = make_hparam_string(learning_rate, use_two_fc, use_two_conv)
        print('Starting run for %s' % hparam)
        sys.stdout.flush() # this forces print-ed lines to show up.

        # Actually run with the new settings
        mnist_model(learning_rate, use_two_fc, use_two_conv, hparam)


if __name__ == '__main__':
  main()

根据我的数学,一切都检查了,但是当我运行代码时,我收到以下错误:

InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 6553600 values, but the requested shape requires a multiple of 193600

我在这行代码中遇到了错误:

flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])

我真的不确定为什么会这样。我的数学有问题吗,因为他们假设 conv_out 的扁平大小应该是[-1, 64*64*64]

任何帮助将不胜感激。如果您需要更多信息,请告诉我。

【问题讨论】:

  • 您能否更清楚地说明此错误发生的位置以及您期望的尺寸是多少?考虑到大量代码,我很难解决这个问题。
  • 当然,我已经添加了引发此错误的行。预期尺寸是该代码行中的输入。我预计 [55,55,64] 是尺寸,但 [64,64,64] 有效。不知道为什么

标签: python tensorflow conv-neural-network dimensionality-reduction


【解决方案1】:

在我看来,您计算错了每个卷积/池化层的输出大小。以下是如何解决这个问题。我将您的代码提炼成这样:

import tensorflow as tf
import numpy as np

def conv_layer(input, size_in, size_out, name="conv"):
  with tf.name_scope(name):
    w = tf.Variable(tf.truncated_normal([17, 17, size_in, size_out], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[size_out]), name="B")
    conv = tf.nn.conv2d(input, w, strides=[1, 1, 1, 1], padding="VALID")
    act = tf.nn.relu(conv + b)
    return tf.nn.max_pool(act, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


# Setup placeholders, and reshape the data
x = tf.placeholder(tf.float32, shape=[None, 256*256], name="x")
x_image = tf.reshape(x, [-1, 256, 256, 1])

conv1 = conv_layer(x_image, 1, 32, "conv1")
conv_out = conv_layer(conv1, 32, 64, "conv2")

flattened = tf.reshape(conv_out, [-1, 55 * 55 * 64])

sess.run(tf.global_variables_initializer())
print(sess.run(tf.shape(conv1), {x: np.zeros([1, 256*256])}))

此代码提供正确形状的零输入,并使用tf.shape() 计算conv1 输出的形状。我回来了:

[ 1 128 128 32]

这与您计算的数字不符。

我怀疑您错误地计算了填充,但是如果不知道您是如何得出顶部表格中的数字的,就很难说。如果不出意外,第一个卷积具有填充 SAME 和步长 1,因此输入和输出将具有相同的空间维度。

希望这会有所帮助!

【讨论】:

    猜你喜欢
    • 2016-09-26
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2016-09-18
    • 1970-01-01
    • 2010-11-15
    • 1970-01-01
    相关资源
    最近更新 更多