【发布时间】:2018-11-05 00:23:44
【问题描述】:
我在这里有点受阻。 我对 tensorflow 并不陌生,即使我没有用它做很多工作。 但这是我的问题:
我想将文本分为 7 个不同的类别,为此我使用 tfidf 方法将字符串转换为数字。 在那里,我将稀疏矩阵转换为密集矩阵,这给了我一个形状为 (179, 482) 的二维矩阵。 我将它分成 150 个 obs 训练集和 29 个 obs 测试集。 然后,我将模型设置如下:
'''
-------------------------------------------------------------------------
Convolution 2D avec RELU
-------------------------------------------------------------------------
'''
def conv1d(x, W, b, name_conv, name_bias, name_relu, strides=1):
tensor = tf.nn.conv1d(x, W, strides, padding="SAME", name=name_conv)
tensor = tf.nn.bias_add(tensor,b, name=name_bias)
return tf.nn.relu(tensor, name=name_relu)
'''
-------------------------------------------------------------------------
Pooling max
-------------------------------------------------------------------------
'''
def maxpool2d(x, name, k=2):
return tf.nn.max_pool(x, [1, k, k, 1], [1, k, k, 1], padding="SAME", name=name)
'''
-------------------------------------------------------------------------
Modele du reseau convolutif :
CONV1-RELU-CONV2-RELU-FCL-Prediction
-------------------------------------------------------------------------
'''
def conv_net(x, poids, biais):
# Mise en forme de l'image d'entree
x = tf.reshape(x, shape=[-1, 482, 1])
conv1 = conv1d(x, poids['wconv1'], biais['bconv1'], name_conv='conv1', name_bias='bias1', name_relu='relu1')
#conv1 = maxpool2d(conv1, k=2, name='pool1')
conv2 = conv1d(conv1, poids['wconv2'], biais['bconv2'], name_conv='conv2', name_bias='bias2', name_relu='relu2')
#conv2 = maxpool2d(conv2, k=2, name='pool2')
# Mise en forme des activations de la seconde couche cachee pour l'entree de la couche completement connectee
fcl = tf.reshape(conv2, [-1, poids['wfcl'].get_shape().as_list()[0]], name='reshape')
fcl = tf.add(tf.matmul(fcl, poids['wfcl']), biais['bfcl'], name='fcl')
fcl = tf.nn.relu(fcl, name='relu3')
# Dropout
#fcl = tf.nn.dropout(fcl, dropout, name='dropout')
# Couche de sortie
out = tf.add(tf.matmul(fcl, poids['out']), biais['out'], name='out')
return out
if __name__ == '__main__':
'''
-------------------------------------------------------------------------
Parametres du reseau
-------------------------------------------------------------------------
'''
learning_rate = 0.00001
num_epochs = 10
batch_size = 1
dropout = 0.75 # ici probabilite de garder le neurone
logs_path = "tensorflow_logs"
data, targets = getData()
data = np.float32(data)
targets = np.float32(targets)
x_train = data[:150]
y_train = targets[:150]
x_test = data[150:]
y_test = targets[150:]
num_examples = x_train.shape[0]
num_input =x_train.shape[1]
num_classes = y_train.shape[1]
# Affichage des informations par pas de temps
display_step = 10
# Espaces reserves qui vont etre remplis par les tenseurs representant l'ensemble des images et des labels lors de l'apprentissage
x = tf.placeholder(tf.float32, [None, num_input], name='data')
y = tf.placeholder(tf.float32, [None, num_classes], name='labels')
#keep_prob = tf.placeholder(tf.float32, name='keep_prob')
#Stockage des poids et biais dans des variables TF
'''
TODO : initialiser avec une loi normale des variables tensorFlow :
- wconv1 et bcconv1 pour CONV1. wconv1 est un banc de 32 filtres 5*5*1. bconv1 a une taille adaptee
- wconv2 et bconv2 pour CONV2. wcconv2 est un banc de 32 filtres 5*5*32*64. bconv2 a une taille adaptee
- wfcl et bfcl pour la couche completement connectee : wfcl est un banc de filtres 7*7*64 a 1024 sorties. bfcl a une taille adaptee
- out (poids et biais) ont une taille adaptee pour la classification des donnees MNIST
'''
poids = {
'wconv1': tf.Variable(tf.random_normal([5, 1, 32])),
'wconv2': tf.Variable(tf.random_normal([5, 32, 64])),
'wfcl': tf.Variable(tf.random_normal([64, 1024])),
'out': tf.Variable(tf.random_normal([1024, num_classes]))
}
biais = {
'bconv1': tf.Variable(tf.random_normal([32])),
'bconv2': tf.Variable(tf.random_normal([64])),
'bfcl': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([num_classes]))
}
# Construction du modele
pred = conv_net(x, poids, biais)
print(pred.shape)
# Fonction de perte et procedure d'optimisation
#TODO : entropie croisee avec logits, algorithme d'optimisation ADAM
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluation du modele
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initialisation des variables
init = tf.global_variables_initializer()
tf.summary.scalar("cost", cost)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
# Creation d'une session TF pour executer le programme
with tf.Session() as sess:
sess.run(init)
train_writer = tf.summary.FileWriter(logs_path + '/train', graph=tf.get_default_graph())
test_writer = tf.summary.FileWriter(logs_path + '/test', graph=tf.get_default_graph())
# Entrainement
total_batch = int(num_examples/batch_size)
for epoch in range(num_epochs):
# Entrainement sur les batchs d'images
for step in range (total_batch):
perm = np.arange(num_examples)
np.random.shuffle(perm)
indices = perm[0:batch_size]
batch_x = x_train[indices]
batch_y = y_train[indices]
_, summary = sess.run([optimizer, merged_summary_op], feed_dict={x: batch_x,
y: batch_y})
train_writer.add_summary(summary, epoch*total_batch+step)
if step % display_step == 0:
loss, acc, summaryt = sess.run([cost, accuracy, merged_summary_op], feed_dict={x: batch_x,
y: batch_y})
test_writer.add_summary(summaryt, epoch*total_batch+step)
print("Iteration " + str(epoch * total_batch + step) + ", Precision = " + "{:.5f}".format(acc))
# Test
print("Test:", sess.run(accuracy, feed_dict={x: x_test,
y: y_test}))
当我保留我的摘要时,我收到了这个错误:
InvalidArgumentError(有关回溯,请参见上文):您必须为占位符张量“数据”提供一个值,其 dtype 为浮点数和形状 [?,482]
所以,我有点迷路了,因为当我删除以下摘要时:
for step in range (total_batch):
perm = np.arange(num_examples)
np.random.shuffle(perm)
indices = perm[0:batch_size]
batch_x = x_train[indices]
batch_y = y_train[indices]
sess.run([optimizer], feed_dict={x: batch_x,
y: batch_y})
#train_writer.add_summary(summary, epoch*total_batch+step)
if step % display_step == 0:
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y})
#test_writer.add_summary(summaryt, epoch*total_batch+step)
print("Iteration " + str(epoch * total_batch + step) + ", Precision = " + "{:.5f}".format(acc))
我的模型正在学习(它几乎什么都没学到,但它仍在做某事)
谁能看到我做错了什么? 我必须承认,我一直使用 4D 数组,主要用于图像识别,所以这个案例对我来说是新的。
感谢任何能回答我问题的好心人。
附:对不起法国cmets,但我相信你仍然可以理解代码
【问题讨论】: