无法在 TensorFlow Estimator 中训练 Keras 预训练模型答案

【问题标题】：Cannot Train Keras Pre-trained Model in Tensorflow Estimator无法在 TensorFlow Estimator 中训练 Keras 预训练模型
【发布时间】：2019-01-20 12:02:19
【问题描述】：

在使用 Estimator 类实现具有自定义数据的 Tensorflow keras VGG16 预训练模型时，它会抛出错误“ValueError：在 Keras 模型中找不到名称为“image”的输入。它需要匹配以下之一以下内容：input_30"。

在这段代码中，我没有将输入张量重新整形为 (-1, 224,224,3)，而是形状为 (224,224,3)。我在解析器函数中尝试了这两种形状 - 要在数据集 API 部分中映射到的函数。

谁能指出代码中的错误在哪里。如果有任何不必要的错误，请随时更改代码。

这是在 Colab 中完成的，所以我给它一个 link 来检查错误，以防你想检查它。

import tensorflow as 
from keras.utils import to_categorical
import cv2 as cv
import glob
import sys
import os
import numpy as np
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from sklearn.preprocessing import LabelEncoder

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

#Function to load image 
def load_image(addr):
    img = cv.imread(addr)
    if img is None:
        return None
    img = cv.resize(img, (224, 224), interpolation=cv.INTER_CUBIC)
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    return img

#Function to create TFRecords
def create_tfrecords(filename, address, labels):
    writer = tf.python_io.TFRecordWriter(filename)
    for i in range(len(address)):
        img = load_image(address[i])
        label = labels[i]

        if img is None:
            continue
        feature = {
        'image_raw': _bytes_feature(img.tostring()),
        'label': _int64_feature(label)}

        example = 
        tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())

    writer.close()
    sys.stdout.flush()

#Creating labels from custom data
def create_labels():
    labels = []
    for i in os.listdir('training'):
        for l in enumerate(os.listdir('training/{}'.format(i))):
            labels.append(i)
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    labels = to_categorical(np.array(labels))
    return labels

labels = create_labels()
#Generating image locations
train_path = 'training/*/*.jpg' #training/class/images
address = glob.glob(train_path)

#Splitting train and test data
x_train = address[0:int(0.8*len(address))]
y_train = labels[0:int(0.8*len(labels))]

x_test = address[int(0.8*len(address)):]
y_test = labels[int(0.8*len(labels)):]

create_tfrecords('train.tfrecords', x_train, y_train)
create_tfrecords('test.tfrecords', x_test, y_test)

#Keras pre-trainied model
base_model = VGG16(weights='imagenet', input_shape=(224,224,3), 
                                               include_top=False)

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
output = Dense(10, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

for layer in base_model.layers:
    layer.trainable = False

optimizer = tf.keras.optimizers.Adam(lr=1e-5)  

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics= 
                                                               ['accuracy'])

keras_model = tf.keras.estimator.model_to_estimator(keras_model=model)

#Initiating session
sess = tf.Session()
sess.run(tf.global_variables_initializer())

#Defining parser function to extract from the TFRecord files
def parser(record):
    keys_to_features = {
                    'image_raw': tf.FixedLenFeature([], tf.string),
                    'label': tf.FixedLenFeature([], tf.int64)}

    parsed = tf.parse_single_example(record, keys_to_features)
    image = tf.decode_raw(parsed['image_raw'], tf.uint8)
    image = tf.cast(image, tf.float32)
    image = tf.reshape(image, shape = [224, 224, 3])
    labels = tf.cast(parsed['label'], tf.int32)
    return image, labels

#Input Function
 def inp_fn(filename, train, batch_size=16, buffer_size=100):
     dataset = tf.data.TFRecordDataset(filenames=filename)
     dataset = dataset.map(parser)
     if train:
         dataset = dataset.shuffle(buffer_size=buffer_size)
         num_repeat = None
     else:
         num_repeat = 1
     dataset = dataset.repeat(num_repeat)
     dataset = dataset.batch(batch_size=batch_size)
     iterator = dataset.make_one_shot_iterator()
     images_batch, labels_batch = iterator.get_next()
     x = {'image': images_batch}
     y = labels_batch
     return x, y

#Train Input Function
def train_input_fn():
    return inp_fn(filename='train.tfrecords' , train=True)
#Test Input Function
def test_input_fn():
    return inp_fn(filename='test.tfrecords', train=False)

#Training and testing
keras_model.train(input_fn=train_input_fn, steps=1000)

result = keras_model.evaluate(input_fn=test_input_fn)

print('Result:', result)
print('Classification Accuracy : {:4f}'.format(result['accuracy']*100)) 
print('Classification loss: {:.4f}'.format(result['loss']))
sys.stdout.flush()

【问题讨论】：

标签： python tensorflow keras deep-learning training-data

【解决方案1】：

您将以下字典作为输入传递给您的模型：

x = {'image': images_batch}

Keras 尝试将 images_batch 传递给名为 image 的输入张量。对于未命名的base_model.input，情况并非如此。这就是导致您的错误的原因。尝试将 images_batch 这样传递给模型，而不将其包装在字典中（就像您已经在为标签做一样）：

x = images_batch

【讨论】：