【发布时间】:2018-04-11 10:21:57
【问题描述】:
有没有更有效的方法从数据集中提取特征然后如下:
def extract_features(directory, sample_count):
features = np.zeros(shape=(sample_count, 6, 6, 512))
labels = np.zeros(shape=(sample_count, 6))
generator =
ImageDataGenerator(rescale=1./255).flow_from_directory(directory,
target_size=(Image_Size, Image_Size), batch_size = batch_size,
class_mode='categorical')
i = 0
print('Entering for loop...');
for inputs_batch, labels_batch in generator:
features_batch = conv_base.predict(inputs_batch)
features[i * 20 : (i + 1) * 20] = features_batch
labels[i * 20 : (i + 1) * 20] = labels_batch
i += 1
print(i);
if (i * 20) >= sample_count:
break
return features, labels
由于我的数据集的大小,这个过程需要相当长的时间,我想知道是否有更好的方法来做到这一点?
提前致谢:)
完整代码:
from keras import layers
from keras import models
from keras import losses
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
import matplotlib.pyplot as plt
import numpy as np
Train_DIR = '/Users/eoind/food/train'
Test_DIR = '/Users/eoind/food/test'
Validation_DIR = '/Users/eoind/food/validation'
Image_Size = 200 # Size of input images to be scaled to
Train_Samples = 6000
Validation_Samples = 3000
Test_Samples = 3000
num_epochs = 30
batch_size = 20
steps_per_epoch = Train_Samples/batch_size
conv_base = VGG16(weights='imagenet', include_top=False, input_shape=
(Image_Size, Image_Size, 3))
conv_base.summary()
print('Conv_Base Summary');
def extract_features(directory, sample_count):
features = np.zeros(shape=(sample_count, 6, 6, 512))
labels = np.zeros(shape=(sample_count, 6))
generator =
ImageDataGenerator(rescale=1./255).flow_from_directory(directory,
target_size=(Image_Size, Image_Size), batch_size = batch_size,
class_mode='categorical')
i = 0
print('Entering for loop...');
for inputs_batch, labels_batch in generator:
features_batch = conv_base.predict(inputs_batch)
features[i * 20 : (i + 1) * 20] = features_batch
labels[i * 20 : (i + 1) * 20] = labels_batch
i += 1
print(i);
if (i * 20) >= sample_count:
break
return features, labels
train_features, train_labels = extract_features(Train_DIR, Train_Samples)
validation_features, validation_labels = extract_features(Validation_DIR,
Validation_Samples)
test_features, test_labels = extract_features(Test_DIR, Test_Samples)
print('Extracting Features');
train_features = np.reshape(train_features, (Train_Samples, 6 * 6 * 512))
validation_features = np.reshape(validation_features, (Validation_Samples, 6 *
6 * 512))
test_features = np.reshape(test_features, (Test_Samples, 6 * 6 * 512))
print('Reshaping Features');
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=6 * 6 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()
print('Model Summary');
model.compile(optimizer=optimizers.RMSprop(lr=1e-4),
loss=losses.categorical_crossentropy,
metrics=['acc'])
print('Compiling Model');
hist = model.fit(train_features, train_labels,
steps_per_epoch = steps_per_epoch,
epochs = num_epochs,
batch_size = batch_size,
verbose = 1,
validation_data = (validation_features, validation_labels))
print('Fitting Model');
train_loss=hist.history['loss']
val_loss=hist.history['val_loss']
train_acc=hist.history['acc']
val_acc=hist.history['val_acc']
xc=range(num_epochs)
fig1=plt.figure(1,figsize=(7,5))
plt.plot(xc,train_loss)
plt.plot(xc,val_loss)
plt.xlabel('Number of Epochs')
plt.ylabel('Loss')
plt.title('Training Loss Vs. Validation Loss')
plt.grid(True)
plt.legend(['Training', 'Validation'])
plt.style.use(['classic'])
fig1.savefig('loss.png')
fig2=plt.figure(2,figsize=(7,5))
plt.plot(xc,train_acc)
plt.plot(xc,val_acc)
plt.xlabel('Number of Epochs')
plt.ylabel('Accuracy')
plt.title('Training Accuracy Vs. Validation Accuracy')
plt.grid(True)
plt.legend(['Training', 'Validation'], loc='upper left')
plt.style.use(['classic'])
fig2.savefig('acc.png')
model.save('food_pretrained.h5') # Save model
iPython 控制台输出
Layer (type) Output Shape Param #
=================================================================
input_19 (InputLayer) (None, 200, 200, 3) 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 200, 200, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 200, 200, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 100, 100, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 100, 100, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 100, 100, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 50, 50, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 50, 50, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 50, 50, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 50, 50, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 25, 25, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 25, 25, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 25, 25, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 25, 25, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 12, 12, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 12, 12, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 6, 6, 512) 0
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
Conv_Base Summary
Found 6000 images belonging to 6 classes.
Entering for loop...
1
2
3
4
5
6
7
8
9
10
11
12...
【问题讨论】:
-
我不确定您的期望,如果数据集很大,并且您使用的是 GPU,那么除了等待整个数据集被处理之外,没有什么可做的。无论如何,这比在数据集上训练模型花费的时间更少。
标签: python deep-learning keras anaconda conv-neural-network