【发布时间】:2021-11-24 20:52:08
【问题描述】:
我有 2833 行、6 个特征和 8 个标签的数据 然后我根据标签将数据分为训练和测试。 在 LSTM 建模时,我收到以下错误:
ValueError: cannot reshape array of size 11874 into shape (1979,64,6)
这是我的代码:
df_raw_primer = pd.read_excel(path_data)
# Get unique label
unique_labels = df_raw_primer['label'].unique()
split_factor=0.7
train_data = pd.DataFrame(columns=df_raw_primer.columns)
test_data = pd.DataFrame(columns=df_raw_primer.columns)
unique_lengths = {}
for uni in unique_labels:
unique_lengths[uni] = int(len(df_raw_primer[df_raw_primer.label == uni]) * split_factor)
for uni in unique_labels:
for _, row in df_raw_primer.iterrows():
if(row['label'] == uni):
if(unique_lengths[uni]): # if unique klength is not equal to 0
train_data = train_data.append({'label': row['label'],
'gyro x': row['gyro x'],
'gyro y': row['gyro y'],
'gyro z': row['gyro z'],
'acc x': row['acc x'],
'acc y': row['acc y'],
'acc z': row['acc z']},
ignore_index=True)
unique_lengths[uni] = unique_lengths[uni] - 1 # minus unique lengths value
else:
test_data = test_data.append({'label': row['label'],
'gyro x': row['gyro x'],
'gyro y': row['gyro y'],
'gyro z': row['gyro z'],
'acc x': row['acc x'],
'acc y': row['acc y'],
'acc z': row['acc z']},
ignore_index=True)
sliceInput_train_data = train_data.iloc[:, 0:-1]
sliceTarget_train_label = train_data['label']
sliceInput_test_data = test_data.iloc[:, 0:-1]
sliceTarget_test_label = test_data['label']
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
verbose, epochs, batch_size = 0, 15, 64
n_timesteps, n_features, n_outputs = 64, 6, 8
model = Sequential()
model.add(LSTM(32, input_shape=(n_timesteps,n_features), return_sequences=True))
model.add(Dropout(0.1))
model.add(attention(return_sequences=False)) # receive 3D and output 2D
model.add(Dense(n_outputs, activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
loss, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
attention_weights = model.layers[3].get_weights()[0]
heat_map = sb.heatmap(attention_weights)
pyplot.show()
return accuracy
# summarize scores
def summarize_results(scores):
print(scores)
m, s = mean(scores), std(scores)
print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))
# run an experiment
def run_experiment(repeats=10):
trainX, trainy, testX, testy = sliceInput_train_data, sliceTarget_train_label, sliceInput_test_data, sliceTarget_test_label
trainX = np.array(trainX)
trainX = np.reshape(trainX, (trainX.shape[0], 64, trainX.shape[1]))
scores = list()
for r in range(repeats):
score = evaluate_model(trainX, trainy, testX, testy)
score = score * 100.0
print('>#%d: %.3f' % (r+1, score))
scores.append(score)
# summarize results
summarize_results(scores)
run_experiment()
我真的很困惑,我不知道 11874 来自哪里,而我的 trainX 形状数据是 print(trainX.shape) = (1979, 6) 我的数据有问题吗?
【问题讨论】:
标签: python dataframe deep-learning lstm reshape