【发布时间】:2021-10-05 15:50:54
【问题描述】:
我有一个简单的 Python 代码,它可以从摄像头捕获视频并预测面部情绪(从here 获取,以防您需要运行它)。
我喜欢将此视频捕获放在此框架内(中心是透明的)并显示所有内容。我该怎么做?
import numpy as np
import argparse
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# command line argument
ap = argparse.ArgumentParser()
ap.add_argument("--mode",help="train/display")
mode = ap.parse_args().mode
# plots accuracy and loss curves
def plot_model_history(model_history):
"""
Plot Accuracy and Loss curves given the model_history
"""
fig, axs = plt.subplots(1,2,figsize=(15,5))
# summarize history for accuracy
axs[0].plot(range(1,len(model_history.history['accuracy'])+1),model_history.history['accuracy'])
axs[0].plot(range(1,len(model_history.history['val_accuracy'])+1),model_history.history['val_accuracy'])
axs[0].set_title('Model Accuracy')
axs[0].set_ylabel('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].set_xticks(np.arange(1,len(model_history.history['accuracy'])+1),len(model_history.history['accuracy'])/10)
axs[0].legend(['train', 'val'], loc='best')
# summarize history for loss
axs[1].plot(range(1,len(model_history.history['loss'])+1),model_history.history['loss'])
axs[1].plot(range(1,len(model_history.history['val_loss'])+1),model_history.history['val_loss'])
axs[1].set_title('Model Loss')
axs[1].set_ylabel('Loss')
axs[1].set_xlabel('Epoch')
axs[1].set_xticks(np.arange(1,len(model_history.history['loss'])+1),len(model_history.history['loss'])/10)
axs[1].legend(['train', 'val'], loc='best')
fig.savefig('plot.png')
plt.show()
# Define data generators
train_dir = 'data/train'
val_dir = 'data/test'
num_train = 28709
num_val = 7178
batch_size = 64
num_epoch = 50
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(48,48),
batch_size=batch_size,
color_mode="grayscale",
class_mode='categorical')
validation_generator = val_datagen.flow_from_directory(
val_dir,
target_size=(48,48),
batch_size=batch_size,
color_mode="grayscale",
class_mode='categorical')
# Create the model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax'))
# If you want to train the same model or try other models, go for this
if mode == "train":
model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=0.0001, decay=1e-6),metrics=['accuracy'])
model_info = model.fit_generator(
train_generator,
steps_per_epoch=num_train // batch_size,
epochs=num_epoch,
validation_data=validation_generator,
validation_steps=num_val // batch_size)
plot_model_history(model_info)
model.save_weights('model.h5')
# emotions will be displayed on your face from the webcam feed
elif mode == "display":
model.load_weights('model.h5')
# prevents openCL usage and unnecessary logging messages
cv2.ocl.setUseOpenCL(False)
# dictionary which assigns each label an emotion (alphabetical order)
emotion_dict = {0: "Angry", 1: "Disgusted", 2: "Fearful", 3: "Happy", 4: "Neutral", 5: "Sad", 6: "Surprised"}
# start the webcam feed
cap = cv2.VideoCapture(1)
while True:
# Find haar cascade to draw bounding box around face
ret, frame = cap.read()
if not ret:
break
facecasc = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = facecasc.detectMultiScale(gray,scaleFactor=1.3, minNeighbors=5)
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y-50), (x+w, y+h+10), (255, 0, 0), 2)
roi_gray = gray[y:y + h, x:x + w]
cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
prediction = model.predict(cropped_img)
maxindex = int(np.argmax(prediction))
text = emotion_dict[maxindex]
if ("Happy" in text) or ("Sad" in text):
cv2.putText(frame, text, (x+20, y-60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
cv2.imshow('Video', cv2.resize(frame,(1600,960),interpolation = cv2.INTER_CUBIC))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
【问题讨论】:
-
您好,您必须使用任何 GUI 库(如 tkinter)或任何 Web 界面(如烧瓶)与 opencv 一起使用
-
最小代码示例怎么样?为什么不简单地调整视频帧的大小以适合中心帧的大小并将其放置在那里?类似于
img[y0:y1, x0:x1, :] = resized_frame和cv2.imshow('img', img)。对每个视频帧重复... -
不必完全适合矩形!我可以找到确切的坐标。我只需要一个解决方案。
-
(0) 将黑色区域作为左、右、上、下坐标,我将它们称为
x0, x1, y0, y1(1) 使用shrunk_down = cv.resize(...)将视频帧缩小到正确的大小是(x1-x0, y1-y0)(2) 使用numpy slicing 复制数据canvas = picture_frame.copy(); canvas[y0:y1, x0:x1] = shrunk_down-- 随意忽略Lahiru 的回复。它完全错过了“你必须”的标记(这个问题不需要)
标签: python python-3.x opencv opencv-python