使用 LBP、深度学习和 OpenCV 进行实时人脸识别答案

【问题标题】：Real Time Face Recognition with LBP, Deep Learning and OpenCV使用 LBP、深度学习和 OpenCV 进行实时人脸识别
【发布时间】：2021-08-05 09:08:10
【问题描述】：

我是计算机视觉方面的新手。我正在尝试使用基于深度学习 dnn 模块的人脸检测部分来实现具有本地二进制模式的实时人脸识别。我正在使用 caltech_faces 数据集，并添加了一个包含我的 20 张照片的文件夹。

所以，这是我的代码。我基本上通过一些更改和添加将示例图像的人脸识别代码转换为实时人脸识别。

执行以下代码时出现以下错误：

predName = le.inverse_transform([predictions[i]])[0]
                                                       ^
TabError: inconsistent use of tabs and spaces in indentation

我检查了所有标签和缩进，但找不到要修复的内容和位置。我恳请您给我一个提示，告诉我该怎么做。非常感谢！

# import the necessary packages

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils.video import VideoStream
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import imutils
import time
import cv2
import os


#Creating our face detector

def detect_faces(net, frame, minConfidence=0.5):
    # grab the dimensions of the image and then construct a blob
    # from it
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
        (104.0, 177.0, 123.0))

    # pass the blob through the network to obtain the face detections,
    # then initialize a list to store the predicted bounding boxes
    net.setInput(blob)
    detections = net.forward()
    boxes = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]

        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > minConfidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # update our bounding box results list
            boxes.append((startX, startY, endX, endY))

    # return the face detection bounding boxes
    return boxes


#Loading the CALTECH Faces dataset

def load_face_dataset(inputPath, net, minConfidence=0.5,
    minSamples=15):
    # grab the paths to all images in our input directory, extract
    # the name of the person (i.e., class label) from the directory
    # structure, and count the number of example images we have per
    # face
    imagePaths = list(paths.list_images(inputPath))
    names = [p.split(os.path.sep)[-2] for p in imagePaths]
    (names, counts) = np.unique(names, return_counts=True)
    names = names.tolist()

    # initialize lists to store our extracted faces and associated
    # labels
    faces = []
    labels = []

    # loop over the image paths
    for imagePath in imagePaths:
        # load the image from disk and extract the name of the person
        # from the subdirectory structure
        frame = cv2.imread(imagePath)
        name = imagePath.split(os.path.sep)[-2]

        # only process images that have a sufficient number of
        # examples belonging to the class
        if counts[names.index(name)] < minSamples:
            continue

        # perform face detection
        boxes = detect_faces(net, frame, minConfidence)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # extract the face ROI, resize it, and convert it to
            # grayscale
            faceROI = frame[startY:endY, startX:endX]
            faceROI = cv2.resize(faceROI, (47, 62))
            faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)

            # update our faces and labels lists
            faces.append(faceROI)
            labels.append(name)

    # convert our faces and labels lists to NumPy arrays
    faces = np.array(faces)
    labels = np.array(labels)

    # return a 2-tuple of the faces and labels
    return (faces, labels)

#Implementing Local Binary Patterns for face recognition    

# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--input", type=str, required=True,
#   help="path to input directory of images")
# ap.add_argument("-f", "--face", type=str,
#   default="face_detector",
#   help="path to face detector model directory")
# ap.add_argument("-c", "--confidence", type=float, default=0.5,
#   help="minimum probability to filter weak detections")
# args = vars(ap.parse_args())

# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
    "input": "caltech_faces",
    "face": "face_detector",
    "confidence": 0.5,
}

# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
    "res10_300x300_ssd_iter_140000.caffemodel"])
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the CALTECH faces dataset
print("[INFO] loading dataset...")
(faces, labels) = load_face_dataset(args["input"], net,
    minConfidence=0.5, minSamples=20)
print("[INFO] {} images in dataset".format(len(faces)))

# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# construct our training and testing split
(trainX, testX, trainY, testY) = train_test_split(faces,
    labels, test_size=0.25, stratify=labels, random_state=42)

# train our LBP face recognizer
print("[INFO] training face recognizer...")
recognizer = cv2.face.LBPHFaceRecognizer_create(
    radius=2, neighbors=16, grid_x=8, grid_y=8)
start = time.time()
recognizer.train(trainX, trainY)
end = time.time()
print("[INFO] training took {:.4f} seconds".format(end - start))


# initialize the list of predictions and confidence scores
print("[INFO] gathering predictions...")
predictions = []
confidence = []
start = time.time()

# loop over the test data
for i in range(0, len(testX)):
    # classify the face and update the list of predictions and
    # confidence scores
    (prediction, conf) = recognizer.predict(testX[i])
    predictions.append(prediction)
    confidence.append(conf)

# measure how long making predictions took
end = time.time()
print("[INFO] inference took {:.4f} seconds".format(end - start))

# show the classification report
print(classification_report(testY, predictions,
    target_names=le.classes_))


# initialize the video stream and allow the cammera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# loop over the frames from the video stream
while True:

    # grab the frame from the threaded video stream and resize it
    # to have a maximum width of 400 pixels
    face = vs.read()
    face = imutils.resize(face, width=400)

    # loop over the detections
    for i in range(0, detections.shape[2]):

        # grab the predicted name and actual name
    predName = le.inverse_transform([predictions[i]])[0]
    actualName = le.classes_[testY[i]]


    # draw the predicted name and actual name on the image
    cv2.putText(face, "pred: {}".format(predName), (5, 25),
    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.putText(face, "actual: {}".format(actualName), (5, 60),
    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # display the predicted name, actual name, and confidence of the
    # prediction (i.e., chi-squared distance; the *lower* the distance
    # is the *more confident* the prediction is)
    print("[INFO] prediction: {}, actual: {}, confidence: {:.2f}".format(predName, actualName, confidence[i]))

# show the output frame
cv2.imshow("Face", face)
key = cv2.waitKey(1) & 0xFF
 
# if the `q` key was pressed, break from the loop
if key == ord("q"):
    break

【问题讨论】：

如果你认为你使用了错误的值，那么可以阅读文档以查看它需要什么值。也许会有一些例子。您还可以使用print() 查看变量中的内容，然后查看是否使用了正确的值。
您是否收到错误消息？始终将有问题的完整错误消息（从“Traceback”一词开始）（不是评论）作为文本（不是屏幕截图，不是链接到外部门户）。还有其他有用的信息。
为什么你认为你使用了错误的值？你会得到错误的结果吗？有问题地显示它（不在评论中）。并显示预期结果。我们无法运行代码，我们看不到您的屏幕，我们无法阅读您的想法——您必须在问题中描述所有细节。
@furas，感谢您的反馈！我已按照您的指示更新了我的帖子。这是我作为 CENG 学生的第一个学期，也是我第一次使用 Stackoverflow。我现在明白如何在这里正确操作了！再次感谢您的指导！
如果有选项convert all tabs to 4 spaces，请检查编辑器/IDE，这应该可以解决这个问题。一些编辑器/IDE 甚至可以选择在您写入文件或按下制表符时将制表符转换为空格 - 这也解决了这个问题。如果编辑器没有这个选项，那么您仍然可以使用函数search & replace 来获取所有选项卡并替换为 4 个空格。 tab 和 4 spaces 看起来一样，但 Python 不喜欢同时使用这两种方法——你必须使用其中一种。在某些编辑器/IDE 中，您可以按 Tab，它将转换为 4 个空格。

标签： python opencv deep-learning computer-vision face-recognition

【解决方案1】：

您有一个 for 循环，没有任何代码行，但在导致问题的行之前有一个注释：

# loop over the detections
for i in range(0, detections.shape[2]):

    # grab the predicted name and actual name
predName = le.inverse_transform([predictions[i]])[0]
actualName = le.classes_[testY[i]]

问题来自这个空循环；如果你有一个循环，你必须在里面至少有一行代码。所以删除它或在里面添加 pass 关键字。

【讨论】：

【解决方案2】：

我为此使用 google collab，首先，请确保您已安装 OpenCV。您可以使用 pip 安装它：

pip install opencv-python

在检测人脸之前，我们必须使用 google collab 打开网络摄像头。

from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
def take_photo(filename='photo.jpg', quality=0.8):
js = Javascript('''
async function takePhoto(quality) {
const div = document.createElement('div');
const capture = document.createElement('button');
capture.textContent = 'Capture';
div.appendChild(capture);
const video = document.createElement('video');
video.style.display = 'block';
const stream = await navigator.mediaDevices.getUserMedia({video: true});
document.body.appendChild(div);
div.appendChild(video);
video.srcObject = stream;
await video.play();
// Resize the output to fit the video element.     google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
// Wait for Capture to be clicked.
await new Promise((resolve) => capture.onclick = resolve);
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
canvas.getContext('2d').drawImage(video, 0, 0);
stream.getVideoTracks()[0].stop();
div.remove();
return canvas.toDataURL('image/jpeg', quality);
}
''')
display(js)
data = eval_js('takePhoto({})'.format(quality))
binary = b64decode(data.split(',')[1])
with open(filename, 'wb') as f:
f.write(binary)
return filename

您必须运行以下代码作为第二步。

from IPython.display import Image
try:
filename = take_photo()
print('Saved to {}'.format(filename))
# Show the image which was just taken.
display(Image(filename))
except Exception as err:
# Errors will be thrown if the user does not have a webcam or if they do 
not
# grant the page permission to access it.
print(str(err))

运行这两个代码后，网络摄像头打开，您可以拍摄照片。照片保存为 photo.jpg。

使用 Haar 级联的人脸检测是一种基于机器学习的方法，其中使用一组输入数据训练级联函数。 OpenCV 已经包含许多针对面部、眼睛、微笑等的预训练分类器。今天我们将使用面部分类器。您也可以尝试使用其他分类器。

【讨论】：