【问题标题】:OpenCV / Python : multi-threading for live facial recognitionOpenCV / Python:用于实时面部识别的多线程
【发布时间】:2017-07-06 03:57:10
【问题描述】:

我正在使用 OpenCv 和 Dlib 来执行带有地标的面部识别,通过网络摄像头流进行直播。语言是 Python。它在我的 macbook 笔记本电脑上运行良好,但我需要它从台式计算机 24/7 运行。该计算机是运行 Debian Jessie 的 PC Intel® Core™2 Quad CPU Q6600 @ 2.40GHz 32bit。 性能急剧下降:由于处理,有 10 秒的延迟!

因此,我研究了 多线程 以获得性能:

  1. 我首先尝试了OpenCv的示例代码,结果很棒!四个核心都达到了 100%,性能要好得多。
  2. 然后我用我的代码替换了帧处理代码,它根本没有提高性能!只有一个核心达到 100%,其他核心保持非常低。 我什至认为启用多线程会更糟。

我从 dlib 示例代码中获得了面部标志性代码。我知道它可能可以进行优化,但我想了解为什么我无法通过多线程使用我的(旧)计算机的全部功能?

我会把我的代码放在下面,非常感谢阅读:)

from __future__ import print_function

import numpy as np
import cv2
import dlib

from multiprocessing.pool import ThreadPool
from collections import deque

from common import clock, draw_str, StatValue
import video

class DummyTask:
    def __init__(self, data):
        self.data = data
    def ready(self):
        return True
    def get(self):
        return self.data

if __name__ == '__main__':
    import sys

    print(__doc__)

    try:
        fn = sys.argv[1]
    except:
        fn = 0
    cap = video.create_capture(fn)
    
    #Face detector
    detector = dlib.get_frontal_face_detector()

    #Landmarks shape predictor 
    predictor = dlib.shape_predictor("landmarks/shape_predictor_68_face_landmarks.dat")

    # This is where the facial detection takes place
    def process_frame(frame, t0, detector, predictor):
        # some intensive computation...
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        clahe_image = clahe.apply(gray)
        detections = detector(clahe_image, 1)
        for k,d in enumerate(detections): 
            shape = predictor(clahe_image, d) 
            for i in range(1,68): #There are 68 landmark points on each face
               cv2.circle(frame, (shape.part(i).x, shape.part(i).y), 1, (0,0,255), thickness=2)
        return frame, t0

    threadn = cv2.getNumberOfCPUs()
    pool = ThreadPool(processes = threadn)
    pending = deque()

    threaded_mode = True

    latency = StatValue()
    frame_interval = StatValue()
    last_frame_time = clock()
    while True:
        while len(pending) > 0 and pending[0].ready():
            res, t0 = pending.popleft().get()
            latency.update(clock() - t0)
            draw_str(res, (20, 20), "threaded      :  " + str(threaded_mode))
            draw_str(res, (20, 40), "latency        :  %.1f ms" % (latency.value*1000))
            draw_str(res, (20, 60), "frame interval :  %.1f ms" % (frame_interval.value*1000))
            cv2.imshow('threaded video', res)
        if len(pending) < threadn:
            ret, frame = cap.read()
            t = clock()
            frame_interval.update(t - last_frame_time)
            last_frame_time = t
            if threaded_mode:
                task = pool.apply_async(process_frame, (frame.copy(), t, detector, predictor))
            else:
                task = DummyTask(process_frame(frame, t, detector, predictor))
            pending.append(task)
        ch = cv2.waitKey(1)
        if ch == ord(' '):
            threaded_mode = not threaded_mode
        if ch == 27:
            break
cv2.destroyAllWindows()

【问题讨论】:

    标签: python multithreading opencv face-detection dlib


    【解决方案1】:

    你可以使用这个,多线程的:

    from imutils.video import VideoStream
    
    # Initialize multithreading the video stream.
    videostream = "rtsp://192.168.x.y/user=admin=xxxxxxx_channel=vvvv=1.sdp?params"
    vs = VideoStream(src=videostream, resolution=frameSize,
                     framerate=32).start()
    
    frame = vs.read()
    

    【讨论】:

      【解决方案2】:

      我尝试了一种简化的方法,例如 P.Ro 在他的回答中提到的进程写入输出队列,但不知何故,队列大部分时间都被锁定,因为所有进程同时写入它。 (只是我的猜测)我可能做错了什么。

      最后我最终使用了管道。

      代码很糟糕。但如果我是几个小时前的我。我仍然很高兴找到一个实际运行不费力的例子。

      from multiprocessing import Process, Queue, Manager,Pipe
      import multiprocessing
      import face_recognition as fik
      import cv2
      import time
      
      
      video_input = 0
      
      obama_image = fik.load_image_file("obama.png")
      obama_face_encoding = fik.face_encodings(obama_image)[0]
      
      
      
      quality = 0.7
      
      
      def f(id,fi,fl):
          import face_recognition as fok
      
          while True:
              small_frame = fi.get()
              print("running thread"+str(id))
              face_locations = fok.face_locations(small_frame)
      
              if(len(face_locations)>0):
                  print(face_locations)
                  for (top7, right7, bottom7, left7) in face_locations:
      
                      small_frame_c = small_frame[top7:bottom7, left7:right7]
                      fl.send(small_frame_c)
      
      fps_var =0
      if __name__ == '__main__':
              multiprocessing.set_start_method('spawn')
      
      
              # global megaman
              with Manager() as manager:
      
                  video_capture = cv2.VideoCapture(video_input)
      
                  fi = Queue(maxsize=14)
      
                  threads = 8
                  proc = []
      
                  parent_p = []
                  thread_p = []
                  # procids = range(0,threads)
                  for t in range(0,threads):
                      p_t,c_t = Pipe()
                      parent_p.append(p_t)
                      thread_p.append(c_t)
                      print(t)
                      proc.append(Process(target=f, args=(t,fi,thread_p[t])))
                      proc[t].start()
      
      
                  useframe = False
      
                  frame_id = 0
                  while True:
                      # Grab a single frame of video
                      ret, frame = video_capture.read()
                      effheight, effwidth = frame.shape[:2]
                      if effwidth < 20:
                          break
                      # Resize frame of video to 1/4 size for faster face recognition processing
                      xxx = 930
                      yyy = 10/16 #0.4234375
                      small_frame = cv2.resize(frame, (xxx, int(xxx*yyy)))
                      if frame_id%2 == 0:
                          if not fi.full():
      
      
                              fi.put(small_frame)
      
                              print(frame_id)
      
                              cv2.imshow('Video', small_frame)
      
      
                              print("FPS: ", int(1.0 / (time.time() - fps_var)))
                              fps_var = time.time()
      
      
                      #GET ALL DETECTIONS
                      for t in range(0,threads):
                          if parent_p[t].poll():
                              small_frame_c = parent_p[t].recv()
                              cv2.imshow('recc', small_frame_c)
                              height34, width34 = small_frame_c.shape[:2]
                              # print fsizeee
                              if(width34<20):
                                  print("face 2 small")
                                  print(width34)
                                  break
                              face_encodings_cam = fik.face_encodings(small_frame_c,[(0, width34, height34, 0)])
      
                              match = fik.compare_faces([obama_face_encoding], face_encodings_cam[0])
                              name = "Unknown"
      
                              if match[0]:
                                  name = "Barack"
      
                              print(name)
                              break
      
                      frame_id += 1
      
                      # Hit 'q' on the keyboard to quit!
                      if cv2.waitKey(1) & 0xFF == ord('q'):
                          break
      

      【讨论】:

        【解决方案3】:

        性能问题是由于 dlib 编译错误造成的。 不要使用 pip install dlib 与正确编译相比,由于某种原因运行非常缓慢。通过这种方式,我从将近 10 秒的滞后时间缩短到了 2 秒左右。所以最后我不需要多线程/处理,但我正在努力提高速度。感谢您的帮助:)

        【讨论】:

          【解决方案4】:

          没有太多使用线程池的经验,但我总是只使用如下所示的进程。您应该能够轻松地编辑此代码以满足您的需求。我写这篇文章时考虑到了你的实现。

          此代码将获取内核数量并启动许多将并行实现所需功能的工作进程。它们都共享一个输入帧队列,并都放入相同的输出队列以供主要获取和显示。每个队列都有一个最大大小,在本例中为 5。这确保了尽管处理需要 CPU 时间,但它始终是相对活跃的时间。

          import numpy as np
          import cv2
          
          from multiprocessing import Process, Queue
          import time
          
          #from common import clock, draw_str, StatValue
          #import video
          
          class Canny_Process(Process):
              
              def __init__(self,frame_queue,output_queue):
                  Process.__init__(self)
                  self.frame_queue = frame_queue
                  self.output_queue = output_queue
                  self.stop = False
                  #Initialize your face detectors here
                  
          
              def get_frame(self):
                  if not self.frame_queue.empty():
                      return True, self.frame_queue.get()
                  else:
                      return False, None
          
              def stopProcess(self):
                  self.stop = True
                      
              def canny_frame(self,frame):
                  # some intensive computation...
                  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                  edges = cv2.Canny(gray, 50, 100)
                  
                  #To simulate CPU Time
                  #############################
                  for i in range(1000000):
                      x = 546*546
                      res = x/(i+1)
                  #############################
                  'REPLACE WITH FACE DETECT CODE HERE'
          
                  if self.output_queue.full(): 
                      self.output_queue.get_nowait()
                  self.output_queue.put(edges)
          
              def run(self):
                  while not self.stop: 
                      ret, frame = self.get_frame()
                      if ret: 
                          self.canny_frame(frame)
          
          
          if __name__ == '__main__':
          
              frame_sum = 0
              init_time = time.time()
          
              def put_frame(frame):
                  if Input_Queue.full(): 
                      Input_Queue.get_nowait()
                  Input_Queue.put(frame)
          
              def cap_read(cv2_cap):
                  ret, frame = cv2_cap.read()
                  if ret: 
                      put_frame(frame)
                  
              cap = cv2.VideoCapture(0)
          
              threadn = cv2.getNumberOfCPUs()
          
              threaded_mode = True
          
              process_list = []
              Input_Queue = Queue(maxsize = 5)
              Output_Queue = Queue(maxsize = 5)
          
              for x in range((threadn -1)):    
                  canny_process = Canny_Process(frame_queue = Input_Queue,output_queue = Output_Queue)
                  canny_process.daemon = True
                  canny_process.start()
                  process_list.append(canny_process)
          
              ch = cv2.waitKey(1)
              cv2.namedWindow('Threaded Video', cv2.WINDOW_NORMAL)
              while True:        
                  cap_read(cap)
                  
                  if not Output_Queue.empty():
                      result = Output_Queue.get()
                      cv2.imshow('Threaded Video', result)
                      ch = cv2.waitKey(5)
          
                  if ch == ord(' '):
                      threaded_mode = not threaded_mode
                  if ch == 27:
                      break
              cv2.destroyAllWindows()

          这应该可以解决问题,只需更改我的精明函数来进行面部检测。我用你的代码写了这个并比较了两者。这明显更快。我在这里使用 multiprocessing.Process 。在 python 中,进程是真正并行的,而线程并不完全是因为 GIL。我正在使用 2 个队列在主进程和进程之间来回发送数据。队列是线程和进程安全的。

          【讨论】:

          • 非常感谢您花时间编写所有代码 :) 由于某种原因它不太有效,这周我将深入研究它。我刚换了一台 i7-2600K @ @3.40Ghz 的电脑,但我仍然遇到性能问题!我猜瓶颈在别的地方。 也许在 OSX 与 Debian 架构中的某个地方? 因为 OSX 一切正常!再次感谢,我会在本周通知你我的结果:)
          • 什么不太好用,代码还是性能?代码是否正在运行但仍然不够快,或者代码根本没有运行?如果你给我错误,我可能会告诉你它是什么。谢谢
          • 代码运行但未按预期运行。我已将日志放入此文件中:docs.google.com/document/d/… 再说一次,我还没有时间深入研究它,明天我会这样做。再次感谢:)
          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2013-04-25
          • 1970-01-01
          • 2019-10-15
          • 1970-01-01
          相关资源
          最近更新 更多