Swift iOS Vision 不准确的矩形检测结果答案

【问题标题】：Swift iOS Vision inaccurate rectangle detection resultSwift iOS Vision 不准确的矩形检测结果
【发布时间】：2022-04-02 06:39:18
【问题描述】：

xcode：版本 12.5.1， IOS：14.7.1，设备：iPhone 12 Pro

您好，需要帮助！

iOS Vision 框架返回不准确的矩形结果： incorrect rect result

识别的矩形（红色）不适合真实的矩形（黑色）。它不是方形的，比真实的更窄。我不明白为什么会这样。如果改为绘制观察到的矩形边界框，则会得到同样不准确的结果。

完整代码：

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    
    private let captureSession = AVCaptureSession()
    private lazy var previewLayer = AVCaptureVideoPreviewLayer(session: self.captureSession)
    private let videoDataOutput = AVCaptureVideoDataOutput()
    private var maskLayer = CAShapeLayer()

    override func viewDidLoad() {
        super.viewDidLoad()
        self.setCameraInput()
        self.showCameraFeed()
        self.setCameraOutput()
        self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
        self.captureSession.startRunning()
    }

    override func viewDidLayoutSubviews() {
        super.viewDidLayoutSubviews()
        self.previewLayer.frame = self.view.frame
    }
    
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
            debugPrint("unable to get image from sample buffer")
            return
        }
        self.detectRectangle(in: frame)
    }
    
    private func setCameraInput() {
        guard let device = AVCaptureDevice.DiscoverySession(
            deviceTypes: [.builtInWideAngleCamera, .builtInDualCamera, .builtInTrueDepthCamera],
            mediaType: .video,
            position: .back).devices.first else {
                fatalError("No back camera device found.")
        }
        let cameraInput = try! AVCaptureDeviceInput(device: device)
        self.captureSession.addInput(cameraInput)
    }
    
    private func showCameraFeed() {
        self.previewLayer.videoGravity = .resizeAspectFill
        self.view.layer.addSublayer(self.previewLayer)
        self.previewLayer.frame = self.view.frame
    }
    
    private func setCameraOutput() {
        self.videoDataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
        self.videoDataOutput.alwaysDiscardsLateVideoFrames = true
        self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
        self.captureSession.addOutput(self.videoDataOutput)
        guard let connection = self.videoDataOutput.connection(with: AVMediaType.video),
        connection.isVideoOrientationSupported else { return }
        connection.videoOrientation = .portrait
    }
    
    private func detectRectangle(in image: CVPixelBuffer) {
        let request = VNDetectRectanglesRequest(completionHandler: { (request: VNRequest, error: Error?) in
            DispatchQueue.main.async {
                guard let results = request.results as? [VNRectangleObservation] else { return }
                self.maskLayer.removeFromSuperlayer()
                guard let rect = results.first else{return}
                    self.drawBoundingBox(rect: rect)
            }
        })
        request.minimumAspectRatio = VNAspectRatio(1.3)
        request.maximumAspectRatio = VNAspectRatio(1.6)
        request.minimumSize = Float(0.5)
        request.maximumObservations = 1
        let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: image, options: [:])
        try? imageRequestHandler.perform([request])
    }
    
    func drawBoundingBox(rect : VNRectangleObservation) {
        let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -self.previewLayer.frame.height)
        let scale = CGAffineTransform.identity.scaledBy(x: self.previewLayer.frame.width, y: self.previewLayer.frame.height)
         
        let path = UIBezierPath()
        path.move(to: CGPoint(x: rect.bottomLeft.x, y: rect.bottomLeft.y))
        path.addLine(to: CGPoint(x: rect.bottomRight.x, y: rect.bottomRight.y))
        path.addLine(to: CGPoint(x: rect.topRight.x, y: rect.topRight.y))
        path.addLine(to: CGPoint(x: rect.topLeft.x, y: rect.topLeft.y))
        path.addLine(to: CGPoint(x: rect.bottomLeft.x, y: rect.bottomLeft.y))
        path.apply(scale)
        path.apply(transform)
        path.close()
        
        maskLayer = CAShapeLayer()
        maskLayer.fillColor = UIColor.clear.cgColor
        maskLayer.lineWidth = 5
        maskLayer.strokeColor = UIColor.red.cgColor
        maskLayer.path = path.cgPath
        
        previewLayer.insertSublayer(maskLayer, at: 1)
    }
}

extension CGPoint {
   func scaled(to size: CGSize) -> CGPoint {
       return CGPoint(x: self.x * size.width,
                      y: self.y * size.height)
   }
}

以上代码为教程改版：rectangle detection tutorial

【问题讨论】：

标签： ios swift image-recognition vision

【解决方案1】：

这是我的代码示例。

    ///SET THE VALUE FOR THE DETECTED RECTANGLE
    detectRectanglesRequest.minimumAspectRatio = VNAspectRatio(0.3)
    detectRectanglesRequest.maximumAspectRatio = VNAspectRatio(0.9)
    detectRectanglesRequest.minimumSize = Float(0.4)
    detectRectanglesRequest.maximumObservations = 0
    detectRectanglesRequest.minimumConfidence = 0.2
    detectRectanglesRequest.quadratureTolerance = 2
    detectRectanglesRequest.revision = VNDetectRectanglesRequestRevision1
    detectRectanglesRequest.preferBackgroundProcessing = true

“尝试”这样使用更好：

    ///SEND THE REQUESTS TO THE REQUEST HANDLER
    DispatchQueue.global(qos: .userInteractive).async {
        do {
            try imageRequestHandler.perform([detectRectanglesRequest])
        } catch let error as NSError {
            print("Failed to perform image request: \(error)")
            //                self.presentAlert("Image Request Failed", error: error)
            return
        }
    }

...最后一个：

private func drawBoundingBox(rect: VNRectangleObservation) {
    
    CATransaction.begin()
    
    let transform = CGAffineTransform(scaleX: 1, y: -1)
        .translatedBy(x: 0, y: -scanCam.videoPreviewLayer.bounds.height)
    
    let scale = CGAffineTransform.identity
        .scaledBy(x: scanCam.videoPreviewLayer.bounds.width,
                  y: scanCam.videoPreviewLayer.bounds.height)
    
    let currentBounds = rect.boundingBox
        .applying(scale).applying(transform)
    
    createLayer(in: currentBounds)
    
    CATransaction.commit()
    
    //viewModel.cameraDetectRectFrame = currentBounds
}

private func createLayer(in rect: CGRect) {
    maskLayer = CAShapeLayer()
    maskLayer.frame = rect
    maskLayer.opacity = 1
    maskLayer.borderColor = UIColor.blue.cgColor ///for visual test
    maskLayer.borderWidth = 2
    scanCam.videoPreviewLayer.insertSublayer(maskLayer, at: 1)
}

【讨论】：