【问题标题】:Swift iOS Vision inaccurate rectangle detection resultSwift iOS Vision 不准确的矩形检测结果
【发布时间】:2022-04-02 06:39:18
【问题描述】:

xcode:版本 12.5.1, IOS:14.7.1, 设备:iPhone 12 Pro

您好,需要帮助!

iOS Vision 框架返回不准确的矩形结果: incorrect rect result

识别的矩形(红色)不适合真实的矩形(黑色)。 它不是方形的,比真实的更窄。 我不明白为什么会这样。如果改为绘制观察到的矩形边界框,则会得到同样不准确的结果。

完整代码:

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    
    private let captureSession = AVCaptureSession()
    private lazy var previewLayer = AVCaptureVideoPreviewLayer(session: self.captureSession)
    private let videoDataOutput = AVCaptureVideoDataOutput()
    private var maskLayer = CAShapeLayer()

    override func viewDidLoad() {
        super.viewDidLoad()
        self.setCameraInput()
        self.showCameraFeed()
        self.setCameraOutput()
        self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
        self.captureSession.startRunning()
    }

    override func viewDidLayoutSubviews() {
        super.viewDidLayoutSubviews()
        self.previewLayer.frame = self.view.frame
    }
    
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
            debugPrint("unable to get image from sample buffer")
            return
        }
        self.detectRectangle(in: frame)
    }
    
    private func setCameraInput() {
        guard let device = AVCaptureDevice.DiscoverySession(
            deviceTypes: [.builtInWideAngleCamera, .builtInDualCamera, .builtInTrueDepthCamera],
            mediaType: .video,
            position: .back).devices.first else {
                fatalError("No back camera device found.")
        }
        let cameraInput = try! AVCaptureDeviceInput(device: device)
        self.captureSession.addInput(cameraInput)
    }
    
    private func showCameraFeed() {
        self.previewLayer.videoGravity = .resizeAspectFill
        self.view.layer.addSublayer(self.previewLayer)
        self.previewLayer.frame = self.view.frame
    }
    
    private func setCameraOutput() {
        self.videoDataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
        self.videoDataOutput.alwaysDiscardsLateVideoFrames = true
        self.videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
        self.captureSession.addOutput(self.videoDataOutput)
        guard let connection = self.videoDataOutput.connection(with: AVMediaType.video),
        connection.isVideoOrientationSupported else { return }
        connection.videoOrientation = .portrait
    }
    
    private func detectRectangle(in image: CVPixelBuffer) {
        let request = VNDetectRectanglesRequest(completionHandler: { (request: VNRequest, error: Error?) in
            DispatchQueue.main.async {
                guard let results = request.results as? [VNRectangleObservation] else { return }
                self.maskLayer.removeFromSuperlayer()
                guard let rect = results.first else{return}
                    self.drawBoundingBox(rect: rect)
            }
        })
        request.minimumAspectRatio = VNAspectRatio(1.3)
        request.maximumAspectRatio = VNAspectRatio(1.6)
        request.minimumSize = Float(0.5)
        request.maximumObservations = 1
        let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: image, options: [:])
        try? imageRequestHandler.perform([request])
    }
    
    func drawBoundingBox(rect : VNRectangleObservation) {
        let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -self.previewLayer.frame.height)
        let scale = CGAffineTransform.identity.scaledBy(x: self.previewLayer.frame.width, y: self.previewLayer.frame.height)
         
        let path = UIBezierPath()
        path.move(to: CGPoint(x: rect.bottomLeft.x, y: rect.bottomLeft.y))
        path.addLine(to: CGPoint(x: rect.bottomRight.x, y: rect.bottomRight.y))
        path.addLine(to: CGPoint(x: rect.topRight.x, y: rect.topRight.y))
        path.addLine(to: CGPoint(x: rect.topLeft.x, y: rect.topLeft.y))
        path.addLine(to: CGPoint(x: rect.bottomLeft.x, y: rect.bottomLeft.y))
        path.apply(scale)
        path.apply(transform)
        path.close()
        
        maskLayer = CAShapeLayer()
        maskLayer.fillColor = UIColor.clear.cgColor
        maskLayer.lineWidth = 5
        maskLayer.strokeColor = UIColor.red.cgColor
        maskLayer.path = path.cgPath
        
        previewLayer.insertSublayer(maskLayer, at: 1)
    }
}

extension CGPoint {
   func scaled(to size: CGSize) -> CGPoint {
       return CGPoint(x: self.x * size.width,
                      y: self.y * size.height)
   }
}

以上代码为教程改版:rectangle detection tutorial

【问题讨论】:

    标签: ios swift image-recognition vision


    【解决方案1】:

    这是我的代码示例。

        ///SET THE VALUE FOR THE DETECTED RECTANGLE
        detectRectanglesRequest.minimumAspectRatio = VNAspectRatio(0.3)
        detectRectanglesRequest.maximumAspectRatio = VNAspectRatio(0.9)
        detectRectanglesRequest.minimumSize = Float(0.4)
        detectRectanglesRequest.maximumObservations = 0
        detectRectanglesRequest.minimumConfidence = 0.2
        detectRectanglesRequest.quadratureTolerance = 2
        detectRectanglesRequest.revision = VNDetectRectanglesRequestRevision1
        detectRectanglesRequest.preferBackgroundProcessing = true
    

    “尝试”这样使用更好:

        ///SEND THE REQUESTS TO THE REQUEST HANDLER
        DispatchQueue.global(qos: .userInteractive).async {
            do {
                try imageRequestHandler.perform([detectRectanglesRequest])
            } catch let error as NSError {
                print("Failed to perform image request: \(error)")
                //                self.presentAlert("Image Request Failed", error: error)
                return
            }
        }
    

    ...最后一个:

    private func drawBoundingBox(rect: VNRectangleObservation) {
        
        CATransaction.begin()
        
        let transform = CGAffineTransform(scaleX: 1, y: -1)
            .translatedBy(x: 0, y: -scanCam.videoPreviewLayer.bounds.height)
        
        let scale = CGAffineTransform.identity
            .scaledBy(x: scanCam.videoPreviewLayer.bounds.width,
                      y: scanCam.videoPreviewLayer.bounds.height)
        
        let currentBounds = rect.boundingBox
            .applying(scale).applying(transform)
        
        createLayer(in: currentBounds)
        
        CATransaction.commit()
        
        //viewModel.cameraDetectRectFrame = currentBounds
    }
    
    private func createLayer(in rect: CGRect) {
        maskLayer = CAShapeLayer()
        maskLayer.frame = rect
        maskLayer.opacity = 1
        maskLayer.borderColor = UIColor.blue.cgColor ///for visual test
        maskLayer.borderWidth = 2
        scanCam.videoPreviewLayer.insertSublayer(maskLayer, at: 1)
    }
    

    【讨论】:

      猜你喜欢
      • 2017-09-27
      • 1970-01-01
      • 1970-01-01
      • 2019-08-05
      • 2022-08-08
      • 1970-01-01
      • 1970-01-01
      • 2022-01-17
      • 2021-10-21
      相关资源
      最近更新 更多