如何在 iOS 上使用谷歌云视觉 OCR (swift)答案

【问题标题】：how to use the google cloud vision OCR on iOS (swift)如何在 iOS 上使用谷歌云视觉 OCR (swift)
【发布时间】：2017-03-05 10:20:38
【问题描述】：

如何调整 (FACE_DETECTION) 和 (LABEL_DETECTION) 的 swift google 示例代码以使用 OCR API (TEXT_DETECTION) 到目前为止我没有找到任何示例代码或解释如何在 iOS 上使用 google OCR，我已经更改对 TEXT_DETECTION 的 API 请求的类型，但它没有给出回复有什么帮助吗？

 class GoogleCloudVisionOCR {

        let session = URLSession.shared
        var googleAPIKey = ""
        var googleURL: URL {
            return URL(string: "https://vision.googleapis.com/v1/images:annotate?key=\(googleAPIKey)")!
        }
        var textFromImageArray: [String]?


        public func GetTextFromImage(imageURLString: String, handler: @escaping (String) -> Void) {
            guard let url = URL(string: imageURLString) else { print("no URL in TextFromImageRequest.GetTextFromImage"); return}
            let data = try? Data(contentsOf: url)
            if let unwrappedData = data {
                guard let image = UIImage(data: unwrappedData) else { print("no image from URL in TextFromImageRequest.GetTextFromImage"); return}

                let binaryImageData = base64EncodeImage(image)
                createRequest(with: binaryImageData, handler: { (result) in
                    //DEBUG PRINT print("RESULT = \(result)")

                    handler(result)
                })
            } else { print("unwrapped data nil in TextFromImageRequest.GetTextFromImage") }
        }


        // Base64 encode the image and create the request

        //createRequest(with: binaryImageData)


        private func base64EncodeImage(_ image: UIImage) -> String {
            var imagedata = UIImagePNGRepresentation(image)

            // Resize the image if it exceeds the 2MB API limit
            if ((imagedata?.count)! > 2097152) {
                let oldSize: CGSize = image.size
                let newSize: CGSize = CGSize(width: 800, height: oldSize.height / oldSize.width * 800)
                imagedata = resizeImage(newSize, image: image)
            }

            return imagedata!.base64EncodedString(options: .endLineWithCarriageReturn)
        }

        private func resizeImage(_ imageSize: CGSize, image: UIImage) -> Data {
            UIGraphicsBeginImageContext(imageSize)
            image.draw(in: CGRect(x: 0, y: 0, width: imageSize.width, height: imageSize.height))
            let newImage = UIGraphicsGetImageFromCurrentImageContext()
            let resizedImage = UIImagePNGRepresentation(newImage!)
            UIGraphicsEndImageContext()
            return resizedImage!
        }

        private func createRequest(with imageBase64: String, handler: @escaping (String) -> Void) {
            // Create our request URL

            var request = URLRequest(url: googleURL)
            request.httpMethod = "POST"
            request.addValue("application/json", forHTTPHeaderField: "Content-Type")
            request.addValue(Bundle.main.bundleIdentifier ?? "", forHTTPHeaderField: "X-Ios-Bundle-Identifier")

            // Build our API request
            let jsonRequest = [
                "requests": [
                    "image": [
                        "content": imageBase64
                    ],
                    "features": [
                        [
                            "type": "LABEL_DETECTION",
                            "maxResults": 10
                        ],
                        [
                            "type": "FACE_DETECTION",   //added by me
                            "maxResults": 10,
                            ]
                    ]
                ]
            ]
            let jsonObject = JSON(jsonDictionary: jsonRequest)
            //let jsonObject = JSONSerialization.jsonObject(with: jsonRequest, options: []) as? [String : Any]

            // Serialize the JSON
            guard let data = try? jsonObject.rawData() else {
                return
            }

            request.httpBody = data



            // Run the request on a background thread
            DispatchQueue.global().async { self.runRequestOnBackgroundThread(request, handler: { (result) in
                handler(result)
            }) }

        }

        private func runRequestOnBackgroundThread(_ request: URLRequest, handler: @escaping (String) -> Void) {
            // run the request

            let task: URLSessionDataTask = session.dataTask(with: request) { (data, response, error) in
                guard let data = data, error == nil else {
                    print(error?.localizedDescription ?? "")
                    return
                }

                self.analyzeResults(data, handler: { (result) in
                    handler(result)
                })
            }

            task.resume()
        }

        private func analyzeResults(_ dataToParse: Data, handler: @escaping (String) -> Void) {

            var textArray: [String] = []
            // Update UI on the main thread
            DispatchQueue.main.async(execute: {

                // Use SwiftyJSON to parse results
                let json = JSON(data: dataToParse)
                let errorObj: JSON = json["error"]
                //Check for errors
                if (errorObj.dictionaryValue != [:]) {
                    print("Error code \(errorObj["code"]): \(errorObj["message"])")
                } else {

                    //Parse the response
                    let responses: JSON = json["responses"][0]

                    //Get text
                    let textAnnotations: JSON = responses["textAnnotations"]
                    //DEBUG PRINT print(textAnnotations)

                    let numTextAnnos: Int = textAnnotations.count

                    if numTextAnnos > 0 {

                        for index in 0..<numTextAnnos {
                            let text = textAnnotations[index]["description"].stringValue
                            textArray.append(text)
                            //DEBUG PRINT print("TEXT = \(text)")

                        }

                        let finalString = textArray.joined(separator: " ")
                        let filteredString = finalString

                        handler(filteredString)

                    }
                }
            })
        }

}

【问题讨论】：

TEXT_DETECTION 和 FACE_DETECTION 的过程从客户端的角度来看是基本相同的。只有一个 URL https://vision.googleapis.com/v1/images:annotate 应该发送一个 POST 请求。主要区别在于请求和响应的内容。这在images.annotate 文档中有所描述。 POST 请求是否从 iOS 设备成功发送？回应是什么？您可以使用上一个链接中的 API 资源管理器重现此问题吗？

标签： ios swift google-cloud-platform ocr google-cloud-vision

【解决方案1】：

我项目中的这个功能可能会对你有所帮助。

func applyOCR(image: UIImage, completionHandler: @escaping(_ error: Error ? , _ data : String ? ) - > Void) {



    let imageData: NSData = UIImageJPEGRepresentation(image, 0.2) !as NSData
    var base64 = imageData.base64EncodedString(options: .endLineWithCarriageReturn)


    var body = "{ 'requests': [ { 'image': { 'content': '\(base64)' }, 'features': [ { 'type': 'DOCUMENT_TEXT_DETECTION' } ],  'imageContext': {'languageHints': ['en']} } ] }";


    var session = URLSession.shared

    let url = URL(string: "https://vision.googleapis.com/v1/images:annotate?key={Your Key}")

    var request = NSMutableURLRequest(url: url!, cachePolicy:
        NSURLRequest.CachePolicy.reloadIgnoringLocalAndRemoteCacheData,
        timeoutInterval: 30.0)

    request.httpMethod = "POST"
    request.setValue("application/json", forHTTPHeaderField: "Content-Type")

    request.httpBody = body.data(using: .utf8)


    let task = session.dataTask(with: request as URLRequest, completionHandler: {
        data,
        response,
        error in

        if let error = error {
            print(error.localizedDescription)
            completionHandler(error, nil)
        }


        if let data = data {

            do {
                let string1 = String(data: data, encoding: String.Encoding.utf8) ? ? "Data could not be printed"
                //print(string1)

                var json =
                    try JSONSerialization.jsonObject(with: data, options: .allowFragments) as![String: Any]
                if let responseData = json["responses"] as ? NSArray {

                    if let levelB = responseData[0] as ? [String: Any] {

                        if let levelC = levelB["fullTextAnnotation"] as ? [String: Any] {


                            if let text = levelC["text"] as ? String {

                                completionHandler(nil, text)
                                return

                            }

                        }

                    }



                }

                let error = NSError(domain: "", code: 401, userInfo: [NSLocalizedDescriptionKey: "Invaild access token"])
                completionHandler(error, nil)
                return


            } catch {
                print("error parsing \(error)")
                completionHandler(error, nil)
                return



            }

        }

    })

    task.resume()

}

您还可以使用免费且非常易于使用的谷歌移动视觉库 https://developers.google.com/vision/ios/text-overview

【讨论】：

它目前是 MLKit 的一部分。这是更新链接：developers.google.com/ml-kit/vision/text-recognition/ios