如何使用谷歌云视觉和统一使用移动相机识别文本？答案

【问题标题】：How to use google cloud vision along with unity for recognising text using mobile camera?如何使用谷歌云视觉和统一使用移动相机识别文本？
【发布时间】：2019-12-02 12:35:45
【问题描述】：

我正在测试一个关于如何使用谷歌云视觉从对象和图片中读取文本的项目。使用移动相机（最好是 iPhone、ipad 或 android 手机）我想获取所需的文本。三星 bixby 应用程序就是一个例子。经过一番阅读，我发现了统一的 OpenCV 和 Google 云视觉。统一的 OpenCV 大约 95 美元。为了测试我不能使用它。所以我选择了另一个选项。

我下载了这个项目。 Github project。我创建了一个谷歌云视觉 api 密钥并添加到检查器。我已将选项功能类型设置为文本检测。当我进行 IOS 构建时，相机打开但看起来倒置。什么都没有发生。我明白了检查器中缺少脚本。如何使用设备摄像头检测文本？

【问题讨论】：

这通常发生在以下情况下：A) 相应的脚本文件实际上丢失了，因为复制了例如预制但不是新项目的相应脚本文件或 B）脚本文件名与代码中的组件类名不匹配。或 C）您有编译器错误。由于链接项目中唯一的脚本是WebCamTextureToCloudVision，实际上它似乎是您的其他脚本之一
@derHugo 关于如何使用移动摄像头实时检测文本的任何想法。

标签： c# unity3d google-cloud-vision text-recognition

【解决方案1】：

Unity Cloud Vision git repo 包含人脸检测的代码。它不适用于 OCR 或文本检测。

因此，我创建了一个代码，用于在 Unity3D 中使用视觉 OCR api 从图像中执行文本检测。

您可以尝试使用以下脚本从 Unity3D 中的图像中检测文本。

using UnityEngine;
using System.Collections;
using System.Collections.Generic;
using UnityEngine.UI;
using SimpleJSON;

public class WebCamTextureToCloudVision : MonoBehaviour {

    public string url = "https://vision.googleapis.com/v1/images:annotate?key=";
    public string apiKey = ""; //Put your google cloud vision api key here
    public float captureIntervalSeconds = 5.0f;
    public int requestedWidth = 640;
    public int requestedHeight = 480;
    public FeatureType featureType = FeatureType.TEXT_DETECTION;
    public int maxResults = 10;
    public GameObject resPanel;
    public Text responseText, responseArray; 

    WebCamTexture webcamTexture;
    Texture2D texture2D;
    Dictionary<string, string> headers;

    [System.Serializable]
    public class AnnotateImageRequests {
        public List<AnnotateImageRequest> requests;
    }

    [System.Serializable]
    public class AnnotateImageRequest {
        public Image image;
        public List<Feature> features;
    }

    [System.Serializable]
    public class Image {
        public string content;
    }

    [System.Serializable]
    public class Feature {
        public string type;
        public int maxResults;
    }

    public enum FeatureType {
        TYPE_UNSPECIFIED,
        FACE_DETECTION,
        LANDMARK_DETECTION,
        LOGO_DETECTION,
        LABEL_DETECTION,
        TEXT_DETECTION,
        SAFE_SEARCH_DETECTION,
        IMAGE_PROPERTIES
    }

    // Use this for initialization
    void Start () {
        headers = new Dictionary<string, string>();
        headers.Add("Content-Type", "application/json; charset=UTF-8");

        if (apiKey == null || apiKey == "")
            Debug.LogError("No API key. Please set your API key into the \"Web Cam Texture To Cloud Vision(Script)\" component.");
        
        WebCamDevice[] devices = WebCamTexture.devices;
        for (var i = 0; i < devices.Length; i++) {
            Debug.Log (devices [i].name);
        }
        if (devices.Length > 0) {
            webcamTexture = new WebCamTexture(devices[0].name, requestedWidth, requestedHeight);
            Renderer r = GetComponent<Renderer> ();
            if (r != null) {
                Material m = r.material;
                if (m != null) {
                    m.mainTexture = webcamTexture;
                }
            }
            webcamTexture.Play();
            StartCoroutine("Capture");
        }   
    }
    
    // Update is called once per frame
    void Update () {

    }

    private IEnumerator Capture() {
        while (true) {
            if (this.apiKey == null)
                yield return null;

            yield return new WaitForSeconds(captureIntervalSeconds);

            Color[] pixels = webcamTexture.GetPixels();
            if (pixels.Length == 0)
                yield return null;
            if (texture2D == null || webcamTexture.width != texture2D.width || webcamTexture.height != texture2D.height) {
                texture2D = new Texture2D(webcamTexture.width, webcamTexture.height, TextureFormat.RGBA32, false);
            }

            texture2D.SetPixels(pixels);
            // texture2D.Apply(false); // Not required. Because we do not need to be uploaded it to GPU
            byte[] jpg = texture2D.EncodeToJPG();
            string base64 = System.Convert.ToBase64String(jpg);
// #if UNITY_WEBGL  
//          Application.ExternalCall("post", this.gameObject.name, "OnSuccessFromBrowser", "OnErrorFromBrowser", this.url + this.apiKey, base64, this.featureType.ToString(), this.maxResults);
// #else
            
            AnnotateImageRequests requests = new AnnotateImageRequests();
            requests.requests = new List<AnnotateImageRequest>();

            AnnotateImageRequest request = new AnnotateImageRequest();
            request.image = new Image();
            request.image.content = base64;
            request.features = new List<Feature>();
            Feature feature = new Feature();
            feature.type = this.featureType.ToString();
            feature.maxResults = this.maxResults;
            request.features.Add(feature); 
            requests.requests.Add(request);

            string jsonData = JsonUtility.ToJson(requests, false);
            if (jsonData != string.Empty) {
                string url = this.url + this.apiKey;
                byte[] postData = System.Text.Encoding.Default.GetBytes(jsonData);
                using(WWW www = new WWW(url, postData, headers)) {
                    yield return www;
                    if (string.IsNullOrEmpty(www.error)) {
                        string responses = www.text.Replace("\n", "").Replace(" ", "");
                        // Debug.Log(responses);
                        JSONNode res = JSON.Parse(responses);
                        string fullText = res["responses"][0]["textAnnotations"][0]["description"].ToString().Trim('"');
                        if (fullText != ""){
                            Debug.Log("OCR Response: " + fullText);
                            resPanel.SetActive(true);
                            responseText.text = fullText.Replace("\\n", " ");
                            fullText = fullText.Replace("\\n", ";");
                            string[] texts = fullText.Split(';');
                            responseArray.text = "";
                            for(int i=0;i<texts.Length;i++){
                                responseArray.text += texts[i];
                                if(i != texts.Length - 1)
                                    responseArray.text += ", ";
                            }
                        }
                    } else {
                        Debug.Log("Error: " + www.error);
                    }
                }
            }
// #endif
        }
    }

#if UNITY_WEBGL
    void OnSuccessFromBrowser(string jsonString) {
        Debug.Log(jsonString);  
    }

    void OnErrorFromBrowser(string jsonString) {
        Debug.Log(jsonString);  
    }
#endif

}

演示项目在 github 中可用。 codemaker2015/google-cloud-vision-api-ocr-unity3d-demo

【讨论】：