【问题标题】:Google Cloud Vision OCR Returns "Bad Image Data" on Google Cloud Shell LocalhostGoogle Cloud Vision OCR 在 Google Cloud Shell 本地主机上返回“错误图像数据”
【发布时间】:2020-11-05 15:16:19
【问题描述】:

tl;dr:如何让 Google Cloud Vision OCR 在 Cloud Shell 编辑器的本地主机上工作?

我正在使用Google Cloud Shell editor,它包含一个web preview 功能,可以在https://8080-dot-10727374-dot-devshell.appspot.com/index.html 等URL 上为“本地”网络服务器提供服务。

我正在关注 Cloud Vision OCR 的 this tutorial。我将该示例代码放入使用 Blobstore 作为图像主机的 servlet:

package com.google.servlets;

import com.google.appengine.api.blobstore.BlobInfo;
import com.google.appengine.api.blobstore.BlobInfoFactory;
import com.google.appengine.api.blobstore.BlobKey;
import com.google.appengine.api.blobstore.BlobstoreService;
import com.google.appengine.api.blobstore.BlobstoreServiceFactory;
import com.google.appengine.api.images.ImagesService;
import com.google.appengine.api.images.ImagesServiceFactory;
import com.google.appengine.api.images.ServingUrlOptions;
import com.google.cloud.vision.v1.AnnotateImageRequest;
import com.google.cloud.vision.v1.AnnotateImageResponse;
import com.google.cloud.vision.v1.BatchAnnotateImagesResponse;
import com.google.cloud.vision.v1.Feature;
import com.google.cloud.vision.v1.Image;
import com.google.cloud.vision.v1.ImageAnnotatorClient;
import com.google.cloud.vision.v1.ImageSource;
import com.google.cloud.vision.v1.TextAnnotation;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

/**
 * When the user submits the form, Blobstore processes the file upload and then forwards the request
 * to this servlet. This servlet can then analyze the image using the Vision API.
 */
@WebServlet("/image-analysis")
public class ImageAnalysisServlet extends HttpServlet {

  @Override
  public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException {

    PrintWriter out = response.getWriter();

    // Get the BlobKey that points to the image uploaded by the user.
    BlobKey blobKey = getBlobKey(request, "image");

    // Get the URL of the image that the user uploaded.
    String imageUrl = getUploadedFileUrl(blobKey);

   // Extract text from the image
    String text = detectDocumentText(imageUrl);

    // Output some HTML.
    response.setContentType("text/html");
    out.println("<p>Here's the image you uploaded:</p>");
    out.println("<a href=\"" + imageUrl + "\">");
    out.println("<img src=\"" + imageUrl + "\" />");
    out.println("</a>");
    out.println("<h1>text: " + text + "</h1>");
  }

    /**
   * Returns the BlobKey that points to the file uploaded by the user, or null if the user didn't
   * upload a file.
   */
  private BlobKey getBlobKey(HttpServletRequest request, String formInputElementName) {
    BlobstoreService blobstoreService = BlobstoreServiceFactory.getBlobstoreService();
    Map<String, List<BlobKey>> blobs = blobstoreService.getUploads(request);
    List<BlobKey> blobKeys = blobs.get("image");

    // User submitted form without selecting a file, so we can't get a BlobKey. (dev server)
    if (blobKeys == null || blobKeys.isEmpty()) {
      return null;
    }

    // Our form only contains a single file input, so get the first index.
    BlobKey blobKey = blobKeys.get(0);

    // User submitted form without selecting a file, so the BlobKey is empty. (live server)
    BlobInfo blobInfo = new BlobInfoFactory().loadBlobInfo(blobKey);
    if (blobInfo.getSize() == 0) {
      blobstoreService.delete(blobKey);
      return null;
    }

    return blobKey;
  }

  /** Returns a URL that points to the uploaded file. */
  private String getUploadedFileUrl(BlobKey blobKey) {
    ImagesService imagesService = ImagesServiceFactory.getImagesService();
    ServingUrlOptions options = ServingUrlOptions.Builder.withBlobKey(blobKey);
    String url = imagesService.getServingUrl(options);

    // GCS's localhost preview is not actually on localhost,
    // so make the URL relative to the current domain.
    if(url.startsWith("http://localhost:8080/")){
      url = url.replace("http://localhost:8080/", "https://8080-dot-10727374-dot-devshell.appspot.com/");
    }

    return url;
  }

  private String detectDocumentText(String path) throws IOException {
    List<AnnotateImageRequest> requests = new ArrayList<>();
    ImageSource imgSource = ImageSource.newBuilder().setImageUri(path).build(); 
    Image img = Image.newBuilder().setSource(imgSource).build();
    Feature feat = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
    AnnotateImageRequest request = AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build();
    requests.add(request);

    // Initialize client that will be used to send requests. This client only needs to be created
    // once, and can be reused for multiple requests. After completing all of your requests, call
    // the "close" method on the client to safely clean up any remaining background resources.
    try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
      BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
      List<AnnotateImageResponse> responses = response.getResponsesList();
      client.close();
      
      // Check to see if any of the responses are errors
      for (AnnotateImageResponse res : responses) {
        if (res.hasError()) {
          System.out.format("Error: %s%n", res.getError().getMessage());
          return "Error: " + res.getError().getMessage();
        }
    
        // For full list of available annotations, see http://g.co/cloud/vision/docs
        TextAnnotation annotation = res.getFullTextAnnotation();
        return annotation.getText();
      }
    }
    catch(Exception e) {
      return "ERROR: ImageAnnotatorClient Failed, " + e;
    }
    // Case where the ImageAnnotatorClient works, but there are no responses from it.
    return "Error: No responses";
  }
}

当我使用mvn package appengine:deploy 命令部署到真实服务器时,这非常有效:

(嗯,正如从这张测试图片中可以预期的那样完美。)

但是,如果我使用 mvn package appengine:run 命令部署到“本地”开发服务器,则 Google Cloud Vision 会返回一般的“错误图像数据”错误:

我猜这是因为图像 URL (https://8080-dot-10727374-dot-devshell.appspot.com/_cloudshellProxy/_ah/img/TjxgeYiHlCkix-XRj94jnw) 无法公开访问,因为它运行在“假”本地主机上,需要我登录到我的 Google 帐户才能查看。

如何让 Google Cloud Vision OCR 在 Cloud Shell 编辑器的“假”本地主机上工作?

【问题讨论】:

    标签: java google-app-engine blobstore google-cloud-vision google-cloud-shell


    【解决方案1】:

    Cloud Vision 还支持直接读取图像字节,而不是通过 URL。切换到那个允许我绕过拥有可公开访问的 URL 的要求。

    重要的是这一行:

    Image img = Image.newBuilder().setContent(ByteString.copyFrom(bytes)).build();
    

    ...bytes 来自 Blobstore 中存储的内容。

    完整代码供参考:

    package com.google.servlets;
    
    import com.google.appengine.api.blobstore.BlobInfo;
    import com.google.appengine.api.blobstore.BlobInfoFactory;
    import com.google.appengine.api.blobstore.BlobKey;
    import com.google.appengine.api.blobstore.BlobstoreService;
    import com.google.appengine.api.blobstore.BlobstoreServiceFactory;
    import com.google.appengine.api.images.ImagesService;
    import com.google.appengine.api.images.ImagesServiceFactory;
    import com.google.appengine.api.images.ServingUrlOptions;
    import com.google.cloud.vision.v1.AnnotateImageRequest;
    import com.google.cloud.vision.v1.AnnotateImageResponse;
    import com.google.cloud.vision.v1.BatchAnnotateImagesResponse;
    import com.google.cloud.vision.v1.Feature;
    import com.google.cloud.vision.v1.Image;
    import com.google.cloud.vision.v1.ImageAnnotatorClient;
    import com.google.cloud.vision.v1.ImageSource;
    import com.google.cloud.vision.v1.TextAnnotation;
    import com.google.protobuf.ByteString;
    import java.io.ByteArrayOutputStream;
    import java.io.IOException;
    import java.io.PrintWriter;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Map;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    /**
     * When the user submits the form, Blobstore processes the file upload and then forwards the request
     * to this servlet. This servlet can then analyze the image using the Vision API.
     */
    @WebServlet("/image-analysis")
    public class ImageAnalysisServlet extends HttpServlet {
    
      @Override
      public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException {
    
        PrintWriter out = response.getWriter();
    
        // Get the BlobKey that points to the image uploaded by the user.
        BlobKey blobKey = getBlobKey(request, "image");
    
        // Get the URL of the image that the user uploaded.
        String imageUrl = getUploadedFileUrl(blobKey);
    
    
        byte[] blobBytes = getBlobBytes(blobKey);
    
       // Extract text from the image
        String text = detectDocumentText(blobBytes);
    
        // Output some HTML.
        response.setContentType("text/html");
        out.println("<p>Here's the image you uploaded:</p>");
        out.println("<a href=\"" + imageUrl + "\">");
        out.println("<img src=\"" + imageUrl + "\" />");
        out.println("</a>");
        out.println("<h1>text: " + text + "</h1>");
      }
    
      private byte[] getBlobBytes(BlobKey blobKey) throws IOException {
        BlobstoreService blobstoreService = BlobstoreServiceFactory.getBlobstoreService();
        ByteArrayOutputStream outputBytes = new ByteArrayOutputStream();
    
        int fetchSize = BlobstoreService.MAX_BLOB_FETCH_SIZE;
        long currentByteIndex = 0;
        boolean continueReading = true;
        while (continueReading) {
          // end index is inclusive, so we have to subtract 1 to get fetchSize bytes
          byte[] b =
              blobstoreService.fetchData(blobKey, currentByteIndex, currentByteIndex + fetchSize - 1);
          outputBytes.write(b);
    
          // if we read fewer bytes than we requested, then we reached the end
          if (b.length < fetchSize) {
            continueReading = false;
          }
    
          currentByteIndex += fetchSize;
        }
    
        return outputBytes.toByteArray();
      }
    
        /**
       * Returns the BlobKey that points to the file uploaded by the user, or null if the user didn't
       * upload a file.
       */
      private BlobKey getBlobKey(HttpServletRequest request, String formInputElementName) {
        BlobstoreService blobstoreService = BlobstoreServiceFactory.getBlobstoreService();
        Map<String, List<BlobKey>> blobs = blobstoreService.getUploads(request);
        List<BlobKey> blobKeys = blobs.get("image");
    
        // User submitted form without selecting a file, so we can't get a BlobKey. (dev server)
        if (blobKeys == null || blobKeys.isEmpty()) {
          return null;
        }
    
        // Our form only contains a single file input, so get the first index.
        BlobKey blobKey = blobKeys.get(0);
    
        // User submitted form without selecting a file, so the BlobKey is empty. (live server)
        BlobInfo blobInfo = new BlobInfoFactory().loadBlobInfo(blobKey);
        if (blobInfo.getSize() == 0) {
          blobstoreService.delete(blobKey);
          return null;
        }
    
        return blobKey;
      }
    
      /** Returns a URL that points to the uploaded file. */
      private String getUploadedFileUrl(BlobKey blobKey) {
        ImagesService imagesService = ImagesServiceFactory.getImagesService();
        ServingUrlOptions options = ServingUrlOptions.Builder.withBlobKey(blobKey);
        String url = imagesService.getServingUrl(options);
    
        // GCS's localhost preview is not actually on localhost,
        // so make the URL relative to the current domain.
        if(url.startsWith("http://localhost:8080/")){
          url = url.replace("http://localhost:8080/", "https://8080-dot-10727374-dot-devshell.appspot.com/");
        }
    
        return url;
      }
    
      private String detectDocumentText(byte[] bytes) throws IOException {
        List<AnnotateImageRequest> requests = new ArrayList<>();
        Image img = Image.newBuilder().setContent(ByteString.copyFrom(bytes)).build();
        Feature feat = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
        AnnotateImageRequest request = AnnotateImageRequest.newBuilder().addFeatures(feat).setImage(img).build();
        requests.add(request);
    
        // Initialize client that will be used to send requests. This client only needs to be created
        // once, and can be reused for multiple requests. After completing all of your requests, call
        // the "close" method on the client to safely clean up any remaining background resources.
        try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
          BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
          List<AnnotateImageResponse> responses = response.getResponsesList();
          client.close();
          
          // Check to see if any of the responses are errors
          for (AnnotateImageResponse res : responses) {
            if (res.hasError()) {
              System.out.format("Error: %s%n", res.getError().getMessage());
              return "Error: " + res.getError().getMessage();
            }
        
            // For full list of available annotations, see http://g.co/cloud/vision/docs
            TextAnnotation annotation = res.getFullTextAnnotation();
            return annotation.getText();
          }
        }
        catch(Exception e) {
          return "ERROR: ImageAnnotatorClient Failed, " + e;
        }
        // Case where the ImageAnnotatorClient works, but there are no responses from it.
        return "Error: No responses";
      }
    }
    

    【讨论】:

      猜你喜欢
      • 2020-04-03
      • 1970-01-01
      • 2017-01-25
      • 1970-01-01
      • 2023-03-02
      • 1970-01-01
      • 2016-07-24
      • 1970-01-01
      • 2020-06-26
      相关资源
      最近更新 更多