【发布时间】:2021-04-22 01:41:43
【问题描述】:
我在笔记本电脑 Windows 10 上使用 C#.net
我有 OCRing 图像(png、jpg)的代码工作正常
我需要让 pdf 文件工作
但有朋友告诉我,pdf可以直接发送到google APIs并获得OCRed,而不需要将pdf转换为图像然后发送图像。
这可能吗?如果有,怎么做?
private string GetTextFromImage(Google.Cloud.Vision.V1.Image filePath)
{
var response = Client.DetectText(filePath);
var resultList = new List<ResultElement>();
foreach (var annotation in response)
{
resultList.Add(new ResultElement
{
Description = annotation.Description,
Location = annotation.BoundingPoly.ToString()
});
}
return resultList.First().Description;
}
private Google.Cloud.Vision.V1.Image GetImageFromPath(string filePath)
{
return Google.Cloud.Vision.V1.Image.FromFile(filePath);
}
编辑
谢谢里科
但是代码返回了这个脚本而不是 pdf 文本
{ "responses": [ { "responses": [ { "fullTextAnnotation": { "pages": [ { "property": { "detectedLanguages": [ { "languageCode": "en", "confidence": 0.92 }, { "languageCode": "fil", "confidence": 0.02 }, { "languageCode": "af", "confidence": 0.01 } ] }, "width": 841, "height": 595, "blocks": [ { "property": { "detectedLanguages": [ { "languageCode": "en", "confidence": 0.33 }, { "languageCode": "fil", "confidence": 0.29 } ] }, "boundingBox": { "normalizedVertices": [ { "x": 0.587395966, "y": 0.9210084 }, { "x": 0.369797856, "y": 0.640336156 }, { "x": 0.4530321, "y": 0.5126051 }, { "x": 0.6706302, "y": 0.7932773 } ] }, "paragraphs": [ { "property": { "detectedLanguages": [ { "languageCode": "fil", "confidence": 0.47 }, { "languageCode": "en", "confidence": 0.39 } ] }, "boundingBox": { "normalizedVertices": [ { "x": 0.587395966, "y": 0.9210084 }, { "x": 0.372176, "y": 0.6386555 }, { "x": 0.416171223, "y": 0.5714286 }, { "x": 0.6313912, "y": 0.8554622 } ] }, "words": [ { "boundingBox": { "normalizedVertices": [ { "x": 0.529132, "y": 0.8436975 }, { "x": 0.4649227, "y": 0.761344552 }, { "x": 0.4803805, "y": 0.73613447 }, { "x": 0.544589758, "y": 0.8201681 } ] }, "symbols": [ { "text": "M", "confidence": 0.99 }, { "text": "e", "confidence": 0.99 }, { "text": "n", "confidence": 1 }, { "text": "g", "confidence": 0.99 }, { "text": "m", "confidence": 0.99 }, { "text": "e", "confidence": 0.99 }, { "text": "n", "confidence": 1 }, { "property": { "detectedBreak": { "type": "SPACE" } }, "text": "g", "confidence": 0.99 } ], "confidence": 0.99 }, { "boundingBox": { "normalizedVertices": [ { "x": 0.460166454, "y": 0.754621863 }, { "x": 0.445897728, "y": 0.73613447 }, { "x": 0.461355537, "y": 0.712605059 }, { "x": 0.475624263, "y": 0.731092453 } ] }, "symbols": [ { "text": "L", "confidence": 0.99 }, { "property": { "detectedBreak": { "type": "EOL_SURE_SPACE" } }, "text": "u", "confidence": 0.99 } ], "confidence": 0.99 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "boundingBox": { "normalizedVertices": [ { "x": 0.58501786, "y": 0.877310932 }, { "x": 0.5731272, "y": 0.8605042 }, { "x": 0.5862069, "y": 0.840336144 }, { "x": 0.5980975, "y": 0.857142866 } ] }, "symbols": [ { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "text": "a", "confidence": 0.62 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "text": "t", "confidence": 0.98 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ], "detectedBreak": { "type": "SPACE" } }, "text": "e", "confidence": 0.94 } ], "confidence": 0.84 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "boundingBox": { "normalizedVertices": [ { "x": 0.568371, "y": 0.8537815 }, { "x": 0.549346, "y": 0.8302521 }, { "x": 0.5636147, "y": 0.8084034 }, { "x": 0.581450641, "y": 0.833613455 } ] }, "symbols": [ { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "text": "K", "confidence": 0.92 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "text": "e", "confidence": 0.98 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ], "detectedBreak": { "type": "SPACE" } }, "text": "y", "confidence": 0.98 } ], "confidence": 0.96 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "boundingBox": { "normalizedVertices": [ { "x": 0.5457788, "y": 0.8235294 }, { "x": 0.5279429, "y": 0.8016807 }, { "x": 0.542211652, "y": 0.779831946 }, { "x": 0.560047567, "y": 0.803361356 } ] }, "symbols": [ { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "text": "L", "confidence": 0.96 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ] }, "text": "a", "confidence": 1 }, { "property": { "detectedLanguages": [ { "languageCode": "en" } ], "detectedBreak": { "type": "SPACE" } }, "text": "b", "co
如何获取pdf文本?
【问题讨论】:
标签: c# pdf google-cloud-vision