【问题标题】:swift CGPDFDocument parsing快速CGPDF文档解析
【发布时间】:2017-09-13 06:13:45
【问题描述】:

我正在尝试使用 Swift 来解析 PDF 文档的内容,遵循 Apple 的编程指南(其中所有示例都是 ObjC...)

let filepath = "/Users/ben/Desktop/Test.pdf"
let localUrl  = filepath as CFString
if let pdfURL = CFURLCreateWithFileSystemPath(nil, localUrl, CFURLPathStyle.cfurlposixPathStyle, false) {
    if let pdf = CGPDFDocument(pdfURL) {
        if let inf = pdf.info {
            CGPDFDictionaryApplyFunction(inf, { (key, object, info) -> Void in
                print("\(key), \(object), \(info)")
            }, nil)
        }
        if let cat = pdf.catalog {

            CGPDFDictionaryApplyFunction(cat, { (key, object, info) -> Void in
                print("\(key), \(object), \(info)")
            }, nil)

        }
}
}

虽然这似乎产生了一些结果,但它只是一串十六进制数字。

0x00007ff29f43ce00, 0x00007ff29f492bd0, nil
0x00007ff29f443b60, 0x00007ff29f492cd0, nil
0x00007ff29f482590, 0x00007ff29f492dd0, nil
0x00007ff29f482a40, 0x00007ff29f492ed0, nil
0x00007ff29f482e30, 0x00007ff29f492fe0, nil
0x00007ff29f47da20, 0x00007ff29f4930e0, nil
0x00007ff29f474ac0, 0x00007ff29f842b50, nil
0x00007ff29f43f5d0, 0x00007ff29f842bf0, nil
0x00007ff29f485eb0, 0x00007ff29f842a60, nil
0x00007ff29f482f70, 0x00007ff29f842ab0, nil
0x00007ff29f48b1c0, 0x00007ff29f48f6d0, nil

那么如何获取实际数据呢?理想情况下,我试图获取文档元数据和包含的字体等内容。

【问题讨论】:

    标签: swift cocoa cgpdfdocument


    【解决方案1】:

    制作了一个解析器(基于以前的答案),它可以抓取 PDF 层次结构并为您提供 JSON。

    // Parse PDF into JSON.
    PDFParser.parse(pdfUrl: pdfFileURL, into: jsonFileURL)
    
    // Parse PDF into Dictionary.
    let pdf: [String:Any?] = PDFParser.parse(pdfUrl: pdfFileURL)
    

    给你:

    {
      "Catalog" : {
        "Pages<Dictionary>" : {
          "MediaBox<Array>" : [
            0,
            0,
            612,
            792
          ],
          "Type<Name>" : "Pages",
          "Kids<Array>" : [
            {
              "Rotate<Integer>" : 0,
              "MediaBox<Array>" : [
                0,
                0,
                595.27499999999998,
                841.88999999999999
              ],
              "Parent<Dictionary>" : "<PARENT_NOT_SERIALIZED>",
              "Resources<Dictionary>" : {
                "ColorSpace<Dictionary>" : {
                  "Cs1<Array>" : [
                    "ICCBased",
                    {
                      "N<Integer>" : 3,
                      "Filter<Name>" : "FlateDecode",
                      "Alternate<Name>" : "DeviceRGB",
                      "Length<Integer>" : 2612
                    }
                  ]
                }
    ...
    

    CGPDFDocument 获取(如原始问题):

    // Get document catalog.
    guard
        let document = CGPDFDocument(pdfFileURL as CFURL),
        let catalog = document.catalog
    else { return }
    
    // Parse into dictionary.
    let catalogDictionary = PDFParser.value(from: catalog)
    

    给你一个非常普通的 Swift 字典。控制台输出:

    Optional(["Pages<Dictionary>": Optional({
        "Count<Integer>" = 1;
        "Kids<Array>" =     (
                    {
                "ArtBox<Array>" =             (
                    "28.3465",
                    "325.193",
                    "393.389",
                    "813.543"
                );
                "Contents<Stream>" =             {
                    Data = "q Q q 0 0 595.276 841.89 re W n 1 0 1 0 k /Gs1 gs 201.8862 420.9449 m 201.8862\n473.8269 244.7562 516.6959 297.6372 516.6959 c 350.5192 516.6959 393.3892\n473.8269 393.3892 420.9449 c 393.3892 368.0629 350.5192 325.1939 297.6372\n325.1939 c 244.7562 325.1939 201.8862 368.0629 201.8862 420.9449 c f Q q 28.346 530.078 283.464 283.465\nre W n 0 0 0 1 k /Gs1 gs BT 12 0 0 12 28.3467 803.499 Tm /Tc1 1 Tf [ (h) 4\n(ttp://epp) 7 (z.eu) ] TJ ET Q";
                    "Filter<Name>" = FlateDecode;
                    "Length<Integer>" = 237;
                };
                "MediaBox<Array>" =             (
                    0,
                    0,
                    "595.2760000000001",
                    "841.89"
                );
                "Parent<Dictionary>" = "<PARENT_NOT_SERIALIZED>";
                "Resources<Dictionary>" =             {
                    "ExtGState<Dictionary>" =                 {
                        "Gs1<Dictionary>" =                     {
                            "OPM<Integer>" = 1;
                            "Type<Name>" = ExtGState;
                        };
                    };
    ...
    

    ParsePDF.swift

    //
    //  PDFParser.swift
    //  PDFParser
    //
    //  Copyright (c) 2020 Geri Borbás http://www.twitter.com/_eppz
    //
    //  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
    //  The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
    //  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    //
    
    import Foundation
    import PDFKit
    
    
    class PDFParser
    {
    
    
        /// Shorthand for type strings.
        static let namesForTypes: [CGPDFObjectType:String] =
        [
            .null : "Null",
            .boolean : "Boolean",
            .integer : "Integer",
            .real : "Real",
            .name : "Name",
            .string : "String",
            .array : "Array",
            .dictionary : "Dictionary",
            .stream : "Stream",
            CGPDFObjectTypeObject : "Object",
        ]
    
        struct Message
        {
            static let parentNotSerialized = "<PARENT_NOT_SERIALIZED>"
            static let couldNotParseValue = "<COULD_NOT_PARSE_VALUE>"
            static let couldNotGetStreamData = "<COULD_NOT_GET_STREAM_DATA>"
            static let unknownStreamDataFormat = "<UNKNOWN_STREAM_DATA_FORMAT>"
        }
    
        /// Parse a PDF file into a JSON file.
        static func parse(pdfUrl: URL, into jsonURL: URL)
        {
            do
            {
                let pdf = PDFParser.parse(pdfUrl: pdfUrl)
                let data = try JSONSerialization.data(withJSONObject: pdf, options: .prettyPrinted)
                try data.write(to: jsonURL, options: [])
            }
            catch
            { print(error) }
        }
    
        /// Parse a PDF file into a JSON file.
        static func parse(pdfUrl: URL) -> [String:Any?]
        {
            // Document.
            guard
                let document = CGPDFDocument(pdfUrl as CFURL),
                let catalog = document.catalog,
                let info = document.info
                else
            {
                print("Cannot open PDF.")
                return [:]
            }
    
            // Parse.
            return [
                "Catalog" : PDFParser.value(from: catalog),
                "Info" : PDFParser.value(from: info)
            ]
        }
    
        static func value(from object: CGPDFObjectRef) -> Any?
        {
            switch (CGPDFObjectGetType(object))
            {
                case .null:
    
                    return nil
    
                case .boolean:
    
                    var valueRef: CGPDFBoolean = 0
                    if CGPDFObjectGetValue(object, .boolean, &valueRef)
                    { return Bool(valueRef == 0x01) }
    
                case .integer:
    
                    var valueRef: CGPDFInteger = 0
                    if CGPDFObjectGetValue(object, .integer, &valueRef)
                    { return valueRef as Int }
    
                case .real:
    
                    var valueRef: CGPDFReal = 0.0
                    if CGPDFObjectGetValue(object, .real, &valueRef)
                    { return Double(valueRef) }
    
                case .name:
    
                    var objectRefOrNil: UnsafePointer<Int8>? = nil
                    if
                        CGPDFObjectGetValue(object, .name, &objectRefOrNil),
                        let objectRef = objectRefOrNil,
                        let string = String(cString: objectRef, encoding: String.Encoding.isoLatin1)
                    { return string }
    
                case .string:
    
                    var objectRefOrNil: UnsafePointer<Int8>? = nil
                    if
                        CGPDFObjectGetValue(object, .string, &objectRefOrNil),
                        let objectRef = objectRefOrNil,
                        let stringRef = CGPDFStringCopyTextString(OpaquePointer(objectRef))
                    { return stringRef as String }
    
                case .array:
    
                    var arrayRefOrNil: CGPDFArrayRef? = nil
                    if
                        CGPDFObjectGetValue(object, .array, &arrayRefOrNil),
                        let arrayRef = arrayRefOrNil
                    {
                        var array: [Any] = []
                        for index in 0 ..< CGPDFArrayGetCount(arrayRef)
                        {
                            var eachObjectRef: CGPDFObjectRef? = nil
                            if
                                CGPDFArrayGetObject(arrayRef, index, &eachObjectRef),
                                let eachObject = eachObjectRef,
                                let eachValue = PDFParser.value(from: eachObject)
                            { array.append(eachValue) }
                        }
                        return array
                    }
    
                case .stream:
    
                    var streamRefOrNil: CGPDFStreamRef? = nil
                    if
                        CGPDFObjectGetValue(object, .stream, &streamRefOrNil),
                        let streamRef = streamRefOrNil,
                        let streamDictionaryRef = CGPDFStreamGetDictionary(streamRef)
                    {
                        // Get stream dictionary.
                        var streamNSMutableDictionary = NSMutableDictionary()
                        Self.collectObjects(from: streamDictionaryRef, into: &streamNSMutableDictionary)
                        var streamDictionary = streamNSMutableDictionary as! [String: Any?]
    
                        // Get data.
                        var dataString: String? = Message.couldNotGetStreamData
                        var streamDataFormat: CGPDFDataFormat = .raw
                        if let streamData: CFData = CGPDFStreamCopyData(streamRef, &streamDataFormat)
                        {
                            switch streamDataFormat
                            {
                                case .raw: dataString = String(data: NSData(data: streamData as Data) as Data, encoding: String.Encoding.utf8)
                                case .jpegEncoded, .JPEG2000: dataString = NSData(data: streamData as Data).base64EncodedString()
                            @unknown default: dataString = Message.unknownStreamDataFormat
                            }
                        }
    
                        // Add to dictionary.
                        streamDictionary["Data"] = dataString
    
                        return streamDictionary
                    }
    
                case .dictionary:
    
                    var dictionaryRefOrNil: CGPDFDictionaryRef? = nil
                    if
                        CGPDFObjectGetValue(object, .dictionary, &dictionaryRefOrNil),
                        let dictionaryRef = dictionaryRefOrNil
                    {
                        var dictionary = NSMutableDictionary()
                        Self.collectObjects(from: dictionaryRef, into: &dictionary)
                        return dictionary as! [String: Any?]
                    }
    
                @unknown default:
    
                    var dictionary = NSMutableDictionary()
                    Self.collectObjects(from: object, into: &dictionary)
                    return dictionary as! [String: Any?]
    
            }
    
            // No known case.
            return nil
        }
    
        static func collectObjects(from dictionaryRef: CGPDFDictionaryRef, into dictionaryPointer: UnsafeMutableRawPointer?)
        {
    
            CGPDFDictionaryApplyFunction(
                dictionaryRef,
                {
                    (eachKeyPointer, eachObject, eachContextOrNil: UnsafeMutableRawPointer?) -> Void in
    
                    // Unwrap dictionary.
                    guard let dictionary = eachContextOrNil?.assumingMemoryBound(to: NSMutableDictionary.self).pointee
                    else { return print("Could not unwrap dictionary.") }
    
                    // Unwrap key.
                    guard let eachKey = String(cString: UnsafePointer<CChar>(eachKeyPointer), encoding: .isoLatin1)
                    else { return print("Could not unwrap key.") }
    
                    // Type.
                    guard let eachTypeName = PDFParser.namesForTypes[CGPDFObjectGetType(eachObject)]
                    else { return print("Could not unwrap type.") }
    
                    // Assemble.
                    let eachDictionaryKey = "\(eachKey)<\(eachTypeName)>" as NSString
    
                    // Skip parent.
                    guard eachKey != "Parent"
                    else
                    {
                        dictionary.setObject(Message.parentNotSerialized, forKey: eachDictionaryKey)
                        return
                    }
    
                    // Parse value.
                    guard let eachValue = PDFParser.value(from: eachObject)
                    else
                    {
                        dictionary.setObject(Message.couldNotParseValue, forKey: eachDictionaryKey)
                        fatalError("?")
                        // return
                    }
    
                    // Set.
                    dictionary.setObject(eachValue, forKey: eachDictionaryKey)
                },
                dictionaryPointer
            )
        }
    }
    

    【讨论】:

    • 虽然这无疑是获取 PDF 数据的最有用的工具,但对我最初面临的问题的最佳解释是我已标记的问题。不过,我讨厌打礼物马的嘴巴!
    • 看起来不错,感谢解析器。我只想更改第一行,因为它是一个 hack:static let CGPDFObjectTypeObject: CGPDFObjectType = CGPDFObjectType(rawValue: 77696)!,它对我不起作用。
    【解决方案2】:

    Swift 4 - 这是 Daniel 的优秀示例的更新版本,它在 Swift 4 中编译。

    import Foundation
    import Quartz
    
    print("Hello, World!")
    
    func printPDFKeys( key: UnsafePointer<Int8>, object: CGPDFObjectRef) { //, info: UnsafeMutableRawPointer) {
        // let _: CGPDFDictionaryRef = CGPDFDictionaryRef(info)
        let keyString = String(cString: UnsafePointer<CChar>(key), encoding: .isoLatin1)
        let objectType = CGPDFObjectGetType(object)
        if keyString == nil {
            return
        }
        print("key \(keyString!) is present in dictionary, type \(objectType.rawValue)")
        var ptrObjectValue:UnsafePointer<Int8>? = nil
        switch objectType {
            // ObjectType is enum of:
            //   Null
            //   Boolean
            //   Integer
            //   Real
            //   Name
            //   String
            //   Array
            //   Dictionary
        //   Stream
        case .boolean:
            // Boolean
            var objectBoolean:CGPDFBoolean = 0
            if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
                let testbool = NSNumber(value: objectBoolean)
                print("Boolean value \(testbool)")
            }
        case .integer:
            // Integer
            var objectInteger:CGPDFInteger? = nil
            if CGPDFObjectGetValue(object, objectType, &objectInteger) {
                print("Integer value \(objectInteger)")
            }
        case .real:
            // Real
            var objectReal:CGPDFReal? = nil
            if CGPDFObjectGetValue(object, objectType, &objectReal) {
                print("Real value \(objectReal)")
            }
        case .name:
            // Name
            if (CGPDFObjectGetValue(object, objectType, &ptrObjectValue)) {
                let stringName = String(cString: UnsafePointer<CChar>(ptrObjectValue!), encoding: String.Encoding.isoLatin1)
                print("Name value: \(stringName!)")
            }
        case .string:
            // String
            _ = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
            let stringValue = CGPDFStringCopyTextString(OpaquePointer(ptrObjectValue!))
            print("String value: \(stringValue!)")
        case .array:
            // Array
            print("Array")
            var objectArray:CGPDFArrayRef? = nil
            if (CGPDFObjectGetValue(object, objectType, &objectArray))
            {
                print("array: \(arrayFromPDFArray(pdfArray: objectArray!))")
            }
        case .dictionary:
            // Dictionary
            var objectDictionary:CGPDFDictionaryRef? = nil
            if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
                let count = CGPDFDictionaryGetCount(objectDictionary!)
                print("Found dictionary with \(count) entries")
                if !(keyString == "Parent") && !(keyString == "P") {
                    //catalogLevel = catalogLevel + 1
                    CGPDFDictionaryApplyFunction(objectDictionary!, { (key, object, info) -> Void in
                        printPDFKeys(key: key, object: object) // , info: info)
                    }, nil)
    
                    // CGPDFDictionaryApplyFunction(objectDictionary!, printPDFKeys as! CGPDFDictionaryApplierFunction, nil)
                    //catalogLevel = catalogLevel - 1
                }
            }
        case .stream:
            // Stream
            print("Stream")
            var objectStream:CGPDFStreamRef? = nil
            if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
                let _: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream! )!
                var fmt: CGPDFDataFormat = .raw
                let streamData: CFData = CGPDFStreamCopyData(objectStream!, &fmt)!;
                let data = NSData(data: streamData as Data)
                let dataString = NSString(data: data as Data, encoding: String.Encoding.utf8.rawValue)
                let dataLength: Int = CFDataGetLength(streamData)
                print("data stream (length=\(dataLength)):")
                if dataLength < 400 {
                    print(dataString)
                }
            }
        default:
            print("Null")
        }
    }
    
    // convert a PDF array into an objC one
    func arrayFromPDFArray(pdfArray: CGPDFArrayRef ) -> NSMutableArray {
        var _:Int = 0
        let tmpArray: NSMutableArray = NSMutableArray()
    
        let count = CGPDFArrayGetCount(pdfArray)
        for i in 0..<count {
            var value:CGPDFObjectRef? = nil
            if (CGPDFArrayGetObject(pdfArray, i, &value)) {
                if let object = objectForPDFObject(object: value!) {
                    tmpArray.add(object)
                }
            }
        }
    
        return tmpArray
    }
    
    func objectForPDFObject( object: CGPDFObjectRef) -> AnyObject? {
        let objectType: CGPDFObjectType = CGPDFObjectGetType(object)
        var ptrObjectValue:UnsafePointer<Int8>? = nil
        switch (objectType) {
        case .boolean:
            // Boolean
            var objectBoolean = CGPDFBoolean()
            if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
                let testbool = NSNumber(value: objectBoolean)
                return testbool
            }
        case .integer:
            // Integer
            var objectInteger = CGPDFInteger()
            if CGPDFObjectGetValue(object, objectType, &objectInteger) {
                return objectInteger as AnyObject
            }
        case .real:
            // Real
            var objectReal = CGPDFReal()
            if CGPDFObjectGetValue(object, objectType, &objectReal) {
                return objectReal as AnyObject
            }
        case .string:
            _ = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
            let stringValue = CGPDFStringCopyTextString(OpaquePointer(ptrObjectValue!))
            return stringValue
        case .dictionary:
            // Dictionary
            var objectDictionary:CGPDFDictionaryRef? = nil
            if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
                let count = CGPDFDictionaryGetCount(objectDictionary!)
                print("In array, found dictionary with \(count) entries")
                CGPDFDictionaryApplyFunction(objectDictionary!, { (key, object, info) -> Void in
                    printPDFKeys(key: key, object: object) // , info: info)
                }, nil)
    
                // CGPDFDictionaryApplyFunction(objectDictionary!, printPDFKeys as! CGPDFDictionaryApplierFunction, nil)
            }
        case .stream:
            // Stream
            var objectStream:CGPDFStreamRef? = nil
            if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
                let _: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream! )!
                var fmt: CGPDFDataFormat = .raw
                let streamData: CFData = CGPDFStreamCopyData(objectStream!, &fmt)!;
                let data = NSData(data: streamData as Data)
                let dataString = NSString(data: data as Data, encoding: String.Encoding.utf8.rawValue)
                print("data stream (length=\(CFDataGetLength(streamData))):")
                return dataString
            }
        default:
            return nil
        }
        return nil
    }
    
    func parse () {
        let filepath = ("~/Desktop/doc.pdf" as NSString).expandingTildeInPath
        let urlDocument = NSURL(fileURLWithPath: filepath)
        let myDocument = CGPDFDocument(urlDocument)
        if myDocument != nil {
            let numPages = myDocument?.numberOfPages
            print("Number of pages: \(numPages)")
            // Get complete catalog
            let myCatalog = myDocument?.catalog
    
            CGPDFDictionaryApplyFunction(myCatalog!, { (key, object, info) -> Void in
                printPDFKeys(key: key, object: object) // , info: info)
            }, nil)
    
            // CGPDFDictionaryApplyFunction(myCatalog!, printPDFKeys, nil)
            let myInfo = myDocument?.info
    
            CGPDFDictionaryApplyFunction(myInfo!, { (key, object, info) -> Void in
                printPDFKeys(key: key, object: object) // , info: info)
            }, nil)
    
            // CGPDFDictionaryApplyFunction(myInfo!, printPDFKeys, nil)
        } else {
            print("Cannot open PDF document")
        }
    }
    
    parse()
    

    【讨论】:

    • 你的回答主要是代码。一点解释文字会很好:)
    • '不能从捕获上下文的闭包中形成 C 函数指针'?
    【解决方案3】:

    您的解析检索高级字典和信息数据是正确的,但您需要在 CGPDFDictionaryApplyFunction 中扩展解码以根据其类型(整数、字符串、数组、字典等)显示 PDF 数据的值。您调用的 CGPDFDictionaryApplierFunction 的语法是:

    typealias CGPDFDictionaryApplierFunction = (UnsafePointer&lt;Int8&gt;, COpaquePointer, UnsafeMutablePointer&lt;()&gt;) -&gt; Void

    您的程序正在显示指向数据的指针,您可以按照以下类型访问数据值(Swift 2):

        let filepath = "/Users/ben/Desktop/Test.pdf"
        let urlDocument = NSURL(fileURLWithPath: filepath)
        let myDocument = CGPDFDocumentCreateWithURL(urlDocument)
        if myDocument != nil {
            let numPages = CGPDFDocumentGetNumberOfPages(myDocument)
            print("Number of pages: \(numPages)")
            // Get complete catalog
            let myCatalog = CGPDFDocumentGetCatalog(myDocument)
            CGPDFDictionaryApplyFunction(myCatalog, printPDFKeys, nil)
            let myInfo = CGPDFDocumentGetInfo(myDocument)
            CGPDFDictionaryApplyFunction(myInfo, printPDFKeys, nil)
        } else {
            print("Cannot open PDF document")
        }
    

    为了从 CGPDFDictionaryApplyFunction 中调用,printPDFKeys 将作为全局函数调用(在您的主类之外),或者您可以将代码插入到 CGPDFDictionaryApplyFunction 的闭包中,如上面的示例所示。以下代码已缩短,不包括针对错误和空值的完全保护。

    func printPDFKeys( key: UnsafePointer<Int8>, object: COpaquePointer, info: UnsafeMutablePointer<()>) {
        let contentDict: CGPDFDictionaryRef = CGPDFDictionaryRef(info)
        let keyString = String(CString: UnsafePointer<CChar>(key), encoding: NSISOLatin1StringEncoding)
        let objectType = CGPDFObjectGetType(object)
        if keyString == nil {
            return
        }
        print("key \(keyString!) is present in dictionary, type \(objectType.rawValue)")
        var ptrObjectValue = UnsafePointer<Int8>()
        switch objectType {
        // ObjectType is enum of:
        //   Null
        //   Boolean
        //   Integer
        //   Real
        //   Name
        //   String
        //   Array
        //   Dictionary
        //   Stream
        case .Boolean:
            // Boolean
            var objectBoolean = CGPDFBoolean()
            if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
                let testbool = NSNumber(unsignedChar: objectBoolean)
                print("Boolean value \(testbool)")
            }
        case .Integer:
            // Integer
            var objectInteger = CGPDFInteger()
            if CGPDFObjectGetValue(object, objectType, &objectInteger) {
                print("Integer value \(objectInteger)")
            }
        case .Real:
            // Real
            var objectReal = CGPDFReal()
            if CGPDFObjectGetValue(object, objectType, &objectReal) {
                print("Real value \(objectReal)")
            }
        case .Name:
            // Name
            if (CGPDFObjectGetValue(object, objectType, &ptrObjectValue)) {
                let stringName = String(CString: UnsafePointer<CChar>(ptrObjectValue), encoding: NSISOLatin1StringEncoding)
                print("Name value: \(stringName!)")
            }
        case .String:
            // String
            let valueFound = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
            let stringValue = CGPDFStringCopyTextString(COpaquePointer(ptrObjectValue))
            print("String value: \(stringValue!)")
        case .Array:
            // Array
            print("Array")
            var objectArray = CGPDFArrayRef()
            if (CGPDFObjectGetValue(object, objectType, &objectArray))
            {
                print("array: \(arrayFromPDFArray(objectArray))")
            }
        case .Dictionary:
            // Dictionary
            var objectDictionary = CGPDFDictionaryRef()
            if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
                let count = CGPDFDictionaryGetCount(objectDictionary)
                print("Found dictionary with \(count) entries")
                if !(keyString == "Parent") && !(keyString == "P") {
                    //catalogLevel = catalogLevel + 1
                    CGPDFDictionaryApplyFunction(objectDictionary, printPDFKeys, nil)
                    //catalogLevel = catalogLevel - 1
                }
            }
    case .Stream:
        // Stream
        print("Stream")
        var objectStream = CGPDFStreamRef()
        if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
            let dict: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream )
            var fmt: CGPDFDataFormat = .Raw
            let streamData: CFDataRef = CGPDFStreamCopyData(objectStream, &fmt)!;
            let data = NSData(data: streamData)
            let dataString = NSString(data: data, encoding: NSUTF8StringEncoding)
            let dataLength: Int = CFDataGetLength(streamData)
            print("data stream (length=\(dataLength)):")
            if dataLength < 400 {
                print(dataString)
            }
        }
    default:
        print("Null")
    }
    }
    
    // convert a PDF array into an objC one
    func arrayFromPDFArray(pdfArray: CGPDFArrayRef ) -> NSMutableArray {
    var i:Int = 0
    var tmpArray: NSMutableArray = NSMutableArray()
    
    let count = CGPDFArrayGetCount(pdfArray)
    for i in 0..<count {
        var value = CGPDFObjectRef()
        if (CGPDFArrayGetObject(pdfArray, i, &value)) {
            if let object = objectForPDFObject(value) {
                tmpArray.addObject(object)
            }
        }
    }
    
    return tmpArray
    }
    
    func objectForPDFObject( object: CGPDFObjectRef) -> AnyObject? {
    let objectType: CGPDFObjectType = CGPDFObjectGetType(object)
    var ptrObjectValue = UnsafePointer<Int8>()
    switch (objectType) {
    case .Boolean:
        // Boolean
        var objectBoolean = CGPDFBoolean()
        if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
            let testbool = NSNumber(unsignedChar: objectBoolean)
            return testbool
        }
    case .Integer:
        // Integer
        var objectInteger = CGPDFInteger()
        if CGPDFObjectGetValue(object, objectType, &objectInteger) {
            return objectInteger
        }
    case .Real:
        // Real
        var objectReal = CGPDFReal()
        if CGPDFObjectGetValue(object, objectType, &objectReal) {
            return objectReal
        }
    case .String:
        let valueFound = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
        let stringValue = CGPDFStringCopyTextString(COpaquePointer(ptrObjectValue))
        return stringValue
    case .Dictionary:
        // Dictionary
        var objectDictionary = CGPDFDictionaryRef()
        if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
            let count = CGPDFDictionaryGetCount(objectDictionary)
            print("In array, found dictionary with \(count) entries")
            CGPDFDictionaryApplyFunction(objectDictionary, printPDFKeys, nil)
        }
    case .Stream:
        // Stream
        var objectStream = CGPDFStreamRef()
        if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
            let dict: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream )
            var fmt: CGPDFDataFormat = .Raw
            let streamData: CFDataRef = CGPDFStreamCopyData(objectStream, &fmt)!;
            let data = NSData(data: streamData)
            let dataString = NSString(data: data, encoding: NSUTF8StringEncoding)
            print("data stream (length=\(CFDataGetLength(streamData))):")
            return dataString
        }
    default:
        return nil
    }
    return nil
    }
    

    【讨论】:

    • 这是很可爱的东西,虽然我使用的是 Swift 3,所以它会标记大量错误。特别是像var objectStream = CGPDFStreamRef() 这样的东西需要参数。
    • 感谢您的宝贵时间。你会认为 Apple 会为以下明显的任务创建更高阶的 API:“CGPDFJustGetAllTheData”。或者那个函数库会更普遍。
    • 不客气!很高兴这能有所帮助。确实,那些 Apple API 对用户并不友好,学习 PDF 解码本身是相当耗时的。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2010-09-12
    • 2014-10-14
    • 2018-07-08
    • 2011-10-14
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多