【问题标题】:PHP Library to Parse Mobi解析 Mobi 的 PHP 库
【发布时间】:2012-08-05 14:01:54
【问题描述】:

是否有任何免费的 PHP 库可以解析 .mobi 文件以获取:

  • 作者
  • 标题
  • 出版商
  • 封面

编辑

对于所有认为这是Does a PHP Library Exist to Work with PRC/MOBI Files完全重复的人,您显然懒得阅读这些问题。

提问者想知道如何使用 PHP 库生成 .mobi 文件。我想知道如何分解或解析已经创建的 .mobi 文件以获取某些信息。因此,该问题的解决方案 phpMobi 将不起作用,因为它是从 HTML 生成 .mobi 文件的脚本,而不是解析 .mobi 文件。

【问题讨论】:

标签: php parsing mobipocket


【解决方案1】:

一个非常非常非常蹩脚的例子,但如果你感到绝望,你可以尝试这样的事情:

$data = file_get_contents("A Young Girl's Diary - Freud, Sigmund.mobi");

$chunk = mb_substr($data, mb_strpos($data, 'EXTH'), 512);
$chunks = explode("\x00", $chunk);
array_shift($chunks);

$chunks = array_filter($chunks, function($str){return preg_match('#([A-Z])#', $str) && mb_strlen($str) > 2;});

$chunks = array_combine(array('author', 'publisher', 'title'), $chunks);

print_r($chunks);

输出:

Array
(
    [author] => Freud, Sigmund
    [publisher] => Webarto
    [title] => A Young Girl's Diary
)

使用的文件:http://freekindlebooks.org/Freud/752-h.mobi(使用 Calibre 编辑发布者元数据)

文件解析甚至不是一件容易或有趣的事情。看看这个:http://code.google.com/p/xee/source/browse/XeePhotoshopLoader.m?r=a70d7396356997114b548f4ab2cbd49badd7d285#107

你应该做的是逐字节读取,但是因为没有详细的文档,恐怕这不是一件容易的事。

附:我没有尝试获取封面照片。

【讨论】:

    【解决方案2】:

    如果有人仍然感兴趣,这里是移动元数据读取示例:

    class palmDOCHeader
    {
        public $Compression = 0;
        public $TextLength = 0;
        public $Records = 0;
        public $RecordSize = 0;
    }
    
    class palmHeader
    {
        public $Records = array();
    }
    
    class palmRecord
    {
        public $Offset = 0;
        public $Attributes = 0;
        public $Id = 0;
    }
    
    class mobiHeader
    {
        public $Length = 0;
        public $Type = 0;
        public $Encoding = 0;
        public $Id = 0;
        public $FileVersion = 0;
    
    }
    
    class exthHeader
    {
        public $Length = 0;
        public $Records = array();  
    }
    
    class exthRecord
    {
        public $Type = 0;
        public $Length = 0;
        public $Data = "";
    }
    
    class mobi {
        protected $mobiHeader;
        protected $exthHeader;
    
        public function __construct($file){
            $handle = fopen($file, "r");
            if ($handle){
                fseek($handle, 60, SEEK_SET);
                $content = fread($handle, 8);
                if ($content != "BOOKMOBI"){
                    echo "Invalid file format";
                    fclose($handle);
                    return;
                }
    
                // Palm Database
                echo "\nPalm database:\n";
                $palmHeader = new palmHeader();
    
                fseek($handle, 0, SEEK_SET);
                $name = fread($handle, 32);
                echo "Name: ".$name."\n";
    
                fseek($handle, 76, SEEK_SET);
                $content = fread($handle, 2);
                $records = hexdec(bin2hex($content));
                echo "Records: ".$records."\n";
    
                fseek($handle, 78, SEEK_SET);
                for ($i=0; $i<$records; $i++){
                    $record = new palmRecord();
    
                    $content = fread($handle, 4);
                    $record->Offset = hexdec(bin2hex($content));
    
                    $content = fread($handle, 1);
                    $record->Attributes = hexdec(bin2hex($content));
    
                    $content = fread($handle, 3);
                    $record->Id = hexdec(bin2hex($content));
    
                    array_push($palmHeader->Records, $record);
                    echo "Record ".$i." offset: ".$record->Offset." attributes: ".$record->Attributes."  id : ".$record->Id."\n";
                }
    
                // PalmDOC Header
                $palmDOCHeader = new palmDOCHeader();
                fseek($handle, $palmHeader->Records[0]->Offset, SEEK_SET);
                $content = fread($handle, 2);
                $palmDOCHeader->Compression = hexdec(bin2hex($content));
                $content = fread($handle, 2);
                $content = fread($handle, 4);
                $palmDOCHeader->TextLength = hexdec(bin2hex($content));
                $content = fread($handle, 2);
                $palmDOCHeader->Records = hexdec(bin2hex($content));
                $content = fread($handle, 2);
                $palmDOCHeader->RecordSize = hexdec(bin2hex($content));
                $content = fread($handle, 4);
    
                echo "\nPalmDOC Header:\n";
                echo "Compression:".$palmDOCHeader->Compression."\n";
                echo "TextLength:".$palmDOCHeader->TextLength."\n";
                echo "Records:".$palmDOCHeader->Records."\n";
                echo "RecordSize:".$palmDOCHeader->RecordSize."\n";
    
                // MOBI Header
                $mobiStart = ftell($handle);
                $content = fread($handle, 4);
                if ($content == "MOBI"){
                    $this->mobiHeader = new mobiHeader();
                    echo "\nMOBI header:\n";
                    $content = fread($handle, 4);
                    $this->mobiHeader->Length = hexdec(bin2hex($content));
    
                    $content = fread($handle, 4);
                    $this->mobiHeader->Type = hexdec(bin2hex($content));
    
                    $content = fread($handle, 4);
                    $this->mobiHeader->Encoding = hexdec(bin2hex($content));
    
                    $content = fread($handle, 4);
                    $this->mobiHeader->Id = hexdec(bin2hex($content));
    
                    echo "Header length: ".$this->mobiHeader->Length."\n";
                    echo "Type: ".$this->mobiHeader->Type."\n";
                    echo "Encoding: ".$this->mobiHeader->Encoding."\n";
                    echo "Id: ".$this->mobiHeader->Id."\n";
    
                    fseek($handle, $mobiStart+$this->mobiHeader->Length, SEEK_SET);
                    $content = fread($handle, 4);
                    if ($content == "EXTH"){
                        $this->exthHeader = new exthHeader();
                        echo "\nEXTH header:\n";
    
                        $content = fread($handle, 4);
                        $this->exthHeader->Length = hexdec(bin2hex($content));
    
                        $content = fread($handle, 4);
                        $records = hexdec(bin2hex($content));
                        echo "Records: ".$records."\n";
    
                        for ($i=0; $i<$records; $i++){
                            $record = new exthRecord();
    
                            $content = fread($handle, 4);
                            $record->Type = hexdec(bin2hex($content));
    
                            $content = fread($handle, 4);
                            $record->Length = hexdec(bin2hex($content));
    
                            $record->Data = fread($handle, $record->Length - 8);
    
                            array_push($this->exthHeader->Records, $record);
                            echo "Record ".$i." type: ".$record->Type." length: ".$record->Length."\n";
                            echo "  data: ".$record->Data."\n";
                        }
                    }
                }
    
                fclose($handle);
            }
        }
    
        protected function GetRecord($type)
        {
            foreach ($this->exthHeader->Records as $record){
                if ($record->Type == $type)
                    return $record;
            }
            return NULL;
        }
    
        protected function GetRecordData($type)
        {
            $record = $this->GetRecord($type);
            if ($record)
                return $record->Data;
            return "";
        }
    
        public function Title()
        {
            return $this->GetRecordData(503);
        }
    
        public function Author()
        {
            return $this->GetRecordData(100);
        }
    
        public function Isbn()
        {
            return $this->GetRecordData(104);
        }
    
        public function Subject()
        {
            return $this->GetRecordData(105);
        }
    
        public function Publisher()
        {
            return $this->GetRecordData(101);
        }
    }
    
    $mobi = new mobi("test.mobi");
    echo "\nTitle: ".$mobi->Title();
    echo "\nAuthor: ".$mobi->Author();
    echo "\nIsbn: ".$mobi->Isbn();
    echo "\nSubject: ".$mobi->Subject();
    echo "\nPublisher: ".$mobi->Publisher();
    

    【讨论】:

      【解决方案3】:

      遇到了同样的问题,没有找到任何 PHP 解析器,只好自己编写(不幸的是我不能透露我的代码)。这是一个关于.mobi结构的好资源http://wiki.mobileread.com/wiki/MOBI

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 2012-04-19
        • 2018-07-07
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多