使用 iText pdfHTML 的 PDF 的页面大小和格式答案

【问题标题】：Page size and formatting of PDF using iText pdfHTML使用 iText pdfHTML 的 PDF 的页面大小和格式
【发布时间】：2017-12-18 13:11:30
【问题描述】：

我正在尝试使用 iText7.1.0 和使用this example 的 pdfHTML2.0.0 将 3 个 HTML 页面（都具有相同的内容）导出到 PDF 中。由于某种原因，页面在页脚处存在格式问题。我的 HTML 代码的jsFiddle link 正在被 PDF 渲染器使用。

以下是用于呈现 PDF 的 Java 代码（Test.html 与小提琴中的 HTML 代码相同）：

package com.itextpdf.htmlsamples.chapter01;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.utils.PdfMerger;
import com.itextpdf.licensekey.LicenseKey;

/**
 * Can we parse different HTML files and combine them into one PDF?
 * Yes, this can be done in different ways. This example shows how
 * to create a PDF in memory for each HTML, then use PdfMerger to
 * merge the different PDFs into one, on a page per page basis.
 */
public class C07E01_CombineHtml {

    /** The Base URI of the HTML page. */
    public static final String BASEURI = "src/main/resources/html/";
    /** An array containing the paths to different HTML files. */
    public static final String[] SRC = {
            String.format("%sTest.html", BASEURI),
            String.format("%sTest.html", BASEURI),
            String.format("%sTest.html", BASEURI)
    };
    /** The target folder for the result. */
    public static final String TARGET = "target/results/ch07/";
    /** The path to the resulting PDF file. */
    public static final String DEST = String.format("%sbundle.pdf", TARGET);
    protected PageSize A4;

    /**
     * The main method of this example.
     *
     * @param args no arguments are needed to run this example.
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public static void main(String[] args) throws IOException {
        LicenseKey.loadLicenseFile("C://Users//Sparks//Desktop//itextkey-0.xml");
        File file = new File(TARGET);
        file.mkdirs();
        new C07E01_CombineHtml().createPdf(BASEURI, SRC, DEST);
    }

    /**
     * Creates the PDF file.
     *
     * @param baseUri the base URI
     * @param src an array with the paths to different source HTML files
     * @param dest the path to the resulting PDF
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void createPdf(String baseUri, String[] src, String dest) throws IOException { 
        ConverterProperties properties = new ConverterProperties();
        properties.setBaseUri(baseUri);
        PdfWriter writer = new PdfWriter(dest);
        PdfDocument pdf = new PdfDocument(writer);
        PdfMerger merger = new PdfMerger(pdf);
        for (String html : src) {
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            PdfDocument temp = new PdfDocument(new PdfWriter(baos));
            PageSize pageSize = PageSize.A4;
            temp.setDefaultPageSize(pageSize);
            HtmlConverter.convertToPdf(new FileInputStream(html), temp, properties);
            temp = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
            merger.merge(temp, 1, temp.getNumberOfPages());
            temp.close();
        }
        pdf.close();
    }
}

output PDF file 有 6 页没有页脚。它应该有 3 页，每页 'A4' 大小。

任何建议都会有所帮助。

【问题讨论】：

标签： java itext itext7

【解决方案1】：

将 PageSize 更改为更大的应该可以解决这个特定问题。之后，您可以缩小页面以获得具有 A4 页面的 PDF。查看下面的代码示例，了解如何执行此操作。

public static void main(String[] args) throws IOException {
    ByteArrayOutputStream pdf = createPdf("src/main/resources/SO47869248/html.html");
    // To get from A3 to A4 the size has to shrink 71%
    new SO47869248().scalePdf(DEST, new ByteArrayInputStream(pdf.toByteArray()), 0.7071f);
}

public static ByteArrayOutputStream createPdf(String htmlSrc) throws IOException {
    ByteArrayOutputStream output = new ByteArrayOutputStream();
    ConverterProperties converterProperties = new ConverterProperties();
    converterProperties.setBaseUri(new File(htmlSrc).getParent());
    PdfWriter writer = new PdfWriter(output);
    PdfDocument pdfDocument = new PdfDocument(writer);
    PdfMerger merger = new PdfMerger(pdfDocument);
    for(int x=0; x < 3; x++){
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        PdfDocument temp = new PdfDocument(new PdfWriter(baos));
        temp.setDefaultPageSize(PageSize.A3);
        HtmlConverter.convertToPdf(new FileInputStream(htmlSrc), temp, converterProperties);
        temp = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
        merger.merge(temp, 1, temp.getNumberOfPages());
        temp.close();
    }
    pdfDocument.close();

    return output;
}

public void scalePdf(String dest, ByteArrayInputStream input, float scale) throws IOException {
    // Create the source document
    PdfDocument srcDoc = new PdfDocument(new PdfReader(input));
    PdfDocument pdfDoc = new PdfDocument(new PdfWriter(dest));
    ScaleDownEventHandler eventHandler = new ScaleDownEventHandler(scale);
    int n = srcDoc.getNumberOfPages();
    pdfDoc.addEventHandler(PdfDocumentEvent.START_PAGE, eventHandler);

    PdfCanvas canvas;
    PdfFormXObject page;
    for (int p = 1; p <= n; p++) {
        eventHandler.setPageDict(srcDoc.getPage(p).getPdfObject());
        canvas = new PdfCanvas(pdfDoc.addNewPage());
        page = srcDoc.getPage(p).copyAsFormXObject(pdfDoc);
        canvas.addXObject(page, scale, 0f, 0f, scale, 0f, 0f);
    }

    pdfDoc.close();
    srcDoc.close();
}

protected class ScaleDownEventHandler implements IEventHandler {
    protected float scale = 1;
    protected PdfDictionary pageDict;

    public ScaleDownEventHandler(float scale) {
        this.scale = scale;
    }

    public void setPageDict(PdfDictionary pageDict) {
        this.pageDict = pageDict;
    }

    @Override
    public void handleEvent(Event event) {
        PdfDocumentEvent docEvent = (PdfDocumentEvent) event;
        PdfPage page = docEvent.getPage();
        page.put(PdfName.Rotate, pageDict.getAsNumber(PdfName.Rotate));

        scaleDown(page, pageDict, PdfName.MediaBox, scale);
        scaleDown(page, pageDict, PdfName.CropBox, scale);
    }

    protected void scaleDown(PdfPage destPage, PdfDictionary pageDictSrc, PdfName box, float scale) {
        PdfArray original = pageDictSrc.getAsArray(box);
        if (original != null) {
            float width = original.getAsNumber(2).floatValue() - original.getAsNumber(0).floatValue();
            float height = original.getAsNumber(3).floatValue() - original.getAsNumber(1).floatValue();
            PdfArray result = new PdfArray();
            result.add(new PdfNumber(0));
            result.add(new PdfNumber(0));
            result.add(new PdfNumber(width * scale));
            result.add(new PdfNumber(height * scale));
            destPage.put(box, result);
        }
    }
}

在本例中，我选择了 A3 页面大小常量。您还可以使用特定度量创建 PageSize 对象。如下图：

构造函数：

public PageSize(float width, float height)

例子：

PageSize pageSize = new PageSize(750, 1000);
PdfDocument temp = new PdfDocument(pageSize);

【讨论】：

如果我正确阅读了这个问题，操作员希望 每页 'A4' 尺寸 3 页，因此将页面尺寸更改为更大的尺寸可能对他没有帮助。
我一定忽略了它。我编辑了帖子以包含转换后将 PDF 缩小为 A4 页面的代码。

【解决方案2】：

试试这个。

style="page-break-after: always; width: 320pt;"在

【讨论】：