正如 OP 已经提到的,如果您可以完全控制最初创建水印的过程,您可以按照@ChrisHaas 在his answer 到the question the OP referred to 中的说明进行操作。
另一方面,如果您用来创建水印的工具以自己的方式执行此操作,您将需要为这些水印定制的方法。
此方法通常需要您编辑一些内容流。顺便说一下,@ChrisHaas 的解决方案也是如此。
为了使这更容易,应该首先创建一个通用的内容流编辑功能,然后只使用此功能来编辑这些水印。
因此,这里首先是一个示例通用内容流编辑器类,然后是一个基于此的解决方案来编辑 OP 的示例水印。
一个通用的内容流编辑器类
这个PdfContentStreamEditor 类通过跟踪部分图形状态的指令来解析原始内容流指令;指令被转发到它的Write 方法,默认情况下,当它们进入时将它们写回,有效地创建原始流的相同或至少等效的副本。
要真正编辑流,只需覆盖此Write 方法,并仅将结果流中所需的指令转发到基本Write 方法。
public class PdfContentStreamEditor : PdfContentStreamProcessor
{
/**
* This method edits the immediate contents of a page, i.e. its content stream.
* It explicitly does not descent into form xobjects, patterns, or annotations.
*/
public void EditPage(PdfStamper pdfStamper, int pageNum)
{
PdfReader pdfReader = pdfStamper.Reader;
PdfDictionary page = pdfReader.GetPageN(pageNum);
byte[] pageContentInput = ContentByteUtils.GetContentBytesForPage(pdfReader, pageNum);
page.Remove(PdfName.CONTENTS);
EditContent(pageContentInput, page.GetAsDict(PdfName.RESOURCES), pdfStamper.GetUnderContent(pageNum));
}
/**
* This method processes the content bytes and outputs to the given canvas.
* It explicitly does not descent into form xobjects, patterns, or annotations.
*/
public void EditContent(byte[] contentBytes, PdfDictionary resources, PdfContentByte canvas)
{
this.canvas = canvas;
ProcessContent(contentBytes, resources);
this.canvas = null;
}
/**
* This method writes content stream operations to the target canvas. The default
* implementation writes them as they come, so it essentially generates identical
* copies of the original instructions the {@link ContentOperatorWrapper} instances
* forward to it.
*
* Override this method to achieve some fancy editing effect.
*/
protected virtual void Write(PdfContentStreamProcessor processor, PdfLiteral operatorLit, List<PdfObject> operands)
{
int index = 0;
foreach (PdfObject pdfObject in operands)
{
pdfObject.ToPdf(canvas.PdfWriter, canvas.InternalBuffer);
canvas.InternalBuffer.Append(operands.Count > ++index ? (byte) ' ' : (byte) '\n');
}
}
//
// constructor giving the parent a dummy listener to talk to
//
public PdfContentStreamEditor() : base(new DummyRenderListener())
{
}
//
// Overrides of PdfContentStreamProcessor methods
//
public override IContentOperator RegisterContentOperator(String operatorString, IContentOperator newOperator)
{
ContentOperatorWrapper wrapper = new ContentOperatorWrapper();
wrapper.setOriginalOperator(newOperator);
IContentOperator formerOperator = base.RegisterContentOperator(operatorString, wrapper);
return formerOperator is ContentOperatorWrapper ? ((ContentOperatorWrapper)formerOperator).getOriginalOperator() : formerOperator;
}
public override void ProcessContent(byte[] contentBytes, PdfDictionary resources)
{
this.resources = resources;
base.ProcessContent(contentBytes, resources);
this.resources = null;
}
//
// members holding the output canvas and the resources
//
protected PdfContentByte canvas = null;
protected PdfDictionary resources = null;
//
// A content operator class to wrap all content operators to forward the invocation to the editor
//
class ContentOperatorWrapper : IContentOperator
{
public IContentOperator getOriginalOperator()
{
return originalOperator;
}
public void setOriginalOperator(IContentOperator originalOperator)
{
this.originalOperator = originalOperator;
}
public void Invoke(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
{
if (originalOperator != null && !"Do".Equals(oper.ToString()))
{
originalOperator.Invoke(processor, oper, operands);
}
((PdfContentStreamEditor)processor).Write(processor, oper, operands);
}
private IContentOperator originalOperator = null;
}
//
// A dummy render listener to give to the underlying content stream processor to feed events to
//
class DummyRenderListener : IRenderListener
{
public void BeginTextBlock() { }
public void RenderText(TextRenderInfo renderInfo) { }
public void EndTextBlock() { }
public void RenderImage(ImageRenderInfo renderInfo) { }
}
}
一些背景:
此类从 iTextSharp 解析器命名空间扩展了 PdfContentStreamProcessor。此类最初旨在仅解析内容流以返回用于文本、图像或图形提取的信息。我们利用它来跟踪图形状态的一部分,更准确地说是那些与文本提取相关的图形状态参数。
如果对于特定的编辑任务,还需要预处理信息,例如当前指令绘制的文本,可以使用自定义的IRenderListener 实现来检索该信息,而不是这里使用的DummyRenderListener,它只是忽略它。
此类架构的灵感来自 iTextSharp.xtra 额外库中的 PdfCleanUpProcessor。
隐藏 OP 水印的编辑器
正如 OP 已经发现的那样,他的水印可以被识别为唯一使用 ExtGState 对象中定义的透明度作为 ca 值的文档部分。因此,为了隐藏水印,我们必须
- 识别与该值相关的图形状态变化并
- 当识别的当前 ca 值小于 1 时,不绘制任何内容。
实际上水印是使用矢量图形操作构建的。因此,我们可以将编辑限制为这些操作。我们甚至可以限制它改变最终的绘图指令(“stroke” / “fill” / “fill-and-stroke” 加上某些变体)以不执行生成透明内容的部分(填充或描边)。
public class TransparentGraphicsRemover : PdfContentStreamEditor
{
protected override void Write(PdfContentStreamProcessor processor, PdfLiteral oper, List<PdfObject> operands)
{
String operatorString = oper.ToString();
if ("gs".Equals(operatorString))
{
updateTransparencyFrom((PdfName) operands[0]);
}
if (operatorMapping.Keys.Contains(operatorString))
{
// Downgrade the drawing operator if transparency is involved
// For details cf. the comment before the operatorMapping declaration
PdfLiteral[] mapping = operatorMapping[operatorString];
int index = 0;
if (strokingAlpha < 1)
index |= 1;
if (nonStrokingAlpha < 1)
index |= 2;
oper = mapping[index];
operands[operands.Count - 1] = oper;
}
base.Write(processor, oper, operands);
}
// The current transparency values; beware: save and restore state operations are ignored!
float strokingAlpha = 1;
float nonStrokingAlpha = 1;
void updateTransparencyFrom(PdfName gsName)
{
PdfDictionary extGState = getGraphicsStateDictionary(gsName);
if (extGState != null)
{
PdfNumber number = extGState.GetAsNumber(PdfName.ca);
if (number != null)
nonStrokingAlpha = number.FloatValue;
number = extGState.GetAsNumber(PdfName.CA);
if (number != null)
strokingAlpha = number.FloatValue;
}
}
PdfDictionary getGraphicsStateDictionary(PdfName gsName)
{
PdfDictionary extGStates = resources.GetAsDict(PdfName.EXTGSTATE);
return extGStates.GetAsDict(gsName);
}
//
// Map from an operator name to an array of operations it becomes depending
// on the current graphics state:
//
// * [0] the operation in case of no transparency
// * [1] the operation in case of stroking transparency
// * [2] the operation in case of non-stroking transparency
// * [3] the operation in case of stroking and non-stroking transparency
//
Dictionary<String, PdfLiteral[]> operatorMapping = new Dictionary<String, PdfLiteral[]>();
public TransparentGraphicsRemover()
{
PdfLiteral _S = new PdfLiteral("S");
PdfLiteral _s = new PdfLiteral("s");
PdfLiteral _f = new PdfLiteral("f");
PdfLiteral _fStar = new PdfLiteral("f*");
PdfLiteral _B = new PdfLiteral("B");
PdfLiteral _BStar = new PdfLiteral("B*");
PdfLiteral _b = new PdfLiteral("b");
PdfLiteral _bStar = new PdfLiteral("b*");
PdfLiteral _n = new PdfLiteral("n");
operatorMapping["S"] = new PdfLiteral[]{ _S, _n, _S, _n };
operatorMapping["s"] = new PdfLiteral[]{ _s, _n, _s, _n };
operatorMapping["f"] = new PdfLiteral[]{ _f, _f, _n, _n };
operatorMapping["F"] = new PdfLiteral[]{ _f, _f, _n, _n };
operatorMapping["f*"] = new PdfLiteral[]{ _fStar, _fStar, _n, _n };
operatorMapping["B"] = new PdfLiteral[]{ _B, _f, _S, _n };
operatorMapping["B*"] = new PdfLiteral[]{ _BStar, _fStar, _S, _n };
operatorMapping["b"] = new PdfLiteral[] { _b, _f, _s, _n };
operatorMapping["b*"] = new PdfLiteral[]{ _bStar, _fStar, _s, _n };
}
}
注意:这个示例编辑器非常简单:
-
它只考虑由 ExtGState 参数 ca 和 CA 创建的透明度,特别是忽略掩码。
- 它不会寻找保存或恢复图形状态的操作。
这些限制可以轻松解除,但需要的代码多于 stackoverflow 答案。
像这样将此编辑器应用于 OP 的示例文件
string source = @"test3.pdf";
string dest = @"test3-noTransparency.pdf";
using (PdfReader pdfReader = new PdfReader(source))
using (PdfStamper pdfStamper = new PdfStamper(pdfReader, new FileStream(dest, FileMode.Create, FileAccess.Write)))
{
PdfContentStreamEditor editor = new TransparentGraphicsRemover();
for (int i = 1; i <= pdfReader.NumberOfPages; i++)
{
editor.EditPage(pdfStamper, i);
}
}
生成没有水印的 PDF 文件。
我没有 OP 将内容导出到 word 的工具,NitroPDF 和 Foxit,所以我无法执行最终测试。 Adobe Acrobat(9.5 版)至少在导出到 Word 时不包含水印。
如果 OP 的工具在导出的 Word 文件中仍然有水印痕迹,则可以轻松改进此类以在透明度处于活动状态时实际删除路径创建和绘图操作。
Java 中也是这样
我开始在 Java 中为 iText 实现此功能,后来才意识到 OP 在 .Net 中使用 iTextSharp。以下是等效的 Java 类:
public class PdfContentStreamEditor extends PdfContentStreamProcessor
{
/**
* This method edits the immediate contents of a page, i.e. its content stream.
* It explicitly does not descent into form xobjects, patterns, or annotations.
*/
public void editPage(PdfStamper pdfStamper, int pageNum) throws IOException
{
PdfReader pdfReader = pdfStamper.getReader();
PdfDictionary page = pdfReader.getPageN(pageNum);
byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum);
page.remove(PdfName.CONTENTS);
editContent(pageContentInput, page.getAsDict(PdfName.RESOURCES), pdfStamper.getUnderContent(pageNum));
}
/**
* This method processes the content bytes and outputs to the given canvas.
* It explicitly does not descent into form xobjects, patterns, or annotations.
*/
public void editContent(byte[] contentBytes, PdfDictionary resources, PdfContentByte canvas)
{
this.canvas = canvas;
processContent(contentBytes, resources);
this.canvas = null;
}
/**
* <p>
* This method writes content stream operations to the target canvas. The default
* implementation writes them as they come, so it essentially generates identical
* copies of the original instructions the {@link ContentOperatorWrapper} instances
* forward to it.
* </p>
* <p>
* Override this method to achieve some fancy editing effect.
* </p>
*/
protected void write(PdfContentStreamProcessor processor, PdfLiteral operator, List<PdfObject> operands) throws IOException
{
int index = 0;
for (PdfObject object : operands)
{
object.toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(operands.size() > ++index ? (byte) ' ' : (byte) '\n');
}
}
//
// constructor giving the parent a dummy listener to talk to
//
public PdfContentStreamEditor()
{
super(new DummyRenderListener());
}
//
// Overrides of PdfContentStreamProcessor methods
//
@Override
public ContentOperator registerContentOperator(String operatorString, ContentOperator operator)
{
ContentOperatorWrapper wrapper = new ContentOperatorWrapper();
wrapper.setOriginalOperator(operator);
ContentOperator formerOperator = super.registerContentOperator(operatorString, wrapper);
return formerOperator instanceof ContentOperatorWrapper ? ((ContentOperatorWrapper)formerOperator).getOriginalOperator() : formerOperator;
}
@Override
public void processContent(byte[] contentBytes, PdfDictionary resources)
{
this.resources = resources;
super.processContent(contentBytes, resources);
this.resources = null;
}
//
// members holding the output canvas and the resources
//
protected PdfContentByte canvas = null;
protected PdfDictionary resources = null;
//
// A content operator class to wrap all content operators to forward the invocation to the editor
//
class ContentOperatorWrapper implements ContentOperator
{
public ContentOperator getOriginalOperator()
{
return originalOperator;
}
public void setOriginalOperator(ContentOperator originalOperator)
{
this.originalOperator = originalOperator;
}
@Override
public void invoke(PdfContentStreamProcessor processor, PdfLiteral operator, ArrayList<PdfObject> operands) throws Exception
{
if (originalOperator != null && !"Do".equals(operator.toString()))
{
originalOperator.invoke(processor, operator, operands);
}
write(processor, operator, operands);
}
private ContentOperator originalOperator = null;
}
//
// A dummy render listener to give to the underlying content stream processor to feed events to
//
static class DummyRenderListener implements RenderListener
{
@Override
public void beginTextBlock() { }
@Override
public void renderText(TextRenderInfo renderInfo) { }
@Override
public void endTextBlock() { }
@Override
public void renderImage(ImageRenderInfo renderInfo) { }
}
}
(PdfContentStreamEditor.java)
public class TransparentGraphicsRemover extends PdfContentStreamEditor
{
@Override
protected void write(PdfContentStreamProcessor processor, PdfLiteral operator, List<PdfObject> operands) throws IOException
{
String operatorString = operator.toString();
if ("gs".equals(operatorString))
{
updateTransparencyFrom((PdfName) operands.get(0));
}
PdfLiteral[] mapping = operatorMapping.get(operatorString);
if (mapping != null)
{
int index = 0;
if (strokingAlpha < 1)
index |= 1;
if (nonStrokingAlpha < 1)
index |= 2;
operator = mapping[index];
operands.set(operands.size() - 1, operator);
}
super.write(processor, operator, operands);
}
// The current transparency values; beware: save and restore state operations are ignored!
float strokingAlpha = 1;
float nonStrokingAlpha = 1;
void updateTransparencyFrom(PdfName gsName)
{
PdfDictionary extGState = getGraphicsStateDictionary(gsName);
if (extGState != null)
{
PdfNumber number = extGState.getAsNumber(PdfName.ca);
if (number != null)
nonStrokingAlpha = number.floatValue();
number = extGState.getAsNumber(PdfName.CA);
if (number != null)
strokingAlpha = number.floatValue();
}
}
PdfDictionary getGraphicsStateDictionary(PdfName gsName)
{
PdfDictionary extGStates = resources.getAsDict(PdfName.EXTGSTATE);
return extGStates.getAsDict(gsName);
}
//
// Map from an operator name to an array of operations it becomes depending
// on the current graphics state:
//
// * [0] the operation in case of no transparency
// * [1] the operation in case of stroking transparency
// * [2] the operation in case of non-stroking transparency
// * [3] the operation in case of stroking and non-stroking transparency
//
static Map<String, PdfLiteral[]> operatorMapping = new HashMap<String, PdfLiteral[]>();
static
{
PdfLiteral _S = new PdfLiteral("S");
PdfLiteral _s = new PdfLiteral("s");
PdfLiteral _f = new PdfLiteral("f");
PdfLiteral _fStar = new PdfLiteral("f*");
PdfLiteral _B = new PdfLiteral("B");
PdfLiteral _BStar = new PdfLiteral("B*");
PdfLiteral _b = new PdfLiteral("b");
PdfLiteral _bStar = new PdfLiteral("b*");
PdfLiteral _n = new PdfLiteral("n");
operatorMapping.put("S", new PdfLiteral[]{ _S, _n, _S, _n });
operatorMapping.put("s", new PdfLiteral[]{ _s, _n, _s, _n });
operatorMapping.put("f", new PdfLiteral[]{ _f, _f, _n, _n });
operatorMapping.put("F", new PdfLiteral[]{ _f, _f, _n, _n });
operatorMapping.put("f*", new PdfLiteral[]{ _fStar, _fStar, _n, _n });
operatorMapping.put("B", new PdfLiteral[]{ _B, _f, _S, _n });
operatorMapping.put("B*", new PdfLiteral[]{ _BStar, _fStar, _S, _n });
operatorMapping.put("b", new PdfLiteral[]{ _b, _f, _s, _n });
operatorMapping.put("b*", new PdfLiteral[]{ _bStar, _fStar, _s, _n });
}
}
(TransparentGraphicsRemover.java)
@Test
public void testRemoveTransparentGraphicsTest3() throws IOException, DocumentException
{
try ( InputStream resource = getClass().getResourceAsStream("test3.pdf");
OutputStream result = new FileOutputStream(new File(RESULT_FOLDER, "test3-noTransparency.pdf")))
{
PdfReader pdfReader = new PdfReader(resource);
PdfStamper pdfStamper = new PdfStamper(pdfReader, result);
PdfContentStreamEditor editor = new TransparentGraphicsRemover();
for (int i = 1; i <= pdfReader.getNumberOfPages(); i++)
{
editor.editPage(pdfStamper, i);
}
pdfStamper.close();
}
}
(摘自EditPageContent.java)