文档树
如果您仔细查看 Google Doc 的 structure,您会注意到它是一个树形数据结构,而不是更广泛意义上的文档(尽管有人可能会争辩说,章/节/段落也是树状结构)。
我怀疑上述是 API 缺少页面相关方法的原因 - 尽管将来可能会添加它们
由于文档是一棵树,所以确定何时发生分页的问题可以简化为计算子高度之和溢出页面高度的点。
问题细分
为了正确获取分裂发生的位置(并跟踪这些元素),我们需要解决几个子问题:
- 获取页面的高度、宽度和边距
- 遍历元素,跟踪总高度。在每一步:
- 计算元素的full高度。
- 将高度添加到总高度,检查是否发生溢出。
- 如果总溢出页面高度,则保证最后一个最外层(最接近根)元素会被拆分。将元素添加到列表中,缓存溢出并重置总数(新页面)。
观察
- 当遇到
PageBreak 时,可以重置总计数器,因为下一个元素将位于顶部(溢出偏移)。请注意,由于PageBreak 不是独立的(它被包裹在Paragraph 或ListItem 中),因此可以随时遇到。
- 只有
TableRow 中最高的TableCell 计入总高度。
- 一些元素继承自
ContainerElement,这意味着它们的高度等于它们的子元素高度之和 + 上下边距。
辅助函数
首先,我们可以定义几个辅助函数(详见 JSDoc cmets):
/**
* @summary checks if element is a container
* @param {GoogleAppsScript.Document.Element} elem
* @param {GoogleAppsScript.Document.ElementType} type
* @returns {boolean}
*/
const isContainer = (elem, type) => {
const Types = DocumentApp.ElementType;
const containerTypes = [
Types.BODY_SECTION,
Types.EQUATION,
Types.EQUATION_FUNCTION,
Types.FOOTER_SECTION,
Types.HEADER_SECTION,
Types.LIST_ITEM,
Types.PARAGRAPH,
Types.TABLE,
Types.TABLE_CELL,
Types.TABLE_ROW,
Types.TABLE_OF_CONTENTS
];
return containerTypes.includes(type || elem.getType());
};
/**
* @summary gets aspect ratio of a font
* @param {string} fontFamily
* @returns {number}
* @default .52
*/
const getAspectRatio = (fontFamily) => {
const aspects = {
Arial: .52,
Calibri: .47,
Courier: .43,
Garamond: .38,
Georgia: .48,
Helvetica: .52,
Times: .45,
Verdana: .58
};
return aspects[fontFamily] || .618;
};
/**
* @summary checks if Element is direct child of Body
* @param {GoogleAppsScript.Document.Element} elem
* @returns {boolean}
*/
const isTopLevel = (elem) => {
const { ElementType } = DocumentApp;
return elem.getParent().getType() === ElementType.BODY_SECTION;
};
/**
* @summary copies non-object array values as is
* @param {any[]} arr
* @returns {any[]}
*/
const shallowCopy = (arr) => {
return arr.map(el => el);
};
状态跟踪
由于我们必须跟踪溢出、处理的最后一个元素等,我选择添加一个负责状态管理的Tracker 对象。跟踪器的几个功能需要说明:
processResults方法:
- 确保在计算嵌套元素的高度后恢复元素边界(页面大小)(
setDimensions、setMargins、resetDimensions 和 resetMargins 方法与私有 inits 允许我们操纵边界)。
- 修改特定元素类型的处理高度:
-
Body 的高度设置为 0(否则它将与子高度重复)。
-
TableRow 的高度设置为最高 TableCell。
- 其他类型的身高与子身高相加。
handleOverflow方法:
- 防止嵌套元素被添加到拆分列表中(可以安全删除)。
- 将总高度重置为最新的溢出偏移量(部分元素拆分的高度)。
totalHeight二传手:
在每次重新计算时查找高度溢出并在需要时调用溢出处理程序。
/**
* @typedef {object} Tracker
* @property {Map.<GoogleAppsScript.Document.ElementType, function>} callbacks map of height processers
* @property {?GoogleAppsScript.Document.Element} currElement current elemenet processed
* @property {number[]} dimensions exposes dimensions of a page
* @property {function(): void} handleOverflow handles page height overflow
* @property {function(): boolean} isOverflow checks if height overflew page height
* @property {number[]} margins exposes margins of a page
* @property {number} overflow getter for overflow status
* @property {function(boolean, ...number): number} processResults process callback results
* @property {function(): Tracker} resetDimensions restores old dimensions
* @property {function(): Tracker} resetMargins restores old margins
* @property {function(): void} resetOverflow resets most resent overflow
* @property {function(): void} resetTotalHeight resets accumulated height
* @property {function(...number): void} setDimensions reinits containing dimensions
* @property {function(...number): void} setMargins reinits containing margins
* @property {function(string, ...any): void} setStore abstract property store setter
* @property {number} significantWidth exposes significant page width
* @property {number} significantHeight exposes significant page height
* @property {GoogleAppsScript.Document.Element[]} splits list of elements split over page
* @property {number} totalHeight total height
*
* @summary factory for element trackers
* @param {Tracker#callbacks} callbacks
* @param {Bounds} bounds
* @param {Tracker#splits} [splits]
* @returns {Tracker}
*/
function makeTracker(callbacks, bounds, splits = []) {
const inits = {
dimensions: shallowCopy(bounds.dimensions),
margins: shallowCopy(bounds.margins)
};
const privates = {
bounds,
current: null,
currentType: null,
currOverflow: 0,
needsReset: 0,
totalHeight: 0
};
const { ElementType } = DocumentApp;
const ResultProcessors = new Map()
.set(ElementType.BODY_SECTION, () => 0)
.set(ElementType.TABLE_ROW, (results) => {
return results.reduce((result, acc) => result > acc ? result : acc, 0);
})
.set("default", (results) => {
return results.reduce((result, acc) => result + acc, 0);
});
return ({
callbacks,
splits,
get currElement() {
return privates.current;
},
set currElement(element) {
privates.current = element;
privates.currentType = element.getType();
},
get dimensions() {
const { bounds } = privates;
return bounds.dimensions;
},
get margins() {
const { bounds } = privates;
return bounds.margins;
},
get overflow() {
const { bounds, totalHeight } = privates;
return totalHeight - bounds.significantHeight;
},
get significantHeight() {
const { bounds } = privates;
return bounds.significantHeight;
},
get significantWidth() {
const { bounds } = privates;
return bounds.significantWidth;
},
get totalHeight() {
return privates.totalHeight;
},
/**
* @summary total height setter
* @description intercepts & recalcs overflow
* @param {number} height
*/
set totalHeight(height) {
privates.totalHeight = height;
if (this.isOverflow()) {
privates.currOverflow = this.overflow;
this.handleOverflow();
}
},
isOverflow() {
return this.overflow > 0;
},
handleOverflow() {
const { currElement, splits } = this;
const type = privates.currentType;
const ignore = [
ElementType.TEXT,
ElementType.TABLE_ROW
];
if (!ignore.includes(type)) {
splits.push(currElement);
}
this.resetTotalHeight();
},
processResults(...results) {
this.resetMargins().resetDimensions();
const { currentType } = privates;
const processed = (
ResultProcessors.get(currentType) ||
ResultProcessors.get("default")
)(results);
return processed;
},
resetDimensions() {
const { bounds } = privates;
const { dimensions } = bounds;
dimensions.length = 0;
dimensions.push(...inits.dimensions);
return this;
},
resetMargins() {
const { bounds } = privates;
const { margins } = bounds;
margins.length = 0;
margins.push(...inits.margins);
return this;
},
resetOverflow() {
privates.currOverflow = 0;
},
resetTotalHeight() {
const { currOverflow } = privates;
this.totalHeight = currOverflow;
this.resetOverflow();
},
setDimensions(...newDimensions) {
return this.setStore("dimensions", ...newDimensions);
},
setMargins(...newMargins) {
return this.setStore("margins", ...newMargins);
},
setStore(property, ...values) {
const { bounds } = privates;
const initStore = inits[property];
const temp = values.map((val, idx) => {
return val === null ? initStore[idx] : val;
});
const store = bounds[property];
store.length = 0;
store.push(...temp);
}
});
};
我。获取页面边界
第一个子问题很容易解决(样本可能很复杂,但对于传递状态很方便)。这里值得注意的是 significantWidth 和 significantHeight getter,它们返回可以被元素占据的宽度和高度(即没有边距)。
如果您想知道,为什么将54 添加到顶部和底部边距,它是一个等于1.5 默认垂直页边距(36 点)的“幻数”,以确保正确的页面溢出(我花了几个小时计算搞清楚为什么appx有额外的空间。尽管HeaderSection和FooterSection默认为null,但这个大小添加到顶部和底部页边距,但似乎没有)。
/**
* @typedef {object} Bounds
* @property {number} bottom bottom page margin
* @property {number[]} dimensions page constraints
* @property {number} left left page margin
* @property {number[]} margins page margins
* @property {number} right right page margin
* @property {number} top top page margin
* @property {number} xMargins horizontal page margins
* @property {number} yMargins vertical page margins
*
* @summary gets dimensions of pages in body
* @param {Body} body
* @returns {Bounds}
*/
function getDimensions(body) {
const margins = [
body.getMarginTop() + 54,
body.getMarginRight(),
body.getMarginBottom() + 54,
body.getMarginLeft()
];
const dimensions = [
body.getPageHeight(),
body.getPageWidth()
];
return ({
margins,
dimensions,
get top() {
return this.margins[0];
},
get right() {
return this.margins[1];
},
get bottom() {
return this.margins[2];
},
get left() {
return this.margins[3];
},
get xMargins() {
return this.left + this.right;
},
get yMargins() {
return this.top + this.bottom;
},
get height() {
return this.dimensions[0];
},
get width() {
return this.dimensions[1];
},
get significantWidth() {
return this.width - this.xMargins;
},
get significantHeight() {
return this.height - this.yMargins;
}
});
}
二。遍历元素
我们需要从根 (Body) 开始递归遍历所有子元素,直到到达叶子(没有子元素的元素),获取它们的外部高度和子元素的高度(如果有),同时跟踪 @987654360 @ 和累积高度。作为Body 的直接子级的每个Element 都保证被拆分。
注意PageBreak 会重置总高度计数器:
/**
* @summary executes a callback for element and its children
* @param {GoogleAppsScript.Document.Element} root
* @param {Tracker} tracker
* @param {boolean} [inCell]
* @returns {number}
*/
function walkElements(root, tracker, inCell = false) {
const { ElementType } = DocumentApp;
const type = root.getType();
if (type === ElementType.PAGE_BREAK) {
tracker.resetTotalHeight();
return 0;
}
const { callbacks } = tracker;
const callback = callbacks.get(type);
const elemResult = callback(root, tracker);
const isCell = type === ElementType.TABLE_CELL;
const cellBound = inCell || isCell;
const childResults = [];
if (isCell || isContainer(root, type)) {
const numChildren = root.getNumChildren();
for (let i = 0; i < numChildren; i++) {
const child = root.getChild(i);
const result = walkElements(child, tracker, cellBound);
childResults.push(result);
}
}
tracker.currElement = root;
const processed = tracker.processResults(elemResult, ...childResults);
isTopLevel(root) && (tracker.totalHeight += processed);
return processed;
}
三。计算元素高度
一般来说,元素的 full 高度是顶部、底部边距(或填充或边框)+ base 高度。此外,由于某些元素是容器,它们的基本高度等于其子元素的全高之和。因此,我们可以将第三个子问题细分为:
- 原始类型的高度(无子)
- 容器类型的高度
原始类型
文字高度
UPD:getLineSpacing() 有可能返回null,所以你要提防它(默认:1.15)
Text 元素由字符组成,因此要计算基本高度,必须:
- 获取父级的缩进
- 获取字符高度和宽度(为简单起见,假设它取决于字体纵横比)
- 从有用的页面宽度(= 线宽)中减去缩进
- 对于每个字符,添加到行宽直到溢出,然后增加行数1
- 文本高度将等于行数除以字符高度并应用行间距修饰符
1 这里,字符的遍历是不必要的,但如果你想要更高的精度,你可以映射字符宽度修饰符,引入字距调整等。
/**
* @summary calculates Text element height
* @param {GoogleAppsScript.Document.Text} elem
* @param {Tracker} tracker
* @returns {number}
*/
function getTextHeight(elem, tracker) {
const { significantWidth } = tracker;
const fontFamily = elem.getFontFamily();
const charHeight = elem.getFontSize() || 11;
const charWidth = charHeight * getAspectRatio(fontFamily);
/** @type {GoogleAppsScript.Document.ListItem|GoogleAppsScript.Document.Paragraph} */
const parent = elem.getParent();
const lineSpacing = parent.getLineSpacing() || 1.15;
const startIndent = parent.getIndentStart();
const endIndent = parent.getIndentEnd();
const lineWidth = significantWidth - (startIndent + endIndent);
const text = elem.getText();
let adjustedWidth = 0, numLines = 1;
for (const char of text) {
adjustedWidth += charWidth;
const diff = adjustedWidth - lineWidth;
if (diff > 0) {
adjustedWidth = diff;
numLines++;
}
}
return numLines * charHeight * lineSpacing;
}
容器类型
幸运的是,我们的 walker 递归处理子元素,所以我们只需要处理每个容器类型的细节(然后跟踪器的processResults 方法将连接子高度)。
段落
Paragraph 有两个属性集添加到其全高:margins(我们只需要顶部和底部 - 可通过 getAttributes() 访问)和 spacing:
/**
* @summary calcs par height
* @param {GoogleAppsScript.Document.Paragraph} par
* @returns {number}
*/
function getParagraphHeight(par) {
const attrEnum = DocumentApp.Attribute;
const attributes = par.getAttributes();
const before = par.getSpacingBefore();
const after = par.getSpacingAfter();
const spacing = before + after;
const marginTop = attributes[attrEnum.MARGIN_TOP] || 0;
const marginBottom = attributes[attrEnum.MARGIN_BOTTOM] || 0;
let placeholderHeight = 0;
if (par.getNumChildren() === 0) {
const text = par.asText();
placeholderHeight = (text.getFontSize() || 11) * (par.getLineSpacing() || 1.15);
}
return marginTop + marginBottom + spacing + placeholderHeight;
}
注意 placeholderHeight 部分 - 这是必要的,因为当您附加 Table 时,会插入一个空的 Paragraph(没有 Text),相当于 1 行默认文本。
表格单元格
TableCell 元素是一个容器,它充当其子元素的主体,因此可以计算单元格内的高度,例如 Text,包括尺寸和边距(在此上下文中填充 与边距相同)的边界暂时设置为单元格的边界(高度可以保持不变):
/**
* @summary calcs TableCell height
* @param {GoogleAppsScript.Document.TableCell} elem
* @param {Tracker} tracker
* @returns {number}
*/
function getTableCellHeight(elem, tracker) {
const top = elem.getPaddingTop();
const bottom = elem.getPaddingBottom();
const left = elem.getPaddingLeft();
const right = elem.getPaddingRight();
const width = elem.getWidth();
tracker.setDimensions(null, width);
tracker.setMargins(top, right, bottom, left);
return top + bottom;
}
表格行
TableRow 没有任何特定属性可计入全高(我们的跟踪器处理 TableCell 高度):
/**
* @summary calcs TableRow height
* @param {GoogleAppsScript.Document.TableRow} row
* @returns {number}
*/
function getTableRowHeight(row) {
return 0;
}
表格
Table 仅包含行,并且只是将水平边框宽度添加到总数中(只有顶部 [或底部] 行有 2 个边框而不会发生冲突,因此只有 行数 + 1 个边框计数) :
/**
* @summary calcs Table height
* @param {GoogleAppsScript.Document.Table} elem
* @returns {number}
*/
function getTableHeight(elem) {
const border = elem.getBorderWidth();
const rows = elem.getNumRows();
return border * (rows + 1);
}
四。确定溢出
第四个子问题只是连接前面的部分:
/**
* @summary finds elements spl it by pages
* @param {GoogleAppsScript.Document.Document} doc
* @returns {GoogleAppsScript.Document.Element[]}
*/
function findSplitElements(doc) {
const body = doc.getBody();
const bounds = getDimensions(body);
const TypeEnum = DocumentApp.ElementType;
const heightMap = new Map()
.set(TypeEnum.BODY_SECTION, () => 0)
.set(TypeEnum.PARAGRAPH, getParagraphHeight)
.set(TypeEnum.TABLE, getTableHeight)
.set(TypeEnum.TABLE_ROW, getTableRowHeight)
.set(TypeEnum.TABLE_CELL, getTableCellHeight)
.set(TypeEnum.TEXT, getTextHeight);
const tracker = makeTracker(heightMap, bounds);
walkElements(body, tracker);
return tracker.splits;
};
驱动功能
为了测试整个解决方案是否有效,我使用了这个驱动程序:
function doNTimes(n, callback, ...args) {
for (let i = 0; i < n; i++) {
callback(...args);
}
}
function prepareDoc() {
const doc = getTestDoc(); //gets Document somehow
const body = doc.getBody();
doNTimes(30, () => body.appendParagraph("Redrum Redrum Redrum Redrum".repeat(8)));
const cells = [
[1, 2, 0, "A", "test"],
[3, 4, 0, "B", "test"],
[5, 6, 0, "C", "test"],
[7, 8, 0, "D", "test"],
[9, 10, 0, "E", "test"],
[11, 12, 0, "F", "test"]
];
body.appendTable(cells);
doNTimes(8, (c) => body.appendTable(c), cells);
body.appendPageBreak();
doNTimes(5, (c) => body.appendTable(c), cells);
const splits = findSplitElements(doc);
for (const split of splits) {
split.setAttributes({
[DocumentApp.Attribute.BACKGROUND_COLOR]: "#fd9014"
});
}
return doc.getUrl();
}
驱动函数将用背景颜色标记每个拆分元素(您可能希望在每个元素之前附加PageBreak):
注意事项
- 答案可能会忽略某些内容(即,如果
Table 的一整行适合上一页,它不会以某种方式算作溢出)并且可以改进(+ 将扩展为其他类,例如 @ 987654390@ 稍后),所以如果有人知道问题的任何部分的更好解决方案,让我们讨论(或直接开枪并贡献)。
- 在测试期间注意 UPD 部分以进行改进。
参考文献
-
ContainerElement班级docs
-
ElementType 枚举 spec
-
Paragraph班级docs
-
TableCell班级docs
-
document 的结构