z123zero

基础操作

from docx import Document
from docx.shared import Inches

# 创建空文档
document = Document()

# 添加标题,设置级别level,0为Title,1或省略为Heading 1,0<=level<=9
document.add_heading(\'Document Title\', 0)
# 添加段落,参数为text=\'\'和style=None
p = document.add_paragraph(\'A plain paragraph having some \')
# 添加run对象,参数为text=None和style=None,
# run对象有bold(加粗)和italic(斜体)这两个属性
p.add_run(\'bold\').bold = True
p.add_run(\' and some \')
p.add_run(\'italic.\').italic = True

document.add_heading(\'Heading, level 1\', level=1)
document.add_paragraph(\'Intense quote\', style=\'Intense Quote\')

document.add_paragraph(
    \'first item in unordered list\', style=\'List Bullet\'
)
document.add_paragraph(
    \'first item in ordered list\', style=\'List Number\'
)
# 添加图片
document.add_picture(\'monty-truth.png\', width=Inches(1.25))

# 添加表格
records = (
    (3, \'101\', \'Spam\'),
    (7, \'422\', \'Eggs\'),
    (4, \'631\', \'Spam, spam, eggs, and spam\')
)

table = document.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = \'Qty\'
hdr_cells[1].text = \'Id\'
hdr_cells[2].text = \'Desc\'
for qty, id, desc in records:
    row_cells = table.add_row().cells
    row_cells[0].text = str(qty)
    row_cells[1].text = id
    row_cells[2].text = desc

document.add_page_break()

对象关系

1556184806969

document.add_paragraph()之后,默认paragraph的内容到第一个run中。

添加样式

中文字体微软雅黑,西文字体Times New Roman

import docx
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
from docx.shared import Cm, Pt

document = Document()
# 设置一个空白样式
style = document.styles[\'Normal\']
# 设置西文字体
style.font.name = \'Times New Roman\'
# 设置中文字体
style.element.rPr.rFonts.set(qn(\'w:eastAsia\'), \'微软雅黑\')

首行缩进

# 获取段落样式
paragraph_format = style.paragraph_format
# 首行缩进0.74厘米,即2个字符
paragraph_format.first_line_indent = Cm(0.74)

单独设置标题样式

# 设置标题
title_ = document.add_heading(level=0)
# 标题居中
title_.alignment = WD_ALIGN_PARAGRAPH.CENTER
# 添加标题内容
title_run = title_.add_run(title)
# 设置标题字体大小
title_run.font.size = Pt(14)
# 设置标题西文字体
title_run.font.name = \'Times New Roman\'
# 设置标题中文字体
title_run.element.rPr.rFonts.set(qn(\'w:eastAsia\'), \'微软雅黑\')

设置超链接

def add_hyperlink(paragraph, url, text, color, underline):
    """
    A function that places a hyperlink within a paragraph object.

    :param paragraph: The paragraph we are adding the hyperlink to.
    :param url: A string containing the required url
    :param text: The text displayed for the url
    :return: The hyperlink object
    """

    # This gets access to the document.xml.rels file and gets a new relation id value
    part = paragraph.part
    r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)

    # Create the w:hyperlink tag and add needed values
    hyperlink = docx.oxml.shared.OxmlElement(\'w:hyperlink\')
    hyperlink.set(docx.oxml.shared.qn(\'r:id\'), r_id, )

    # Create a w:r element
    new_run = docx.oxml.shared.OxmlElement(\'w:r\')

    # Create a new w:rPr element
    rPr = docx.oxml.shared.OxmlElement(\'w:rPr\')

    # Add color if it is given
    if not color is None:
        c = docx.oxml.shared.OxmlElement(\'w:color\')
        c.set(docx.oxml.shared.qn(\'w:val\'), color)
        rPr.append(c)

    # Remove underlining if it is requested
    if not underline:
        u = docx.oxml.shared.OxmlElement(\'w:u\')
        u.set(docx.oxml.shared.qn(\'w:val\'), \'none\')
        rPr.append(u)

    # Join all the xml elements together add add the required text to the w:r element
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)

    paragraph._p.append(hyperlink)

    return hyperlink

document = docx.Document()
p = document.add_paragraph()

#add a hyperlink with the normal formatting (blue underline)
hyperlink = add_hyperlink(p, \'http://www.google.com\', \'Google\', None, True)

#add a hyperlink with a custom color and no underline
hyperlink = add_hyperlink(p, \'http://www.google.com\', \'Google\', \'FF8822\', False)

document.save(\'demo.docx\')

上面的函数是对整段内容直接添加链接,日常使用的时候,超链接多为关键词,或<a>标签的格式,用paragraph和run这两个对象的关系来解决。

比如有文本内容如下,将其中的<a>标签换为超链接:

"""I am trying to add an hyperlink in a MS Word document using docx module for <a href="python.org">Python</a>. Just do it."""

# 判断字段是否为链接
def is_text_link(text):
    for i in [\'http\', \'://\', \'www.\', \'.com\', \'.org\', \'.cn\', \'.xyz\', \'.htm\']:
        if i in text:
            return True
        else:
            return False

# 对段落中的链接加上超链接
def add_text_link(document, text):
    paragraph = document.add_paragraph()
    # 根据<a>标签拆分文本内容
    text = re.split(r\'<a href="|">|</a>\',text)
    keyword = None
    for i in range(len(text)):
        # 对非链接和非关键词的内容,通过run直接加入段落中
        if not is_text_link(text[i]):
            if text[i] != keyword:
                paragraph.add_run(text[i])
        # 对链接和关键词,使用add_hyperlink插入超链接
        elif i + 1<len(text):
            url=text[i]
            keyword=text[i + 1]
            add_hyperlink(paragraph, url, keyword, None, True)

参考文档

  1. https://python-docx.readthedocs.io/en/latest/index.html
  2. https://github.com/python-openxml/python-docx/issues/74
  3. http://www.warmeng.com/2018/12/02/auto_report/

分类:

技术点:

相关文章: