通过 python-docx 转录 .docx 文件以修改字体和字体大小。需要重建目标文件中的段落
Transcribe .docx files via python-docx to modify font and font size. Need to reconstruct paragraphs in target files
目的是转录 .docx 文件以修改字体和字体大小,同时保留 运行 属性,例如粗体、下划线、斜体等。然后我将添加一些 headers 和图形到新创建的 target.docx 个文件
如何重建 运行 中的段落?目前,每一个都有自己单独的一行!
from docx import Document
from docx.shared import Pt
def main(filename):
try:
src_doc = Document(filename)
trg_doc = Document()
style = trg_doc.styles['Normal']
font = style.font
font.name = 'Times'
font.size = Pt(11)
for p_cnt in range(len(src_doc.paragraphs)):
for r_cnt in range(len(src_doc.paragraphs[p_cnt].runs)):
curr_run = src_doc.paragraphs[p_cnt].runs[r_cnt]
print('Run: ', curr_run.text)
paragraph = trg_doc.add_paragraph()
if curr_run.bold:
paragraph.add_run(curr_run.text).bold = True
elif curr_run.italic:
paragraph.add_run(curr_run.text).italic = True
elif curr_run.underline:
paragraph.add_run(curr_run.text).underline = True
else:
paragraph.add_run(curr_run.text)
trg_doc.save('../Output/the_target.docx')
except IOError:
print('There was an error opening the file')
if __name__ == '__main__':
main("../Input/Current_File.docx
输入:
1.0 PURPOSE The purpose of this procedure is to ensure all feedback is logged, documented and any resulting complaints are received, evaluated, and reviewed in accordance with 21 CFR Part 820 and ISO 13485
输出:
PURPOSE The purpose of this procedure is to ensure
all feedback is logged,
documented and any resulting complaints are received,
evaluated, and reviewed
in accordance with 21 CFR P art 820
and ISO 13485 .
您正在为每个 运行 添加一个新段落。您的核心循环需要看起来更像这样:
for src_paragraph in src_doc.paragraphs:
tgt_paragraph = tgt_doc.add_paragraph()
for src_run in src_paragraph.runs:
print('Run: ', src_run.text)
tgt_run = tgt_paragraph.add_run(src_run.text)
if src_run.bold:
tgt_run.bold = True
if src_run.italic:
tgt_run.italic = True
if src_run.underline:
tgt_run.underline = True
已替换
for p_cnt in range(len(src_doc.paragraphs)):
for r_cnt in range(len(src_doc.paragraphs[p_cnt].runs)):
curr_run = src_doc.paragraphs[p_cnt].runs[r_cnt]
在 运行 的结构出现的地方,我使用了类似于 Scanny 建议的结构。这里每个 运行 不会成为一个段落。
src_doc = docx.Document(path)
trgt_doc = docx.api.Document()
# Generate new Target file from Source File
for src_paragraph in src_doc.paragraphs:
src_paragraph_format = src_paragraph.paragraph_format
# Get Target section(s) for Headers/Footers
sections = trgt_doc.sections
section = sections[0]
sectPr = section._sectPr
footer = section.footer
paragraph = footer.paragraphs[0]
trgt_paragraph = trgt_doc.add_paragraph()
trgt_paragraph_format = trgt_paragraph.paragraph_format
trgt_paragraph.style.name = src_paragraph.style.name
trgt_paragraph_format.left_indent = src_paragraph_format.left_indent
trgt_paragraph_format.right_indent = src_paragraph_format.right_indent
trgt_paragraph_format.space_before = Pt(2)
trgt_paragraph_format.space_after = Pt(2)
font = trgt_paragraph.style.font
font.name = 'Times'
font.size = Pt(11)
# Transcribe source file runs
for src_run in src_paragraph.runs:
trgt_run = trgt_paragraph.add_run(src_run.text)
trgt_paragraph_format = trgt_paragraph.paragraph_format
if src_run.font.highlight_color == WD_COLOR_INDEX.BRIGHT_GREEN:
trgt_run.font.highlight_color = WD_COLOR_INDEX.BRIGHT_GREEN
if src_run.bold:
trgt_run.bold = True
if src_run.italic:
trgt_run.italic = True
if src_run.underline:
trgt_run.underline = True*
目的是转录 .docx 文件以修改字体和字体大小,同时保留 运行 属性,例如粗体、下划线、斜体等。然后我将添加一些 headers 和图形到新创建的 target.docx 个文件
如何重建 运行 中的段落?目前,每一个都有自己单独的一行!
from docx import Document
from docx.shared import Pt
def main(filename):
try:
src_doc = Document(filename)
trg_doc = Document()
style = trg_doc.styles['Normal']
font = style.font
font.name = 'Times'
font.size = Pt(11)
for p_cnt in range(len(src_doc.paragraphs)):
for r_cnt in range(len(src_doc.paragraphs[p_cnt].runs)):
curr_run = src_doc.paragraphs[p_cnt].runs[r_cnt]
print('Run: ', curr_run.text)
paragraph = trg_doc.add_paragraph()
if curr_run.bold:
paragraph.add_run(curr_run.text).bold = True
elif curr_run.italic:
paragraph.add_run(curr_run.text).italic = True
elif curr_run.underline:
paragraph.add_run(curr_run.text).underline = True
else:
paragraph.add_run(curr_run.text)
trg_doc.save('../Output/the_target.docx')
except IOError:
print('There was an error opening the file')
if __name__ == '__main__':
main("../Input/Current_File.docx
输入:
1.0 PURPOSE The purpose of this procedure is to ensure all feedback is logged, documented and any resulting complaints are received, evaluated, and reviewed in accordance with 21 CFR Part 820 and ISO 13485
输出:
PURPOSE The purpose of this procedure is to ensure
all feedback is logged,
documented and any resulting complaints are received,
evaluated, and reviewed
in accordance with 21 CFR P art 820
and ISO 13485 .
您正在为每个 运行 添加一个新段落。您的核心循环需要看起来更像这样:
for src_paragraph in src_doc.paragraphs:
tgt_paragraph = tgt_doc.add_paragraph()
for src_run in src_paragraph.runs:
print('Run: ', src_run.text)
tgt_run = tgt_paragraph.add_run(src_run.text)
if src_run.bold:
tgt_run.bold = True
if src_run.italic:
tgt_run.italic = True
if src_run.underline:
tgt_run.underline = True
已替换
for p_cnt in range(len(src_doc.paragraphs)):
for r_cnt in range(len(src_doc.paragraphs[p_cnt].runs)):
curr_run = src_doc.paragraphs[p_cnt].runs[r_cnt]
在 运行 的结构出现的地方,我使用了类似于 Scanny 建议的结构。这里每个 运行 不会成为一个段落。
src_doc = docx.Document(path)
trgt_doc = docx.api.Document()
# Generate new Target file from Source File
for src_paragraph in src_doc.paragraphs:
src_paragraph_format = src_paragraph.paragraph_format
# Get Target section(s) for Headers/Footers
sections = trgt_doc.sections
section = sections[0]
sectPr = section._sectPr
footer = section.footer
paragraph = footer.paragraphs[0]
trgt_paragraph = trgt_doc.add_paragraph()
trgt_paragraph_format = trgt_paragraph.paragraph_format
trgt_paragraph.style.name = src_paragraph.style.name
trgt_paragraph_format.left_indent = src_paragraph_format.left_indent
trgt_paragraph_format.right_indent = src_paragraph_format.right_indent
trgt_paragraph_format.space_before = Pt(2)
trgt_paragraph_format.space_after = Pt(2)
font = trgt_paragraph.style.font
font.name = 'Times'
font.size = Pt(11)
# Transcribe source file runs
for src_run in src_paragraph.runs:
trgt_run = trgt_paragraph.add_run(src_run.text)
trgt_paragraph_format = trgt_paragraph.paragraph_format
if src_run.font.highlight_color == WD_COLOR_INDEX.BRIGHT_GREEN:
trgt_run.font.highlight_color = WD_COLOR_INDEX.BRIGHT_GREEN
if src_run.bold:
trgt_run.bold = True
if src_run.italic:
trgt_run.italic = True
if src_run.underline:
trgt_run.underline = True*