Python3;文档;从 word doc 中替换文本会修改段落后的间距——如何避免或改回?
Python3; Docx; replacing text from word doc modifies spacing after paragraph--how to avoid or change back?
我有一些函数可以成功地将 .docx 文件中的特定字符串(在两个段落和 tables 中)替换为新字符串。
所有带有替换部分的功能都很好用。但是,当我替换段落中的字符串时,"spacing after paragraph" 值会发生变化。我希望间距保持不变,或者我希望能够在替换发生后将间距更改回其原始格式。
我不熟悉 docx,所以它可能只是我错过的一个简单的解决方案?
可以在此处下载 word .docx 文件示例:https://filebin.net/9t5v96tb5y7z0e60
工作段和table字符串替换代码:
"""
Script to replace specific texts within word doc templates
"""
import re, docx
from docx import Document
def clear_paragraph(self, paragraph):
p_element = paragraph._p
p_child_elements = [elm for elm in p_element.iterchildren()]
for child_element in p_child_elements:
p_element.remove(child_element)
def paragraph_replace(self, search, replace):
searchre = re.compile(search)
for paragraph in self.paragraphs:
paragraph_text = paragraph.text
if paragraph_text:
if searchre.search(paragraph_text):
clear_paragraph(self, paragraph)
paragraph.add_run(re.sub(search, replace, paragraph_text))
return paragraph
def table_replace(self, text_value, replace):
result = False
tbl_regex = re.compile(text_value)
for table in self.tables:
for row in table.rows:
for cell in row.cells:
if cell.text:
if tbl_regex.search(cell.text):
cell.text = replace
result = True
return result
regex1 = ["<<authors>>", "<<author>>", "<<id>>", \
"<<title>>", "<<date>>", "<<discipline>>", \
"<<countries>>"]
author = "Robert"
authors = "Robert, John; Bob, Billy; Duck, Donald"
ms_id = "2020-34-2321"
title = "blah blah blah and one more blah"
date = "31-03-2020"
discipline = "BE"
countries = "United States, Japan, China, South Africa"
replace1 = [authors, author, ms_id, title, date, discipline, countries]
filename = "Sample Template.docx"
doc = Document(filename)
for x in range(len(regex1)):
paragraph_replace(doc, regex1[x], replace1[x])
table_replace(doc, regex1[x], replace1[x])
doc.save(author + '_updated.docx')
阅读更多 Docx 文档并进行一些测试后,这个问题的解决方案很简单。
def clear_paragraph(self, paragraph):
p_element = paragraph._p
p_child_elements = [elm for elm in p_element.iterchildren()]
for child_element in p_child_elements:
p_element.remove(child_element)
def paragraph_replace(self, search, replace, x):
searchre = re.compile(search)
for paragraph in self.paragraphs:
paragraph_text = paragraph.text
if paragraph_text:
if searchre.search(paragraph_text):
clear_paragraph(self, paragraph)
para = paragraph.add_run(re.sub(search, replace, paragraph_text))
para.font.size = Pt(10)
paragraph.paragraph_format.space_after=Pt(0)
if x is 2:
para.bold = True
else:
para.bold = False
paragraph.paragraph_format.line_spacing = 1.0
return paragraph
def table_replace(self, text_value, replace):
result = False
tbl_regex = re.compile(text_value)
for table in self.tables:
for row in table.rows:
for cell in row.cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
font = run.font
font.size=Pt(10)
if cell.text:
if tbl_regex.search(cell.text):
cell.text = replace
result = True
return result
我在 paragraph_replace 函数中添加了 x 参数,因为我希望文档的第一行是粗体。通过对代码的这些简单添加,我所有的问题现在都得到了解决。
我有一些函数可以成功地将 .docx 文件中的特定字符串(在两个段落和 tables 中)替换为新字符串。
所有带有替换部分的功能都很好用。但是,当我替换段落中的字符串时,"spacing after paragraph" 值会发生变化。我希望间距保持不变,或者我希望能够在替换发生后将间距更改回其原始格式。
我不熟悉 docx,所以它可能只是我错过的一个简单的解决方案?
可以在此处下载 word .docx 文件示例:https://filebin.net/9t5v96tb5y7z0e60
工作段和table字符串替换代码:
"""
Script to replace specific texts within word doc templates
"""
import re, docx
from docx import Document
def clear_paragraph(self, paragraph):
p_element = paragraph._p
p_child_elements = [elm for elm in p_element.iterchildren()]
for child_element in p_child_elements:
p_element.remove(child_element)
def paragraph_replace(self, search, replace):
searchre = re.compile(search)
for paragraph in self.paragraphs:
paragraph_text = paragraph.text
if paragraph_text:
if searchre.search(paragraph_text):
clear_paragraph(self, paragraph)
paragraph.add_run(re.sub(search, replace, paragraph_text))
return paragraph
def table_replace(self, text_value, replace):
result = False
tbl_regex = re.compile(text_value)
for table in self.tables:
for row in table.rows:
for cell in row.cells:
if cell.text:
if tbl_regex.search(cell.text):
cell.text = replace
result = True
return result
regex1 = ["<<authors>>", "<<author>>", "<<id>>", \
"<<title>>", "<<date>>", "<<discipline>>", \
"<<countries>>"]
author = "Robert"
authors = "Robert, John; Bob, Billy; Duck, Donald"
ms_id = "2020-34-2321"
title = "blah blah blah and one more blah"
date = "31-03-2020"
discipline = "BE"
countries = "United States, Japan, China, South Africa"
replace1 = [authors, author, ms_id, title, date, discipline, countries]
filename = "Sample Template.docx"
doc = Document(filename)
for x in range(len(regex1)):
paragraph_replace(doc, regex1[x], replace1[x])
table_replace(doc, regex1[x], replace1[x])
doc.save(author + '_updated.docx')
阅读更多 Docx 文档并进行一些测试后,这个问题的解决方案很简单。
def clear_paragraph(self, paragraph):
p_element = paragraph._p
p_child_elements = [elm for elm in p_element.iterchildren()]
for child_element in p_child_elements:
p_element.remove(child_element)
def paragraph_replace(self, search, replace, x):
searchre = re.compile(search)
for paragraph in self.paragraphs:
paragraph_text = paragraph.text
if paragraph_text:
if searchre.search(paragraph_text):
clear_paragraph(self, paragraph)
para = paragraph.add_run(re.sub(search, replace, paragraph_text))
para.font.size = Pt(10)
paragraph.paragraph_format.space_after=Pt(0)
if x is 2:
para.bold = True
else:
para.bold = False
paragraph.paragraph_format.line_spacing = 1.0
return paragraph
def table_replace(self, text_value, replace):
result = False
tbl_regex = re.compile(text_value)
for table in self.tables:
for row in table.rows:
for cell in row.cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
font = run.font
font.size=Pt(10)
if cell.text:
if tbl_regex.search(cell.text):
cell.text = replace
result = True
return result
我在 paragraph_replace 函数中添加了 x 参数,因为我希望文档的第一行是粗体。通过对代码的这些简单添加,我所有的问题现在都得到了解决。