使用 nbconvert 执行包含内联降价的 Jupyter 笔记本
Execute a Jupyter notebook including inline markdown with nbconvert
我有一个 Jupyter 笔记本,其中包含 python 降价单元格中的变量,如下所示:
代码单元格:
x = 10
降价单元格:
The value of x is {{x}}.
如果我在笔记本中使用 shift-enter 执行降价单元格,IPython-notebook-extension Python Markdown 允许我动态显示这些变量。
降价单元格:
The value of x is 10.
我想以编程方式执行笔记本中的所有单元格,并使用如下方法将它们保存到新笔记本中:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
with open('report.ipynb') as f:
nb = nbformat.read(f, as_version=4)
ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
ep.preprocess(nb, {})
with open('report_executed.ipynb', 'wt') as f:
nbformat.write(nb, f)
这将执行代码单元而不是降价单元。他们仍然是这样的:
The value of x is {{x}}.
我认为问题在于笔记本不受信任。有没有办法告诉 ExecutePreprocessor 信任笔记本?是否有另一种方法以编程方式执行笔记本,包括降价单元格中的 python 变量?
ExecutePreprocessor only looks at code cells,因此您的降价单元格完全未受影响。如您所述,要进行降价处理,您需要 Python 降价预处理器。
不幸的是,Python Markdown 预处理器系统仅在实时笔记本中执行代码,它由 modifying the javascript involved with rendering cells 执行。修改将执行代码片段的结果存储在单元元数据中。
PyMarkdownPreprocessor
class(在 pre_pymarkdown.py 中)设计用于与 nbconvert 一起在笔记本上运行,这些笔记本首先在实时笔记本设置中呈现。它处理降价单元格,用存储在元数据中的值替换 {{}}
模式。
但是,在您的情况下,您没有实时笔记本元数据。我有一个类似的问题,我通过编写自己的执行预处理器解决了这个问题,其中还包含处理降价单元格的逻辑:
from nbconvert.preprocessors import ExecutePreprocessor, Preprocessor
import nbformat, nbconvert
from textwrap import dedent
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
return super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index):
"""
Executes a single code cell. See base.py for details.
To execute all cells see :meth:`preprocess`.
"""
if cell.cell_type not in ['code','markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index)
elif cell.cell_type == 'markdown':
# Do markdown stuff
return self.preprocess_markdown_cell(cell, resources, cell_index)
else:
# Don't do anything
return cell, resources
def preprocess_code_cell(self, cell, resources, cell_index):
''' Process code cell.
'''
outputs = self.run_cell(cell)
cell.outputs = outputs
if not self.allow_errors:
for out in outputs:
if out.output_type == 'error':
pattern = u"""\
An error occurred while executing the following cell:
------------------
{cell.source}
------------------
{out.ename}: {out.evalue}
"""
msg = dedent(pattern).format(out=out, cell=cell)
raise nbconvert.preprocessors.execute.CellExecutionError(msg)
return cell, resources
def preprocess_markdown_cell(self, cell, resources, cell_index):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
fakecell = nbformat.v4.nbbase.new_code_cell(m.group(1))
fakecell, resources = self.preprocess_code_cell(fakecell, resources, cell_index)
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
break
return cell, resources
def convert_output_to_html(self, output):
'''Convert IOpub output to HTML
See https://github.com/ipython-contrib/IPython-notebook-extensions/blob/master/nbextensions/usability/python-markdown/main.js
'''
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue;
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data = output.data
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'', text)
text = re.sub(r"'([\s\S]*?)'",r'', text)
return text
else:
# Some tag we don't support
return None
else:
return None
然后您可以使用类似于您发布的代码的逻辑来处理您的笔记本:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
import ExecuteCodeMarkdownPreprocessor # from wherever you put it
import PyMarkdownPreprocessor # from pre_pymarkdown.py
with open('report.ipynb') as f:
nb = nbformat.read(f, as_version=4)
ep = ExecuteCodeMarkdownPreprocessor(timeout=600, kernel_name='python3')
ep.preprocess(nb, {})
pymk = PyMarkdownPreprocessor()
pymk.preprocess(nb, {})
with open('report_executed.ipynb', 'wt') as f:
nbformat.write(nb, f)
请注意,通过包含 Python Markdown 预处理,生成的笔记本文件将不再在 Markdown 单元格中具有 {{}}
语法 - Markdown 将具有静态内容。如果结果笔记本的接收者更改代码并再次执行,则不会更新降价。但是,如果您要导出为不同的格式(例如 HTML),那么您确实希望将 {{}}
语法替换为静态内容。
更新2020-07-08
@gordon-bean 提供的答案对我来说是救命恩人。在我放弃之前的最后一轮谷歌搜索中,我找到了这个答案,所以在我继续之前,我只想说声谢谢!
然而,在最初的答案 jupyter / nbconvert 经历了一些变化后的 4 年多一点,提供的代码需要更新。所以这里是:
from nbconvert.preprocessors import ExecutePreprocessor
from nbconvert.preprocessors import execute
import nbformat
import re
# Taken from:
#
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index, store_history=True):
"""
Executes a single code cell. See base.py for details.
To execute all cells see :meth:`preprocess`.
"""
if cell.cell_type not in ['code', 'markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index, store_history)
elif cell.cell_type == 'markdown':
# Do markdown stuff
return self.preprocess_markdown_cell(cell, resources, cell_index, store_history)
else:
# Don't do anything
return cell, resources
def preprocess_code_cell(self, cell, resources, cell_index, store_history):
""" Process code cell.
"""
# outputs = self.run_cell(cell)
reply, outputs = self.run_cell(cell, cell_index, store_history)
cell.outputs = outputs
cell_allows_errors = (self.allow_errors or "raises-exception"
in cell.metadata.get("tags", []))
if self.force_raise_errors or not cell_allows_errors:
for out in cell.outputs:
if out.output_type == 'error':
raise execute.CellExecutionError.from_cell_and_msg(cell, out)
if (reply is not None) and reply['content']['status'] == 'error':
raise execute.CellExecutionError.from_cell_and_msg(cell, reply['content'])
return cell, resources
def preprocess_markdown_cell(self, cell, resources, cell_index, store_history):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
fakecell = nbformat.v4.nbbase.new_code_cell(m.group(1))
fakecell, resources = self.preprocess_code_cell(fakecell, resources, cell_index, store_history)
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
break
return cell, resources
def convert_output_to_html(self, output):
"""Convert IOpub output to HTML
See https://github.com/ipython-contrib/IPython-notebook-extensions/blob/master/nbextensions/usability/python-markdown/main.js
"""
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data = output.data
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'', text)
text = re.sub(r"'([\s\S]*?)'",r'', text)
return text
else:
# Some tag we don't support
return None
else:
return None
此代码的用法与 Gordon Bean 所报告的完全相同。
更新2021.06.29
由于 nbconvert 更改为使用 nbclient 调用 (https://github.com/jupyter/nbconvert/commit/e7bf8350435a66cc50faf29ff12df492be5d7f57#diff-bee04d71b1dfc0202a0239b1513fd81d983edc339a9734ca4f4813276feed032),这再次需要更新。由于run_cell不再可用,因此需要修改代码和markdown单元格处理。这有效:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from nbconvert.preprocessors import execute
import re
# Taken from:
# modified to avoid using superseeded run_cell calls.
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index, store_history=True):
"""
Executes a single code cell. See base.py for details.
To execute all cells see :meth:`preprocess`.
"""
if cell.cell_type not in ['code', 'markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index, store_history)
elif cell.cell_type == 'markdown':
# Do markdown stuff
cell, resources = self.preprocess_markdown_cell(cell, resources, cell_index, store_history)
return cell, resources
else:
# Don't do anything
return cell, resources
def preprocess_code_cell(self, cell, resources, cell_index, store_history):
""" Process code cell. Follow preprocess_cell from ExecutePreprocessor
"""
self._check_assign_resources(resources)
cell = self.execute_cell(cell, cell_index, store_history=True)
return cell, self.resources
def preprocess_markdown_cell(self, cell, resources, cell_index, store_history):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
self.nb.cells.append(nbformat.v4.nbbase.new_code_cell(m.group(1)))
fakecell, resources = self.preprocess_code_cell(self.nb.cells[-1], resources, len(self.nb.cells)-1, store_history)
self.nb.cells.pop()
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
break
return cell, resources
def convert_output_to_html(self, output):
"""Convert IOpub output to HTML
See https://github.com/ipython-contrib/IPython-notebook-extensions/blob/master/nbextensions/usability/python-markdown/main.js
"""
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data = output.data
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'', text)
text = re.sub(r"'([\s\S]*?)'",r'', text)
return text
else:
# Some tag we don't support
return None
else:
return None
用法保持不变。
我有一个 Jupyter 笔记本,其中包含 python 降价单元格中的变量,如下所示:
代码单元格:
x = 10
降价单元格:
The value of x is {{x}}.
如果我在笔记本中使用 shift-enter 执行降价单元格,IPython-notebook-extension Python Markdown 允许我动态显示这些变量。
降价单元格:
The value of x is 10.
我想以编程方式执行笔记本中的所有单元格,并使用如下方法将它们保存到新笔记本中:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
with open('report.ipynb') as f:
nb = nbformat.read(f, as_version=4)
ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
ep.preprocess(nb, {})
with open('report_executed.ipynb', 'wt') as f:
nbformat.write(nb, f)
这将执行代码单元而不是降价单元。他们仍然是这样的:
The value of x is {{x}}.
我认为问题在于笔记本不受信任。有没有办法告诉 ExecutePreprocessor 信任笔记本?是否有另一种方法以编程方式执行笔记本,包括降价单元格中的 python 变量?
ExecutePreprocessor only looks at code cells,因此您的降价单元格完全未受影响。如您所述,要进行降价处理,您需要 Python 降价预处理器。
不幸的是,Python Markdown 预处理器系统仅在实时笔记本中执行代码,它由 modifying the javascript involved with rendering cells 执行。修改将执行代码片段的结果存储在单元元数据中。
PyMarkdownPreprocessor
class(在 pre_pymarkdown.py 中)设计用于与 nbconvert 一起在笔记本上运行,这些笔记本首先在实时笔记本设置中呈现。它处理降价单元格,用存储在元数据中的值替换 {{}}
模式。
但是,在您的情况下,您没有实时笔记本元数据。我有一个类似的问题,我通过编写自己的执行预处理器解决了这个问题,其中还包含处理降价单元格的逻辑:
from nbconvert.preprocessors import ExecutePreprocessor, Preprocessor
import nbformat, nbconvert
from textwrap import dedent
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
return super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index):
"""
Executes a single code cell. See base.py for details.
To execute all cells see :meth:`preprocess`.
"""
if cell.cell_type not in ['code','markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index)
elif cell.cell_type == 'markdown':
# Do markdown stuff
return self.preprocess_markdown_cell(cell, resources, cell_index)
else:
# Don't do anything
return cell, resources
def preprocess_code_cell(self, cell, resources, cell_index):
''' Process code cell.
'''
outputs = self.run_cell(cell)
cell.outputs = outputs
if not self.allow_errors:
for out in outputs:
if out.output_type == 'error':
pattern = u"""\
An error occurred while executing the following cell:
------------------
{cell.source}
------------------
{out.ename}: {out.evalue}
"""
msg = dedent(pattern).format(out=out, cell=cell)
raise nbconvert.preprocessors.execute.CellExecutionError(msg)
return cell, resources
def preprocess_markdown_cell(self, cell, resources, cell_index):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
fakecell = nbformat.v4.nbbase.new_code_cell(m.group(1))
fakecell, resources = self.preprocess_code_cell(fakecell, resources, cell_index)
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
break
return cell, resources
def convert_output_to_html(self, output):
'''Convert IOpub output to HTML
See https://github.com/ipython-contrib/IPython-notebook-extensions/blob/master/nbextensions/usability/python-markdown/main.js
'''
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue;
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data = output.data
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'', text)
text = re.sub(r"'([\s\S]*?)'",r'', text)
return text
else:
# Some tag we don't support
return None
else:
return None
然后您可以使用类似于您发布的代码的逻辑来处理您的笔记本:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
import ExecuteCodeMarkdownPreprocessor # from wherever you put it
import PyMarkdownPreprocessor # from pre_pymarkdown.py
with open('report.ipynb') as f:
nb = nbformat.read(f, as_version=4)
ep = ExecuteCodeMarkdownPreprocessor(timeout=600, kernel_name='python3')
ep.preprocess(nb, {})
pymk = PyMarkdownPreprocessor()
pymk.preprocess(nb, {})
with open('report_executed.ipynb', 'wt') as f:
nbformat.write(nb, f)
请注意,通过包含 Python Markdown 预处理,生成的笔记本文件将不再在 Markdown 单元格中具有 {{}}
语法 - Markdown 将具有静态内容。如果结果笔记本的接收者更改代码并再次执行,则不会更新降价。但是,如果您要导出为不同的格式(例如 HTML),那么您确实希望将 {{}}
语法替换为静态内容。
更新2020-07-08
@gordon-bean 提供的答案对我来说是救命恩人。在我放弃之前的最后一轮谷歌搜索中,我找到了这个答案,所以在我继续之前,我只想说声谢谢!
然而,在最初的答案 jupyter / nbconvert 经历了一些变化后的 4 年多一点,提供的代码需要更新。所以这里是:
from nbconvert.preprocessors import ExecutePreprocessor
from nbconvert.preprocessors import execute
import nbformat
import re
# Taken from:
#
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index, store_history=True):
"""
Executes a single code cell. See base.py for details.
To execute all cells see :meth:`preprocess`.
"""
if cell.cell_type not in ['code', 'markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index, store_history)
elif cell.cell_type == 'markdown':
# Do markdown stuff
return self.preprocess_markdown_cell(cell, resources, cell_index, store_history)
else:
# Don't do anything
return cell, resources
def preprocess_code_cell(self, cell, resources, cell_index, store_history):
""" Process code cell.
"""
# outputs = self.run_cell(cell)
reply, outputs = self.run_cell(cell, cell_index, store_history)
cell.outputs = outputs
cell_allows_errors = (self.allow_errors or "raises-exception"
in cell.metadata.get("tags", []))
if self.force_raise_errors or not cell_allows_errors:
for out in cell.outputs:
if out.output_type == 'error':
raise execute.CellExecutionError.from_cell_and_msg(cell, out)
if (reply is not None) and reply['content']['status'] == 'error':
raise execute.CellExecutionError.from_cell_and_msg(cell, reply['content'])
return cell, resources
def preprocess_markdown_cell(self, cell, resources, cell_index, store_history):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
fakecell = nbformat.v4.nbbase.new_code_cell(m.group(1))
fakecell, resources = self.preprocess_code_cell(fakecell, resources, cell_index, store_history)
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
break
return cell, resources
def convert_output_to_html(self, output):
"""Convert IOpub output to HTML
See https://github.com/ipython-contrib/IPython-notebook-extensions/blob/master/nbextensions/usability/python-markdown/main.js
"""
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data = output.data
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'', text)
text = re.sub(r"'([\s\S]*?)'",r'', text)
return text
else:
# Some tag we don't support
return None
else:
return None
此代码的用法与 Gordon Bean 所报告的完全相同。
更新2021.06.29
由于 nbconvert 更改为使用 nbclient 调用 (https://github.com/jupyter/nbconvert/commit/e7bf8350435a66cc50faf29ff12df492be5d7f57#diff-bee04d71b1dfc0202a0239b1513fd81d983edc339a9734ca4f4813276feed032),这再次需要更新。由于run_cell不再可用,因此需要修改代码和markdown单元格处理。这有效:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from nbconvert.preprocessors import execute
import re
# Taken from:
# modified to avoid using superseeded run_cell calls.
class ExecuteCodeMarkdownPreprocessor(ExecutePreprocessor):
def __init__(self, **kw):
self.sections = {'default': True} # maps section ID to true or false
self.EmptyCell = nbformat.v4.nbbase.new_raw_cell("")
super().__init__(**kw)
def preprocess_cell(self, cell, resources, cell_index, store_history=True):
"""
Executes a single code cell. See base.py for details.
To execute all cells see :meth:`preprocess`.
"""
if cell.cell_type not in ['code', 'markdown']:
return cell, resources
if cell.cell_type == 'code':
# Do code stuff
return self.preprocess_code_cell(cell, resources, cell_index, store_history)
elif cell.cell_type == 'markdown':
# Do markdown stuff
cell, resources = self.preprocess_markdown_cell(cell, resources, cell_index, store_history)
return cell, resources
else:
# Don't do anything
return cell, resources
def preprocess_code_cell(self, cell, resources, cell_index, store_history):
""" Process code cell. Follow preprocess_cell from ExecutePreprocessor
"""
self._check_assign_resources(resources)
cell = self.execute_cell(cell, cell_index, store_history=True)
return cell, self.resources
def preprocess_markdown_cell(self, cell, resources, cell_index, store_history):
# Find and execute snippets of code
cell['metadata']['variables'] = {}
for m in re.finditer("{{(.*?)}}", cell.source):
# Execute code
self.nb.cells.append(nbformat.v4.nbbase.new_code_cell(m.group(1)))
fakecell, resources = self.preprocess_code_cell(self.nb.cells[-1], resources, len(self.nb.cells)-1, store_history)
self.nb.cells.pop()
# Output found in cell.outputs
# Put output in cell['metadata']['variables']
for output in fakecell.outputs:
html = self.convert_output_to_html(output)
if html is not None:
cell['metadata']['variables'][fakecell.source] = html
break
return cell, resources
def convert_output_to_html(self, output):
"""Convert IOpub output to HTML
See https://github.com/ipython-contrib/IPython-notebook-extensions/blob/master/nbextensions/usability/python-markdown/main.js
"""
if output['output_type'] == 'error':
text = '**' + output.ename + '**: ' + output.evalue
return text
elif output.output_type == 'execute_result' or output.output_type == 'display_data':
data = output.data
if 'text/latex' in data:
html = data['text/latex']
return html
elif 'image/svg+xml' in data:
# Not supported
#var svg = ul['image/svg+xml'];
#/* embed SVG in an <img> tag, still get eaten by sanitizer... */
#svg = btoa(svg);
#html = '<img src="data:image/svg+xml;base64,' + svg + '"/>';
return None
elif 'image/jpeg' in data:
jpeg = data['image/jpeg']
html = '<img src="data:image/jpeg;base64,' + jpeg + '"/>'
return html
elif 'image/png' in data:
png = data['image/png']
html = '<img src="data:image/png;base64,' + png + '"/>'
return html
elif 'text/markdown' in data:
text = data['text/markdown']
return text
elif 'text/html' in data:
html = data['text/html']
return html
elif 'text/plain' in data:
text = data['text/plain']
# Strip <p> and </p> tags
# Strip quotes
# html.match(/<p>([\s\S]*?)<\/p>/)[1]
text = re.sub(r'<p>([\s\S]*?)<\/p>', r'', text)
text = re.sub(r"'([\s\S]*?)'",r'', text)
return text
else:
# Some tag we don't support
return None
else:
return None
用法保持不变。