使用 wkhtmltopdf 或 Markdown 将 html 从 data.frames 保存为 pdf
save html from data.frames to pdf using wkhtmltopdf or Markdown
我有一个 df,其列 htmltext
包含 html 文本,我想打印(如果可能的话作为一个批次)作为单个 PDF,文件名为 doc_id
。
我可以直接在 R 中这样做吗?
我想到了
> system("wkhtmltopdf --javascript-delay 1 in.html out.pdf")
我如何在 R 中实现它?
或者是否有另一种简单的方法来使用 markdown 例如。
# df
doc_id <- c("doc1","doc2","doc3")
htmltext <- c("<b>good morning</b>","<b>This text is bold</b>","<b>good evening</b>")
df <- data.frame(doc_id,htmltext, stringsAsFactors = FALSE)
# save htmltext single pdfs with doc_id as filename
filenames = filenames = df$doc_id
...?
查看其中一项是否可以接受:
library(rmarkdown)
library(decapitated) # devtools::install_github("hrbrmstr/decapitated") # requires Chrome
data.frame(
doc_id = c("doc1", "doc2", "doc3"),
htmltext = c("<b>good morning</b>", "<b>This text is bold</b>", "<b>good evening</b>"),
stringsAsFactors = FALSE
) -> xdf
# hackish pandoc way
for(i in 1:nrow(xdf)) {
message(sprintf("Processing %s", xdf$doc_id[i]))
tf <- tempfile(fileext=".html")
writeLines(xdf$htmltext[i], tf)
pandoc_convert(
input = tf,
to = "latex",
output = sprintf("%s.pdf", xdf$doc_id[i]),
wd = getwd()
)
unlink(tf)
}
# using headless chrome
for(i in 1:nrow(xdf)) {
message(sprintf("Processing %s", xdf$doc_id[i]))
tf <- tempfile(fileext=".html")
writeLines(xdf$htmltext[i], tf)
chrome_dump_pdf(sprintf("file://%s", tf), path=sprintf("%s.pdf", xdf$doc[i]))
unlink(tf)
}
我有一个 df,其列 htmltext
包含 html 文本,我想打印(如果可能的话作为一个批次)作为单个 PDF,文件名为 doc_id
。
我可以直接在 R 中这样做吗?
我想到了
> system("wkhtmltopdf --javascript-delay 1 in.html out.pdf")
我如何在 R 中实现它? 或者是否有另一种简单的方法来使用 markdown 例如。
# df
doc_id <- c("doc1","doc2","doc3")
htmltext <- c("<b>good morning</b>","<b>This text is bold</b>","<b>good evening</b>")
df <- data.frame(doc_id,htmltext, stringsAsFactors = FALSE)
# save htmltext single pdfs with doc_id as filename
filenames = filenames = df$doc_id
...?
查看其中一项是否可以接受:
library(rmarkdown)
library(decapitated) # devtools::install_github("hrbrmstr/decapitated") # requires Chrome
data.frame(
doc_id = c("doc1", "doc2", "doc3"),
htmltext = c("<b>good morning</b>", "<b>This text is bold</b>", "<b>good evening</b>"),
stringsAsFactors = FALSE
) -> xdf
# hackish pandoc way
for(i in 1:nrow(xdf)) {
message(sprintf("Processing %s", xdf$doc_id[i]))
tf <- tempfile(fileext=".html")
writeLines(xdf$htmltext[i], tf)
pandoc_convert(
input = tf,
to = "latex",
output = sprintf("%s.pdf", xdf$doc_id[i]),
wd = getwd()
)
unlink(tf)
}
# using headless chrome
for(i in 1:nrow(xdf)) {
message(sprintf("Processing %s", xdf$doc_id[i]))
tf <- tempfile(fileext=".html")
writeLines(xdf$htmltext[i], tf)
chrome_dump_pdf(sprintf("file://%s", tf), path=sprintf("%s.pdf", xdf$doc[i]))
unlink(tf)
}