使用 wkhtmltopdf 或 Markdown 将 html 从 data.frames 保存为 pdf

save html from data.frames to pdf using wkhtmltopdf or Markdown

我有一个 df,其列 htmltext 包含 html 文本,我想打印(如果可能的话作为一个批次)作为单个 PDF,文件名为 doc_id

我可以直接在 R 中这样做吗?

我想到了

> system("wkhtmltopdf --javascript-delay 1 in.html out.pdf") 

我如何在 R 中实现它? 或者是否有另一种简单的方法来使用 markdown 例如。

# df
doc_id <- c("doc1","doc2","doc3")
htmltext <- c("<b>good morning</b>","<b>This text is bold</b>","<b>good evening</b>")
df <- data.frame(doc_id,htmltext, stringsAsFactors = FALSE)

# save htmltext single pdfs with doc_id as filename
filenames = filenames = df$doc_id
...?

查看其中一项是否可以接受:

library(rmarkdown)
library(decapitated) # devtools::install_github("hrbrmstr/decapitated") # requires Chrome

data.frame(
  doc_id = c("doc1", "doc2", "doc3"),
  htmltext = c("<b>good morning</b>", "<b>This text is bold</b>", "<b>good evening</b>"), 
  stringsAsFactors = FALSE
) -> xdf

# hackish pandoc way
for(i in 1:nrow(xdf)) {
  message(sprintf("Processing %s", xdf$doc_id[i]))
  tf <- tempfile(fileext=".html")
  writeLines(xdf$htmltext[i], tf)
  pandoc_convert(
    input = tf, 
    to = "latex", 
    output = sprintf("%s.pdf", xdf$doc_id[i]),
    wd = getwd()
  )
  unlink(tf)
}

# using headless chrome
for(i in 1:nrow(xdf)) {
  message(sprintf("Processing %s", xdf$doc_id[i]))
  tf <- tempfile(fileext=".html")
  writeLines(xdf$htmltext[i], tf)
  chrome_dump_pdf(sprintf("file://%s", tf), path=sprintf("%s.pdf", xdf$doc[i]))
  unlink(tf)
}