I have a df with a column htmltext
containing html text that I would like to print (as a batch if possible) as single PDFs with doc_id
as filename.
Can I do that directly within R?
I thought about something like
> system("wkhtmltopdf --javascript-delay 1 in.html out.pdf")
how can I implement that in R? or is there another easy way to to so using markdown for example.
# df
doc_id <- c("doc1","doc2","doc3")
htmltext <- c("<b>good morning</b>","<b>This text is bold</b>","<b>good evening</b>")
df <- data.frame(doc_id,htmltext, stringsAsFactors = FALSE)
# save htmltext single pdfs with doc_id as filename
filenames = filenames = df$doc_id
...?
See if one of these is acceptable:
library(rmarkdown)
library(decapitated) # devtools::install_github("hrbrmstr/decapitated") # requires Chrome
data.frame(
doc_id = c("doc1", "doc2", "doc3"),
htmltext = c("<b>good morning</b>", "<b>This text is bold</b>", "<b>good evening</b>"),
stringsAsFactors = FALSE
) -> xdf
# hackish pandoc way
for(i in 1:nrow(xdf)) {
message(sprintf("Processing %s", xdf$doc_id[i]))
tf <- tempfile(fileext=".html")
writeLines(xdf$htmltext[i], tf)
pandoc_convert(
input = tf,
to = "latex",
output = sprintf("%s.pdf", xdf$doc_id[i]),
wd = getwd()
)
unlink(tf)
}
# using headless chrome
for(i in 1:nrow(xdf)) {
message(sprintf("Processing %s", xdf$doc_id[i]))
tf <- tempfile(fileext=".html")
writeLines(xdf$htmltext[i], tf)
chrome_dump_pdf(sprintf("file://%s", tf), path=sprintf("%s.pdf", xdf$doc[i]))
unlink(tf)
}