I am using PuppeteerSharp to generate a PDF from a locally-hosted HTML template, and want the PDF to be non-searchable, meaning that the PDF is truly just one big image within the PDF. I have chosen to take this approach because I want to make it harder for the document to be tampered with.
I have already come up with a technique for generating an image-only PDF. I access and take a screenshot of the HTML page, store it to disk, access said screenshot from the browser, and finally generate a PDF from that image. The PDF turns out fine, but I want to learn if it is possible to eliminate the need to store the image to disk.
Code below:
Function code
[FunctionName("pdftest")]
public async Task<IActionResult> Run(
[HttpTrigger(AuthorizationLevel.Anonymous, "get")] HttpRequest req,
ILogger log)
{
log.LogInformation($"Browser path: {appInfo.BrowserExecutablePath}");
ViewPortOptions vprtOpts = new ViewPortOptions()
{
DeviceScaleFactor = 2
};
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = true,
ExecutablePath = appInfo.BrowserExecutablePath,
DefaultViewport = vprtOpts
});
var page = await browser.NewPageAsync();
// access the HTML template
await page.GoToAsync(@"C:\experiment\pdf\index.html");
// where to save the screenshot
var file = @"C:\experiment\pdf\index.png";
// where to save the final PDF
var file2 = @"C:\experiment\pdf\index2.pdf";
PdfOptions opts = new PdfOptions()
{
PrintBackground = true,
Width = "794px",
Height = "1122px",
Scale = 1
};
ScreenshotOptions scrOpts = new ScreenshotOptions()
{
FullPage = true,
};
await page.ScreenshotAsync(file, scrOpts);
await page.GoToAsync(@"C:\experiment\pdf\index.png");
var stream = await page.PdfStreamAsync(opts);
await page.PdfAsync(file2, opts);
await browser.CloseAsync();
return new FileStreamResult(stream, "application/pdf");
}
Startup.cs
public class Startup : FunctionsStartup
{
public override void Configure(IFunctionsHostBuilder builder)
{
var bfOptions = new BrowserFetcherOptions();
if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
{
bfOptions.Path = Path.GetTempPath();
}
var bf = new BrowserFetcher(bfOptions);
bf.DownloadAsync(BrowserFetcher.DefaultRevision).Wait();
var info = new AppInfo
{
BrowserExecutablePath = bf.GetExecutablePath(BrowserFetcher.DefaultRevision)
};
var port = GetAvailablePort();
info.RazorPagesServerPort = port;
builder.Services.AddSingleton(info);
var webHost = Host.CreateDefaultBuilder()
.ConfigureWebHostDefaults(webBuilder =>
{
var scriptRoot = Environment.GetEnvironmentVariable("AzureWebJobsScriptRoot");
System.Console.WriteLine($"Starting web server on port {port}");
if (!string.IsNullOrEmpty(scriptRoot))
{
webBuilder.UseContentRoot(scriptRoot);
}
webBuilder.UseUrls($"http://0.0.0.0:{port}")
.UseStartup<RazorPagesApp.Startup>();
})
.Build();
webHost.Start();
}
private int GetAvailablePort()
{
// https://stackoverflow.com/a/150974/9035640
var listener = new TcpListener(IPAddress.Loopback, 0);
listener.Start();
int availablePort = ((IPEndPoint)listener.LocalEndpoint).Port;
listener.Stop();
return availablePort;
}
}
public class AppInfo
{
public string BrowserExecutablePath { get; set; }
public int RazorPagesServerPort { get; set; }
}
According to the docs you have ScreenShotDataAsync
which returns a byte[]
:
byte[] data = await page.ScreenshotDataAsync(scrOpts);
There's also ScreenshotStreamAsync
which returns a Stream
:
Stream stream = await page.ScreenshotStreamAsync(scrOpts);