Search code examples
pythonpython-3.xpdfkit

Create PDF of a https webpage which requires login using pdfkit


I am trying to generate a PDF of a webpage from a https website (Salesforce). I have so far tried using simple_salesforce, which returns a sessionID (cookie) to no avail.

from simple_salesforce import Salesforce
import pdfkit

sf = Salesforce(username='my username'
            ,password='my password'
            ,security_token= 'my API security token')

path_wkthmltopdf = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=bytes(path_wkthmltopdf, 'utf8'))
options1 = {
    'page-size': None,
    'margin-top': None,
    'margin-right': None,
    'margin-bottom': None,
    'margin-left': None,
    'encoding': None,
    'custom-header' : None, 
    'cookie': sf.session_id,
    'no-outline': None
}
pdfkit.from_url('https://thiess.my.salesforce.com/0069000000IZH71','out.pdf', 
                configuration=config, options=options1)

Anyone knows what's the best way to pass a cookie argument into pdfkit?


Solution

  • Check this :) You probably need requests, if you don't have. I don't know much about the salesforce library.

    import requests
    import pdfkit
    
    session = requests.session()
    
    
    def download(session,username,password):
        session.get('https://bneadf.thiess.com.au/adfs/ls/')
    
        ua = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
        session.headers = {'User-Agent': self.ua}
        payload = {'UserName':username,
            'Password':password,
            'AuthMethod':'FormsAuthentication'}
    
        session.post('https://bneadf.thiess.com.au/adfs/ls/', data = payload, headers = session.headers)
        my_html = session.get('https://thiess.my.salesforce.com/0069000000IZH71')
        my_pdf = open('myfile.html','wb+')
        my_pdf.write(my_html.content)
        my_pdf.close()
    
        path_wkthmltopdf = 'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
        config = pdfkit.configuration(wkhtmltopdf=bytes(path_wkthmltopdf, 'utf8'))
    
    
        pdfkit.from_file('myfile.html', 'out.pdf')
    
    download(session,"yourusername","yourpass")