Search code examples
python-3.xselenium-webdriverselenium-firefoxdriverbrowsermob-proxy

How to find _ transferSize in har file exported using browsermob-proxy in python


I am trying to export .har file using firefox-selenium-browsermob-proxy-python. Using the below code.

bmp_loc = "/Users/project/browsermob-proxy-2.1.4/bin/browsermob-proxy"
server = Server(bmp_loc)
server.start()
proxy = server.create_proxy(params={'trustAllServers': 'true'})

selenium_proxy = proxy.selenium_proxy()
caps = webdriver.DesiredCapabilities.FIREFOX
caps['marionette'] = False
proxy_settings = {
    "proxyType": "MANUAL",
    "httpProxy": selenium_proxy.httpProxy,
    "sslProxy": selenium_proxy.sslProxy,
}
caps['proxy'] = proxy_settings

driver = webdriver.Firefox(desired_capabilities=caps)
proxy.new_har("generated_har",options={'captureHeaders': True})
driver.get("someurl")
browser_logs = proxy.har

I am interested to get _transferSize in the .har file to perform some analysis but unable to get that, instead I am getting that as 'comment':

"redirectURL": "", "headersSize": 1023, "bodySize": 38, "comment": ""

whereas manually downloading the .har file using firefox I am getting _transferSize

Version used:

browsermob_proxy==2.1.4
selenium==4.0.0

Can anybody please help me to resolve this?


Solution

  • You can get _transferSize by adding headersSize and bodySize from the har file itself.

     urls = ["https://google.com"]
        for ur in urls:
            server = proxy.start_server()
            client = proxy.start_client()
            client.new_har("demo.com")
            # print(client.proxy)
            
            options = webdriver.ChromeOptions()
          
            options.add_argument("--disk-cache-size=0")
            
            options = {
            'enable_har': True 
            }
            driver = webdriver.Chrome(seleniumwire_options=options)
            driver.request_interceptor = proxy.interceptor
    
            driver.get(ur)
            time.sleep(40)
            row_list = []
            json_dictionary = json.loads(driver.har)
            repeat_url_list = []
            repeat_urls = defaultdict(lambda:[])
            resp_size = 0
            count_url = 0
            url_time = 0 
            status_list = []
            status_url = defaultdict(lambda:[])
            a_list = []
            
            with open("network_log2.har", "w", encoding="utf-8") as f:
                # f.write(json.dumps(driver.har))
                for i in json_dictionary['log']['entries']:
                    f.write(str(i))
                    f.write("\n")
                    url = i['request']['url']
                    a_list.append(url)
                    timing = i['time']
                    if timing>2000:
                        timing = round(timing/2000,1)
                        url_time += 1
                    status =  i['response']['status']
                    if status in status_list:
                        status_url[status] = status_url[status] + 1
                    else:
                        status_url[status] = 1
                        status_list.append(status)
                    
                    size = i['response']['headersSize'] + i['response']['bodySize']
                    if size//1000 > 500:
                        resp_size += 1
                    if url in repeat_url_list:
                        repeat_urls[url] =  1
                    else:
                        repeat_url_list.append(url)
    
                rurl_count = len(repeat_urls)