Search code examples
javaselectjsoup

Why does this ID-selection not work in Jsoup?


I am trying to get data from a webpage (http://steamcommunity.com/id/Winning117/games/?tab=all) using a specific tag but I keep getting null. My desired result is to get the "hours played" for a specific game - Cluckles' Adventure in this case.

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

public class TestScrape {
    public static void main(String[] args) throws Exception {
        String url = "http://steamcommunity.com/id/Winning117/games/?tab=all";
        Document document = Jsoup.connect(url).get();
        
        Element playTime = document.select("div#game_605250").first();
        System.out.println(playTime);
    }
}

Edit: How can I tell if a webpage is using JavaScript and is therefore unable to be parsed by Jsoup?


Solution

  • To execute javascript in java code there is Selenium :

    Selenium-WebDriver makes direct calls to the browser using each browser’s native support for automation.

    To include it with maven use this dependency:

    <dependency>
                <groupId>org.seleniumhq.selenium</groupId>
                <artifactId>selenium-server</artifactId>
                <version>3.4.0</version>
            </dependency>
    

    Next I give you code of simple JUnit test that creates instance of WebDriver and goes to given url and executes simple script to get rgGames . File chromedriver you have to download at https://sites.google.com/a/chromium.org/chromedriver/downloads.

    package SeleniumProject.selenium;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Map;
    
    import org.junit.After;
    import org.junit.AfterClass;
    import org.junit.Before;
    import org.junit.BeforeClass;
    import org.junit.Test;
    import org.junit.runner.RunWith;
    import org.junit.runners.JUnit4;
    import org.openqa.selenium.By;
    import org.openqa.selenium.JavascriptExecutor;
    import org.openqa.selenium.WebDriver;
    import org.openqa.selenium.WebElement;
    import org.openqa.selenium.chrome.ChromeDriverService;
    import org.openqa.selenium.chrome.ChromeOptions;
    import org.openqa.selenium.remote.DesiredCapabilities;
    import org.openqa.selenium.remote.RemoteWebDriver;
    import org.openqa.selenium.support.ui.ExpectedCondition;
    import org.openqa.selenium.support.ui.WebDriverWait;
    
    import junit.framework.TestCase;
    
    @RunWith(JUnit4.class)
    public class ChromeTest extends TestCase {
    
        private static ChromeDriverService service;
        private WebDriver driver;
    
        @BeforeClass
        public static void createAndStartService() {
            service = new ChromeDriverService.Builder()
                    .usingDriverExecutable(new File("D:\\Downloads\\chromedriver_win32\\chromedriver.exe"))
                    .withVerbose(false).usingAnyFreePort().build();
            try {
                service.start();
            } catch (IOException e) {
                System.out.println("service didn't start");
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    
        @AfterClass
        public static void createAndStopService() {
            service.stop();
        }
    
        @Before
        public void createDriver() {
            ChromeOptions chromeOptions = new ChromeOptions();
            DesiredCapabilities capabilities = DesiredCapabilities.chrome();
            capabilities.setCapability(ChromeOptions.CAPABILITY, chromeOptions);
            driver = new RemoteWebDriver(service.getUrl(), capabilities);
        }
    
        @After
        public void quitDriver() {
            driver.quit();
        }
    
        @Test
        public void testJS() {
            JavascriptExecutor js = (JavascriptExecutor) driver;
    
            // Load a new web page in the current browser window.
            driver.get("http://steamcommunity.com/id/Winning117/games/?tab=all");
    
            // Executes JavaScript in the context of the currently selected frame or
            // window.
            ArrayList<Map> list = (ArrayList<Map>) js.executeScript("return rgGames;");
            // Map represent properties for one game
            for (Map map : list) {
                for (Object key : map.keySet()) {
                    // take each key to find key "name" and compare its vale to
                    // Cluckles' Adventure
                    if (key instanceof String && key.equals("name") && map.get(key).equals("Cluckles' Adventure")) {
                        // print all properties for game Cluckles' Adventure
                        map.forEach((key1, value) -> {
                            System.out.println(key1 + " : " + value);
                        });
                    }
                }
            }
        }
    }
    

    As you can see selenium loads page at

    driver.get("http://steamcommunity.com/id/Winning117/games/?tab=all");
    

    And to get data of all games by Winning117 it returns rgGames variable:

    ArrayList<Map> list = (ArrayList<Map>) js.executeScript("return rgGames;");