Whenever I click on a button, I get the wrong encoding. How to fix it?
package com.company;
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import org.apache.commons.logging.LogFactory;
import java.util.logging.Level;
public class Parsing {
public void connect() throws Exception {
LogFactory.getFactory().setAttribute("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(Level.OFF);
java.util.logging.Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.OFF);
WebClient client = new WebClient(BrowserVersion.CHROME);
client.setAjaxController(new NicelyResynchronizingAjaxController());
client.getOptions().setThrowExceptionOnScriptError(false);
client.getOptions().setThrowExceptionOnFailingStatusCode(false);
String url = "https://auto.ru/cars/new/group/mercedes/e_klasse/20743577/20886691/?grouping_id=tech_param_id%3D20743577%2Ccomplectation_id%3D21225392&sort=fresh_relevance_1-desc&geo_radius=200";
HtmlPage page = client.getPage(url);
HtmlElement clickContact = (HtmlElement) page.getElementById("confirm-button");
page = clickContact.click();
client.waitForBackgroundJavaScript(10000);
System.out.println(page.asText());
}
Output:
Привет! �вто.ру Легковые Мото Коммерче�кие ...
Try this
public static void main(String[] args) throws IOException {
String url = "https://auto.ru/cars/new/group/mercedes/e_klasse/20743577/20886691/?grouping_id=tech_param_id%3D20743577%2Ccomplectation_id%3D21225392&sort=fresh_relevance_1-desc&geo_radius=200";
try (final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_60)) {
webClient.getOptions().setThrowExceptionOnScriptError(false);
HtmlPage page = webClient.getPage(url);
HtmlElement clickContact = (HtmlElement) page.getElementById("confirm-button");
page = clickContact.click();
System.out.println("still running " + webClient.waitForBackgroundJavaScript(60_000));
System.out.println(" ---- ");
// the page content got changed via javascript
// we like to get see the new content
page = (HtmlPage) webClient.getCurrentWindow().getEnclosedPage();
System.out.println(page.asText());
}
}
I do not get the whole content correct but major parts.