If I go to the following web page in Chrome, it loads fine: https://www.cruisemapper.com/?poi=39
However, when I run the following PhantomJS script, which simply goes to the same URL and outputs the entire DOM string to the console, I get a 403 Forbidden message:
var page = require('webpage').create(),
url = 'https://www.cruisemapper.com/?poi=39';
page.open(url, function (status) {
if (status === 'success') {
console.log(page.evaluate(function () {
return document.documentElement.outerHTML;
}));
phantom.exit();
}
});
Here's the exact output to the console:
<html><head>
<title>403 Forbidden</title>
</head><body>
<h1>Forbidden</h1>
<p>You don't have permission to access /
on this server.<br>
</p>
</body></html>
I thought that if I added some sort of user agent string, it might work. As such, I added the following above the console.log line:
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36';
But that didn't work. So then I tried the following instead:
page.customHeaders = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
};
But that didn't work either. Does anyone have any advice on how I can possibly hit up the URL above and not get a 403 Forbidden message? Thank you.
Your code works for me fine (I's suggest viewport size emulation though, see code). If you still get a 403, try changing your IP, it's possible that the site is on to you now (you probably visited that page lots of times).
var page = require('webpage').create(),
url = 'https://www.cruisemapper.com/?poi=39';
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36';
page.viewportSize = { width: 1440, height: 900 }; // <-- otherwise it's 400x300 by default
// It's good to watch for errors on the page
page.onError = function (msg, trace)
{
console.log(msg);
trace.forEach(function(item) {
console.log(' ', item.file, ':', item.line);
})
}
page.open(url, function (status) {
console.log(status);
page.render("page.png"); // Also useful to check if you get what you expect
if (status === 'success') {
console.log(page.evaluate(function () {
return document.documentElement.outerHTML;
}));
phantom.exit();
}
});