I have been trying this out for a long time now. I want to scrap contents from a subreddit that has adult contents. But, the problem is that, you have to answer a simple question before you are given access to that page i.e. if you are 18+ or not. I did some research on the source code and found that the solution is a simple POST request. where you need to send the parameters "over18=yes". But my problem is that, I am not able to access the response body after the post.
Here's the code using http request in node. I have even tried it out with the node "request" module, but no help from that either.
Hoping to find someone who can help me out here.
var http = require("http");
var options = {
host: 'www.reddit.com',
port: 80,
path: '/over18?dest=http%3A%2F%2Fwww.reddit.com%2Fr%2Fnsfw&over18=yes',
method: 'POST'
};
var req = http.request(options, function(res) {
console.log('STATUS: ' + res.statusCode);
console.log('HEADERS: ' + JSON.stringify(res.headers));
res.setEncoding('utf8');
res.on('data', function (chunk) {
console.log('BODY: ' + chunk);
});
});
req.on('error', function(e) {
console.log('problem with request: ' + e.message);
});
// write data to request body
req.write('data\n');
req.write('data\n');
req.end();
And here is the code using the Node Request module
var request = require("request");
request.post({url:'http://www.reddit.com/over18?dest=http%3A%2F%2Fwww.reddit.com%2Fr%2Fnsfw', form: {over18:'yes'}}, function(err,httpResponse,body){
console.log(body);
});
the URL i am trying to access is http://www.reddit.com/r/nsfw
In short, when you click YES button, the form sends over18=yes parameter to url http://www.reddit.com/over18?dest=http%3A%2F%2Fwww.reddit.com%2Fr%2Fnsfw using POST method. Then, server responds with an 302 Redirection header, cookie with value over18=1 and finally redirects to url http://www.reddit.com/r/nsfw using GET request. THen, server just checks if youa have a cookie with needed valuse.
All what you need is to do request directly to the final url with cookies using GET method.
var request = require("request");
var target = "http://www.reddit.com/r/nsfw";
var jar = request.jar();
var cookie = request.cookie("over18=1");
cookie.domain = "reddit.com";
cookie.path = "/";
jar.setCookie(cookie, target, function(error, cookie) {
console.log(error);
console.log(cookie);
});
request({
uri: target,
method: "GET",
jar: jar
}, function(error, response, body) {
console.log(response.statusCode);
console.log(body);
});