Search code examples
javascriptnode.jsasynchronouspromisebluebird

Using request.getAsync from bluebird, how to 'pipe' to a file


I'm trying to get the content of some pdf's file asynchronously. To do that I'm using Promise.mapSeries with request.getAsync and spread from bluebird.

But in the then I need to get the result of that request using pipe and createWriteStream directly. Something like:

request(url).pipe(fs.createWriteStream(file));

This is the code, I'm using:

const Promise = require('bluebird');
const request = Promise.promisifyAll(require('request'), { multiArgs: true });
const fs = Promise.promisifyAll(require("fs"));

const urls = ['http://localhost/test-pdf/one.pdf', 'http://localhost/test-pdf/two.pdf'];

Promise.mapSeries(urls, url => {
    return request.getAsync({url: url, encoding:'binary'}).spread((response, body) => {
        if (response.statusCode == 200){
            let r = {};
            r.name = url.match(/\/([^/]*)$/)[1]; // get the last part of url (file name)
            r.content = body;
            console.log(`Getting ${r.name}`);
            return r;
        }
        else if (response.statusCode == 404){
            console.log(`The archive ${url.match(/\/([^/]*)$/)[1]} does not exists`);
        }
        else throw new Error(`Unsuccessful attempt. Code: ${response.statusCode}`);
    });
}).then((result) => {
    // Here I want to 'pipe' to a file the result from 'getAsync'
}).catch((error) =>{
    console.error(error);
})

My question:

How can I pipe to a file the result from getAsync using the pipe function ? It is possible?

PD: I know that I can use fs.promises, but just want to know if it's possible to do it in the way I'm posting


Solution

  • I think the answer is already in the question in that .then() seems to be the .pipe() you seek.

    What's possibly missing is that (result) should be (results), ie. an array of all the {name, content} pairs arising from Promise.mapSeries(urls, ...).

    Promise.mapSeries(urls, url => {
        return request.getAsync({'url':url, 'encoding':'binary'}).spread((response, body) => {
            if (response.statusCode == 200) {
                return {
                    'name': url.match(/\/([^/]*)$/)[1], // get the last part of url (file name)
                    'content': body
                };
            } else if (response.statusCode == 404) {
                throw new Error(`The archive ${url.match(/\/([^/]*)$/)[1]} does not exist`);
            } else {
                throw new Error(`Unsuccessful attempt. Code: ${response.statusCode}`);
            }
        });
    }).then((results) => {
        // Here write each `result.content` to file.
    }).catch((error) => {
        console.error(error);
    });
    

    In practice you would probably not choose to write it that way because every getAsync() would need to complete before any of the writes commenced.

    A better flow under most circumstances (and probably the one you want) would be for content from each successful getAsync() to be written as soon as possible :

    Promise.mapSeries(urls, url => {
        let name = url.match(/\/([^/]*)$/)[1]; // get the last part of url (file name)
        return request.getAsync({'url':url, 'encoding':'binary'}).spread((response, body) => {
            if (response.statusCode == 200) {
                // write `body.content` to file.
            } else if (response.statusCode == 404) {
                throw new Error(`The archive ${name} does not exist`);
            } else {
                throw new Error(`Unsuccessful attempt. Code: ${response.statusCode}`);
            }
        });
    }).catch((error) => {
        console.error(error);
    });
    

    Going further, you might choose to handle errors better, for example you may wish to :

    • catch individual url/get/write errors
    • compile success/failure stats.

    Something like this maybe :

    Promise.mapSeries(urls, url => {
        let name = url.match(/\/([^/]*)$/)[1] || ''; // get the last part of url (file name)
        if(!name) {
            throw new RangeError(`Error in input data for ${url}`);
        }
        return request.getAsync({'url':url, 'encoding':'binary'}).spread((response, body) => {
            if (response.statusCode == 200) {
                // write `body.content` to file.
                return { name, 'content': body };
            } else if (response.statusCode == 404) {
                throw new Error(`The archive ${name} does not exist`);
            } else {
                throw new Error(`Unsuccessful attempt. Code: ${response.statusCode}`);
            }
        })
        .catch(error => ({ name, error }));
    }).then((results) => {
        let successes = results.filter(res => !res.error).length;
        let failures = results.filter(res => !!res.error).length;
        let total = results.length;
        console.log({ successes, failures, total }); // log success/failure stats
    }).catch((error) => {
        console.error(error); // just in case some otherwise uncaught error slips through
    });