I am using event-stream module to help me in reading and writing to these local files for which I hope to return a resulting file. Long story short, the 2 input files(sent through express API as multipart/form-data
) I am expecting can be upwards of 200MB in size containing a list of entries (1 per line). What I would like to do is to combine those entries in the following format <entry1>:<entry2>
where entry1
is the entry from the first file and entry2
is from the second file. I did this in a way earlier where I was able to store and return inputs/outputs in memory, but seeing as I have very limited memory space on my application server, I was running out of memory on the heap. I read that I could use event-stream and piping to read in each file line by line and output to a file instead of to a large string in memory using read-streams. The issue is that I can't seem to resolve in the right way/time in order for the resulting output file to be ready to send back to the caller.
What I have so far worked in that I get the correct file output I am expecting, however, this seems to be an asynchronicity problem, in that, I am resolving the promise before the file has actually completed writing/saving. Please see my code below...
const fs = require('fs');
const es = require('event-stream');
const uuid = require('uuid');
const buildFile = async (fileOne, fileTwo) =>
await new Promise((resolve, reject) => {
try {
// Output stream
let fileID = uuid.v4();
let outStream = fs
.createWriteStream(`files/outputFile-${fileID}.txt`, {
flags : 'a',
encoding : 'utf-8'
});
let fileOneRS = fs
.createReadStream(fileOne.path, {
flags : 'r',
encoding : 'utf-8'
})
.pipe(es.split())
.pipe(
es.mapSync((lineOne) => {
fileOneRS.pause();
let fileTwoRS = fs
.createReadStream(fileTwo.path, {
flags : 'r',
encoding : 'utf-8'
})
.pipe(es.split())
.pipe(
es.mapSync((lineTwo) => {
fileTwoRS.pause();
// Write combo to file
outStream.write(`${lineOne}:${lineTwo}\n`);
fileTwoRS.resume();
})
);
fileOneRS.resume();
})
); // This is where I have tried doing .on('end', () => resolve), but it also does not work :(
} catch (err) {
reject(err);
}
});
Note: This function is called from another service function as follows:
buildFile(fileOne, fileTwo)
.then((result) => {
resolve(result);
})
.catch((err) => {
console.log(err);
reject(err);
});
As a novice Javascript developer and even newer to NodeJS, I have been stuck trying to figure this out on my own for over 2 weeks now. If anyone is able to help, I would greatly appreciate some wisdom here!
Thanks 🙂
Edit: Updated the code to conform to the OP's expected output.
The promise' resolve()
function should be called once the write stream completes. The comment provided in the OP snippet indicate that the resolve function might have been called upon draining fileOneRS
(at the end of the pipe() chain).
Rather than creating a new read stream for each line in the first file, the code should only instantiate the read streams once.
The following example illustrate how this code flow could be refactored to read each line only once, and concatenate the lines from file A and B line-by-line:
import stream from "stream";
import util from "util";
import readline from "readline";
import fs from "fs";
import os from "os";
/** Returns a readable stream as an async iterable over text lines */
function lineIteratorFromFile( fileStream ){
return readline.createInterface({
input: fileStream,
crlfDelay: Infinity
})
}
// Use stream.pipeline to handle errors and to stream the combined output
// to a Writable stream. The promise will resolve once the data has finished
// writing to the output stream.
await util
.promisify(stream.pipeline)(
async function*(){
for await ( const lineA of lineIteratorFromFile(fs.createReadStream( "./in1.txt" ))){
for await (const lineB of lineIteratorFromFile(fs.createReadStream( "./in2.txt" ))){
yield `${lineA}: ${lineB}${os.EOL}`
}
}
},
fs.createWriteStream( outputFile )
);
A runnable example with NodeJS v13+ is available in the collapsed snippet below:
// in1.txt:
foo1
foo2
// in2.txt:
bar1
bar2
// out.txt (the file created by this script, with expected output):
foo1: bar1
foo1: bar2
foo2: bar1
foo2: bar2
// main.mjs:
import stream from "stream";
import util from "util";
import readline from "readline";
import fs from "fs";
import os from "os";
/** Returns a readable stream as an async iterable over text lines */
function lineIteratorFromFile( fileStream ){
return readline.createInterface({
input: fileStream,
crlfDelay: Infinity
})
}
(async ()=>{
await util
.promisify(stream.pipeline)(
async function*(){
for await ( const lineA of lineIteratorFromFile(fs.createReadStream( "./in1.txt" ))){
for await (const lineB of lineIteratorFromFile(fs.createReadStream( "./in2.txt" ))){
yield `${lineA}: ${lineB}${os.EOL}`
}
}
},
fs.createWriteStream( "./out.txt" )
);
})()
.catch(console.error);