Search code examples
javascriptnode.jsreadline

Finding byte range correctly from nodejs readline


I am reading a large csv file, using node js' readline. I want to find how many bytes I have read correctly. Or the byte positions for the blocks. The readline is accounting the newline character for 1st line but not for others I guess, so it's kind of making the whole byte ranges wrong. Could anyone please help here? Also, tell if this is the correct way? Also, If let's say I can know what's the byte position after I have read, let's say, 100 lines.

const fs = require("fs");
const readline = require("readline");

const csvfile = "csv-file.csv";

// stream
let stream1 = fs.createReadStream(csvfile);

// Stream 2 to read only the specific bytes
//let stream2 = fs.createReadStream(csvfile, { start: 97, end: 174 });
let rl = readline.createInterface({
    input: stream1,
    terminal: false,
  });

var lines = 0;
var byterange = []
var startingByte = 0;
var totalBytesInThisBlock = 0;

// Print those specific bytes and see if it's working as expected.
// stream2.on('data', (data) => {
//     console.log(data.toString('utf8'));
// });
// stream2.on('end', ()=>{});

rl.on('line', (input) => {
//console.log(input);
lines++;
totalBytesInThisBlock += Buffer.byteLength(input);
console.log("total bytes till line "+lines +" and adding "+Buffer.byteLength(input)+" now : " + totalBytesInThisBlock);

//Making blocks of 4 lines i.e. starting byte of 1st line starting byte to 4th line ending byte
// 5th line starting byte to ending byte of 8th line and so on.
if(lines%4==0) {
byterange.push("bytes="+startingByte+"-"+(totalBytesInThisBlock+startingByte));
startingByte+=totalBytesInThisBlock+1;
totalBytesInThisBlock=0;
}
});

rl.on('close', () => {
console.log("lines read : " + lines);
console.log(byterange);

//Total bytes in file
var filedata = fs.readFileSync(csvfile);
console.log(Buffer.byteLength(filedata));
});

Solution

  • Range calculation mistake I am making is:

    Hello world 0 10 This is example line. 11. 50

    I was not adding one while the line changed. so 0-10 is one and next cannot be from 10 to 50, it should be 11-50

    var lines = 0;
    var byterange = []
    var startingByte = 0;
    var totalBytesInThisBlock = 0;
    var lastByteLength = 0;
    var byteCursor = -1;
    var lastCursor = -1;
    
    rl.on('line', (input) => {
    //console.log(input);
    lines++;
    byteCursor++;
    lastByteLength = Buffer.byteLength(input);
    byteCursor += lastByteLength;
    //console.log("total bytes till line "+lines +" and adding "+Buffer.byteLength(input)+" now : " + totalBytesInThisBlock);
    
    //Making blocks of 4 lines i.e. starting byte of 1st line starting byte to 4th line ending byte
    // 5th line starting byte to ending byte of 8th line and so on.
    if(byteCursor-startingByte>400) {
        lastCursor = byteCursor;
        console.log("bytes="+startingByte+"-"+byteCursor);
        startingByte = byteCursor+1;
    //byterange.push("bytes="+startingByte+"-"+(lastByteLength+startingByte));
    //startingByte+=lastByteLength+1;
    //totalBytesInThisBlock=0;
    }
    });
    

    here the byteCursor did the job.