Search code examples
amazon-web-servicesamazon-s3npmxlsx

convert raw aws s3 xlsx data to json


I have an xlsx file in aws S3, i want to retrive it and convert to json.

I have tried many xlsx to json converters but they all seem to need the file path and wont work with raw file data. I am able to convert to wb and get ws but then the ws data contains lots of unicode that is unable to be passed to json.

var AWS = require('aws-sdk');
var XLSX = require('xlsx');
util.s3_getObject(bucketName, fileName, true, false).then(data => {
    var wb = XLSX.read(data.data, {type:"buffer"});
    var ws;
    var target_sheet = "Sheet1";
    try {
        ws = wb.Sheets[target_sheet];
        console.log(JSON.parse(JSON.stringify(ws)));
        console.log(wb.SheetNames);
        if(!ws) {
            console.error("Sheet " + target_sheet + " cannot be found");
            process.exit(3);
        }
    } catch(e) {
        console.error("error parsing "+fileName+" "+target_sheet+": " + e);
    }
    console.log("converting sheet to json");

    data = XLSX.utils.sheet_to_json(JSON.parse(JSON.stringify(ws)));
    console.log("xlsx to json complete");
});

Is there away to remove the unicode or is there a better xlsx converter that will help with using the raw data from s3.getObject?

error parsing input/xls/test.xlsx Sheet1: SyntaxError: Unexpected token u in JSON at position 0

Solution

  • This worked for me

    var wb = XLSX.read(data.data, {type:"buffer"});
    var ws;
    var target_sheet = "Sheet1";
    try {
        console.log("converting xlsx");
        ws = wb.Sheets[target_sheet];
        if(!ws) {
            console.error("Sheet " + target_sheet + " cannot be found");
            process.exit(3);
        }
    } catch(e) {
        console.error("error parsing "+fileName+" "+target_sheet+": " + e);
        process.exit(4);
    }
    console.log("converting sheet to json");
    data = JSON.stringify(XLSX.utils.sheet_to_json(ws));