I have an xlsx file in aws S3, i want to retrive it and convert to json.
I have tried many xlsx to json converters but they all seem to need the file path and wont work with raw file data. I am able to convert to wb and get ws but then the ws data contains lots of unicode that is unable to be passed to json.
var AWS = require('aws-sdk');
var XLSX = require('xlsx');
util.s3_getObject(bucketName, fileName, true, false).then(data => {
var wb = XLSX.read(data.data, {type:"buffer"});
var ws;
var target_sheet = "Sheet1";
try {
ws = wb.Sheets[target_sheet];
console.log(JSON.parse(JSON.stringify(ws)));
console.log(wb.SheetNames);
if(!ws) {
console.error("Sheet " + target_sheet + " cannot be found");
process.exit(3);
}
} catch(e) {
console.error("error parsing "+fileName+" "+target_sheet+": " + e);
}
console.log("converting sheet to json");
data = XLSX.utils.sheet_to_json(JSON.parse(JSON.stringify(ws)));
console.log("xlsx to json complete");
});
Is there away to remove the unicode or is there a better xlsx converter that will help with using the raw data from s3.getObject?
error parsing input/xls/test.xlsx Sheet1: SyntaxError: Unexpected token u in JSON at position 0
This worked for me
var wb = XLSX.read(data.data, {type:"buffer"});
var ws;
var target_sheet = "Sheet1";
try {
console.log("converting xlsx");
ws = wb.Sheets[target_sheet];
if(!ws) {
console.error("Sheet " + target_sheet + " cannot be found");
process.exit(3);
}
} catch(e) {
console.error("error parsing "+fileName+" "+target_sheet+": " + e);
process.exit(4);
}
console.log("converting sheet to json");
data = JSON.stringify(XLSX.utils.sheet_to_json(ws));