Search code examples
javascriptcsvvalidation

Validate CSV file against XSD in Javascript


Team, I need a javascript function to validate csv file is conform to a schema definition.

BU_NAME,BU_CONTACT,CONTEXT
US_BU,John,FIN
US_BU,Emily,SCM
,Eric,SCM
UK_BU,Joe,SCM

The above input file should throw error in line 3 as the mandatory BU_NAME is blank. But the below JS code didn't fall in catch block. It always returns 200 but I need 500 when there is csv format issue encountered.

resolve(fileSet) {
        const validator = require('xsd-schema-validator');
        let status = '';
        if (fileSet.length > 0) {
          //Grab the first (and only) file
          let csvFile = fileSet[0];
          //Check it's the correct type
          try {
            if (csvFile.type === 'text/csv') {
              alert('CSV FILE');
              //Create a File reader and its onload callback
              let fileReader = new FileReader();
              fileReader.onload = function (fileReadEvent) {
                let readCSVData = fileReadEvent.target.result;
                alert('result 11===>  ' + readCSVData);
                // resolve(readCSVData);
              };
              fileReader.readAsText(csvFile);
status = 200;
            }
            
          } catch (error) {
alert('status ===>  ' + status+ '  error ==> '+ error);
            return status = 500;
          }
        }
        
        return status;
      }

The following code works and validates the file but the Status variable value is not set neither 200 nor 500 accordingly. It always defaults to initial value 100. Status value is not reachable to the root function return.

readAndProcess(file) {
        let status = 100;
        let flag = 0;
        let reader = new FileReader();
        reader.onload = function (e) {
          let rows = e.target.result.split("\n");
         
          for (let i = 1; i < rows.length; i++) {
            let cells = rows[i].split(",");            
            if (cells.length < 2 || cells.length > 13) {
              flag = 1;
              break;
            }
            
            for (let j = 0; j < cells.length; j++) {
        
                    if (cells[0] === "" || cells[11] === "" || cells[j] === "\r") {
            
                flag = 1;
                break;
              }
            }
            if (flag === 1)
              break;
          }
          if (flag){
            window.alert("ERROR");
           status = 500;
           
          }
          else{
            window.alert("VALIDATED");
           status = 200;
        
          }
        };
        reader.readAsText(file);
        return status;
      }

Solution

  • Although it seems you moved the goal a bit with your second example (where is the XSD that the CSV is validated against?), a possible solution that works according to your validation rules could look like the following suggestion:

    function splitCSV(csvString) {
      // used to split the whole CSV contents into an array of rows,
      // each resembled as an array of strings
      return csvString
        .split(/\\n/g)                // split into rows
        .slice(1)                     // get rid of header row
        .map(row => row.split(/,/g)); // split each row into it's cells
    }
    
    function rowIsValid(row) {
      // resembles line 9 from your second example
      return row.length >= 2 && row.length <= 13; 
    }
    
    function cellIsValid(cell) {
      // resembles line 16 from your second example
      return cell.trim() !== '' && cell !== '\r';
    }
    
    function csvIsValid(csvString) {
      const rows = splitCSV(csvString); // split into rows of cells
      return rows.reduce(
        (isValidSoFar, row) => {
          if (isValidSoFar) {                // if all rows until now are valid...
            if (rowIsValid(row)) {           // ... and this one is valid ...
              return row.every(cellIsValid); // ... see if all cells in the row are valid
            }
          }
          return false;
        },
        true
      );
    }
    
    function readAndProcess(file) {
      // loading the file and validating it is an asynchronous process,
      // therefor we use a promise here
      return new Promise((resolve, reject) => {
        let reader = new FileReader();
        reader.onerror = reject;             // handle errors if load fails
        reader.onload = function (e) {
          if (csvIsValid(e.target.result)) {
            resolve(200);                    // file loaded & valid
          } else {
            resolve(500);                    // file loaded, but invalid
          }
        };
        reader.readAsText(file);
      });
    }
    

    And you'd use it like this:

    readAndProcess('path/to/csv/file.csv')
      .then((statusCode) => {
        console.log('CSV loaded, result is:', statusCode);
      })
      .catch((loadError) => {
        console.log('Failed to load the CSV file because:', loadError);
      });
    

    Note that csvIsValid can be written in a shorter form (which is terser, but - IMHO - a bit harder to understand overall):

    function csvIsValid(csvString) {
      const rows = splitCSV(csvString);
      return rows.reduce(
        (isValidSoFar, row) => 
          isValidSoFar && rowIsValid(row) && row.every(cellIsValid),
        true
      );
    }
    

    Which one you prefer is up to you.