Search code examples
jqueryjsonvalidation

JS check JSON for duplicate keys prior to loading?


Currently I'm loading a text file which contains JSON formatted data. I want to validate this prior to parsing it.

At the moment I'm loading it using the following :

$.getJSON('dataFile', function(data) {
}

This works, but I can't validate it for duplicate keys once it's loaded as no duplicate keys now exist.

ie: if the raw data file had duplicate keys, once loaded no duplicates exist as the last duplicate overwrites the previous entry.

Is there another way to load it so I can validate the keys prior to parsing ?

Thanks


Solution

  • As you wrote:

    JSON texts should not have duplicate keys, as per RFC 7159

    and therefore duplicate keys will be replaced when parsing JSON with JSON.parse() (which is what jQuery does behind the scenes). That means you need to check for duplicate keys before the JSON-parsing step.

    I do not have a clean solution, but depending on your validation needs it might be enough:

    You could basically use something like this to minify your incoming JSON data (from the request) and store it in a variable, then call JSON.parse() on the response data, stringify and minify that as well and then compare the two for string equality. If they are not equal you could deduct that there were duplicate keys, although not which keys exactly...but as I said, if you simply want to throw an error if duplicate keys of any kind are detected, that would be enough.

    /*! JSON.minify()
        v0.1 (c) Kyle Simpson
        MIT License
    */
    /**
     * @name minify.json.js
     * @author Kei Funagayama <kei.topaz@gmail.com
     * @overview JSON.minify
     */
    
    /**
     * @namespace JSON
     */
    (function(global){
        'use strict';
    
        /**
         * @function
         * @memberof JSON
         * @param {Object} Transformed data. format) json-like
         * @return {String}
         *
         * @example
         * var json = { // hoge
         *     "foo": "bar",// this is cool
         *     "bar": [
         *         "baz", "bum", "zam"   // this is cool
         *     ]
         * } // hoge
         *
         */
        var minify = function (json) {
    
            var tokenizer = /"|(\/\*)|(\*\/)|(\/\/)|\n|\r|\[|]/g,
                in_string = false,
                in_multiline_comment = false,
                in_singleline_comment = false,
                tmp, tmp2, new_str = [], ns = 0, from = 0, lc, rc
            ;
    
            tokenizer.lastIndex = 0;
    
            while ( tmp = tokenizer.exec(json) ) {
                lc = RegExp.leftContext;
                rc = RegExp.rightContext;
                if (!in_multiline_comment && !in_singleline_comment) {
                    tmp2 = lc.substring(from);
                    if (!in_string) {
                        tmp2 = tmp2.replace(/(\n|\r|\s)*/g,"");
                    }
                    new_str[ns++] = tmp2;
                }
                from = tokenizer.lastIndex;
    
                if (tmp[0] === "\"" && !in_multiline_comment && !in_singleline_comment) {
                    tmp2 = lc.match(/(\\)*$/);
                    if (!in_string || !tmp2 || (tmp2[0].length % 2) === 0) {    // start of string with ", or unescaped " character found to end string
                        in_string = !in_string;
                    }
                    from--; // include " character in next catch
                    rc = json.substring(from);
                }
                else if (tmp[0] === "/*" && !in_string && !in_multiline_comment && !in_singleline_comment) {
                    in_multiline_comment = true;
                }
                else if (tmp[0] === "*/" && !in_string && in_multiline_comment && !in_singleline_comment) {
                    in_multiline_comment = false;
                }
                else if (tmp[0] === "//" && !in_string && !in_multiline_comment && !in_singleline_comment) {
                    in_singleline_comment = true;
                }
                else if ((tmp[0] === "\n" || tmp[0] === "\r") && !in_string && !in_multiline_comment && in_singleline_comment) {
                    in_singleline_comment = false;
                }
                else if (!in_multiline_comment && !in_singleline_comment && !(/\n|\r|\s/.test(tmp[0]))) {
                    new_str[ns++] = tmp[0];
                }
            }
            new_str[ns++] = rc;
            return new_str.join("");
        };
    
        if (typeof module !== 'undefined' && module.exports) {
            // node
            module.exports = minify;
            JSON.minify = minify;
        } else {
            // others, export global
            if (typeof global.JSON === "undefined" || !global.JSON) {
                global.JSON = {};
            }
            global.JSON.minify = minify;
        }
    })(this);
    
    
    $.ajax({
      url: 'https://jsonplaceholder.typicode.com/users',
      // make sure jQuery doesn't try to be smart and parse the JSON already
      dataType: 'text'
    }).done(function(data) {
      
      var raw = JSON.minify(data);
      var parsed = JSON.minify(JSON.stringify(JSON.parse(data)));
      
      if ( raw === parsed ) {
        console.log('No duplicate keys found');
      } else {
        console.log('Possible duplicate keys!');
      }
    });
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>

    If you then really need to go further and actually find which key is duplicate, you could run some kind of string-diff method to find the difference between raw and parsed and continue on from there.