Search code examples
javascriptjquerydataformat

what is the format of this data? is it a custom format?


I get this data as an ajax response:

{
    "idArray" = (
        "99516",
        "99518",
        "97344",
        "97345",
        "98425"
    );
    "frame" = {
        "size" = {
            "width" = "8";
            "height" = "8";
        };
        "origin" = {
            "x" = "244";
            "y" = "345";
        };
    };
},

This is just a portion of the Data, but it continues in the same format. I don't have access to the source of the files that generate this data.

Is this a known format or something custom?


Solution

  • Since people tend to throw regular expressions at everything, even things that can not be parsed with regular expressions (ie. non-regular languages): I've written a proof-of-concept parser for this data format:

    $input = '{
        "idArray" = (
            "99516",
            "99518",
            "97344",
            "97345",
            "98425"
        );
        "frame" = {
            "size" = {
                "width" = "8";
                "height" = "8";
            };
            "origin" = {
                "x" = "244";
                "y" = "345";
            };
        };
    }';
    
    echo json_encode(parse($input));
    
    function parse($input) {
        $tokens = tokenize($input);
        $index = 0;
        $result = parse_value($tokens, $index);
        if ($result[1] !== count($tokens)) {
            throw new Exception("parsing stopped at token " . $result[1] . " but there is more input");
        }
        return $result[0][1];
    }
    
    function tokenize($input) {
        $tokens = array();
        $length = strlen($input);
        $pos = 0;
        while($pos < $length) {
            list($token, $pos) = find_token($input, $pos);
            $tokens[] = $token;
        }
        return $tokens;
    }
    
    function find_token($input, $pos) {
        $static_tokens = array("=", "{", "}", "(", ")", ";", ",");
        while(preg_match("/\s/mis", substr($input, $pos, 1))) { // eat whitespace
            $pos += 1;
        }
        foreach ($static_tokens as $static_token) {
            if (substr($input, $pos, strlen($static_token)) === $static_token) {
                return array($static_token, $pos + strlen($static_token));
            }
        }
        if (substr($input, $pos, 1) === '"') {
            $length = strlen($input);
            $token_length = 1;
            while ($pos + $token_length < $length) {
                if (substr($input, $pos + $token_length, 1) === '"') {
                    return array(array("value", substr($input, $pos + 1, $token_length - 1)), $pos + $token_length + 1);
                }
                $token_length += 1;
            }
        }
        throw new Exception("invalid input at " . $pos . ": `" . substr($input, $pos - 10, 20) . "`");
    }
    
    // value is either an object {}, an array (), or a literal ""
    function parse_value($tokens, $index) {
        if ($tokens[$index] === "{") {  // object: a list of key-value pairs, glued together by ";"
            $return_value = array();
            $index += 1;
            while ($tokens[$index] !== "}") {
                list($key, $value, $index) = parse_key_value($tokens, $index);
                $return_value[$key] = $value[1];
                if ($tokens[$index] !== ";") {
                    throw new Exception("Unexpected: " . print_r($tokens[$index], true));
                }
                $index += 1;
            }
            return array(array("object", $return_value), $index + 1);
        }
        if ($tokens[$index] === "(") {  // array: a list of values, glued together by ",", the last "," is optional
            $return_value = array();
            $index += 1;
            while ($tokens[$index] !== ")") {
                list($value, $index) = parse_value($tokens, $index);
                $return_value[] = $value[1];
                if ($tokens[$index] === ",") {  // last, is optional
                    $index += 1;
                } else {
                    if ($tokens[$index] !== ")") {
                        throw new Exception("Unexpected: " . print_r($tokens[$index], true));
                    }
                    return array(array("array", $return_value), $index + 1);
                }
            }
            return array(array("array", $return_value), $index + 1);
        }
        if ($tokens[$index][0] === "value") {
            return array(array("string", $tokens[$index][1]), $index + 1);
        }
        throw new Exception("Unexpected: " . print_r($tokens[$index], true));
    }
    
    // find a key (string) followed by '=' followed by a value (any value)
    function parse_key_value($tokens, $index) {
        list($key, $index) = parse_value($tokens, $index);
        if ($key[0] !== "string") { // key must be a string
            throw new Exception("Unexpected: " . print_r($key, true));
        }
        if ($tokens[$index] !== "=" ) {
            throw new Exception("'=' expected");
        }
        $index += 1;
        list($value, $index) = parse_value($tokens, $index);
        return array($key[1], $value, $index);
    }
    

    The output is:

    {"idArray":["99516","99518","97344","97345","98425"],"frame":{"size":{"width":"8","height":"8"},"origin":{"x":"244","y":"345"}}}
    

    Notes

    • the original input has a trailing ,. I've removed that character. It will throw an error (more input) if you put it back.

    • This parser is naive in the sense that it tokenizes all input before it starts parsing. This is not good for large input.

    • I've not added escape detection for strings in the tokenizer. Like: "foo\"bar".

    This was a fun exercise. If you have any questions let me know.

    Edit: I see this is a JavaScript question. Porting the PHP to JavaScript shouldn't be too hard. The list($foo, $bar) = func() is equivalent to: var res = func(); var foo = res[0]; var bar = res[1];