Search code examples
javascriptarraysdictionarykey

Need to create an array of all keypaths of a JS nested dictionary


Let me use this dictionary as an example; it is information from the World Lexicon of Grammaticalization (WLG) that I have repackaged into a nested dictionary. This particular dictionary encodes everything (?) that "do" can be grammaticalized as, that is attested in the WLG:

let tDict = {
    "Causative": {},
    "Emphasis": {},
    "D-Necessity": {
        "Future": {
            "E-Necessity": {}
        },
        "B-Necessity": {
            "Future": {
                "E-Necessity": {}
            },
            "Purpose": {
                "Cause": {
                    "Adversative": {
                        "Mirative": {}
                    }
                },
                "Infinitive": {}
            }
        },
        "E-Necessity": {},
        "D-Possibility": {
            "D-Necessity": {}
        },
        "Purpose": {
            "Cause": {
                "Adversative": {
                    "Mirative": {}
                }
            },
            "Infinitive": {}
        }
    },
    "Pro-Verb": {},
    "Progressive": {
        "Habitual": {},
        "Imperfective": {},
        "Present": {}
    }
};

What I want is some function that takes in tDict and outputs a list of all maximum-depth key paths, i.e. the expected output is:

[
    ["Causative"],
    ["Emphasis"],
    ["D-Necessity", "Future", "E-Necessity"],
    ["D-Necessity", "B-Necessity", "Future", "E-Necessity"],
    ["D-Necessity", "B-Necessity", "Purpose", "Cause", "Adversative", "Mirative"],
    ["D-Necessity", "B-Necessity", "Purpose", "Infinitive"],
    ["D-Necessity", "E-Necessity"],
    ["D-Necessity", "D-Possibility", "D-Necessity"],
    ["D-Necessity", "Purpose", "Cause", "Adversative", "Mirative"],
    ["D-Necessity", "Purpose", "Infinitive"],
    ["Pro-Verb"],
    ["Progressive", "Habitual"],
    ["Progressive", "Imperfective"],
    ["Progressive", "Present"]
]

I already have a function kicking around that computes the Cartesian product of two arrays. It can be called repeatedly with a one-element array as the first argument, such that the effect is just to concatenate the first argument onto every subarray of the second argument simultaneously:

var Cartesian = (a, b) => a.reduce((r, v) => r.concat(b.map(w => [].concat(v, w))), []);

tCart1 = Cartesian(["w"], ["x","y","z"]);
console.log("1st Cartesian Product:");
console.log(tCart1);

tCart2 = Cartesian(["v"], tCart1);
console.log("2nd Cartesian Product:");
console.log(tCart2);

tCart3 = Cartesian(["u"], tCart2);
console.log("3rd Cartesian Product:");
console.log(tCart3);

So my thought was to create a recursive function that exploits that property of Cartesian, by using it to concatenate each dictionary key onto the front of each subarray of a lower level call:

function GetAllKeypaths(t) {
    return Object.keys(t).map( e => Cartesian([e], GetAllKeypaths(t[e]) ) );
}

This... does not work. It creates all sorts of duplicated dictionary keys all over the place, and adds a lot of additional unnecessary nesting.

It did occur to me that maybe the nesting problem is because Cartesian returns a nested array which just gets smooshed into a single index of the final output array because Array.map does a 1-to-1 mapping. So I tried dumping all the Cartesian outputs separately into a communal table one-by-one instead:

function GetAllKeypaths(t) {
    let tOut = [];
    Object.keys(t).forEach( e => { 
        Cartesian([e], GetAllKeypaths(t[e])).forEach( e2 => { tOut.push(e2) });
    });
    return tOut;
}

But this doesn't work either - it returns an entirely empty array.

What should I do instead to return the desired output?

let tDict = {
    "Causative": {},
    "Emphasis": {},
    "D-Necessity": {
        "Future": {
            "E-Necessity": {}
        },
        "B-Necessity": {
            "Future": {
                "E-Necessity": {}
            },
            "Purpose": {
                "Cause": {
                    "Adversative": {
                        "Mirative": {}
                    }
                },
                "Infinitive": {}
            }
        },
        "E-Necessity": {},
        "D-Possibility": {
            "D-Necessity": {}
        },
        "Purpose": {
            "Cause": {
                "Adversative": {
                    "Mirative": {}
                }
            },
            "Infinitive": {}
        }
    },
    "Pro-Verb": {},
    "Progressive": {
        "Habitual": {},
        "Imperfective": {},
        "Present": {}
    }
};

var Cartesian = (a, b) => a.reduce((r, v) => r.concat(b.map(w => [].concat(v, w))), []);

function GetAllKeypaths(t) {
    return Object.keys(t).map( e => Cartesian([e], GetAllKeypaths(t[e]) ) );
}

console.log(GetAllKeypaths(tDict));

GetAllKeypaths = function (t) {
    let tOut = [];
    Object.keys(t).forEach( e => { 
        Cartesian([e], GetAllKeypaths(t[e])).forEach( e2 => { tOut.push(e2) });
    });
    return tOut;
}

console.log(GetAllKeypaths(tDict));
  


Solution

  • Looking at your approach with the .forEach(), you're trying to take the cartesian product of your key with an array that is potentially empty. If you think about a simple object such as {a: {}}, you're trying to perform cartesian(['a'], []) within your loop over the keys, which will give you an empty array. This issue propagates up through to the recursive calls, resulting in an empty array as your return value. One option would be to check that the length of the recursive call is empty, and if it is, return the current key (this is your base-case), otherwise, perform the recursive calls. In the below code, I've essentially done that, but instead, used Object.keys() on the object we're about to recurse on to determine whether the recursive call is going to return an empty array (we can do this, as the key paths of an empty object will be an empty array).

    let tDict = { "Causative": {}, "Emphasis": {}, "D-Necessity": { "Future": { "E-Necessity": {} }, "B-Necessity": { "Future": { "E-Necessity": {} }, "Purpose": { "Cause": { "Adversative": { "Mirative": {} } }, "Infinitive": {} } }, "E-Necessity": {}, "D-Possibility": { "D-Necessity": {} }, "Purpose": { "Cause": { "Adversative": { "Mirative": {} } }, "Infinitive": {} } }, "Pro-Verb": {}, "Progressive": { "Habitual": {}, "Imperfective": {}, "Present": {} } };
    
    const cartesian = (a, b) => a.reduce((r, v) => r.concat(b.map(w => [].concat(v, w))), []);
    
    function getAllKeyPaths(t) {
      return Object.entries(t).flatMap(([key, val]) => Object.keys(val).length > 0 // if the current object is not "empty"
        ? cartesian([key], getAllKeyPaths(val)) // get the cartesian product of its child object's keys with this key
        : [[key]] // else, return just the current key 
      );
    }
    
    console.log(getAllKeyPaths(tDict));

    Another approach that you could also try is a backtracking one. Below, I begin by grabbing the object's entries [[key, value], ...] pairs. For each key, I push the key into a step array, which I continue to build up with the children's object's keys by recursively calling the helper function. Eventually, when I hit a child object with no keys, I push this step array into the res array which is eventually returned. Each time I finish pushing a key into the step array and the recursive call completes, I'm also using .pop() on the step array to effectively "undo" the last key I pushed, allowing the next iteration to build another combination for the step array:

    const tDict = { "Causative": {}, "Emphasis": {}, "D-Necessity": { "Future": { "E-Necessity": {} }, "B-Necessity": { "Future": { "E-Necessity": {} }, "Purpose": { "Cause": { "Adversative": { "Mirative": {} } }, "Infinitive": {} } }, "E-Necessity": {}, "D-Possibility": { "D-Necessity": {} }, "Purpose": { "Cause": { "Adversative": { "Mirative": {} } }, "Infinitive": {} } }, "Pro-Verb": {}, "Progressive": { "Habitual": {}, "Imperfective": {}, "Present": {} } };
    
    function getAllKeyPaths(obj) {
      const res = [];
      function helper(obj, step = []) {
        const entries = Object.entries(obj);
        if(entries.length === 0 && step.length > 0) // checking step.length handles if `getAllKeyPaths()` is called with an empty object
          res.push([...step]);
    
        for(const [key, val] of entries) {
          step.push(key);
          helper(val, step);
          step.pop();
        }
      }
      helper(obj);
      return res;
    }
    
    console.log(getAllKeyPaths(tDict));