Search code examples
javascriptstringtext-parsing

Getting 'begin' and 'end' of blocks in a string using JavaScript


I'm trying to make a function to get the beginning and end of part of a string similar to a code block, returning the corresponding character index. Example:

abc [ //A0 - start
   def [ //B1 - start
      ghi [ //C2 - start
         jkl
      ] //D2 - end
   ] //E1 - end
] //F0 - end

then a getBlock function would receive the string and index of one of the characters ("[" or "]") and return the index of its corresponding partner:

var str = "abc [def [ghi [jkl]]]";

getBlock(str, 4); // returns 20 ("F0" in the example)
getBlock(str, 9); // returns 19 ("E1")
getBlock(str, 14); // returns 18 ("D2")
getBlock(str, 20); // returns 4 ("A0")
getBlock(str, 19); // returns 9 ("B1")
getBlock(str, 18); // returns 14 ("C2")

This is my functional attempt so far:

function getBlock(str) {
    console.log({
        "start": Array.from(str.matchAll(/\[/g)).map(m => m.index),
        "end": Array.from(str.matchAll(/\]/g)).map(m => m.index)
    })
}
getBlock("abc [def [ghi [jkl]]]")

It returns two arrays containing the index of all start and end characters, but I still haven't been able to figure out a logic to get its partners individually, any idea if that's possible?


Solution

  • Ok I ended up creating a somewhat ugly solution, but it seems to work as expected:

    function getBlock(str, i) {
        var blocks = Array.from(str.matchAll(/\[|]/g)).map(m => m.index);
        return blocks[(blocks.length-blocks.indexOf(i))-1];
    }
    
    // must return 20 19 18 4 9 14
    var str = "abc [def [ghi [jkl]]]";
    console.log(
      getBlock(str, 4),
      getBlock(str, 9),
      getBlock(str, 14),
      getBlock(str, 20),
      getBlock(str, 19),
      getBlock(str, 18)
    );
    

    the logic is very simple, instead of returning two arrays it returns only one:

    [4, 9, 14, 18, 19, 20]
    

    from there you just need to get the values ​​from the "middle":

        14 18
      9      19
    4          20
    

    and that's it.

    Edit: It works for the case where each block has only one block inside it, but if there are two blocks side by side it doesn't. Example where it fails:

    var str = "abc [def] [ghi]";
    getBlock(str, 10) // returns 8 instead of 14
    

    Edit 2 (Solution):

    Okay this should be the proper JavaScript solution, inspired by @HereticMonkey's comment regarding How do I find the position of matching parentheses or braces in a given piece of text?, the only difference is that this version is in JavaScript and has been slightly changed to allow start and end indexes.

    function getBlock(str, start) {
      var end = start;
      var counter = 1;
      while (counter > 0) {
        var direction = (symbol) => ((str[start] == symbol) ? 1 : -1);
        var char = str[end += direction("[")];
        if (char == "[" || char == "]") {
          counter += direction(char)
        };
      }
      return {
        "start": start,
        "end": end
      };
    }
    
    var str = "abc [def [ijk]] [ghi]";
    console.log(
      getBlock(str, 4), // {start: 4, end: 14}
      getBlock(str, 9), // {start: 9, end: 13}
      getBlock(str, 16) // {start: 16, end: 20}
    );