Search code examples
javascriptalgorithmtexttext-processing

How to split an array of string nodes at given index?


I probably need to use other data structure, but I'm stuck with this solution for now. Will appreciate any advice on this.

For now I have this data structure:

const data = [
    {
        id: 'node-1',
        type: 'text',
        data: 'Hello,'
    },
    {
        id: 'node-2',
        type: 'text',
        data: [
            {
                id: 'node-3',
                type: 'text',
                data: ' world.'
            }
        ]
    },
    {
        id: 'node-4',
        type: 'text',
        data: [
            {
                id: 'node-5',
                type: 'text',
                data: [
                    {   
                        id: 'node-6',
                        type: 'text',
                        data: 'Foo bar'
                    }
                ]
            }
        ]
    }
]

I need a function which will give next result:

function split(arr, start_node, start_index, end_node, end_index) { ... }

const { before, range, after } = split(data, 'node-3', 3, 'node-6', 3)

// before
[
    {
        id: 'node-1',
        type: 'text',
        data: 'Hello,'
    },
    {
        id: 'node-2',
        type: 'text',
        data: [
            {
                id: 'node-3',
                type: 'text',
                data: ' wo'
            }
        ]
    },
]

// range
[
    {
        id: 'node-2',
        type: 'text',
        data: [
            {
                id: 'node-3',
                type: 'text',
                data: 'rld.'
            }
        ]
    },
    {
        id: 'node-4',
        type: 'text',
        data: [
            {
                id: 'node-5',
                type: 'text',
                data: [
                    {   
                        id: 'node-6',
                        type: 'text',
                        data: 'Foo'
                    }
                ]
            }
        ]
    }
]

// after
[
    {
        id: 'node-4',
        type: 'text',
        data: [
            {
                id: 'node-5',
                type: 'text',
                data: [
                    {   
                        id: 'node-6',
                        type: 'text',
                        data: ' bar'
                    }
                ]
            }
        ]
    }
]

The problem is to keep nesting structure and to do that effectively. The only solution I came up with was to make this in three different loops, but that obviously ineffective


Solution

  • The code below will find all text nodes in the initial data structure, and will convert each node to a path array that will include all ancestors.

    Now, you can more easily split up the list of paths into a list of paths for before, after and within the range.

    Finally, take each of those lists of paths, and turn them back into regular objects.

    const data = [{"id":"node-1","type":"text","data":"Hello,"},{"id":"node-2","type":"text","data":[{"id":"node-3","type":"text","data":" world."}]},{"id":"node-4","type":"text","data":[{"id":"node-5","type":"text","data":[{"id":"node-6","type":"text","data":"Foo bar"}]}]}]
    
    function split(data, start_node, start_index, end_node, end_index) {
    
      const paths = [];
    
      const toPaths = (arr, path=[]) => arr.forEach(({id, type, data}) =>
        Array.isArray(data) ? toPaths(data, [{id, type}, ...path]) : paths.push([{id, type, data}, ...path]))
    
      toPaths(data)
    
      const range = (paths, idMatch, index, type, state={inRegion: type==='before'}) =>
        paths.flatMap(([a, ...b]) => {
        if(a.id === idMatch ) {
          state.inRegion = !state.inRegion;
          return [[{
            id: a.id,
            type: a.type,
            data: state.inRegion ? a.data.substring(index) : a.data.substring(0, index)}, ...b]];
        }
        else return state.inRegion ? [[a, ...b]] : [];
      })
    
      const pathsBefore = range(paths, start_node, start_index, 'before')
      const pathsAfter =  range(paths, end_node, end_index, 'after')
      const pathsDuring = range(range(paths, start_node, start_index, 'after'), end_node, end_index, 'before')
    
      const build = (paths, arr=[]) => {
        paths.forEach(path => {
          if(path.length) {
            let last = structuredClone(path.pop());
            last = arr.find(({id})=>id===last.id) ?? (arr.push(last), last);
            if(path.length) {
              last.data??=[];
              build([path], last.data);
            }
          }
        })
        return arr
      }
    
      return {
        before: build(pathsBefore),
        range: build(pathsDuring),
        after: build(pathsAfter),
      }
    }
    
    const { before, range, after } = split(data, 'node-3', 3, 'node-6', 3);
    
    console.log('------- before:');
    console.log(JSON.stringify(before, null, 2))
    console.log('------- range:');
    console.log(JSON.stringify(range, null, 2))
    console.log('------- after:');
    console.log(JSON.stringify(after, null, 2))