Trying to convert html_string
(could have more nested ul li elements) into ideal_data_output
let html_string = `<ul><li><p>one</p></li><li><p>two</p><ul><li><p>a</p></li><li><p>b</p></li><li><p>c</p></li></ul></li></ul>`;
let ideal_data_output = [
{ name: 'one' },
{
list: [
{
name: 'a'
},
{
name: 'b'
},
{
name: 'c'
}
],
name: 'two'
}
];
My attempt
// create an array from string
let html_tag_array = html_string.split('<');
// function to turn html_tag_array into ideal_data_output
const html_to_data = (arr) => {
function recursive(data, str_array) {
if (str_array == '') return data;
let str = str_array.shift();
let temp_obj = {
name: null
};
if (str.slice(0, 3) == 'ul>') {
temp_obj['list'] = [];
data.push(temp_obj);
return recursive(data[data.length - 1].list, str_array);
}
if (str.slice(0, 2) == 'p>') {
temp_obj.name = str.slice(2);
data.push(temp_obj);
}
return recursive(data, str_array);
}
return recursive([], arr);
};
This is unsuccessful resulting in output below
0: { name: "a" }
1: { name: "b" }
2: { name: "c" }
What is the best way to approach this type of problem? I am creating an issue with recursive(data[data.length - 1]
, how best to properly format this function call?
This should be done with a DOM parser.
Assuming the HTML structure always has the text in a separate p
element, and its only possible next sibling node is an ul
node, you can use this recursive function:
const dfs = ul =>
Array.from(ul.children, ({children: [{textContent: name}, lu]}) =>
lu ? { list: dfs(lu), name } : { name }
);
const html_string = `<ul><li><p>one</p></li><li><p>two</p><ul><li><p>a</p></li><li><p>b</p></li><li><p>c</p></li></ul></li></ul>`;
const {body} = new DOMParser().parseFromString(html_string, "text/html");
const result = dfs(body.children[0]); // Assumed to be the UL
console.log(result);