I have this basic code to handle all text nodes:
function walk (node) {
if (node.nodeType == '3') {
handleText (node)
}
node = node.firstChild
while (node) {
walk (node)
node = node.nextSibling
}
}
Unfortunately, this handles all text nodes, including elements such as <script>
and <style>
, which I do not want. I updated to my code to the following to ignore these specific elements:
function walk (node) {
if (node.nodeType == '3' && node.tagName != 'SCRIPT' && node.tagName != 'STYLE') {
handleText (node)
}
node = node.firstChild
while (node) {
walk (node)
node = node.nextSibling
}
}
However, this is not working. What am I doing wrong?
A node
with a nodeType
of 3
will be a text node. Since it sounds like you want to exclude the traversal of text nodes which are children of script
or style
tags, you should put the test elsewhere - only walk(node)
if tagName
is not SCRIPT
OR STYLE
, so that the parent <script>
/ <style>
tag is not iterated over in the first place:
function walk (node) {
if (node.nodeType === 3) {
handleText (node);
}
node = node.firstChild;
while (node) {
const { tagName } = node;
if (tagName !== 'SCRIPT' && tagName !== 'STYLE') {
walk (node);
}
node = node.nextSibling;
}
}
(also note that nodeType
evaluates to an integer, so you can use strict equality comparison ===
instead)
To avoid a while
loop and reassignment, you might consider using forEach
instead:
function walk (node) {
if (node.nodeType === 3) {
handleText (node);
return;
}
Array.prototype.forEach.call(
node.childNodes,
(childNode) => {
const { tagName } = childNode;
if (tagName !== 'SCRIPT' && tagName !== 'STYLE') {
walk (child);
}
}
);
}