Consider code like the following:
<p> </p><!-- comment -->
<span></span><br />
<div><span class="foo"></span></div>
which on a browser would effectively render as a stretch of whitespace.
I'm wondering if, given that or similar markup, there is a straightforward, programmatic way to detect that the end result of this code with whitespace stripped is an empty string.
The implementation here is JavaScript but I'm also interested in a more general (language agnostic) solution as well if one exists.
Note that just stripping out the tags and seeing if any text remains is not a real fix as there are plenty of tags which do end up rendering visible content (e.g. img, hr, etc).
This is the answer I came up with. It uses a whitelist of tags that are assumed to render on the page whether they have content or not — all other tags are assumed to only render if they have actual text content. Once you have that in place, actually the solution is fairly easy — it relies on the fact that the innerText
attribute strips out all tags automatically.
This solution also ignores elements which render based on CSS (e.g. blocks with a background color or where content is set for the :after
or :before
pseudo-elements) but fortunately this isn't relevant for my use case.
function htmlIsWhitespace(input) {
var visible = [
'img','iframe','object','hr',
'audio', 'video',
'form', 'button', 'input', 'select', 'textarea'
],
container = document.createElement('div');
container.innerHTML = input;
return !(container.innerText.trim().length > 0 || container.querySelector(visible.join(',')));
}
// And the tests (I believe these are comprehensive):
var testStringsYes = [
"",
"<a href='#'></a>",
"<a href='#'><span></span></a>",
"<a href='#'><span> <!-- comment --></span></a>",
"<a href='#'><span> </span></a>",
"<a href='#'><span> </span></a>",
"<a href='#'><span> </span></a> ",
"<p><a href='#'><span> </span></a> </p>",
" <p><a href='#'><span> </span></a> </p> <p></p>",
"<p>\n \n</p><ul><li></li></ul>"
],
testStringsNo = [
"<a href='#'><span> hi</span></a>",
"<img src='#foo'>",
"<hr />",
"<div><object /></div>",
"<div><iframe /></div>",
"<div><object /></div>",
"<div><!-- hi -->bye</div>",
"<div><!-- what --><audio></audio></div>",
"<div><!-- what --><video></video></div>",
'<form><!-- empty --></form>',
'<input type="text">',
'<select name="foo"><option>1</option></select>',
'<textarea>',
'<input type="text">',
'<form><input type="button"></form>',
'<button />',
'<button>Push</button>',
"yo"
];
for(var yy=0, yl=testStringsYes.length; yy < yl; yy += 1) {
console.debug("Testing", testStringsYes[yy]);
console.assert(htmlIsWhitespace(testStringsYes[yy]));
}
for(var nn=0, nl=testStringsNo.length; nn < nl; nn += 1) {
console.debug("Testing", testStringsNo[nn]);
console.assert(!htmlIsWhitespace(testStringsNo[nn]));
}