There is a web page that I would like to scrape some information from.
I start off with gathering a bunch of HTML Elements.
var theSearch = document.getElementsByClassName('theID');
I then take that HTML Collection and turn it into an array.
var arr = Array.prototype.slice.call( theSearch );
Now comes the tricky part.
I'd like to scroll down the page, and grab new items that have appeared on the page.
window.scrollTo(0, document.body.scrollHeight);
How does one access the newly inserted DOM nodes? Something like ...
var theSearch2 = document.getElementsByClassName('theID');
... and casting it into a new array ...
var arr2 = Array.prototype.slice.call( theSearch );
... and pushing the items from arr2
to arr
like ...
arr.push(...arr2);
And how would one achieve an ongoing process which keeps scraping until no new items are appended into the page's DOM.
The OP might have a look into MutationObserver
. Whenever new items are rendered into the DOM (triggered by the scrolling) the observer's callback
receives a list of MutationRecord
instances which the OP can act upon.
function handleChildlistChanges(mutationList/*, observer*/) {
mutationList.forEach(mutation => {
const { type, addedNodes } = mutation;
if (type === 'childList') {
// one or more children have been added to
// and/or removed from the tree.
scrapedContentNodes.push(...addedNodes);
console.log({ scrapedContentNodes });
}
});
}
const scrapedContentNodes = [];
const options = {
//attributes: true,
childList: true,
//subtree: true,
};
const target = document.querySelector('#items');
const observer = new MutationObserver(handleChildlistChanges);
observer.observe(target, options);
// test case ... creating content.
['the quick', 'brown fox', 'jumped over', 'the lazy dog.']
.reduce((parentNode, content, idx) => {
const contentNode = document.createElement('p');
contentNode.appendChild(
document.createTextNode(content)
);
setTimeout(
() => parentNode.appendChild(contentNode),
600 * idx,
);
return parentNode;
}, target);
.as-console-wrapper { left: auto!important; width: 70%; min-height: 100%; }
<div id="items">
</div>