Search code examples
javascripthtmlweb-scrapingbeautifulsoupjssoup

Does JSSoup support select() similar to Beautiful Soup or JSoup?


Does JSSoup (which itself states "JavaScript + BeautifulSoup = JSSoup") support a select() operation similar to Beautiful Soup or JSoup to select elements based on a CSS selector?

I did not find it, does it probably exist with a different name?


Solution

  • You will not be able to utilize selector querying similar to querySelector and querySelectorAll.

    Here is the findAll definition in JSsoup:

    {
      key: 'findAll',
      value: function findAll() {
        var name = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : undefined;
        var attrs = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : undefined;
        var string = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : undefined;
        // ...
        var strainer = new SoupStrainer(name, attrs, string);
        // ...
      }
    }
    

    And here is the SoupStrainer constructor:

    function SoupStrainer(name, attrs, string) {
      _classCallCheck(this, SoupStrainer);
    
      if (typeof attrs == 'string') {
        attrs = { class: [attrs] };
      } else if (Array.isArray(attrs)) {
        attrs = { class: attrs };
      } else if (attrs && attrs.class && typeof attrs.class == 'string') {
        attrs.class = [attrs.class];
      }
      if (attrs && attrs.class) {
        for (var i = 0; i < attrs.class.length; ++i) {
          attrs.class[i] = attrs.class[i].trim();
        }
      }
      this.name = name;
      this.attrs = attrs;
      this.string = string;
      }
    

    You are required to pass a tag name as the first argument, followed by attributes. A string is treated as a class name.

    Example usage

    const JSSoup = require('jssoup').default;
    
    const html = `
    <html>
      <head>
        <title>Hello World</title>
      </head>
      <body>
        <h1>Hello World</h1>
        <p class="foo">First</p>
        <p class="foo bar">Second</p>
        <div class="foo">Third</div>
      </body>
    </html>
    `;
    
    const printTags = (tags) => console.log(tags.map(t => t.toString()).join(' '));
    
    const soup = new JSSoup(html);
    
    printTags(soup.findAll('p', 'foo'));
    // <p class="foo">First</p> <p class="foo">Second</p>
    
    printTags(soup.findAll('p', { class: 'foo' }));
    // <p class="foo">First</p> <p class="foo">Second</p>
    
    printTags(soup.findAll('p', { class: 'foo' }, 'Second'));
    // <p class="foo">Second</p>
    
    printTags(soup.findAll('p', { class: ['foo', 'bar'] }));
    // <p class="foo">Second</p>
    
    printTags(soup.findAll(null, 'bar'));
    // <p class="foo bar">Second</p> <div class="foo">Third</div>