Search code examples
rustlifetimeownership

rust ownership and liftetime issue


use scraper::html::Select;

fn get_doc(url: String) -> scraper::Html {
    let response = reqwest::blocking::get(url);
    let html_content = response.unwrap().text().unwrap();
    scraper::Html::parse_document(&html_content)
}

fn get_item(doc: &scraper::Html, css_selector: String) -> Select<'static, 'static> {
    let html_product_selector = scraper::Selector::parse(&css_selector).unwrap();
    doc.select(&html_product_selector)
}

fn main() {
    let doc = get_doc("https://www.zenrows.com/blog/rust-web-scraping#get-target-webpage".to_string());
    let item = get_item(&doc, String::from("li.product"));
}

I'm fairly new to Rust (and therefore not totally comfy with lifetimes and such) and I'm following an article to build a scraper and splitting up every section into its own function for use in a larger project. My issue is that scraper::Html.select being used in get_item seems to allocate memory which is owned by get_item.

Here's the error:

 cargo run                                              
   Compiling ws v0.1.0 (/Users/calebcosta/coding/ws)
error: lifetime may not live long enough
  --> src/main.rs:12:5
   |
10 | fn get_item(doc: &scraper::Html, css_selector: String) -> Select<'static, 'static> {
   |                  - let's call the lifetime of this reference `'1`
11 |     let html_product_selector = scraper::Selector::parse(&css_selector).unwrap();
12 |     doc.select(&html_product_selector)
   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ returning this value requires that `'1` must outlive `'static`

error[E0515]: cannot return value referencing local variable `html_product_selector`
  --> src/main.rs:12:5
   |
12 |     doc.select(&html_product_selector)
   |     ^^^^^^^^^^^----------------------^
   |     |          |
   |     |          `html_product_selector` is borrowed here
   |     returns a value referencing data owned by the current function
   |
   = help: use `.collect()` to allocate the iterator

warning: unused variable: `item`
  --> src/main.rs:17:9
   |
17 |     let item = get_item(&doc, String::from("li.product"));
   |         ^^^^ help: if this is intentional, prefix it with an underscore: `_item`
   |
   = note: `#[warn(unused_variables)]` on by default

For more information about this error, try `rustc --explain E0515`.
warning: `ws` (bin "ws") generated 1 warning
error: could not compile `ws` (bin "ws") due to 2 previous errors; 1 warning emitted

I asked ChatGPT how to work around this but it gave me a ton of answers and I was wondering how an actual rust dev would handle this, as well as taking the chance to get more familiar with lifetimes and ownership.


Solution

  • Html::select() borrows both from the Html and from the Selector. The Html is a parameter, so you can borrow from it (but you didn't tell the compiler you do - this is the first error), but the Selector is a local variable so you cannot return something borrowing from it. Luckily, there is a solution: Html::select() returns a Select, which is just an iterator over ElementRef, which borrow from the Html but not from the Selector. So you can just collect the iterator into a Vec:

    fn get_item<'a>(doc: &'a scraper::Html, css_selector: String) -> Vec<ElementRef<'a>> {
        let html_product_selector = scraper::Selector::parse(&css_selector).unwrap();
        doc.select(&html_product_selector).collect()
    }
    

    Alternatively, you can elide the lifetime, because it's the only lifetime:

    fn get_item(doc: &scraper::Html, css_selector: String) -> Vec<ElementRef<'_>> {
        // ...
    }
    

    Or, if you want only the first match:

    fn get_item(doc: &scraper::Html, css_selector: String) -> Option<ElementRef<'_>> {
        let html_product_selector = scraper::Selector::parse(&css_selector).unwrap();
        doc.select(&html_product_selector).next()
    }