Search code examples
pythonweb-scrapingbeautifulsouppython-requestsviewstate

HTTPError 500 when trying to scrape an ASP webpage


I want to scrape http://quotes.toscrape.com/search.aspx to obtain all quotes. I get an error 500 in spite of saving the ___VIEWSTATE parameter and passing it.

import requests
from bs4 import BeautifulSoup

url = 'http://quotes.toscrape.com/search.aspx'
#headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0"}
s = requests.Session()
#s.headers.update(headers)
page = s.get(url)
page.raise_for_status()

soup = BeautifulSoup(page.content)
authors = soup.find('select', id="author")
url = 'http://quotes.toscrape.com/filter.aspx'
for author in authors.stripped_strings:
    if author != '----------':
        parameters = {'tag'    : '----------'}
        parameters.update({"___VIEWSTATE" : soup.select_one("#__VIEWSTATE")["value"]})
        parameters.update({"author" : author}) #autor.replace(" ", "+") ?
        print(parameters)                                                   
        page = s.post(url, data = parameters)
        page.raise_for_status() #ERROR 500
        soup = BeautifulSoup(page.content)
        tags = soup.find('select', id="tag")
        parameters.update({"submit_button" : "Search"})
        for tag in tags.stripped_strings:
            if tag != '----------':
                parameters.update({"___VIEWSTATE" : soup.select_one("#__VIEWSTATE")["value"]})
                parameters.update({"tag" : tag})
                page = s.post(url, data = parameters)
                page.raise_for_status()
                soup = BeautifulSoup(page.content)
                print(author + "-" + tag)
                print(soup.find("span", class_="content").get_text())

Solution

  • Just change "___VIEWSTATE" to "__VIEWSTATE" in

    parametros.update({"___VIEWSTATE" : soup.select_one("#__VIEWSTATE")["value"]})