my objective is to get data from this site: https://pokemondb.net/pokedex/all
i´m struggling to get the abilities, they must be like this:
URL Name Description of effect
But some of these informations are in another page. Any tips on how i can get them?
I need to access the link for each skill and gather the information:
My code alredy looks like this now:
import scrapy
class PokeSpider(scrapy.Spider):
name = 'pokespider'
start_urls = ['https://pokemondb.net/pokedex/all']
def parse(self, response):
linha = response.css('table#pokedex > tbody > tr:first-child')
link = linha.css("td:nth-child(2) > a::attr(href)")
yield response.follow(link.get(), self.parser_pokemon)
def parser_pokemon(self, response):
nome = response.css('h1::text').get()
id = response.css('table.vitals-table > tbody > tr:nth-child(1) > td > strong::text').get()
tamanho = response.css('table.vitals-table > tbody > tr:nth-child(4) > td::text').get()
peso = response.css('table.vitals-table > tbody > tr:nth-child(5) > td::text').get()
url_pokemon = response.url
tipos = response.css('table.vitals-table tbody tr:nth-child(2) td a::text').getall()[:2]
evolucoes = []
evolucoes_possiveis = response.css('#main div.infocard-list-evo div span.infocard-lg-data.text-muted')
for evolucao in evolucoes_possiveis:
nome_evolucao = evolucao.css('a::text').get()
id_evolucao = evolucao.css('small:nth-child(1)::text').get()
url_evolucao = evolucao.css('a::attr(href)').get()
url_evolucao_completinha = f'https://pokemondb.net{url_evolucao}'
evolucoes.append({
"nome_evolucao": nome_evolucao,
"id_evolucao": id_evolucao,
"url_evolucao": url_evolucao_completinha
})
yield {
"nome": nome,
"id": id,
"tamanho": tamanho,
"peso": peso,
"url_pokemon": url_pokemon,
"tipos": tipos,
"evolucoes": evolucoes,
}
I advise you to read the documentation about cb_kwargs in https://docs.scrapy.org/en/latest/topics/debug.html?highlight=cb_kwargs and scrapy items in https://docs.scrapy.org/en/latest/topics/items.html
You can make next request and pass information to the next function through meta argument like this
def parser_pokemon(self, response):
nome = response.css('h1::text').get()
id = response.css('table.vitals-table > tbody > tr:nth-child(1) > td > strong::text').get()
tamanho = response.css('table.vitals-table > tbody > tr:nth-child(4) > td::text').get()
peso = response.css('table.vitals-table > tbody > tr:nth-child(5) > td::text').get()
url_pokemon = response.url
tipos = response.css('table.vitals-table tbody tr:nth-child(2) td a::text').getall()[:2]
evolucoes = []
evolucoes_possiveis = response.css('#main div.infocard-list-evo div span.infocard-lg-data.text-muted')
for evolucao in evolucoes_possiveis:
nome_evolucao = evolucao.css('a::text').get()
id_evolucao = evolucao.css('small:nth-child(1)::text').get()
url_evolucao = evolucao.css('a::attr(href)').get()
url_evolucao_completinha = f'https://pokemondb.net{url_evolucao}'
evolucoes.append(
{
"nome_evolucao": nome_evolucao,
"id_evolucao": id_evolucao,
"url_evolucao": url_evolucao_completinha
}
)
# VVVVVVVVVVVV next code is updated VVVVVVVVVVVV
yield Request(
url='https://example.com/next_page_path',
callback=self.parse_attributes,
meta={
'pokemon_attribs': {
"nome": nome,
"id": id,
"tamanho": tamanho,
"peso": peso,
"url_pokemon": url_pokemon,
"tipos": tipos,
"evolucoes": evolucoes,
},
},
)
def parse_attributes(self, response):
pokemon_attribs = response.meta['pokemon_attribs']
pokemon_lastname = response.css('a::text').get()
pokemon_attribs.update({'pokemon_lastname': pokemon_lastname})
yield pokemon_attribs