I'm making a web scraper using Scrapy that collects data on exchange rates, and want to display the data in a table using mysql, but with my code it only displays the first set of data, for the currency and exchange rate, and I'm not sure how to make it display all the data. Heres my code:
Spider code:
import scrapy
from ..items import EurotocurrencyItem
class CurrencySpider(scrapy.Spider):
name = 'currency'
start_urls = [
'https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/index.en.html'
]
def parse(self, response):
items = EurotocurrencyItem()
currency = response.xpath('//td[@class="currency"]//text()').extract()
rate = response.css('.rate::text').extract()
items['currency'] = currency
items['rate'] = rate
yield items
Piplines.py code:
import mysql.connector
class EurotocurrencyPipeline:
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = mysql.connector.connect(
host='localhost',
user='root',
passwd='notactualpassword',
database='currency'
)
self.curr = self.conn.cursor()
def create_table(self):
self.curr.execute("""DROP TABLE IF EXISTS currency_tb""")
self.curr.execute("""create table currency_tb(
currency text,
rate text
)""")
def process_item(self, item, spider):
self.store_db(item)
return item
def store_db(self, item):
self.curr.execute("""insert into currency_tb values(%s, %s )""", (
item['currency'][0],
item['rate'][0],
))
self.conn.commit()
In the logic you're currently using, you only have 1 item, that looks something like this:
item = {'currency': ["USD", "JPY", ...],
'rate': ["1.0876", "115.87", ...]}
In your store_db method you will then only insert the first element of each list into mysql. You should rewrite the logic in parse in order to yield 1 item per exchange rate:
exchange_rates = response.xpath('//*[@class="forextable"]//tr')
for exchange_rate in exchange_rates:
item = EurotocurrencyItem()
currency = exchange_rate.xpath('.//td[@class="currency"]//text()').extract_first()
rate = exchange_rate.css('.rate::text').extract_first()
item['currency'] = currency
item['rate'] = rate
yield item
If you then update your store_db method as follows it should be good to go:
def store_db(self, item):
self.curr.execute("""insert into currency_tb values(%s, %s )""", (
item['currency'],
item['rate'],
))
self.conn.commit()