I'm making a web scraper using Scrapy that scrapes currency conversion rated compared to the Euro, and want to display the rates, currency name and the shortened version of the name in a MySQL table. I've been able to make it so the rates and shortened name are able to be put on the table, but when I try to do the full currency name the only thing that is put in the table is the first result. Here's my code:
The scraper itself:
import scrapy
from ..items import EurotocurrencyItem
class CurrencySpider(scrapy.Spider):
name = 'currency'
start_urls = [
'https://www.ecb.europa.eu/stats/policy_and_exchange_rates/euro_reference_exchange_rates/html/index.en.html'
]
def parse(self, response):
exchange_rates = response.xpath('//*[@class="forextable"]//tr')
for exchange_rate in exchange_rates:
item = EurotocurrencyItem()
currency = exchange_rate.xpath('.//td[@class="currency"]//text()').extract_first()
currencyl = response.xpath('//td[@class="alignLeft"]//text()').extract_first()
rate = exchange_rate.css('.rate::text').extract_first()
item['currency'] = currency
item['currencyl'] = currencyl
item['rate'] = rate
yield item
items.py:
import scrapy
class EurotocurrencyItem(scrapy.Item):
currency = scrapy.Field()
rate = scrapy.Field()
currencyl = scrapy.Field()
pipelines.py:
import mysql.connector
class EurotocurrencyPipeline:
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = mysql.connector.connect(
host='localhost',
user='root',
passwd='notrealpassword',
database='currency'
)
self.curr = self.conn.cursor()
def create_table(self):
self.curr.execute("""DROP TABLE IF EXISTS currency_tb""")
self.curr.execute("""create table currency_tb(
currency text,
rate text,
currencyl text
)""")
def process_item(self, item, spider):
self.store_db(item)
return item
def store_db(self, item):
self.curr.execute("""insert into currency_tb values(%s, %s, %s )""", (
item['currency'],
item['currencyl'],
item['rate']
))
self.conn.commit()
There are two small mistakes in your currencyl
-selection:
First you have to iterate over exchange_rate
instead of response
(as you already do correctly for currency and rate).
Furthermore there is a .
missing in your xpath()
.
So this should work as expected:
currencyl = exchange_rate.xpath('.//td[@class="alignLeft"]//text()').extract_first()