I am unable to run scrapy through my pipeline to my local database. I have already installed mysql-connector-python 8.0.19 and am able to write data to the database within the same project but outside of a Scrapy pipeline . Can someone please help i can't figure out why it isn't working.
When i try to send data via scrapy pipeline i get the following error:
[twisted] CRITICAL: Unhandled error in Deferred:
File "C:\Users\Viking\PycharmProjects\Indigo_Scrp\IndgoScrp\IndgoScrp\pipelines.py", line 7, in <module>
from mysql.connector import (connection)
ModuleNotFoundError: No module named 'mysql
Here is my code for the pipeline :
from mysql.connector import (connection)
from mysql.connector import errorcode
class IndgoscrpPipeline(object):
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = connection.MySQLConnection(
host='127.0.0.1',
user='root',
passwd='',
database='Python'
)
self.curr = self.conn.cursor()
def open_spider(self, spider):
print("spider open")
def process_item(self, item, spider):
print("Saving item into db ...")
self.save(dict(item))
return item
def close_spider(self, spider):
self.mysql_close()
##########################################################################
def mysql_connect(self):
try:
return self.curr.connect(**self.conf)
except self.curr.Error as err:
if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
print("Something is wrong with your user name or password")
elif err.errno == errorcode.ER_BAD_DB_ERROR:
print("Database does not exist")
else:
print(err)
#########################################
def create_table(self):
self.curr.execute(""" DROP TABLE IF EXISTS indigo""")
self.curr.execute(""" Create table indigo(
Product_Name text,
Product_Author text,
Product_Price text,
Product_Image text
)""")
def process_item(self, item, spider):
self.store_db(item)
def store_db(self, item):
self.curr.execute("""Insert Into indigo values (%s,%s,%s,%s)""",
(item['Product_Name'][0],
item['Product_Author'][0],
item['Product_Price'][0],
item['Product_Image'][0],
)
)
self.conn.commit()
return item
self.conn.close()
*
Here is my code from my spider
import scrapy
from ..items import IndScrItem
class IndgoSpider(scrapy.Spider):
name = 'Indgo'
start_urls = ['https://www.chapters.indigo.ca/en-ca/books/?link-usage=Header%3A%20books&mc=Book&lu=Main']
def parse(self, response):
items = IndScrItem()
Product_Name= response.css('.product-list__product-title-link--grid::text').getall(),
Product_Author= response.css('.product-list__contributor::text').getall(),
Product_Price= response.css('.product-list__price--orange::text').getall(),
Product_Image= response.css('.product-image--lazy::attr(src)').getall()
items['Product_Name'] = Product_Name
items['Product_Author'] = Product_Author
items['Product_Price'] = Product_Price
items['Product_Image'] = Product_Image
yield items
This is the line in the settings file that i have to enable pipelines
ITEM_PIPELINES = {
'IndgoScrp.pipelines.IndgoscrpPipeline': 100,
}
I actually found the issue was tied to having previously pip installed the wrong version of mysql-connector even though through my ide pycharm i had installed the correct one python was confused. After uninstalling both and reinstalling mysql-connector-python it was able to run.