Search code examples
pandasmysql-python

Pandas to_sql with SQLAlchemy cannot used to mysql


I have a problem with pandas to_sql in current version. I already set create engine with SQLAchemy like this

db_url = "mysql+mysqlconnector://{USER}:{PWD}@{HOST}/{DBNAME}"

db_url = db_url.format(
    USER = "root",
    PWD = "root",
    HOST = "127.0.0.1:3306",
    DBNAME = "report"
)
engine = create_engine(db_url, echo=False)

when used the pandas to_sql,

with engine.begin() as connection:
    df.to_sql('dim_date', con=connection, index=False, if_exists='replace')

I got an error like this

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[10], line 2
      1 with engine.begin() as connection:
----> 2     df.to_sql('dim_date', con=connection, index=False, if_exists='replace')

File /usr/local/lib/python3.11/site-packages/pandas/util/_decorators.py:333, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    327 if len(args) > num_allow_args:
    328     warnings.warn(
    329         msg.format(arguments=_format_argument_list(allow_args)),
    330         FutureWarning,
    331         stacklevel=find_stack_level(),
    332     )
--> 333 return func(*args, **kwargs)

File /usr/local/lib/python3.11/site-packages/pandas/core/generic.py:3081, in NDFrame.to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
   2886 """
   2887 Write records stored in a DataFrame to a SQL database.
   2888 
   (...)
   3077 [(1,), (None,), (2,)]
   3078 """  # noqa: E501
   3079 from pandas.io import sql
-> 3081 return sql.to_sql(
   3082     self,
   3083     name,
   3084     con,
   3085     schema=schema,
   3086     if_exists=if_exists,
   3087     index=index,
   3088     index_label=index_label,
   3089     chunksize=chunksize,
   3090     dtype=dtype,
   3091     method=method,
   3092 )

File /usr/local/lib/python3.11/site-packages/pandas/io/sql.py:842, in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method, engine, **engine_kwargs)
    837     raise NotImplementedError(
    838         "'frame' argument should be either a Series or a DataFrame"
    839     )
    841 with pandasSQL_builder(con, schema=schema, need_transaction=True) as pandas_sql:
--> 842     return pandas_sql.to_sql(
    843         frame,
    844         name,
    845         if_exists=if_exists,
    846         index=index,
    847         index_label=index_label,
    848         schema=schema,
    849         chunksize=chunksize,
    850         dtype=dtype,
    851         method=method,
    852         engine=engine,
    853         **engine_kwargs,
    854     )

File /usr/local/lib/python3.11/site-packages/pandas/io/sql.py:2851, in SQLiteDatabase.to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method, engine, **engine_kwargs)
   2840             raise ValueError(f"{col} ({my_type}) not a string")
   2842 table = SQLiteTable(
   2843     name,
   2844     self,
   (...)
   2849     dtype=dtype,
   2850 )
-> 2851 table.create()
   2852 return table.insert(chunksize, method)

File /usr/local/lib/python3.11/site-packages/pandas/io/sql.py:984, in SQLTable.create(self)
    983 def create(self) -> None:
--> 984     if self.exists():
    985         if self.if_exists == "fail":
    986             raise ValueError(f"Table '{self.name}' already exists.")

File /usr/local/lib/python3.11/site-packages/pandas/io/sql.py:970, in SQLTable.exists(self)
    969 def exists(self):
--> 970     return self.pd_sql.has_table(self.name, self.schema)

File /usr/local/lib/python3.11/site-packages/pandas/io/sql.py:2866, in SQLiteDatabase.has_table(self, name, schema)
   2855 wld = "?"
   2856 query = f"""
   2857 SELECT
   2858     name
   (...)
   2863     AND name={wld};
   2864 """
-> 2866 return len(self.execute(query, [name]).fetchall()) > 0

File /usr/local/lib/python3.11/site-packages/pandas/io/sql.py:2673, in SQLiteDatabase.execute(self, sql, params)
   2671     raise TypeError("Query must be a string unless using sqlalchemy.")
   2672 args = [] if params is None else [params]
-> 2673 cur = self.con.cursor()
   2674 try:
   2675     cur.execute(sql, *args)

AttributeError: 'Connection' object has no attribute 'cursor'

It's like the pandas library use the default SQLiteDatabase executor. Any of fellow able to fix my issue?

What's wrong with my code to able insert to MySQL Database with pandas to_sql?


Solution

  • I think you have an old version of SQLAlchemy (1.x?) so it can't work with recent versions of Pandas. You have to upgrade your version of SQLAlchemy:

    [...]$ pip install -U sqlalchemy
    

    Note: I tried your code and it works fine with Pandas==2.2.0 and sqlalchemy==2.0.25.

    More information: