Search code examples
pythonsql-serverscrapypyodbc

Invalid column name - What is wrong?


My database

USE [master]
GO

/****** Object:  Database [company]    Script Date: 06/07/2021 17:01:26 ******/
CREATE DATABASE [company]
 CONTAINMENT = NONE
 ON  PRIMARY 
( NAME = N'company', FILENAME = N'D:\Program Files\Microsoft SQL Server\MSSQL13.SQLEXPRESS\MSSQL\DATA\company.mdf' , SIZE = 8192KB , MAXSIZE = UNLIMITED, FILEGROWTH = 65536KB )
 LOG ON 
( NAME = N'company_log', FILENAME = N'D:\Program Files\Microsoft SQL Server\MSSQL13.SQLEXPRESS\MSSQL\DATA\company_log.ldf' , SIZE = 8192KB , MAXSIZE = 2048GB , FILEGROWTH = 65536KB )
GO

IF (1 = FULLTEXTSERVICEPROPERTY('IsFullTextInstalled'))
begin
EXEC [company].[dbo].[sp_fulltext_database] @action = 'enable'
end
GO

ALTER DATABASE [company] SET ANSI_NULL_DEFAULT OFF 
GO

ALTER DATABASE [company] SET ANSI_NULLS OFF 
GO

ALTER DATABASE [company] SET ANSI_PADDING OFF 
GO

ALTER DATABASE [company] SET ANSI_WARNINGS OFF 
GO

ALTER DATABASE [company] SET ARITHABORT OFF 
GO

ALTER DATABASE [company] SET AUTO_CLOSE OFF 
GO

ALTER DATABASE [company] SET AUTO_SHRINK OFF 
GO

ALTER DATABASE [company] SET AUTO_UPDATE_STATISTICS ON 
GO

ALTER DATABASE [company] SET CURSOR_CLOSE_ON_COMMIT OFF 
GO

ALTER DATABASE [company] SET CURSOR_DEFAULT  GLOBAL 
GO

ALTER DATABASE [company] SET CONCAT_NULL_YIELDS_NULL OFF 
GO

ALTER DATABASE [company] SET NUMERIC_ROUNDABORT OFF 
GO

ALTER DATABASE [company] SET QUOTED_IDENTIFIER OFF 
GO

ALTER DATABASE [company] SET RECURSIVE_TRIGGERS OFF 
GO

ALTER DATABASE [company] SET  DISABLE_BROKER 
GO

ALTER DATABASE [company] SET AUTO_UPDATE_STATISTICS_ASYNC OFF 
GO

ALTER DATABASE [company] SET DATE_CORRELATION_OPTIMIZATION OFF 
GO

ALTER DATABASE [company] SET TRUSTWORTHY OFF 
GO

ALTER DATABASE [company] SET ALLOW_SNAPSHOT_ISOLATION OFF 
GO

ALTER DATABASE [company] SET PARAMETERIZATION SIMPLE 
GO

ALTER DATABASE [company] SET READ_COMMITTED_SNAPSHOT OFF 
GO

ALTER DATABASE [company] SET HONOR_BROKER_PRIORITY OFF 
GO

ALTER DATABASE [company] SET RECOVERY SIMPLE 
GO

ALTER DATABASE [company] SET  MULTI_USER 
GO

ALTER DATABASE [company] SET PAGE_VERIFY CHECKSUM  
GO

ALTER DATABASE [company] SET DB_CHAINING OFF 
GO

ALTER DATABASE [company] SET FILESTREAM( NON_TRANSACTED_ACCESS = OFF ) 
GO

ALTER DATABASE [company] SET TARGET_RECOVERY_TIME = 60 SECONDS 
GO

ALTER DATABASE [company] SET DELAYED_DURABILITY = DISABLED 
GO

ALTER DATABASE [company] SET QUERY_STORE = OFF
GO

USE [company]
GO

ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
GO

ALTER DATABASE SCOPED CONFIGURATION SET MAXDOP = 0;
GO

ALTER DATABASE SCOPED CONFIGURATION SET PARAMETER_SNIFFING = ON;
GO

ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
GO

ALTER DATABASE [company] SET  READ_WRITE 
GO
-------------------------------------------------------------------------------------------
USE [company]
GO

/****** Object:  Table [dbo].[comp]    Script Date: 06/07/2021 17:01:55 ******/
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE TABLE [dbo].[comp](
    [mst] [nchar](14) NOT NULL,
    [company_name] [nvarchar](max) NULL,
    [address] [nvarchar](max) NULL,
    [created] [datetime] NULL,
    [modified] [datetime] NULL,
    [legal_representative] [nvarchar](max) NULL,
 CONSTRAINT [PK_comp] PRIMARY KEY CLUSTERED 
(
    [mst] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO

My program

import scrapy
import re
import pyodbc


class BlogSpider(scrapy.Spider):
    name = 'blogspider'
    start_urls = ['https://masothue.com/']
    # cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=DESKTOP-23PIH3M;DATABASE=company;UID=sa;PWD=123456a@')
    # cursor = cnxn.cursor()
    # cursor.execute("SELECT comp.mst, comp.address, comp.company_name FROM comp")
    # for row in cursor.fetchall():
    #     print
    #     row

    def parse(self, response):
        cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER=DESKTOP-23PIH3M;DATABASE=company;UID=sa;PWD=123456a@')
        cursor = cnxn.cursor()
        cursor.execute("INSERT INTO [dbo].[comp] ([mst] ,[company_name]  ,[address] ,[legal_representative]) VALUES  (\"343\", \"565\", \"343\", \"343\")")
        cnxn.commit()
        for href in response.xpath("//div/h3/a/@href").extract():
            print('https://masothue.com' + href)
            print(re.search(r'(\d{10})', href).group(1))

error

C:\Users\donhuvy\PycharmProjects\pythonProject>scrapy runspider sc.py
2021-07-06 19:08:33 [scrapy.utils.log] INFO: Scrapy 2.5.0 started (bot: scrapybot)
2021-07-06 19:08:33 [scrapy.utils.log] INFO: Versions: lxml 4.6.3.0, libxml2 2.9.5, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 21.2.0, Python 3.9.6 (tags/v3.9.6:db3ff76, Jun 28 2021, 15:26:21) [MSC v.1929 64 bit (AMD64)], pyOpenSSL 20.0.1 (OpenSSL 1.1.1k  25 Mar 2021), cryptography 3.4.7, Platform Windows-10-10.0.19042-SP0
2021-07-06 19:08:33 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2021-07-06 19:08:33 [scrapy.crawler] INFO: Overridden settings:
{'SPIDER_LOADER_WARN_ONLY': True}
2021-07-06 19:08:33 [scrapy.extensions.telnet] INFO: Telnet Password: 5f64e686c90fdf8a
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.logstats.LogStats']
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
 'scrapy.downloadermiddlewares.stats.DownloaderStats']
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
 'scrapy.spidermiddlewares.referer.RefererMiddleware',
 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
 'scrapy.spidermiddlewares.depth.DepthMiddleware']
2021-07-06 19:08:33 [scrapy.middleware] INFO: Enabled item pipelines:
[]
2021-07-06 19:08:33 [scrapy.core.engine] INFO: Spider opened
2021-07-06 19:08:33 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2021-07-06 19:08:33 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
2021-07-06 19:08:34 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://masothue.com/> (referer: None)
2021-07-06 19:08:34 [scrapy.core.scraper] ERROR: Spider error processing <GET https://masothue.com/> (referer: None)
Traceback (most recent call last):
  File "C:\Users\donhuvy\AppData\Roaming\Python\Python39\site-packages\twisted\internet\defer.py", line 662, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "C:\Users\donhuvy\AppData\Roaming\Python\Python39\site-packages\scrapy\spiders\__init__.py", line 90, in _parse
    return self.parse(response, **kwargs)
  File "C:\Users\donhuvy\PycharmProjects\pythonProject\sc.py", line 19, in parse
    cursor.execute("INSERT INTO [dbo].[comp] ([mst] ,[company_name]  ,[address] ,[legal_representative]) VALUES  (\"343\", \"565\", \"343\", \"343\")")
pyodbc.ProgrammingError: ('42S22', "[42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '343'. (207) (SQLExecDirectW); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '565'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '343'. (207); [42S22] [Microsoft][ODBC SQL Server Driver][SQL Server]Invalid column name '343'. (207)")
2021-07-06 19:08:34 [scrapy.core.engine] INFO: Closing spider (finished)
2021-07-06 19:08:34 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 212,
 'downloader/request_count': 1,
 'downloader/request_method_count/GET': 1,
 'downloader/response_bytes': 17472,
 'downloader/response_count': 1,
 'downloader/response_status_count/200': 1,
 'elapsed_time_seconds': 0.437639,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2021, 7, 6, 12, 8, 34, 415274),
 'httpcompression/response_bytes': 81335,
 'httpcompression/response_count': 1,
 'log_count/DEBUG': 1,
 'log_count/ERROR': 1,
 'log_count/INFO': 10,
 'response_received_count': 1,
 'scheduler/dequeued': 1,
 'scheduler/dequeued/memory': 1,
 'scheduler/enqueued': 1,
 'scheduler/enqueued/memory': 1,
 'spider_exceptions/ProgrammingError': 1,
 'start_time': datetime.datetime(2021, 7, 6, 12, 8, 33, 977635)}
2021-07-06 19:08:34 [scrapy.core.engine] INFO: Spider closed (finished)

C:\Users\donhuvy\PycharmProjects\pythonProject>

What is wrong? How to fix it?


Solution

  • The relevant hint is the error message Invalid column name '343'. In

    # WRONG, column names instead of string constants
    " ...  VALUES  (\"343\", \"565\", \"343\", \"343\")"
    

    You are using double quotes to delimit strings. Strings in Oracle SQL must be put in single quotes. Double quotes are used to escape column names and other identifiers.

    # CORRECT
    " ...  VALUES  ('343', '565', '343', '343')"
    

    This will fix the problem if the corresponding columns are of a text type. For columns of a numeric type, don't use quotes. E.g. if the first column mst is a NUMBER, write:

    " ...  VALUES  (343, '565', '343', '343')"
    

    Note that you can use a column name or an expression in the values clause. Just because it appears in a VALUES clause does not mean that it must be a constant or literal. See: VALUES expression

    See: