Search code examples
c#sqlsql-updateimdbscreen-scraping

SQL UPDATE isn't updating the database, is there something wrong with my SQL statement?


This is my first attempt at a 'real' C# program. It takes a specified directory, extracts the file name (without extension) and writes them to an SQL database. This data is then read back into an array and passed into the below 'foreach' loop. The loop then uses the data to search IMDB and store the URL for the first result into the DB. It then reads this data back into a variable and uses this to 'scrape' data from the page such as director, cast, plot etc.

I have the program working right down to updating the database at the end with the director, cast, plot, etc data. I have drilled down into the program and all variables contain the correct values, just when the form loads the table in a DataGrid it shows all the data I added earlier in the loop, but not the director etc.

For these reasons I think my SQL statement at the end of the program may be wrong. I know the code is probably inefficient, and messy, but I'm new to all this so go easy!

        foreach (string title in titles)
        {
            //Use each title in titles array to search IMDB and return the page URL
            string searchURL = "http://www.imdb.com/find?s=all&q=" + title;
            string url = searchURL;
            string sourceCode = WorkerClass.getSourceCode(url);
            int startIndex = sourceCode.IndexOf("Media from ");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf("<a href=") + 9;
            int endIndex = sourceCode.IndexOf('"' + " onclick", startIndex);
            string link = "http://www.imdb.com" + (sourceCode.Substring(startIndex, endIndex - startIndex));

            //Update DB to add page url based on title
            SqlConnection con = new SqlConnection(DataAccess.GetConnectionString("dbCon"));
            //Create SQL Command
            var command = new SqlCommand("UPDATE movieTable SET imdbPageURL=@pageURL WHERE title=@title", con);
            command.Parameters.AddWithValue("@pageURL", link);
            command.Parameters.AddWithValue("@title", title);
            con.Open();
            //Add to DB
            command.ExecuteNonQuery();
            con.Close();

            //Select IMDB Page URL from movieTable where the title = current title
            var com = new SqlCommand("SELECT imdbPageURL FROM movieTable WHERE title=@title", con);
            con.Open();
            com.Parameters.AddWithValue("@title", title);
            string pageURL = (string)com.ExecuteScalar();
            con.Close();

            //Get Director
            sourceCode = WorkerClass.getSourceCode(pageURL);
            startIndex = sourceCode.IndexOf("description");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf("content=") +21;
            endIndex = sourceCode.IndexOf('.' , startIndex);
            string director = sourceCode.Substring(startIndex, endIndex - startIndex);

            //Get Cast
            sourceCode = WorkerClass.getSourceCode(pageURL);
            startIndex = sourceCode.IndexOf("content=");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf('.') +2;
            endIndex = sourceCode.IndexOf("/>", startIndex) -3;
            string cast = sourceCode.Substring(startIndex, endIndex - startIndex);

            //Get Plot
            sourceCode = WorkerClass.getSourceCode(pageURL);
            startIndex = sourceCode.IndexOf("Users:");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf("</div>");
            endIndex = sourceCode.IndexOf("<div", startIndex);
            sourceCode = sourceCode.Substring(startIndex, endIndex - startIndex);
            startIndex = sourceCode.IndexOf("<p>") +7;
            endIndex = sourceCode.IndexOf("</p>");
            string plot = sourceCode.Substring(startIndex, endIndex - startIndex);

            //Get Rating
            sourceCode = WorkerClass.getSourceCode(pageURL);
            startIndex = sourceCode.IndexOf("infobar");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf("alt=") +5;
            endIndex = sourceCode.IndexOf("src=", startIndex) -2;
            string rating = sourceCode.Substring(startIndex, endIndex - startIndex);

            //Get Release Date
            sourceCode = WorkerClass.getSourceCode(pageURL);
            startIndex = sourceCode.IndexOf("infobar");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf("nobr");
            endIndex = sourceCode.IndexOf("</div>", startIndex);
            sourceCode = sourceCode.Substring(startIndex, endIndex - startIndex);
            startIndex = sourceCode.IndexOf("dates") +11;
            endIndex = sourceCode.IndexOf("</a") -4;
            string releaseDate = sourceCode.Substring(startIndex, endIndex - startIndex);

            //Get link to Cover Image
            sourceCode = WorkerClass.getSourceCode(pageURL);
            startIndex = sourceCode.IndexOf("img_primary");
            sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
            startIndex = sourceCode.IndexOf("<img src=") + 10;
            endIndex = sourceCode.IndexOf(".jpg", startIndex) +4;
            string coverURL = sourceCode.Substring(startIndex, endIndex - startIndex);

            //Update movieTable with scraped data for the current title
            var comd = new SqlCommand("UPDATE movieTable SET director=@director, cast=@cast, plot=@plot, rating=@rating, releaseDate=@releaseDate, coverURL=@coverURL WHERE title=@title", con);
            comd.Parameters.AddWithValue("@title", title);
            comd.Parameters.AddWithValue("@director", director);
            comd.Parameters.AddWithValue("@cast", cast);
            comd.Parameters.AddWithValue("@plot", plot);
            comd.Parameters.AddWithValue("@rating", rating);
            comd.Parameters.AddWithValue("@releaseDate", releaseDate);
            comd.Parameters.AddWithValue("@coverURL", coverURL);
            con.Open();
            //Add to DB
            command.ExecuteNonQuery();
            con.Close();
        }

        this.movieTableTableAdapter.Fill(this.movieLibraryDBDataSet.movieTable);

Solution

  • Your last execution is using the command object rather than the comd object you define later for the update.