Extension from previous question
Attempting to insert SQL values into database after pulling from XML file, but none seem to be appearing in database after insert statement embedded in Python code. Without the SQL section included, the entries are printed as expected. I am not getting an error in my Python environment (Anaconda Navigator), so totally lost on how the queries were processed, but nothing was entered! I tried a basic select statement to display the table, but get an empty table back.
Select Query
%sql SELECT * FROM publication;
Main Python code
import sqlite3
con = sqlite3.connect("publications.db")
cur = con.cursor()
from xml.dom import minidom
xmldoc = minidom.parse("test.xml")
#loop through <pub> tags to find number of pubs to grab
root = xmldoc.getElementsByTagName("root")[0]
pubs = [a.firstChild.data for a in root.getElementsByTagName("pub")]
num_pubs = len(pubs)
count = 0
while(count < num_pubs):
#get data from each <pub> tag
temp_pub = root.getElementsByTagName("pub")[count]
temp_ID = temp_pub.getElementsByTagName("ID")[0].firstChild.data
temp_title = temp_pub.getElementsByTagName("title")[0].firstChild.data
temp_year = temp_pub.getElementsByTagName("year")[0].firstChild.data
temp_booktitle = temp_pub.getElementsByTagName("booktitle")[0].firstChild.data
temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data
temp_authors = temp_pub.getElementsByTagName("authors")[0]
temp_author_array = [a.firstChild.data for a in temp_authors.getElementsByTagName("author")]
num_authors = len(temp_author_array)
count = count + 1
#process results into sqlite
pub_params = (temp_ID, temp_title)
cur.execute("INSERT INTO publication (id, ptitle) VALUES (?, ?)", pub_params)
journal_params = (temp_booktitle, temp_pages, temp_year)
cur.execute("INSERT INTO journal (jtitle, pages, year) VALUES (?, ?, ?)", journal_params)
x = 0
while(x < num_authors):
cur.execute("INSERT OR IGNORE INTO authors (name) VALUES (?)", (temp_author_array[x],))
x = x + 1
#display results
print("\nEntry processed: ", count)
print("------------------\nPublication ID: ", temp_ID)
print("Publication Title: ", temp_title)
print("Year: ", temp_year)
print("Journal title: ", temp_booktitle)
print("Pages: ", temp_pages)
i = 0
print("Authors: ")
while(i < num_authors):
print("-",temp_author_array[i])
i = i + 1
print("\nNumber of entries processed: ", count)
SQL queries
%%sql
DROP TABLE IF EXISTS publication;
CREATE TABLE publication(
id INT PRIMARY KEY NOT NULL,
ptitle VARCHAR NOT NULL
);
/* Author Entity set and writes_for relationship */
DROP TABLE IF EXISTS authors;
CREATE TABLE authors(
name VARCHAR(200) PRIMARY KEY NOT NULL,
pub_id INT,
pub_title VARCHAR(200),
FOREIGN KEY(pub_id, pub_title) REFERENCES publication(id, ptitle)
);
/* Journal Entity set and apart_of relationship */
DROP TABLE IF EXISTS journal;
CREATE TABLE journal(
jtitle VARCHAR(200) PRIMARY KEY NOT NULL,
pages INT,
year INT(4),
pub_id INT,
pub_title VARCHAR(200),
FOREIGN KEY(pub_id, pub_title) REFERENCES publication(id, ptitle)
);
/* Wrote relationship b/w journal & authors */
DROP TABLE IF EXISTS wrote;
CREATE TABLE wrote(
name VARCHAR(100) NOT NULL,
jtitle VARCHAR(50) NOT NULL,
PRIMARY KEY(name, jtitle),
FOREIGN KEY(name) REFERENCES authors(name),
FOREIGN KEY(jtitle) REFERENCES journal(jtitle)
);
You need to call con.commit()
in order to commit the data to the database. If you use the connection as a context manager (with con:
), the connection will commit any changes you make (or roll them back if there is an error).
Explicitly closing the connection is also a good practice.