I have a folder in which I have 4 csvs. The csvs have three columns: Title, Author, ISBN
what I want to do is to create a new csv with an api link for each isbn from the file. At the end, I should have four csvs, which will be used later on. This is my code so far:
import glob
import pandas as pd
from urllib.request import urlopen
#import generated csvs from other script
safepath='.' #currently the same path as the origin, should be /extract/isbn, wip
filelist=glob.glob('./extract/Reihe A/Reihe*_extract.csv',recursive=True) #there are currently 4 files in the folder
print(filelist)
for file in filelist:
#read csv, make a list of all isbns
data=pd.read_csv(file, sep="\t", encoding='utf8')
print(file)
isbnlist=[]
print (isbnlist)
for row in data['ISBN']:
isbnlist.append(row)
#for each isbn in list, get data from api
apisearch=[]
for isbn in isbnlist:
url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
print(url)
apisearch=[]
for isbn in isbnlist:
url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
for column in url:
apisearch.append(url)
#create new csv with data from api
urllinks=pd.DataFrame(apisearch)
urllinks.to_csv(str(safepath) +"/"+ file +"_" +"isbn.csv", sep='\t', encoding='utf8')
The problem I'm facing right now is that all rows get pushed to one csv, which is not what I want.
What do I have to change to process each file individually and create a new csv for each source file?
Any help is appreciated
Edit: link to files, in case anyone wants to try to reproduce the csv creation: sync.com
no need to use pandas. This can be done with some string manipulation.
def add_urls(filename_in: str, filename_out: str):
with open(filename_in, encoding="utf-8") as infile:
# OS-agnostic split on newline
data = infile.read().splitlines()
with open(filename_out, "w", encoding="utf-8") as outfile:
# grab header (by removing it from data) and add URL column (and a newline)
outfile.write(f"{data.pop(0)}\tURL\n")
for row in data:
# last element is ISBN
isbn = row.split("\t")[-1]
# append tab, URL and newline
row += f"\thttp://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D{isbn}&maximumRecords=10&recordSchema=marcxml\n"
outfile.write(row)
# iterate over files
for filename in glob.glob('./Reihe A/Reihe*_extract.csv', recursive=True):
filename_out = f"{filename[:-4]}_ISBN.csv"
add_urls(filename, filename_out)