Search code examples
pythonpandasexport-to-csv

create multiple csv based on lists


I have a folder in which I have 4 csvs. The csvs have three columns: Title, Author, ISBN

what I want to do is to create a new csv with an api link for each isbn from the file. At the end, I should have four csvs, which will be used later on. This is my code so far:

import glob
import pandas as pd
from urllib.request import urlopen
#import generated csvs from other script
safepath='.' #currently the same path as the origin, should be /extract/isbn, wip
filelist=glob.glob('./extract/Reihe A/Reihe*_extract.csv',recursive=True) #there are currently 4 files in the folder
print(filelist)
for file in filelist:
    #read csv, make a list of all isbns
    data=pd.read_csv(file, sep="\t",  encoding='utf8')
    print(file)
    isbnlist=[]
    print (isbnlist)
    for row in data['ISBN']:
        isbnlist.append(row)
    #for each isbn in list, get data from api
    apisearch=[]
    for isbn in isbnlist:
        url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
        print(url)
    apisearch=[]
    for isbn in isbnlist:
        url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
        for column in url:
            apisearch.append(url)
            #create new csv with data from api
            urllinks=pd.DataFrame(apisearch)
            urllinks.to_csv(str(safepath) +"/"+ file +"_" +"isbn.csv", sep='\t', encoding='utf8')

The problem I'm facing right now is that all rows get pushed to one csv, which is not what I want.

What do I have to change to process each file individually and create a new csv for each source file?

Any help is appreciated

Edit: link to files, in case anyone wants to try to reproduce the csv creation: sync.com


Solution

  • no need to use pandas. This can be done with some string manipulation.

    def add_urls(filename_in: str, filename_out: str):
        with open(filename_in, encoding="utf-8") as infile:
            # OS-agnostic split on newline
            data = infile.read().splitlines()
        with open(filename_out, "w", encoding="utf-8") as outfile:
            # grab header (by removing it from data) and add URL column (and a newline)
            outfile.write(f"{data.pop(0)}\tURL\n")
            for row in data:
                # last element is ISBN
                isbn = row.split("\t")[-1]
                # append tab, URL and newline
                row += f"\thttp://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D{isbn}&maximumRecords=10&recordSchema=marcxml\n"
                outfile.write(row)
    
    
    # iterate over files
    for filename in glob.glob('./Reihe A/Reihe*_extract.csv', recursive=True):
        filename_out = f"{filename[:-4]}_ISBN.csv"
        add_urls(filename, filename_out)