Search code examples
pythoncsvfile

Merge information from two files into one CSV file


For example 1st file contains names and dates separated by colon:

john:01.01.2001
mary:06.03.2016

And then 2nd file contains names and cities:

john:london
mary:new york

I need top merge them by names into csv file like that:

name,town,date
john,london,01.01.2001
mary,new york,06.03.2016 

Also, if information about person is missing, it should be "-" in the output file:

name,town,date
john,-,01.01.2001
mary,new york,-

Solution

  • A rough draft. When I get a chance later I will clean it up some.

    cat name_date.csv                                                                                                                                                                                                       
    john:01.01.2001
    mary:06.03.2016
    sue:
    
    cat name_city.csv                                                                                                                                                                                                       
    john:london
    mary:new york
    bob:
    
    import csv
    
    with open("name_date.csv") as dt_csv:
        new_dict = {}
        dt_dictR = csv.DictReader(dt_csv, fieldnames=["name", "date"],  delimiter=':')
        for row in dt_dictR:
            if not row["date"]:
                row["date"] = '-'
            new_dict.update({row["name"]: {"date": row["date"]}})
        with open("name_city.csv") as city_csv:
            dt_dictC = csv.DictReader(city_csv, fieldnames=["name", "city"],  delimiter=':')
            print(new_dict)
            for row in dt_dictC:
                if not row["city"]: 
                    row["city"] = '-' 
                if new_dict.get(row["name"]):
                    new_dict[row["name"]].update({"city": row["city"]})
                else:
                    new_dict.update({row["name"]: {"date": '-', "city": row["city"]}})
        with open("merged_csv", "w", newline='') as out_file:
            csv_w = csv.writer(out_file)
            csv_w.writerow(["name","town","date"])
            for item in new_dict:
                if not new_dict[item].get("city"):
                    new_dict[item]["city"] = '-'
                csv_w.writerow([item, new_dict[item]["city"], new_dict[item]["date"]])
    
    cat merged_csv                                                                                                                                                                                                          
    name,town,date
    john,london,01.01.2001
    mary,new york,06.03.2016
    sue,-,-
    bob,-,-
    
    

    Simplify somewhat by using defaultdict:

    import csv 
    from collections import defaultdict
    
    with open("name_date.csv") as dt_csv:
        def cityDateDict():
            return {"city": "-", "date": "-"}
        new_dict = defaultdict(cityDateDict)
        dt_dictR = csv.DictReader(dt_csv, fieldnames=["name", "date"],  delimiter=':')
        for row in dt_dictR:
            new_dict[row["name"]]
            if row["date"].strip():
                new_dict[row["name"]]["date"] = row["date"]
        with open("name_city.csv") as city_csv:
            dt_dictC = csv.DictReader(city_csv, fieldnames=["name", "city"],  delimiter=':')
            for row in dt_dictC:
                new_dict[row["name"]] 
                if row["city"].strip():
                    new_dict[row["name"]]["city"] = row["city"] 
        with open("merged_csv", "w", newline='') as out_file:
            csv_w = csv.writer(out_file)
            csv_w.writerow(["name","town","date"])
            for item in new_dict:
                csv_w.writerow([item, new_dict[item]["city"], new_dict[item]["date"]])
    
    

    defaultdict allows you to build a dictionary dynamically using 'default' values. In this case the city/date dict with defaults of -. Then the corresponding key(city/date) can be updated with a non-empty value to override the default.