Search code examples
pythonexceldictionaryxlrd

python - creating dictionary from excel using specific columns


I'm relatively new to python (outside of the world of GIS) and struggling getting the code I won't for a side project I am doing with USGS Earthquake data. The data is in an excel file containing 7 columns as seen below:

USGS earthquake table

I have these tasks which I set myself:

  1. Total number of earthquakes
  2. Create Dictionary of Time of Earthquakes with the Region
  3. Top 3 locations with the highest frequency of earthquakes
  4. Top 3 locations with the highest magnitude of earthquakes
  5. Print the above to text file

I am stuck on second task and once I have that i should be able to do the rest.

Also, I'm aware that nothing gets printed to text file yet!

import xlrd
from xlrd import open_workbook
from collections import Counter

def get_sheet(xl_file):
    wb = open_workbook(xl_file,'r')

    # get first sheet in the workbook
    return wb.sheets()[0]

def number_eq(sheet):
    row_count = len(range(sheet.nrows))
    print ("Total number of earthqaukes = %s") % row_count

def no_earthquake_region():

    #???

#def top_freq_eq(sheet):

    #print (Counter(words).most_common(5))

#def top_mag_eq(sheet):

    #print (Counter(words).most_common(5))

def main(xl_file, out_folder):

    sheet = get_sheet(xl_file)
    row = number_eq(sheet)


if __name__ == "__main__":
    xl_file = r'D:\Projects\Other\data\EarthquakeUSGS.xlsx'
    out_folder = r"D:\Projects\Other\data\output\output.txt"
    main(xl_file, out_folder)
    print("Done!")

Solution

  • Here is the way I'll do it:

        import xlrd
        from xlrd import open_workbook
        from collections import Counter
    
        def get_sheet(xl_file):
            wb = open_workbook(xl_file,'r')
    
            # get first sheet in the workbook
            return wb.sheets()[0]
    
        def number_eq(sheet):
            row_count = len(range(sheet.nrows))
            print ("Total number of earthqaukes = %s") % row_count
    
        def no_earthquake_region(sheet):
            l = []
            num_of_rows = sheet.nrows
            num_of_cols = sheet.ncols
            if(num_of_rows > 1):
                d={}
                for i in range(2,num_of_rows):
                    d = {"Time":sheet.cell(i,1).value , "Region":sheet.cell(i,6).value}
                    l.append(d)
            return l
    
        #def top_freq_eq(sheet):
    
            #print (Counter(words).most_common(5))
    
        #def top_mag_eq(sheet):
    
            #print (Counter(words).most_common(5))
    
        def main(xl_file, out_folder):
    
            sheet = get_sheet(xl_file)
            row = number_eq(sheet)
            print no_earthquake_region(sheet)
    
    
        if __name__ == "__main__":
            xl_file = r'D:\Projects\Other\data\EarthquakeUSGS.xlsx'
            out_folder = r"D:\Projects\Other\data\output\output.txt"
            main(xl_file, out_folder)
            print("Done!")