Search code examples
pythonglob

Perform analysis on multiple files in python


I'm using a module called SHARPpy to perform weather analysis on weather balloon data. The data is in .oax form, so I can't provide a sample here. I know how to derive the values I want from the data for a single data file, but I am unsure of how to do this easily for multiple files (about 50 files). Below is my code, and a sample of what this code returns for a specific file. How can I automate this to make the analysis run for multiple files?

spc_file = open('X:/seabreezestormdays/201412090300_040842.oax', 'r').read()

import sharppy
import sharppy.sharptab.profile as profile
import sharppy.sharptab.interp as interp
import sharppy.sharptab.winds as winds
import sharppy.sharptab.utils as utils
import sharppy.sharptab.params as params
import sharppy.sharptab.thermo as thermo
import numpy as np
from StringIO import StringIO

def parseSPC(spc_file):
    ## read in the file
    data = np.array([l.strip() for l in spc_file.split('\n')])

    ## necessary index points
    title_idx = np.where( data == '%TITLE%')[0][0]
    start_idx = np.where( data == '%RAW%' )[0] + 1
    finish_idx = np.where( data == '%END%')[0]

    ## create the plot title
    data_header = data[title_idx + 1].split()
    location = data_header[0]
    time = data_header[1][:11]

    ## put it all together for StringIO
    full_data = '\n'.join(data[start_idx : finish_idx][:])
    sound_data = StringIO( full_data )

    ## read the data into arrays
    p, h, T, Td, wdir, wspd = np.genfromtxt( sound_data, delimiter=',', comments="%", unpack=True )

    return p, h, T, Td, wdir, wspd

pres, hght, tmpc, dwpc, wdir, wspd = parseSPC(spc_file)

prof = profile.create_profile(profile='default', pres=pres, hght=hght, tmpc=tmpc, \
                                    dwpc=dwpc, wspd=wspd, wdir=wdir, missing=-9999, strictQC=True)
msl_hght = prof.hght[prof.sfc] # Grab the surface height value
#print "SURFACE HEIGHT (m MSL):",msl_hght
agl_hght = interp.to_agl(prof, msl_hght) # Converts to AGL
#print "SURFACE HEIGHT (m AGL):", agl_hght
msl_hght = interp.to_msl(prof, agl_hght) # Converts to MSL
#print "SURFACE HEIGHT (m MSL):",msl_hght
sfcpcl = params.parcelx( prof, flag=1 ) # Surface Parcel
fcstpcl = params.parcelx( prof, flag=2 ) # Forecast Parcel
mupcl = params.parcelx( prof, flag=3 ) # Most-Unstable Parcel
mlpcl = params.parcelx( prof, flag=4 ) # 100 mb Mean Layer Parcel
print "Most-Unstable CAPE:", mupcl.bplus # J/kg
print "Most-Unstable CIN:", mupcl.bminus # J/kg
print "Most-Unstable LCL:", mupcl.lclhght # meters AGL
print "Most-Unstable LFC:", mupcl.lfchght # meters AGL
print "Most-Unstable EL:", mupcl.elhght # meters AGL
print "Most-Unstable LI:", mupcl.li5 # C
sfc = prof.pres[prof.sfc]
p3km = interp.pres(prof, interp.to_msl(prof, 3000.))
p6km = interp.pres(prof, interp.to_msl(prof, 6000.))
p1km = interp.pres(prof, interp.to_msl(prof, 1000.))
mean_3km = winds.mean_wind(prof, pbot=sfc, ptop=p3km)
sfc_6km_shear = winds.wind_shear(prof, pbot=sfc, ptop=p6km)
sfc_3km_shear = winds.wind_shear(prof, pbot=sfc, ptop=p3km)
sfc_1km_shear = winds.wind_shear(prof, pbot=sfc, ptop=p1km)
print "0-3 km Pressure-Weighted Mean Wind (kt):", utils.comp2vec(mean_3km[0], mean_3km[1])[1]
print "0-6 km Shear (kt):", utils.comp2vec(sfc_6km_shear[0], sfc_6km_shear[1])[1]
srwind = params.bunkers_storm_motion(prof)
#print "Bunker's Storm Motion (right-mover) [deg,kts]:", utils.comp2vec(srwind[0], srwind[1])
#print "Bunker's Storm Motion (left-mover) [deg,kts]:", utils.comp2vec(srwind[2], srwind[3])
srh3km = winds.helicity(prof, 0, 3000., stu = srwind[0], stv = srwind[1])
srh1km = winds.helicity(prof, 0, 1000., stu = srwind[0], stv = srwind[1])
print "0-3 km Storm Relative Helicity [m2/s2]:",srh3km[0]
stp_fixed = params.stp_fixed(sfcpcl.bplus, sfcpcl.lclhght, srh1km[0], utils.comp2vec(sfc_6km_shear[0], sfc_6km_shear[1])[1])
ship = params.ship(prof)
eff_inflow = params.effective_inflow_layer(prof)
ebot_hght = interp.to_agl(prof, interp.hght(prof, eff_inflow[0]))
etop_hght = interp.to_agl(prof, interp.hght(prof, eff_inflow[1]))
print "Effective Inflow Layer Bottom Height (m AGL):", ebot_hght
print "Effective Inflow Layer Top Height (m AGL):", etop_hght
effective_srh = winds.helicity(prof, ebot_hght, etop_hght, stu = srwind[0], stv = srwind[1])
print "Effective Inflow Layer SRH (m2/s2):", effective_srh[0]
ebwd = winds.wind_shear(prof, pbot=eff_inflow[0], ptop=eff_inflow[1])
ebwspd = utils.mag( ebwd[0], ebwd[1] )
print "Effective Bulk Wind Difference:", ebwspd
scp = params.scp(mupcl.bplus, effective_srh[0], ebwspd)
stp_cin = params.stp_cin(mlpcl.bplus, effective_srh[0], ebwspd, mlpcl.lclhght, mlpcl.bminus)
#print "Supercell Composite Parameter:", scp
#print "Significant Tornado Parameter (w/CIN):", stp_cin
#print "Significant Tornado Parameter (fixed):", stp_fixed

output:

runfile('C:/Users/kirkj/tryit.py', wdir='C:/Users/kirkj')
Most-Unstable CAPE: 1711.9340703
Most-Unstable CIN: -76.4232980928
Most-Unstable LCL: 936.427493357
Most-Unstable LFC: 2268.41422348
Most-Unstable EL: 11164.0
Most-Unstable LI: -5.1505592459
0-3 km Pressure-Weighted Mean Wind (kt): 3.90277462633
0-6 km Shear (kt): 19.1904316825
0-3 km Storm Relative Helicity [m2/s2]: 67.0917409854
Effective Inflow Layer Bottom Height (m AGL): 0.0
Effective Inflow Layer Top Height (m AGL): 2090.0
Effective Inflow Layer SRH (m2/s2): 65.5651659762
Effective Bulk Wind Difference: 14.2626808205
C:/Users/kirkj/tryit.py:42: DeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
  ## put it all together for StringIO

Solution

  • To expand on @PyNEwbie's answer:

    # get all the .oax files from a directory
    from glob import glob
    files = glob.glob('X:/seebreezestromdays/*.oax')
    
    # To get files from the command line, use the following instead:
    #      files = sys.argv[1:]
    
    # load all the data from a list of files.  
    data = [parseSPC(f) for f in files]
    
    # details for each file
    for d in data:
        # The following is shorthand for:
        #    pres, hght, tmpc, dwpc, wdir, wspd = d
        #    show_one_file(pres, hght, tmpc, dwpc, wdir, wspd)
        show_one_file(*d)
    
    # summary for all files
    # The following is shortand for:
    #    # make separate lists for each stat
    #    pres, hght, tmpc, dwpc, wdir, wspd = zip(*data)
    #    show_summary(pres, hght, tmpc, dwpc, wdir, wspd)
    show_summary(*zip(*data))