Search code examples
pythonamazon-web-servicesamazon-s3boto3botocore

Download Amazon AWS-S3 GEFS Ensenble DATA Loop


I would like to shorten the following code such that it downloads the bucket subfolder from the Amazon server

import boto3
import botocore
import datetime
import xarray as xr
import matplotlib.pyplot as plt
import cfgrib

#cfs_bucket = 'noaa-cfs-pds'

cfs_bucket = 'noaa-gefs-pds'

# Want to get 5 days worth of forecast data = 5 days x 4 forecast cycles/day
cycles2get = 4*5 

client = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))

paginator = client.get_paginator('list_objects')
result = paginator.paginate(Bucket=cfs_bucket, Delimiter='/')

    count = 0
    
    for prefix in result.search('CommonPrefixes'):
        print(prefix.get('Prefix'))
        count += 1 
        if(count>49):
            break # skip the for loop after reporting 50 values

gefs.20170101/
gefs.20170102/
gefs.20170103/
gefs.20170104/
gefs.20170105/
gefs.20170106/
gefs.20170107/
gefs.20170108/
gefs.20170109/
gefs.20170110/
gefs.20170111/
gefs.20170112/
gefs.20170113/
gefs.20170114/
gefs.20170115/
gefs.20170116/
gefs.20170117/
gefs.20170118/
gefs.20170119/
gefs.20170120/
gefs.20170121/
gefs.20170122/
gefs.20170123/
gefs.20170124/
gefs.20170125/
gefs.20170126/
gefs.20170127/
gefs.20170128/
gefs.20170129/
gefs.20170130/
gefs.20170131/
gefs.20170201/
gefs.20170202/
gefs.20170203/
gefs.20170204/
gefs.20170205/
gefs.20170206/
gefs.20170207/
gefs.20170208/
gefs.20170209/
gefs.20170210/
gefs.20170211/
gefs.20170212/
gefs.20170213/
gefs.20170214/
gefs.20170215/
gefs.20170216/
gefs.20170217/
gefs.20170218/
gefs.20170219/

keys = []
date = datetime.datetime(2017,11,28,hour=0)
prefix = date.strftime('gefs.%Y%m%d/00/')

   print(prefix)

response = client.list_objects_v2(Bucket=cfs_bucket, Prefix=prefix)
response_meta = response.get('ResponseMetadata')

if response_meta.get('HTTPStatusCode') == 200:
    contents = response.get('Contents')
    if contents == None:
        print("No objects are available for %s" % date.strftime('%B %d, %Y'))
    else:
        for obj in contents:
            keys.append(obj.get('Key'))
        print("There are %s objects available for %s\n--" % (len(keys), date.strftime('%B %d, %Y')))
        
        count = 0
        for k in keys:
            print(k)
            
            count += 1 
            if(count>1200):
                break # skip the for loop after reporting 50 values
else:
    print("There was an error with your request.")

gefs_filebase = 'gec'

sdate = date
ensnum = '00' # GEFS Ensemble number
tz = 't00z'
pgr = 'pgrb2af'
pga = 'pgrb2aanl'
fdate = sdate

flx_files = []
    
for i in range(cycles2get):
    
    sdatestr = sdate.strftime('%Y%m%d')
    fdatestr = fdate.strftime('%Y%m%d')
    
    flx_file_A   = gefs_filebase + ensnum + '.' + tz + '.' + pga
    flx_file_0   = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '000'
    flx_file_6   = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '006'
    flx_file_12  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '012'
    flx_file_18  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '018'
    flx_file_24  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '024'
    flx_file_30  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '030'
    flx_file_36  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '036'
    flx_file_42  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '042'
    flx_file_48  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '048'
    flx_file_54  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '054'
    flx_file_60  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '060'
    flx_file_66  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '066'
    flx_file_72  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '072'
    flx_file_78  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '078'
    flx_file_84  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '084'
    flx_file_90  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '090'
    flx_file_96  = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '096'
    flx_file_102 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '102'
    flx_file_108 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '108'
    flx_file_114 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '114'
    flx_file_120 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '120'
    flx_file_126 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '126'
    flx_file_132 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '132'
    flx_file_138 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '138'
    flx_file_144 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '144'
    flx_file_150 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '150'
    flx_file_156 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '156'
    flx_file_162 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '162'
    flx_file_168 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '168'
    flx_file_174 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '174'
    flx_file_180 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '180'
    flx_file_186 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '186'
    flx_file_192 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '192'
    flx_file_198 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '198'
    flx_file_204 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '204'
    flx_file_210 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '210'
    flx_file_216 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '216'
    flx_file_210 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '210'
    flx_file_216 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '216'
    flx_file_222 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '222'
    flx_file_228 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '228'
    flx_file_234 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '234'
    flx_file_240 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '240'
    flx_file_246 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '246'
    flx_file_252 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '252'
    flx_file_258 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '258'
    flx_file_264 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '264'
    flx_file_270 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '270'
    flx_file_276 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '276'
    flx_file_282 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '282'
    flx_file_288 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '288'
    flx_file_288 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '288'
    flx_file_294 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '294'
    flx_file_300 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '300'
    flx_file_306 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '306'
    flx_file_312 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '312'
    flx_file_318 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '318'
    flx_file_324 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '324'
    flx_file_330 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '330'
    flx_file_336 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '336'
    flx_file_342 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '342'
    flx_file_348 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '348'
    flx_file_354 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '354'
    flx_file_360 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '360'
    flx_file_366 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '366'
    flx_file_372 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '372'
    flx_file_378 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '378'
    flx_file_384 = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '384'
    
#   flx_file = gefs_filebase + ensnum + '.' + tz + '.' + pgr +  f'{(fcst)}'
    flx_files.append(flx_file)
    
    # Update the valid datetime by adding six hours from the valid time just used
    fdate = fdate + datetime.timedelta(hours=6)

print('Downloading '+flx_file_A)
client.download_file(cfs_bucket,prefix+flx_file_A,flx_file_A)
print('Downloading '+flx_file_0)
client.download_file(cfs_bucket,prefix+flx_file_0,flx_file_0)
print('Downloading '+flx_file_6)
client.download_file(cfs_bucket,prefix+flx_file_6,flx_file_6)
print('Downloading '+flx_file_12)
client.download_file(cfs_bucket,prefix+flx_file_12,flx_file_12)
print('Downloading '+flx_file_18)
client.download_file(cfs_bucket,prefix+flx_file_18,flx_file_18)
print('Downloading '+flx_file_24)
client.download_file(cfs_bucket,prefix+flx_file_24,flx_file_24)
print('Downloading '+flx_file_30)
client.download_file(cfs_bucket,prefix+flx_file_30,flx_file_30)
print('Downloading '+flx_file_36)
client.download_file(cfs_bucket,prefix+flx_file_36,flx_file_36)
print('Downloading '+flx_file_42)
client.download_file(cfs_bucket,prefix+flx_file_42,flx_file_42)
print('Downloading '+flx_file_48)
client.download_file(cfs_bucket,prefix+flx_file_48,flx_file_48)
print('Downloading '+flx_file_54)
client.download_file(cfs_bucket,prefix+flx_file_54,flx_file_54)
print('Downloading '+flx_file_60)
client.download_file(cfs_bucket,prefix+flx_file_60,flx_file_60)
print('Downloading '+flx_file_66)
client.download_file(cfs_bucket,prefix+flx_file_66,flx_file_66)
print('Downloading '+flx_file_72)
client.download_file(cfs_bucket,prefix+flx_file_72,flx_file_72)
print('Downloading '+flx_file_78)
client.download_file(cfs_bucket,prefix+flx_file_78,flx_file_78)
print('Downloading '+flx_file_84)
client.download_file(cfs_bucket,prefix+flx_file_84,flx_file_84)
print('Downloading '+flx_file_90)
client.download_file(cfs_bucket,prefix+flx_file_90,flx_file_90)
print('Downloading '+flx_file_96)
client.download_file(cfs_bucket,prefix+flx_file_96,flx_file_96)
print('Downloading '+flx_file_102)
client.download_file(cfs_bucket,prefix+flx_file_102,flx_file_102)
print('Downloading '+flx_file_108)
client.download_file(cfs_bucket,prefix+flx_file_108,flx_file_108)
print('Downloading '+flx_file_114)
client.download_file(cfs_bucket,prefix+flx_file_114,flx_file_114)
print('Downloading '+flx_file_120)
client.download_file(cfs_bucket,prefix+flx_file_120,flx_file_120)
print('Downloading '+flx_file_126)
client.download_file(cfs_bucket,prefix+flx_file_126,flx_file_126)
print('Downloading '+flx_file_132)
client.download_file(cfs_bucket,prefix+flx_file_132,flx_file_132)
print('Downloading '+flx_file_138)
client.download_file(cfs_bucket,prefix+flx_file_138,flx_file_138)
print('Downloading '+flx_file_144)
client.download_file(cfs_bucket,prefix+flx_file_144,flx_file_144)
print('Downloading '+flx_file_150)
client.download_file(cfs_bucket,prefix+flx_file_150,flx_file_150)
print('Downloading '+flx_file_156)
client.download_file(cfs_bucket,prefix+flx_file_156,flx_file_156)
print('Downloading '+flx_file_162)
client.download_file(cfs_bucket,prefix+flx_file_162,flx_file_162)
print('Downloading '+flx_file_168)
client.download_file(cfs_bucket,prefix+flx_file_168,flx_file_168)
print('Downloading '+flx_file_174)
client.download_file(cfs_bucket,prefix+flx_file_174,flx_file_174)
print('Downloading '+flx_file_180)
client.download_file(cfs_bucket,prefix+flx_file_180,flx_file_180)
print('Downloading '+flx_file_186)
client.download_file(cfs_bucket,prefix+flx_file_186,flx_file_186)
print('Downloading '+flx_file_192)
client.download_file(cfs_bucket,prefix+flx_file_192,flx_file_192)
print('Downloading '+flx_file_198)
client.download_file(cfs_bucket,prefix+flx_file_198,flx_file_198)
print('Downloading '+flx_file_204)
client.download_file(cfs_bucket,prefix+flx_file_204,flx_file_204)
print('Downloading '+flx_file_210)
client.download_file(cfs_bucket,prefix+flx_file_210,flx_file_210)
print('Downloading '+flx_file_216)
client.download_file(cfs_bucket,prefix+flx_file_216,flx_file_216)
print('Downloading '+flx_file_222)
client.download_file(cfs_bucket,prefix+flx_file_222,flx_file_222)
print('Downloading '+flx_file_228)
client.download_file(cfs_bucket,prefix+flx_file_228,flx_file_228)
print('Downloading '+flx_file_234)
client.download_file(cfs_bucket,prefix+flx_file_234,flx_file_234)
print('Downloading '+flx_file_240)
client.download_file(cfs_bucket,prefix+flx_file_240,flx_file_240)
print('Downloading '+flx_file_246)
client.download_file(cfs_bucket,prefix+flx_file_246,flx_file_246)
print('Downloading '+flx_file_252)
client.download_file(cfs_bucket,prefix+flx_file_252,flx_file_252)
print('Downloading '+flx_file_258)
client.download_file(cfs_bucket,prefix+flx_file_258,flx_file_258)
print('Downloading '+flx_file_264)
client.download_file(cfs_bucket,prefix+flx_file_264,flx_file_264)
print('Downloading '+flx_file_270)
client.download_file(cfs_bucket,prefix+flx_file_270,flx_file_270)
print('Downloading '+flx_file_276)
client.download_file(cfs_bucket,prefix+flx_file_276,flx_file_276)
print('Downloading '+flx_file_282)
client.download_file(cfs_bucket,prefix+flx_file_282,flx_file_282)
print('Downloading '+flx_file_288)
client.download_file(cfs_bucket,prefix+flx_file_288,flx_file_288)
print('Downloading '+flx_file_294)
client.download_file(cfs_bucket,prefix+flx_file_294,flx_file_294)
print('Downloading '+flx_file_300)
client.download_file(cfs_bucket,prefix+flx_file_300,flx_file_300)
print('Downloading '+flx_file_306)
client.download_file(cfs_bucket,prefix+flx_file_306,flx_file_306)
print('Downloading '+flx_file_312)
client.download_file(cfs_bucket,prefix+flx_file_312,flx_file_312)
print('Downloading '+flx_file_318)
client.download_file(cfs_bucket,prefix+flx_file_318,flx_file_318)
print('Downloading '+flx_file_324)
client.download_file(cfs_bucket,prefix+flx_file_324,flx_file_324)
print('Downloading '+flx_file_330)
client.download_file(cfs_bucket,prefix+flx_file_330,flx_file_330)
print('Downloading '+flx_file_336)
client.download_file(cfs_bucket,prefix+flx_file_336,flx_file_336)
print('Downloading '+flx_file_342)
client.download_file(cfs_bucket,prefix+flx_file_342,flx_file_342)
print('Downloading '+flx_file_348)
client.download_file(cfs_bucket,prefix+flx_file_348,flx_file_348)
print('Downloading '+flx_file_354)
client.download_file(cfs_bucket,prefix+flx_file_354,flx_file_354)
print('Downloading '+flx_file_360)
client.download_file(cfs_bucket,prefix+flx_file_360,flx_file_360)
print('Downloading '+flx_file_366)
client.download_file(cfs_bucket,prefix+flx_file_366,flx_file_366)
print('Downloading '+flx_file_372)
client.download_file(cfs_bucket,prefix+flx_file_372,flx_file_372)
print('Downloading '+flx_file_378)
client.download_file(cfs_bucket,prefix+flx_file_378,flx_file_378)
print('Downloading '+flx_file_384)
client.download_file(cfs_bucket,prefix+flx_file_384,flx_file_384)

The above Code works to download the specific files. But I would like to shorten this code so that it loops the forecast time from 000 to 384, how can I modify the code above? And in this case direct these files to a specific directory with the name of the BUCKET


Solution

  • You can use a for..loop structure:

    for i in range(cycles2get):
     sdatestr = sdate.strftime('%Y%m%d')
     fdatestr = fdate.strftime('%Y%m%d')
    
     for i in range(0,384,6):
      flx_file = gefs_filebase + ensnum + '.' + tz + '.' + pgr + '{:0>3}'.format(i)
    
      print('Downloading '+flx_file)
      client.download_file(cfs_bucket,prefix+flx_file,flx_file)
    

    This will work identically, except for the line of the pga variable, because it is different from the pattern.

    And this line

    flx_files.append(flx_file)
    

    isn't necessary.