Search code examples
cloud

Extraction of data


I am trying to extract monthly mean of a vertical profile variable mass mixing ratio(MMR) from a netcdf file for the duration 2000-2003 for a region (longitude -45W to 10E and latitude 30-40E)and a point (lat=37 deg, lon=-40) and also for entire geographical region. Task:

  1. Extract monthly MMR at 400-450, 450-500,500-550,550-600 hpa for the region and the point during 2000-2003. For instance, at 400 hPa get (latitude,longitude,MMR as raster layer)
  2. Extract monthly ozone MMR at between two pressure levels as raster format.
library(ncdf4)
nc = nc_open("E:/ERA5/ERA_2000_2003.nc")
lat=ncvar_get (nc,"latitude") ; lon=ncvar_get(nc,"longitude")
t = ncvar_get(nc, "time")
pres = ncvar_get(nc, "level")
length(lat); length(lon);length(time);length(pres)
t <- as.POSIXct("1900-01-01 00:00")+as.difftime(t,units="hours")
x <- c(400, 450, 500, 550, 600) 

#I want ozone for longitude -45W to 10E and latitude (30-40E) at pressure level 500 
#for all years (12months*4 yrs= 48 months) 

oz = ncvar_get(nc,'o3',start=c(1,1,1,1),count=c(100,-1,x[3],-1))
lonIdx <- which( lon >= -40 & lon <= 10.00);
latIdx <- which( lat >= 30.00 & lat <= 45.00)

I get the following error:

1. Error: cannot allocate vector of size 558.5 Mb
oz = ncvar_get(nc,'o3',start=c(lonIdx,latIdx,1,1),count=c(length(lonIdx),length(latIdx),x[1),-1))
# and sometimes error messages like
2.Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset,  : 
 Error: 
variable has 4 dims, but start has 264 entries.  They must match!

3. Error in ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, : Error: variable has 4 dims, but start has 324 entries.  They must match!
Traceback:

1. ncvar_get(nc, "o3", start = c(lonIdx, latIdx, 1, 1), count = c(length(lonIdx), 
 .     length(latIdx), -1, -1))
2. ncvar_get_inner(ncid2use, varid2use, nc$var[[li]]$missval, addOffset, 
 .     scaleFact, start = start, count = count, verbose = verbose, 
 .     signedbyte = signedbyte, collapse_degen = collapse_degen, 
 .     raw_datavals = raw_datavals)
3. stop(paste("Error: variable has", ndims, "dims, but start has", 
 .     length(start), "entries.  They must match!"))

Any help will be much appreciated.

Thank You

Data Description


Solution

  • Two options to extract the subset of interest from the netcdf object are 1) using the ncvar_get function with start and count defined, or 2) using ncvar_get to extract the entire array and then using standard base R array indexing to subset. I'm not exactly sure how to calculate MMR, so I am showing how to extract o3 values and do some basic summarization. If this doesn't achieve the two tasks specifically, hopefully it is has enough hints to get you started.

    Read in the data

    library(ncdf4)
    
    # read file 
    nc <- nc_open("ERA5_2000_03.nc")
    
    # extract variable levels
    lat <- ncvar_get (nc, "latitude")
    lon <- ncvar_get(nc, "longitude")
    t <- ncvar_get(nc, "time")
    t <- as.POSIXct("1900-01-01 00:00") + as.difftime(t, units = "hours")
    pres <- ncvar_get(nc, "level")
    

    Define the indices of the region of interest

    lonIdx <- which(lon >= -40 & lon <= 10)
    latIdx <- which(lat >= 30 & lat <= 45)
    presIdx <- which(pres >= 475 & pres <= 525) # subset to only 500
    tIdx <- which(t == t) # i.e., no subset of t
    

    Extract o3 for all month and pressure levels

    # Option 1 -- use ncvar_get()
    extract1 <- ncvar_get(nc,
                          'o3',
                          start = c(lonIdx[1],latIdx[1], presIdx[1], tIdx[1]),
                          count = c(length(lonIdx),length(latIdx),length(presIdx), length(tIdx)))
    
    
    # Option 2 -- subset using array indexing
    o3 <- ncvar_get(nc,'o3')
    extract2 <- o3[lonIdx, latIdx, presIdx, ]
    
    
    # Check the outputs are identical
    identical(extract1, extract2)
    #>TRUE
    

    Optionally, you can also name the array dimensions, which is helpful for making results intuitive

    # Name the array dimensions
    dimnames(o3) <- list('lon' = lon, 
                         'lat' = lat,
                         'pres' = pres,
                         'time' = as.character(as.Date(t)))
    
    extract3 <- o3[lonIdx,latIdx, , ]
    
    # Monthly mean for each level of pres across the entire region of interest
    apply(extract3, c('pres', 'time'), mean)
    #       time
    # pres    2000-01-01   2000-02-01   2000-03-01
    #   400 8.627874e-08 8.303606e-08 9.857230e-08
    #   450 7.911534e-08 7.790138e-08 9.043681e-08
    #   500 7.421088e-08 7.398450e-08 8.510393e-08
    #   550 7.074213e-08 7.102885e-08 8.128490e-08
    #   600 6.817185e-08 6.840323e-08 7.853805e-08
    #   ...
    
    # o3 levels at a specific long/lat
    o3['-40', '37', ,]
    #       time
    # pres    2000-01-01   2000-02-01   2000-03-01
    #   400 8.160814e-08 8.172732e-08 1.006738e-07
    #   450 7.688993e-08 7.681632e-08 9.276743e-08
    #   500 7.274836e-08 7.514602e-08 8.791778e-08
    #   550 6.989851e-08 7.359140e-08 8.330298e-08
    #   600 6.867163e-08 7.110260e-08 8.087377e-08