I'm using xarray's very cool pydap back-end (http://xarray.pydata.org/en/stable/io.html#opendap) to read data stored via OPenDAP at IRI:
import xarray as xr
remote_data = xr.open_dataarray('http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.RSMAS/.CCSM4/.hindcast/.zg/dods')
print(remote_data)
#<xarray.DataArray 'zg' (P: 2, S: 6569, M: 3, L: 45, Y: 181, X: 360)>
#[115569730800 values with dtype=float32]
#Coordinates:
# * L (L) timedelta64[ns] 0 days 12:00:00 1 days 12:00:00 ...
# * Y (Y) float32 -90.0 -89.0 -88.0 -87.0 -86.0 -85.0 -84.0 -83.0 ...
# * S (S) datetime64[ns] 1999-01-07 1999-01-08 1999-01-09 1999-01-10 ...
# * M (M) float32 1.0 2.0 3.0
# * X (X) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 ...
# * P (P) int32 500 200
#Attributes:
# level_type: pressure level
# standard_name: geopotential_height
# long_name: Geopotential Height
# units: m
For reference it's sub-seasonal forecast data where L is lead-time (45 days forecasts), S is initialization date and M is ensemble.
I would like to do an ensemble mean and i'm only interested in the 500 hPa level. However, it crashes out and gives a RuntimeError: NetCDF: Access failure
:
da = remote_data.sel(P=500)
da_ensmean = da.mean(dim='M')
RuntimeError Traceback (most recent call last)
<ipython-input-46-eca488e9def5> in <module>()
1 remote_data = xr.open_dataarray('http://iridl.ldeo.columbia.edu/SOURCES/.Models' '/.SubX/.RSMAS/.CCSM4/.hindcast/.zg/dods')
2 da = remote_data.sel(P=500)
----> 3 da_ensmean = da.mean(dim='M')
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/common.py in wrapped_func(self, dim, axis, skipna, keep_attrs, **kwargs)
20 keep_attrs=False, **kwargs):
21 return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
---> 22 skipna=skipna, allow_lazy=True, **kwargs)
23 else:
24 def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/dataarray.py in reduce(self, func, dim, axis, keep_attrs, **kwargs)
1359 summarized data and the indicated dimension(s) removed.
1360 """
-> 1361 var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
1362 return self._replace_maybe_drop_dims(var)
1363
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in reduce(self, func, dim, axis, keep_attrs, allow_lazy, **kwargs)
1264 if dim is not None:
1265 axis = self.get_axis_num(dim)
-> 1266 data = func(self.data if allow_lazy else self.values,
1267 axis=axis, **kwargs)
1268
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in data(self)
293 return self._data
294 else:
--> 295 return self.values
296
297 @data.setter
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in values(self)
385 def values(self):
386 """The variable's data as a numpy.ndarray"""
--> 387 return _as_array_or_item(self._data)
388
389 @values.setter
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in _as_array_or_item(data)
209 TODO: remove this (replace with np.asarray) once these issues are fixed
210 """
--> 211 data = np.asarray(data)
212 if data.ndim == 0:
213 if data.dtype.kind == 'M':
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
622
623 def __array__(self, dtype=None):
--> 624 self._ensure_cached()
625 return np.asarray(self.array, dtype=dtype)
626
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in _ensure_cached(self)
619 def _ensure_cached(self):
620 if not isinstance(self.array, NumpyIndexingAdapter):
--> 621 self.array = NumpyIndexingAdapter(np.asarray(self.array))
622
623 def __array__(self, dtype=None):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
600
601 def __array__(self, dtype=None):
--> 602 return np.asarray(self.array, dtype=dtype)
603
604 def __getitem__(self, key):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
506 def __array__(self, dtype=None):
507 array = as_indexable(self.array)
--> 508 return np.asarray(array[self.key], dtype=None)
509
510 def transpose(self, order):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/coding/variables.py in __getitem__(self, key)
64
65 def __getitem__(self, key):
---> 66 return self.func(self.array[key])
67
68 def __repr__(self):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/coding/variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype)
133 for fv in encoded_fill_values:
134 condition |= data == fv
--> 135 data = np.asarray(data, dtype=dtype)
136 return np.where(condition, decoded_fill_value, data)
137
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
506 def __array__(self, dtype=None):
507 array = as_indexable(self.array)
--> 508 return np.asarray(array[self.key], dtype=None)
509
510 def transpose(self, order):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/backends/netCDF4_.py in __getitem__(self, key)
63 with self.datastore.ensure_open(autoclose=True):
64 try:
---> 65 array = getitem(self.get_array(), key.tuple)
66 except IndexError:
67 # Catch IndexError in netCDF4 and return a more informative
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/backends/common.py in robust_getitem(array, key, catch, max_retries, initial_delay)
114 for n in range(max_retries + 1):
115 try:
--> 116 return array[key]
117 except catch:
118 if n == max_retries:
netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()
netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable._get()
netCDF4/_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
RuntimeError: NetCDF: Access failure
Breaking down the calculation removes the RuntimeError
. Guess it was just too hefty of a calculation with all the start times. Shouldn't be too difficult to put in a loop over S:
da = remote_data.isel(P=0,S=0)
da_ensmean = da.mean(dim='M')
print(da_ensmean)
<xarray.DataArray 'zg' (L: 45, Y: 181, X: 360)>
array([[[5231.1445, 5231.1445, ..., 5231.1445, 5231.1445],
[5231.1445, 5231.1445, ..., 5231.1445, 5231.1445],
...,
[5056.2383, 5056.2383, ..., 5056.2383, 5056.2383],
[5056.2383, 5056.2383, ..., 5056.2383, 5056.2383]],
[[5211.346 , 5211.346 , ..., 5211.346 , 5211.346 ],
[5211.346 , 5211.346 , ..., 5211.346 , 5211.346 ],
...,
[5082.062 , 5082.062 , ..., 5082.062 , 5082.062 ],
[5082.062 , 5082.062 , ..., 5082.062 , 5082.062 ]],
...,
[[5108.8247, 5108.8247, ..., 5108.8247, 5108.8247],
[5108.8247, 5108.8247, ..., 5108.8247, 5108.8247],
...,
[5154.2173, 5154.2173, ..., 5154.2173, 5154.2173],
[5154.2173, 5154.2173, ..., 5154.2173, 5154.2173]],
[[5106.4893, 5106.4893, ..., 5106.4893, 5106.4893],
[5106.4893, 5106.4893, ..., 5106.4893, 5106.4893],
...,
[5226.0063, 5226.0063, ..., 5226.0063, 5226.0063],
[5226.0063, 5226.0063, ..., 5226.0063, 5226.0063]]], dtype=float32)
Coordinates:
* L (L) timedelta64[ns] 0 days 12:00:00 1 days 12:00:00 ...
* Y (Y) float32 -90.0 -89.0 -88.0 -87.0 -86.0 -85.0 -84.0 -83.0 ...
S datetime64[ns] 1999-01-07
* X (X) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 ...
P int32 500
This is a good use-case for chunking with dask, e.g.,
import xarray as xr
url = 'http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.RSMAS/.CCSM4/.hindcast/.zg/dods'
remote_data = xr.open_dataarray(url, chunks={'S': 1, 'L': 1})
da = remote_data.sel(P=500)
da_ensmean = da.mean(dim='M')
This version will access the data server in parallel, using many smaller chunks. It will still be slow to download 231 GB of data, but your request will have much better odds of success.