I have several nc files, I need to open them all. Filter the desired period and write the result of all in a single txt file.
The nc files correspond to a month (Jan, Feb, Mar...) and have four variables (temperature, dew point, u and v).
I need to assemble a table, with all the variables side by side for a specific period. For example, from January to October. The first column being temperature, the second dew point, third u and lastly v.
from netCDF4 import MFDataset
import pandas as pd
import xarray as xr
import csv
import tempfile
ds=xr.open_mfdataset('/home/milena/Documentos/dados_obs_haroldo/media_horaria/MEDIA_HORARIA_*.nc')
lat = ds.variables['lat'][:]
lon = ds.variables['lon'][:]
t2mj = ds.variables['t2mj'][:]
td2mj = ds.variables['td2mj'][:]
u10mj = ds.variables['u10mj'][:]
v10mj = ds.variables['v10mj'][:]
#Brasilia
t2mj_txt=ds.t2mj.isel(lat=153, lon=117).to_dataframe().to_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/t2mj.csv')
td2mj_txt=ds.td2mj.isel(lat=153, lon=117).to_dataframe().to_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/td2mj.csv')
u10mj_txt=ds.u10mj.isel(lat=153, lon=117).to_dataframe().to_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/u10mj.csv')
v10mj_txt=ds.v10mj.isel(lat=153, lon=117).to_dataframe().to_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/v10mj.csv')
#print(t2mj_txt)
#opem csv
t2mj_csv = pd.read_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/t2mj.csv', skipinitialspace=True)
td2mj_csv = pd.read_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/td2mj.csv', skipinitialspace=True)
u10mj_csv = pd.read_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/u10mj.csv', skipinitialspace=True)
v10mj_csv = pd.read_csv('/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/v10mj.csv', skipinitialspace=True)
#print(t2mj_csv)
#filter desired period
t2mj_date=t2mj_csv[(t2mj_csv['time'])<"2022-12-01"]
td2mj_date=td2mj_csv[(td2mj_csv['time'])<"2022-12-01"]
u10mj_date=u10mj_csv[(u10mj_csv['time'])<"2022-12-01"]
v10mj_date=v10mj_csv[(v10mj_csv['time'])<"2022-12-01"]
arquivo = open("/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/t2mj_filter.txt", "w")
arquivo.write(t2mj_date['t2mj'].to_string())
arquivo.close()
arquivo2 = open("/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/td2mj_filter.txt", "w")
arquivo2.write(td2mj_date['td2mj'].to_string())
arquivo2.close()
arquivo3 = open("/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/u10mj_filter.txt", "w")
arquivo3.write(u10mj_date['u10mj'].to_string())
arquivo3.close()
arquivo4 = open("/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/v10mj_filter.txt", "w")
arquivo4.write(v10mj_date['v10mj'].to_string())
arquivo4.close()
file_list=['/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/t2mj_filter.txt', '/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/td2mj_filter.txt', '/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/u10mj_filter.txt', '/home/milena/Documentos/dados_obs_haroldo/media_horaria/csv/v10mj_filter.txt']
dfe = pd.DataFrame()
for file in file_list:
temp_dfe = pd.read_csv(file, header=None, names=[file[:-4]])
dfe = pd.concat([dfe, temp_dfe], axis=1)
arquivo5 = open("/home/milena/Documentos/dados_obs_haroldo/media_horaria/teste.txt", "w")
arquivo5.write(dfe.to_string())
arquivo5.close()
my result looks like this:
I would like it to look like this:
The beauty of Xarray is that you don't need to do all this extra IO to make this problem work. Just use xarray's selection logic then convert the dataset to a dataframe before writing a multi-column text file.
ds = xr.open_mfdataset(...)
# select point, select time slice, drop lat/lon variables
ds_point = ds.isel(lat=153, lon=117).sel(time=slice(None, '2022-12-01')).drop(['lat', 'lon'])
# convert to dataframe
df_point = ds_point.to_dataframe()
# order columns then write to text file with tab separator
variables = ['t2mj', 'td2mj', 'u10mj', 'v10mj']
df_point[variables].to_csv(..., sep='\t')