Search code examples
pythoninterpolationnetcdf4

How to insert TXT data into netcdf in python


I'm new to python, so I'm sorry if I make any beginner mistakes. I'm trying to insert my text file into a netcdf.

I'm using the netcdf4 package and follow the example in this website: https://pyhogs.github.io/intro_netcdf4.html and I managed to reproduce the example (the example uses random data):

enter image description here

Problem: My text file contains: Lon, Lat , SST and when I try to insert this values, the netcdf file is created, however, it's not correct:

enter image description here

In my code I'm trying to apply a Barnes interpolation (var) or a griddata interpolation (interp). I think this is what has to enter in my variable netcdf (maybe I'm wrong).

Here my code so far:

import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)

# Open file
arq_sst = np.loadtxt(fname = "C:\\Users\\Rodrigo\\XYZ.txt", skiprows=0, delimiter=",")

# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]

# Atmosphere level
z = [1]   

#shapping grid
x_1, y_1 = np.meshgrid(lonf, latf)

#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')

#Or

#Another interpolation
interp = griddata((lonf, latf), sstf, (lonf[None,:], latf[:,None]), method='nearest')

#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4') 

#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')

#Specifying dimensions
tempgrp.createDimension('lon', len(lonf))
tempgrp.createDimension('lat', len(latf))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)

#Building variables
longitude = tempgrp.createVariable('Longitude', 'f4', 'lon')
latitude = tempgrp.createVariable('Latitude', 'f4', 'lat')  
levels = tempgrp.createVariable('Levels', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f4', ('time', 'lon', 'lat', 'z'))
time = tempgrp.createVariable('Time', 'i4', 'time')


#Passing data into variables
longitude[:] = lonf 
latitude[:] = latf
levels[:] = z
sst[0,:,:,:] = var

#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num

#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")

#Add local attributes to variable instances
longitude.units = 'degrees east'
latitude.units = 'degrees north'
time.units = 'days since Jan 01, 0001'
sst.units = 'degrees'
levels.units = 'meters'
sst.warning = 'This data is not real!'

#Closing the dataset
f.close()

Here is my text data(Header: Longitude,Latitude,SST). I decreased the number of lines to fit here:

-42.1870,-22.9940,22.4844
-37.4000,-29.9700,20.2000
-37.4200,-29.9600,20.1000
-39.1800,-30.0000,20.5000
-39.2100,-30.0000,20.4000
-39.2300,-30.0000,20.4000
-39.2200,-29.9800,20.4000
-39.2300,-29.9900,20.4000
-39.2000,-29.9800,20.4000
-39.1900,-30.0000,20.5000
-39.2800,-29.9900,20.5000
-39.2700,-29.9900,20.4000
-39.3400,-29.9700,20.5000
-39.3300,-29.9600,20.4000
-39.3100,-29.9600,20.4000
-39.3600,-29.9700,20.6000
-39.3500,-29.9900,20.4000
-39.3900,-29.9900,20.4000
-38.4600,-30.0000,20.3000
-38.4900,-29.9800,20.7000
-37.4800,-29.8800,20.4000
-37.5000,-29.8600,20.3000
-37.4600,-29.8900,20.3000
-41.3800,-29.9900,20.0000
-41.4000,-29.9900,20.1000
-41.0400,-29.9300,20.1000
-41.0200,-29.9200,20.2000
-41.0600,-29.9300,20.1000
-41.1000,-29.9400,19.9000
-41.0900,-29.9600,19.9000
-41.1100,-29.9800,19.9000
-41.1100,-29.9600,20.0000
-41.1200,-29.9400,20.0000
-41.1400,-29.9400,20.0000
-41.1600,-29.9500,20.1000
-41.1700,-29.9500,20.1000
-41.1900,-29.9700,20.0000
-41.1900,-29.9500,20.1000
-40.6800,-29.9900,20.1000
-40.7400,-29.9600,20.1000
-40.7700,-29.9700,20.1000
-40.7800,-29.9700,20.1000
-40.7100,-29.9000,20.1000
-40.7600,-29.9100,20.1000
-40.7400,-29.9000,20.1000
-40.7200,-29.9000,20.2000
-40.7600,-29.9200,20.1000
-40.7500,-29.9400,20.1000
-40.7800,-29.9100,20.2000
-40.8000,-29.9100,20.2000
-40.8100,-29.9300,20.1000
-40.8200,-29.9200,20.2000
-40.7900,-29.9300,20.2000
-40.7900,-29.9500,20.1000
-40.7700,-29.9300,20.1000
-40.8400,-29.9600,20.2000
-40.8600,-29.9600,20.3000
-40.9000,-29.9100,20.1000
-40.9100,-29.9100,20.0000
-40.3900,-29.9400,20.0000
-40.3900,-29.9200,20.0000
-40.4100,-29.9200,20.0000
-40.4100,-29.9400,20.0000
-40.3800,-29.9000,20.0000
-40.3800,-29.9200,20.0000
-40.4000,-29.9000,20.1000
-40.3700,-29.9600,20.0000
-40.3600,-29.9700,20.0000
-40.3800,-29.9800,20.0000
-40.4200,-29.9000,20.0000
-40.4300,-29.9300,20.1000
-40.4500,-29.9300,20.1000
-40.4700,-29.9300,20.0000
-40.4400,-29.9100,20.0000
-40.4500,-29.9100,20.0000
-40.4700,-29.9100,20.0000
-40.5000,-29.9400,19.9000
-40.5300,-29.9200,20.1000
-40.5100,-29.9200,20.1000
-40.4900,-29.9400,19.9000
-40.4900,-29.9200,20.0000
-40.6200,-30.0000,20.2000
-40.6000,-30.0000,20.1000
-40.6800,-29.9900,20.1000
-40.4000,-29.8400,20.1000
-40.4800,-29.8700,20.1000
-40.4500,-29.8300,20.3000
-40.4600,-29.8900,20.1000
-40.4600,-29.8700,20.0000
-40.5000,-29.8800,20.3000
-40.4900,-29.9000,20.1000
-40.5100,-29.9000,20.3000
-40.5300,-29.9000,20.2000
-40.5600,-29.8500,20.3000
-40.5800,-29.8500,20.3000
-40.6300,-29.9000,19.9000
-40.7100,-29.9000,20.1000
-40.0500,-29.9600,20.3000
-40.1100,-29.9800,20.2000
-40.1100,-30.0000,20.2000

Can anybody help me?


Solution

  • So there are a couple of things. First of all, you are not providing the correct equally spaced dimensions for the interpolation and the resulting netCDF file. This is how I created the space for the meshgrid, (I chose a linear space of 100 but depending on what resolution you want your data you may want to change this to whatever suits your purpose):

    spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
    spacing_y = np.linspace(np.min(latf),np.max(latf),100)
    x_1, y_1 = np.meshgrid(spacing_x, spacing_y)
    

    Then doing the interpolation as follows:

    #Barnes Interpolation
    var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
    
    #Or
    
    #Another interpolation
    interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')
    

    Finally you will want to add the linear spaces as the latitude and longitude dimensions since the interpolated data is being broadcasted to them:

    #Passing data into variables
    longitude[:] = x_1[0]
    latitude[:] = y_1[:,0]
    

    Another note is that for Panoply or other software to show your data in a Geo2D format, you will want to name your lat lon dimensions the same as your variables. The full code is below:

    import os
    import numpy as np
    from scipy.interpolate import griddata
    import matplotlib.pyplot as plt
    import numpy.ma as ma
    import netCDF4 as nc4
    from numpy.random import uniform, seed
    from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)
    
    # Open file
    arq_sst = np.loadtxt(fname = r"C:\Users\Rodrigo\XYZ.txt", skiprows=0, delimiter=",")
    
    # Getting the Arrays
    lonf = arq_sst[:, 0]
    latf = arq_sst[:, 1]
    sstf = arq_sst[:, 2]
    
    # Atmosphere level
    z = [1]
    
    #shapping grid
    spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
    spacing_y = np.linspace(np.min(latf),np.max(latf),100)
    x_1, y_1 = np.meshgrid(spacing_x, spacing_y)
    
    #Barnes Interpolation
    var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
    
    #Or
    
    #Another interpolation
    interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')
    
    #Open netcdf to write
    f = nc4.Dataset('file_created.nc','w', format='NETCDF4')
    
    #Creating group in netcdf file
    tempgrp = f.createGroup('SAT_DATA')
    
    #Specifying dimensions
    tempgrp.createDimension('longitude', len(spacing_x))
    tempgrp.createDimension('latitude', len(spacing_y))
    tempgrp.createDimension('z', len(z))
    tempgrp.createDimension('time', None)
    
    #Building variables
    longitude = tempgrp.createVariable('longitude', 'f8', 'longitude', fill_value=np.nan)
    latitude = tempgrp.createVariable('latitude', 'f8', 'latitude', fill_value=np.nan)
    levels = tempgrp.createVariable('z', 'i4', 'z')
    sst = tempgrp.createVariable('sst', 'f8', ('time','longitude','latitude','z'), fill_value=np.nan)
    time = tempgrp.createVariable('time', 'f8', 'time', fill_value=np.nan)
    
    #Passing data into variables
    longitude[:] = x_1[0]
    latitude[:] = y_1[:,0]
    levels[:] = z
    sst[0,:,:,:] = var
    
    #get time in days since Jan 01,01
    from datetime import datetime
    today = datetime.today()
    time_num = today.toordinal()
    time[0] = time_num
    
    #Add global attributes
    f.description = "XYZ dataset containing one group"
    f.history = "Created " + today.strftime("%d/%m/%y")
    
    #Add local attributes to variable instances
    longitude.units = 'degrees_east'
    longitude.point_spacing = "even";
    longitude._CoordinateAxisType = "Lon";
    latitude.units = 'degrees_north'
    latitude.point_spacing = "even";
    latitude._CoordinateAxisType = "Lat";
    time.units = "days since Jan 01, 0001";
    time._ChunkSizes = [1]
    sst.long_name = "SEA SURFACE TEMPERATURE"
    sst.history = "From coads_climatology"
    sst.units = "Deg C";
    sst.missing_value = -1.0
    sst._ChunkSizes = [1, 100, 100]
    levels.units = 'meters'
    sst.warning = 'This data is not real!'
    
    #Closing the dataset
    f.close()
    

    Let me know if you have any questions.