What is the most accurate way of creating a raster from netcdf in R?

I am processing netCDF data for multiple years. The netCDF is for air pollutant data, and the latitude and longitude are provided as separate variables, not as part of the original grid.

LINK TO DATE: Sample Netcdf

These netCDF files provide Level 2 Nitrogen Dioxide data and they are downloaded from NASA Earthdata portal. The satellite is Sentinel-5P and the instrument is TROPOMI.

So when processing this data, you have to create variables for NO2, latitude and longitude. I am trying to create raster layers, and then save them as GeoTIFF files for my research.

The problem here is related to the fact that I don't know how best to create these rasters. The latitude and longitude is not equally spaced throughout the dataset, and I haven't figured out a way to accurately create these images. I created a model grid using the number of rows and columns provided by the netCDF files. In the list of variables this is called the scanline and ground_pixel, but when I plot it the cells in the final image don't look right.

This is how I upload the data:

## Open the netcdf
  ncname <- no2files$filename[m]
  ncfname <- paste(ncname, sep = "")
  nc <- nc_open(ncfname)
  
  ## Get the necessary variables. 
  no2tc <-ncvar_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column")
  lat <- ncvar_get(nc, "PRODUCT/latitude")
  lon <- ncvar_get(nc, "PRODUCT/longitude")
  qa <- ncvar_get(nc, "PRODUCT/qa_value")
  
  fillvalue = ncatt_get(nc, "DETAILED_RESULTS/nitrogendioxide_total_column",
                  "_FillValue")
  mfactor <- ncatt_get(nc, "DETAILED_RESULTS/nitrogendioxide_total_column",
                  "multiplication_factor_to_convert_to_molecules_percm2")
  
  fillvalue_qa = ncatt_get(nc,"PRODUCT/qa_value",
                  "_FillValue")
  
  
  
  no2tc[no2tc == fillvalue$value] <- NA
  no2tc <- no2tc * mfactor$value
  
  qa[qa == fillvalue_qa$value] <- NA
  
  nc_close(nc)
  # rm(ncfname)
  
  no2vec <- as.vector(no2tc)
  latvec <- as.vector(lat)
  lonvec <- as.vector(lon)
  qavec <- as.vector(qa)
  
  dfsat <- data.frame(no2vec, lonvec, latvec)
  dfqa <- data.frame(qavec,lonvec,latvec)
  
  colnames(dfsat) <- c('z', 'x', 'y')
  colnames(dfqa) <- c('z', 'x', 'y')
  
  df <- rbind(df, dfsat)
  dfqa <- rbind(df,dfqa)
  rm(lat,lon,no2tc,qa,latvec,lonvec,no2vec,qavec)

This is how I currently create the raster:

  ## Create the raster. The ncol = 3245 and now = 450 are from the scanline and ground_pixel variables. 
  e <- extent(-180,180,-90,90)
  r <- raster(e, ncol = 3245, nrow = 450)
  
  xx <- rasterize(df[, 2:3], r, df[, 1], fun = mean)
  qa_raster <- rasterize(dfqa[, 2:3], r, df[, 1], fun = mean)
  
  crs(xx) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
  crs(qa_raster) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
  
  ## Crop and plot the raster
  ## change shapefile coordinate system 
  # border <- spTransform(ontario, crs(xx))
  aoi <- spTransform(ontario_buffer, crs(xx))
  
  ## Mask values with qa < 0.5 (this is the recommended value)
  
  xx[qa_raster < 0.5 & xx < 0] <- NA

  
  ## This is the final plot
  plot_tif <- crop(xx, extent(aoi))

  ### Use this if you want to view the plot. 
  mask_tif <- mask(plot_tif,aoi)
  # plot(mask_tif)
  # final <- plot(border,add=TRUE)
  
  ## Plot the raster 
  filename <- paste(i,".tif",sep="")
  writeRaster(mask_tif,filename = filename,"GTiff", overwrite=TRUE)

The end results looks something like this:

Then I tried another method I found online, but you have to set a resolution. I COULD do this, but I just want to plot the cells AS THEY ARE, without any modification.

ncfname <- "S5P_OFFL_L2__NO2____20200107T173517_20200107T191647_11582_01_010302_20200109T103930.nc"

nc <- ncdf4::nc_open(ncfname)

mfactor = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column","multiplication_factor_to_convert_to_molecules_percm2")
fillvalue = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column","_FillValue")
my_unit = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column","units")
my_product_name = ncdf4::ncatt_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column", "long_name")

mfactor <- mfactor$value

fillvalue <- fillvalue$value

vals <- ncdf4::ncvar_get(nc, "PRODUCT/nitrogendioxide_tropospheric_column")
lat <- ncdf4::ncvar_get(nc, "PRODUCT/latitude")
lon <- ncdf4::ncvar_get(nc, "PRODUCT/longitude")
vals[vals == fillvalue] <- NA

vals_df = NULL

vals_df <- rbind(vals_df, data.frame(lat = as.vector(lat), lon = as.vector(lon), vals = as.vector(vals)))

pts <- vals_df

sp::coordinates(pts) <- ~lon + lat

my_projection <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

sp::proj4string(pts) <- sp::CRS(my_projection)

my_aoi <- ontario

crs_test <- raster::compareCRS(pts, my_aoi)

my_aoi <- sp::spTransform(my_aoi, CRS = as.character(raster::crs(pts)))


p <- methods::as(raster::extent(my_aoi), "SpatialPolygons")

sp::proj4string(p) <- sp::CRS(my_projection)

pts <- raster::crop(pts, p)

extent_distance_vertical <- geosphere::distm(c(raster::extent(pts)[1],                  raster::extent(pts)[3]), c(raster::extent(pts)[1], raster::extent(pts)[4]), 
                                               fun = geosphere::distHaversine)
  
vertical_mid_distance <- (raster::extent(pts)[4] - raster::extent(pts)[3])/2

lat_mid <- raster::extent(pts)[3] + vertical_mid_distance

horizontal_distance <- raster::extent(pts)[2] - raster::extent(pts)[1]
  

if (horizontal_distance > 180) {
  one_degree_horizontal_distance <- geosphere::distm(c(1,
                                                       lat_mid), c(2, lat_mid), fun = geosphere::distHaversine)
  extent_distance_horizontal <- one_degree_horizontal_distance *
    horizontal_distance
} else {
  extent_distance_horizontal <-
    geosphere::distm(c(raster::extent(pts)[1],
                       lat_mid),
                     c(raster::extent(pts)[2], lat_mid),
                     fun = geosphere::distHaversine)
}


my_res <- 20000
ncol_rast <- as.integer(extent_distance_horizontal/my_res)
nrow_rast <- as.integer(extent_distance_vertical/my_res)
print(paste0("Create raster file from points"))
rast <- raster::raster(nrows = nrow_rast, ncols = ncol_rast, 
                         crs = as.character(raster::crs(pts)), ext = raster::extent(pts), 
                         vals = NULL)
final <- raster::rasterize(pts, rast, pts$vals, fun = mean)

final <- raster::mask(final, my_aoi)

sp::plot(final)

How can I create these raster layers accurately? Thanks!

Solution

With the example file

f <- "S5P_OFFL_L2__NO2____20200107T173517_20200107T191647_11582_01_010302_20200109T103930.nc"

You can do

library(terra)
r <- rast(f, paste0("/PRODUCT/", c("longitude", "latitude", "nitrogendioxide_tropospheric_column")))
r
#class       : SpatRaster 
#dimensions  : 4172, 450, 3  (nrow, ncol, nlyr)
#resolution  : 1, 1  (x, y)
#extent      : -0.5, 449.5, -0.5, 4171.5  (xmin, xmax, ymin, ymax)
#coord. ref. :  
#sources     : longitude  
#              latitude  
#              nitrogendioxide_tropospheric_column  
#varnames    : longitude (pixel center longitude) 
#              latitude (pixel center latitude) 
#              nitrogendioxide_tropospheric_column (Tropospheric vertical column of nitrogen dioxide) 
#names       :    longitude,      latitude, nitrogendi~ric_column 
#unit        : degrees_east, degrees_north,               mol m-2 
#time        : 2020-01-07 

plot(r, nr=1)

The plot illustrates that the data are not organized as regular raster data (it they were, there would be a N-S gradient for latitude, and a E-W gradient for longitude). Also see plot(r$longitude, r$latitude). You can treat them as points in stead:

dp <- as.data.frame(r)
p <- vect(dp, geom=c("longitude", "latitude"))

Plotting takes a while, as there are > 1.5 million points, so I take a sample

plot(p[sample(nrow(p), 10000)], "nitrogendioxide_tropospheric_column")

If you want your data organized as a regular raster, you can use rasterize

x <- rast(res=1/6)
x <- rasterize(p, x, "nitrogendioxide_tropospheric_column", fun=mean)
plot(x > 0)

And you can get Ontario like this

can <- vect(raster::getData("GADM", country="CAN", level=1))
ontario <- can[can$NAME_1=="Ontario", ]

x <- crop(x, ontario)
x <- mask(x, ontario)
plot(x)