Search code examples
rasciir-grid

Read ascii grid data into matrix format


I am trying to read a file in the ASCII TOMS grid format into R. I have been able to read it in such a way that it opens in R. However, iy opens as a linear matrix. A sumary of what the file contains is available here:

[Link](http://www.temis.nl/docs/README_TOMSASCII.pdf)

A sample of the data set could be downloaded here:

[Link](http://www.temis.nl/airpollution/no2col/no2monthscia.php?Year=2005&Month=04)

The data set is for January 2006 and I just renamed it for easy access as there are quite a few I need to work with. I read it in using:

CCC<-read.csv("no2_200601.asc",header=FALSE,skip=4,sep="\t")
dim(CCC)
[1] 52560    1

How do I read this into R so that data for each latitude is on a single row? I feel this would help build a proper data structure. Note: Let me try and simply it ias I understand:
It means the structure is such that one line indicates the title e.g. lat=-89.9 and the next 144 lines with 20 elements each belong to row lat=-89.9; so my problem now is reading all these element before the next "lat=..." into one row.

In addition, I just tried to loop it through a set of files using this:

NO2files<-list.files(pattern=".asc", full.names=TRUE)
f<-lapply(NO2files, function (x) readLines (x))

for (i in 1:length (NO2files)) {  
 function(x)
 i<-readLines(x)
pattern <- "[[:digit:]]+(?=\\sbins)"
m <- regexpr(pattern, i[3], perl=TRUE)
dim <- regmatches(i[3], m)
m <- regexpr(pattern, i[4], perl=TRUE)
dim[2] <- regmatches(i[4], m)

dim <- as.integer(dim)

pattern <- "(?<=undef=).*"
m <- regexpr(pattern, i[2], perl=TRUE)
na_string <- regmatches(i[2], m)

dat1 <- i[-(1:4)]
sep <- grepl("=", dat1, fixed=TRUE)
dat2a <- dat1[sep] 
dat2b <- dat1[!sep] 
dat2b <- lapply(dat2b, substring, 
            first=seq(1,nchar(dat2b[1]),4), 
            last=  seq(4,nchar(dat2b[1]),4))
dat2b <- unlist(dat2b)
dat2b <- as.numeric(dat2b)
dat2b[dat2b==as.numeric(na_string)] <- NA
dat2b <- matrix(dat2b, nrow=dim[2], byrow=TRUE)
dat2b <- dat2b[nrow(dat2b):1, ]
}

Solution

  • Not nearly as elegant as @Roland's example and I'm not sure why have different values - actually I do thx to the comment below (different file).

    library(stringr)
    library(plyr)
    library(raster)
    
    f <- readLines("totno2_200601.asc")
    
    # how many lat/lon values
    bins.lon <- as.numeric(str_match(f[3], "Longitudes *: *([0-9]+) bins")[2])
    bins.lat <- as.numeric(str_match(f[4], "Latitudes *: *([0-9]+) bins")[2])
    
    # number of characters that represent a value
    num.width <- 4
    
    # how many lines do we need to encode the longitude bins
    bins.lon.lines <- as.integer(bins.lon / (80/num.width))
    
    # where does the data start
    curr.lat.line <- 5
    curr.lat.bin <- 1
    
    m <- matrix(nrow=bins.lat, ncol=bins.lon+1)
    
    repeat {
    
      # get current latitude
      lat <- as.numeric(str_match(f[curr.lat.line], "lat=\ +([0-9\\.\\-]+)")[2])
    
      # show progress - not necessary
      cat(curr.lat.bin, lat); cat("\n")
    
      # get the values for the longitudes at current latitude
      vals <- paste(f[(curr.lat.line+1):(curr.lat.line+bins.lon.lines)], sep="", collapse="")
    
      # split them by 4 and assign to the proper entry
      m[curr.lat.bin, ] <- c(lat, as.numeric(laply(seq(1, nchar(vals), 4), function(i) substr(vals, i, i+3))))
    
      curr.lat.bin <- curr.lat.bin + 1
      curr.lat.line <- curr.lat.line + bins.lon.lines + 1
    
      if (curr.lat.bin > bins.lat) { break }
    
    }
    
    m <- m[nrow(m):1, ]
    
    plot(raster(m))
    

    plot

    Since you added a requirement to have this be able to be used in a loop to read multiple files:

    library(stringr)
    library(plyr)
    library(raster)
    
    # this is the function-ized version 
    
    tomsToMatrix <- function(fname, verbose=FALSE) {
    
      f <- readLines(fname)
    
      bins.lon <- as.numeric(str_match(f[3], "Longitudes *: *([0-9]+) bins")[2])
      bins.lat <- as.numeric(str_match(f[4], "Latitudes *: *([0-9]+) bins")[2])
    
      num.width <- 4
      bins.lon.lines <- as.integer(bins.lon / (80/num.width))
      curr.lat.line <- 5
      curr.lat.bin <- 1
    
      m <- matrix(nrow=bins.lat, ncol=bins.lon+1)
    
      repeat {
        lat <- as.numeric(str_match(f[curr.lat.line], "lat=\ +([0-9\\.\\-]+)")[2])
        if (verbose) { cat(curr.lat.bin, lat); cat("\n") }
        vals <- paste(f[(curr.lat.line+1):(curr.lat.line+bins.lon.lines)], sep="", collapse="")
        m[curr.lat.bin, ] <- c(lat, as.numeric(laply(seq(1, nchar(vals), 4), function(i) substr(vals, i, i+3))))
        curr.lat.bin <- curr.lat.bin + 1
        curr.lat.line <- curr.lat.line + bins.lon.lines + 1 
        if (curr.lat.bin > bins.lat) { break } 
      }
    
      m <- m[nrow(m):1, ]
    
      return(m)
    
    }
    
    setwd("/data/toms") # whatever the source directory is for **your** files
    
    t.files <- list.files("/data/toms")
    t.files
    [1] "totno2_200504.asc" "totno2_200505.asc" "totno2_200506.asc"
    
    dat <- lapply(t.files, tomsToMatrix)
    
    str(dat)
    List of 3
    $ : num [1:720, 1:1441] 89.9 89.6 89.4 89.1 88.9 ...
    $ : num [1:720, 1:1441] 89.9 89.6 89.4 89.1 88.9 ...
    $ : num [1:720, 1:1441] 89.9 89.6 89.4 89.1 88.9 ...
    

    If you need them as named entries, that should not be difficult to add.