I have the following data:
EDIT: With smaller sample data
dat <- structure(list(SN = c(198305L, 198305L, 198305L, 198305L,
198305L,198305L, 198305L, 198305L, 198305L, 198305L, 198305L,
198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L,
198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L,
198305L, 198305L, 198305L, 198305L, 198305L, 198305L, 198305L,
198305L, 198305L, 198305L, 198306L, 198306L, 198306L, 198306L,
198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 198308L,
198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 198308L,
198308L, 198308L, 198308L, 198308L, 198308L, 198308L, 198308L,
198308L, 198308L, 198308L, 198310L, 198310L, 198310L, 198310L,
198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 198310L,
198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 198310L,
198310L, 198310L, 198310L, 198310L, 198310L, 198310L, 198310L,
198310L), CY = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L), Year = c(1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L, 1983L,
1983L, 1983L), Month = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L
), Day = c(9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L,
15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 14L, 14L,
14L, 14L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L, 22L,
22L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L), Hour = c(0L, 6L, 12L,
18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L,
6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L,
18L, 0L, 6L, 12L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L,
0L, 6L, 12L, 18L, 0L, 6L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L,
0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L), Lat = c(17.8, 18.1, 18.7, 18.9, 19.2, 19.5, 19.9,
20.1, 20.6, 21.2, 21.6, 22, 22.5, 22.9, 23.4, 23.9, 24.6, 24.9,
25.4, 26.1, 26.6, 27.2, 27.6, 28.1, 28.5, 29.1, 29.5, 30, 31.1,
31.8, 32.7, 33.8, 34.6, 35.1, 35.6, 19.8, 19.9, 19.9, 20.2, 15.9,
16.1, 16.3, 16.5, 16.9, 17.4, 18, 18.7, 19.3, 20, 23.8, 24.2,
24.9, 25.4, 25.8, 25.5, 25.1, 25.3, 25.8, 26.2, 26.5, 27.1, 27.9,
29.1, 10.3, 10.2, 9, 9.2, 9.2, 9.5, 10, 10.5, 10.9, 11.3, 12.3,
13, 13.7, 14.4, 15, 15.9, 16.8, 17.2, 17.8, 18.3, 18.7, 19, 19.3,
19.5, 19.7, 20), Lon = c(130.8, 130.7, 130.3, 130.4, 130.4, 130.4,
130.5, 130.5, 130.7, 130.8, 130.7, 130.6, 130.7, 130.8, 131.2,
131.5, 131.8, 132.2, 132.6, 133, 133.3, 133.5, 133.5, 133.5,
133.6, 134.1, 134.3, 134.8, 135.1, 135.8, 136.5, 137, 137.3,
138.1, 139.4, 121.5, 122.7, 124.4, 126.2, 133.8, 133.2, 132.8,
132.4, 132.2, 132.5, 133, 133.7, 134.7, 135.6, 140.1, 141.6,
142.6, 143.1, 143.5, 144, 144.3, 144.7, 144.7, 144.1, 143.4,
142.8, 141.8, 141.3, 151.2, 149.2, 143.4, 141.8, 140.2, 138.9,
137.5, 136, 134.4, 133, 131.7, 130.7, 129.6, 128.8, 128, 126.9,
125.8, 125.1, 124.1, 123.2, 122.2, 121.2, 120.2, 119.2, 118.3,
117.5), VMax = c(145L, 135L, 125L, 120L, 120L, 120L, 115L, 110L,
110L, 115L, 120L, 120L, 120L, 120L, 125L, 120L, 115L, 110L, 105L,
105L, 110L, 100L, 100L, 95L, 90L, 85L, 85L, 80L, 75L, 70L, 70L,
70L, 60L, 55L, 45L, 35L, 40L, 45L, 40L, 35L, 40L, 45L, 50L, 55L,
50L, 45L, 40L, 35L, 35L, 35L, 45L, 50L, 50L, 45L, 40L, 40L, 45L,
50L, 50L, 50L, 45L, 40L, 35L, 35L, 35L, 35L, 40L, 45L, 50L, 55L,
60L, 65L, 70L, 75L, 85L, 90L, 95L, 95L, 95L, 95L, 100L, 120L,
125L, 120L, 100L, 90L, 85L, 85L, 80L), Cat = structure(c(5L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
4L, 4L, 4L, 3L, 2L, 2L, 2L, 1L), .Label = c("Cat1", "Cat2", "Cat3",
"Cat4", "Cat5", "TS"), class = "factor")), row.names = c(243L,
244L, 245L, 246L, 247L, 248L, 249L, 250L, 251L, 252L, 253L, 254L,
255L, 256L, 257L, 258L, 259L, 260L, 261L, 262L, 263L, 264L, 265L,
266L, 267L, 268L, 269L, 270L, 271L, 272L, 273L, 274L, 275L, 276L,
277L, 278L, 279L, 280L, 281L, 282L, 283L, 284L, 285L, 286L, 287L,
288L, 289L, 290L, 291L, 292L, 293L, 294L, 295L, 296L, 297L, 298L,
299L, 300L, 301L, 302L, 303L, 304L, 305L, 927L, 928L, 929L, 930L,
931L, 932L, 933L, 934L, 935L, 936L, 937L, 938L, 939L, 940L, 941L,
942L, 943L, 944L, 945L, 946L, 947L, 948L, 949L, 950L, 951L, 952L
), class = "data.frame")
Each lat-lon pair has a unique identifier, the SN column. I created a grid and I want to count the number of unique lat-lon pairs within each grid.
Here's my script:
latmin=0
latmax=50
lonmin=60
lonmax=180
dlat=2.5
dlon=2.5
latint=dlat*0.5
lonint=dlon*0.5
## derive center lat and lon points
x.Lon<-seq((lonmin+lonint),(lonmax-lonint),lonint)
y.Lat<-seq((latmin+latint),(latmax-latint),latint)
df2<-as.data.frame(expand.grid(x.Lon=x.Lon,y.Lat=y.Lat))
df2$count<-"0"
library(data.table)
library(expss)
setDT(dat)
dummy<-matrix(ncol=1,nrow=nrow(df2))
for (i in 1:nrow(df2)){
df_bounds<-data.frame(north=(df2[i,]$y.Lat+latint),south=(df2[i,]$y.Lat-latint),west=(df2[i,]$x.Lon-lonint),east=(df2[i,]$x.Lon+lonint))
dat[,inBounds := Lat >= (df2[i,]$y.Lat-latint) & Lat <= (df2[i,]$y.Lat+latint) & Lon >= (df2[i,]$x.Lon-lonint) & Lon >= (df2[i,]$x.Lon+lonint)]
dat1<-dat[SN %in% dat[inBounds == TRUE, unique(SN)],passesThroughBox := T]
#dat2<-dat[is.na(passesThroughBox),passesThroughBox := F]
#dat3<-dat1[which(passesThroughBox == TRUE),]
dummy[i,]<-count_if("TRUE",dat1$passesThroughBox)
}
PROBLEMS/QUESTIONS
EXPECTED OUTPUTS
Any suggestions on how to do this in R?
I'll appreciate any help.
Given the observations data-frame dat
and grid data-frame df2
. For each observation in dat
find the nearest grid center in df2
to obtain the grid it belongs to.
This is based on assumption that an observation belongs to its nearest grid center. Also, For each SN
only the first entry reported is taken as the unique observation.
df2<-as.data.frame(expand.grid(x.Lon=x.Lon,y.Lat=y.Lat)) # grid box centers
df2$count<- 0L # keeping count as integer
# function to calculate distance between two points
dist <- function(x, y) {diff <- (y - x) ; sqrt(sum(diff^2))}
# Filtering only the unique observation
dat <- dat[!duplicated(dat$SN), ]
# Finding closest grid center for every observation in dat
closest_grid <- apply(dat[,c('Lon','Lat')],1, function(x){
dist_grid <- apply(df2[,c('x.Lon','y.Lat')], 1, function(y) dist(x,y))
return(which.min(dist_grid))
})
# Summarizing no of of counts for each grid center with non zero counts
df2[names(table(closest_grid)),'count'] <- as.integer(table(closest_grid))
df2[names(table(closest_grid)),] # the non-zero counts
# x.Lon y.Lat count
#738 151.25 10.00 1
#1199 133.75 16.25 1
#1292 131.25 17.50 1
#1474 121.25 20.00 1
Use ggplot2
to plot the the count heat-map at grid centers:
library(ggplot2) # fill = counts data
ggplot(data = df2, aes(x = x.Lon, y = y.Lat)) + geom_tile(aes(fill = count))
The below plot is for your full data-set: