I am attempting to make a shiny web app using a function that I found to scrape data off of a NOAA website. So I did not create the function, but I have used it and it works well. I want to create the app for others at work who are not willing/able to use R so they can download the data from the app.
I have created some simple apps and thought this would be simple as well but I am running into a problem. I run the app and get the message "number of items to replace is not a multiple of replacement length". I think this has to do with the fact that the function creates lists of csv files (line 163 of helpers)and is then adding them together, but I am not certain. I am hoping that I am missing something about creating an empty data frame in shiny that can handle the new table and not have to edit the function within helpers at all.
The info already entered into the ui should work if the app does so you don't need to go to the noaa website to find a station name/number. I really appreciate the help. Thanks.
library(XML)
library(RCurl)
library(plyr)
source("helpers.R")
shinyServer(
function(input, output) {
output$stageData <- renderTable({
input$goButton
stage <- isolate(data.frame(noaa(begindate = input$begindate, enddate = input$enddate, station = input$station,
units = input$units, datum = input$datum, interval = input$interval, time = input$time, continuous = "TRUE")))
return(stage)
})
}
)
shinyUI(fluidPage(
titlePanel("NOAA"),
sidebarLayout(
sidebarPanel(
textInput("station", label = h5("Station"), value = "Astoria, OR"),
textInput("begindate", label = h5("Begin date, must be format: YYYYMMDD"), value = "20140101"),
textInput("enddate", label = h5("End date, must be format: YYYYMMDD"), value = "20140201"),
selectInput("units", label = h5("Units"),
choices = list("Feet" = "feet", "Meters" = "meters"),
selected = "feet"),
selectInput("datum", label = h5("Datum"),
choices = list("NAVD" = "NAVD", "Station" = "station", "MLLM" = "MLLW", "MLW" = "MLW", "MSL" = "MSL", "MTL" = "MTL", "MHW" = "MHW", "MHHW" = "MHHW", "IGLD" = "IGLD"),
selected = "NAVD"),
selectInput("interval", label = h5("Interval"),
choices = list("HL" = "HL", "6 minute" = "6 minute", "Hourly" = "hourly", "Monthly" = "monthly"),
selected = "HL"),
selectInput("time", label = h5("Time"),
choices = list("LST" = "LST", "GMT" = "GMT", "LST/LDT" = "LST/LDT"),
selected = "LST"),
actionButton("goButton", "Go!")
),
mainPanel(
tableOutput("stageData")
)
)
))
noaa <- function(begindate = "begindate", enddate = "enddate", station = "9439040",
units = "feet", datum = "NAVD", interval = "hourly", time = "GMT", continuous = "TRUE") {
if(!continuous %in% c("FALSE", "TRUE", "T", "F")) stop("'continuous' must be set to 'TRUE' or 'FALSE'")
# set units
if(units == "meters") {
u.csv <- u <- "metric"
} else if(units == "feet") {
u <- "standard"
u.csv <- "english"
} else stop("invalid units: must be 'feet' or 'meters' ")
# set datum
if(datum %in% c("STND", "MHHW", "MHW", "MTL", "MSL", "MLW", "MLLW", "NAVD", "IGLD")){
datum <- datum
} else stop("invalid datum: must be 'STND', 'MHHW', 'MHW', 'MTL',
'MSL', 'MLW', 'MLLW', 'IGLD', or 'NAVD'")
# set measurement time interval
if(interval == "6 minute") {
ti.csv <- "water_level"
ti.name <- "Verified 6-Minute Water Level"
} else if(interval == "hourly") {
ti.csv <- "hourly_height"
ti.name <- "Verified Hourly Height Water Level"
} else if(interval == "HL") {
ti.csv <- "high_low"
ti.name <- "Verified High/Low Water Level"
} else if(interval == "monthly") {
ti.csv <- "monthly_mean"
ti.name <- "Verified Monthly Mean Water Level"
} else stop("invalid time interval: must be '6 minute', 'hourly', or 'HL'")
# set time zone
if(time %in% c("LST/LDT", "GMT", "LST")) {
tz <- time
} else stop("invalid time zone: must be 'LST/LDT', 'GMT', or 'LST' ")
# set site name/number indicator
if(regexpr("[0-9]{7}", station)[1] == 1) {
site.ind <- c(1)
} else if(regexpr("[a-zA-Z]+", station)[1] == 1) {
site.name <- station
site.ind <- c(0)
} else stop("Invalid station entry: must use station name or number. Check active stations
at: http://co-ops.nos.noaa.gov/stations.html?type=Water+Levels")
suppressWarnings(stns <- readLines("http://co-ops.nos.noaa.gov/stations.html?type=Water+Levels")) # list of active stations
if(site.ind == 1) { # Use station number to identify station
stn1 <- grep(paste(station, " " , sep = ""), stns) # station number is followed by a space, then the station name
if(length(stn1) == 0) {
stop("Station number appears to be invalid. No match found at
http://co-ops.nos.noaa.gov/stations.html?type=Water+Levels")
} else if(length(stn1) > 1) {
stop("Station number appears to be duplicated. Try using site name:
http://co-ops.nos.noaa.gov/stations.html?type=Water+Levels")
} else if(length(stn1) == 1) {
stn2 <- regexpr("[0-9] .*</a>$", stns[stn1])
stn3 <- regmatches(stns[stn1], stn2) # extract matches
site.name <- gsub("[0-9] |</a>", "", stn3) # clean up site name
}
} else if(site.ind == 0) { # Use station name to identify site number
no1 <- grep(site.name, stns)
if(length(no1) == 1){
no2 <- regexpr("[0-9]{7} .*</a>$", stns[no1])
no3 <- regmatches(stns[no1], no2)
station <- site.no <- gsub("[A-Za-z]| |,|</a>", "", no3)
} else if(length(no1) > 1){
stop("Site name found for multiple active NOAA stations. Look up site number at
http://co-ops.nos.noaa.gov/stations.html?type=Water+Levels")
} else if(length(no1) < 1){
stop("Site name not found on list of active NOAA stations. Look up sites at
http://co-ops.nos.noaa.gov/stations.html?type=Water+Levels.
Be attentive to spelling or consider using the station number.")
}
}
doc <- htmlParse(getURL(paste("http://co-ops.nos.noaa.gov/inventory.html?id=", station, sep="")),
useInternalNodes = TRUE)
nodes <- getNodeSet(doc, "//tr")
date.list <- sapply(nodes, function(x) xmlValue(getSibling(x)))
data.line <- grep(ti.name, date.list)
first.record <- regexpr("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}[:punct:][0-9]{2}",
date.list[data.line])
first.rec <- regmatches(date.list[data.line], first.record )
first.rec <- as.Date(substr(first.rec, 1, 10), format = "%Y-%m-%d")
last.record <- regexpr("[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}[:punct:][0-9]{2}$",
date.list[data.line])
last.rec <- regmatches(date.list[data.line], last.record )
last.rec <- as.Date(substr(last.rec, 1, 10), format = "%Y-%m-%d")
if(length(date.list[data.line]) > 1) {
first.rec <- first.rec[1]
last.rec <- last.rec[length(last.rec)]
}
# set start/end dates to full period of record, if left as default
if(begindate == "begindate") {
sdate <- strptime(as.character(first.rec), "%Y-%m-%d")
sdate <- gsub("-", "", sdate)
sdate <- as.Date(as.character(sdate), "%Y%m%d")
sdate <- sdate + 1
} else if(begindate != "begindate") {
sdate <- as.Date(as.character(begindate), "%Y%m%d")
}
if(enddate == "enddate") {
edate <- strptime(as.character(last.rec), "%Y-%m-%d")
edate <- gsub("-", "", edate)
edate <- as.Date(as.character(edate), "%Y%m%d")
edate <- edate - 1
} else if(enddate != "enddate") {
edate <- as.Date(as.character(enddate), "%Y%m%d")
}
# check if date range is within period of record, and check if time period
# requires splitting into smaller units. Interval limit is 1 year for hourly
# and HL data, 31 days for 6-min data, 10 years for monthly data.
dates <- sdate
if(interval == "HL") { if(sdate < first.rec | edate > last.rec) {
stop("invalid time interval")
} else if( as.numeric(edate - sdate) > 364) {
dates <- seq(sdate,edate, 365)
} else(dates <- c(sdate,edate))}
if(interval == "hourly") { if(sdate < first.rec | edate > last.rec) {
stop("invalid time interval")
} else if( as.numeric(edate - sdate) > 364) {
dates <- seq(sdate,edate, 365)
} else(dates <- c(sdate,edate))}
if(interval == "6 minute") { if(sdate < first.rec | edate > last.rec) {
stop("invalid time interval")
} else if( as.numeric(edate - sdate) > 30) {
dates <- seq(sdate,edate, 31)
} else(dates <- c(sdate,edate))}
if(!edate %in% dates[length(dates)]) dates <- c(dates, edate)
dates2 <- format(as.Date(dates), "%Y%m%d") # re-format dates for the url
# old version:
# url.temp <- c(paste0("http://co-ops.nos.noaa.gov/api/datagetter?", "begin_date=", dates2[i],
# "&end_date=", dates2[i+1], "&station=", station, "&product=", ti.csv,
# "&units=", u.csv, "&time_zone=", tz, "&datum=", datum,
# "&application=Tides_and_Currents","&format=csv"))
#
# create list of csv files
for(i in 1:(length(dates2) - 1)) {
url.temp <- c(paste0("http://tidesandcurrents.noaa.gov/api/datagetter?",
"product=", ti.csv,
"&application=NOS.COOPS.TAC.WL",
"&begin_date=", dates2[i],
"&end_date=", dates2[i+1],
"&datum=", datum,
"&station=", station,
"&time_zone=", tz,
"&units=", u.csv,
"&format=csv"))
if (!exists("url.list")){
url.list <- url.temp
}
# if the dataset exists, add to it
if (exists("url.list")){
url.list[i] <- url.temp
rm(url.temp)
}
}
lapply.csv <- lapply(url.list, function(x) read.csv(x))
data.csv <- do.call(rbind, lapply.csv)
data.csv$station <- rep(site.name, times = nrow(data.csv))
label <- paste("ver_wtr_lev_", units,"_", datum, sep="")
t.label <- paste("time (", time, ")", sep = "")
# clean up the data
if(interval == "HL" ) {
data.csv$datetime <- as.POSIXlt(data.csv[, 1], format = "%Y-%m-%d %H:%M")
data.csv <- data.csv[, c(7, 2, 3, 6)]
names(data.csv) <- c(t.label, label, "tide", "station")
levels(data.csv$tide) <- c("H", "HH", "L", "LL")
}
if(interval == "6 minute" ) {
data.csv$datetime <- as.POSIXlt(data.csv[, 1], format = "%Y-%m-%d %H:%M")
data.csv <- data.csv[, c(10, 2, 9)]
names(data.csv) <- c(t.label, label, "station")
}
if(interval == "hourly" ) {
data.csv$datetime <- as.POSIXlt(data.csv[, 1], format = "%Y-%m-%d %H:%M")
data.csv <- data.csv[, c(7, 2, 6)]
names(data.csv) <- c(t.label, label, "station")
}
if(interval == "monthly" ) {
data.csv$datetime <- data.csv$Year + data.csv$Month / 12
}
if(interval == "hourly" & (continuous == "TRUE" | continuous == "T")) {
data.csv <- data.csv[!duplicated(data.csv[, 1]), ]
time.df <- data.frame(seq(from = data.csv[1, 1], to = data.csv[nrow(data.csv), 1], by = 60*60))
names(time.df)[1] <- t.label
data.csv <- join_all(list(time.df, data.csv[!duplicated(data.csv[, 1]), ]))
} else if(interval == "6 minute" & (continuous == "TRUE" | continuous == "T")) {
data.csv <- data.csv[!duplicated(data.csv[, 1]), ]
time.df <- data.frame(seq(from = data.csv[1, 1], to = data.csv[nrow(data.csv), 1], by = 60*6))
names(time.df)[1] <- t.label
data.csv <- join_all(list(time.df, data.csv[!duplicated(data.csv[, 1]), ]))
} else if(interval == "monthly" & (continuous == "TRUE" | continuous == "T" )) {
time.df <- data.frame(seq(from = data.csv$datetime[1], to = data.csv$datetime[nrow(data.csv)], by = 1 / 12))
names(time.df) <- "datetime"
time.df$datetime <- round(time.df$datetime, 2)
data.csv$datetime <- round(data.csv$datetime, 2)
data.csv <- join_all(list(time.df, data.csv))
data.csv$Year <- as.numeric(data.csv$Year)
data.csv$Year[is.na(data.csv$station)] <- as.numeric(substr(data.csv$datetime[is.na(data.csv$station)], 1, 4))
data.csv$Month[is.na(data.csv$station)] <- round((data.csv$datetime[is.na(data.csv$station)] - data.csv$Year[is.na(data.csv$station)]) * 12)
data.csv$station[is.na(data.csv$station)] <- site.name
} else data.csv <- data.csv[!duplicated(data.csv[, 1]), ]
invisible(data.csv)
}
It turns out that renderTable
has problems with the POSIXlt format.
Add stage[,'time..LST.'] <- as.character(stage[,'time..LST.'])
before return(stage)
to fix your problem.