Search code examples
rpercentagesummarizationparameterizationgeneralization

Generalizing my YoY quarterly percentage growth code to suit different data


I have written out a code that works for year over year quarterly percentage growth. However my code only works for the data i was using to write the code. I would like to be able to just run my whole code with data of different lengths and not have to change anything.

Here is my code:

>lastyr<-tail(datan,horiz) #selects the last values from the original data

>percentf<-((Arimab2f/lastyr)-1)*100 #finds the percentage growth of the forecasted data

>percentfr<-round(percentf,digits=2) #rounds the results to 3d.p

>percentf_percent<-paste(percentfr,"%",sep="")

>plot.ts(percentf,xaxt="n",ylab="Percentage growth rate",xlab="Quarter",main="Percentage Growth of the Forecasts") #plots the percentage growth of the forecasts and removes the x-axis values

>axis(1,at=seq(1,horiz,by=1),las=1) #adds the choosen values onto the x-axis at the points

>points(percentf,col="blue") #puts a blue circle around each of the points

>for(i in 1:length(percentfr))
+{text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=1)
+text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=3)} #adds the percentage values next to the points on both the left and right side

>fulldata<-c(datan,Arimab2f,0,0) #this makes a vector of all the data with the forecasts, the added zeros make it easier to seperate the years if it doesnt finish in the 4th quarter

>fullmat<-matrix(fulldata,nrow=freqdata) #produces a matrix of the full data with the years seperated into columns

>full1mat<-fullmat[,-1] #removes the first column from the matrix

>full2mat<-matrix(c(full1mat,0,0,0,0),nrow=freqdata) #makes a matrix with a zero column at the end to account for the one removed in the last line

>percent1<-((full2mat[,1]/fullmat[,1])-1)*100
>percent2<-((full2mat[,2]/fullmat[,2])-1)*100
>percent3<-((full2mat[,3]/fullmat[,3])-1)*100
>percent4<-((full2mat[,4]/fullmat[,4])-1)*100
>percent5<-((full2mat[,5]/fullmat[,5])-1)*100
>percent6<-((full2mat[,6]/fullmat[,6])-1)*100
>percent7<-((full2mat[,7]/fullmat[,7])-1)*100
>percent8<-((full2mat[,8]/fullmat[,8])-1)*100
#percent9<-((full2mat[,9]/fullmat[,9])-1)*100 #add as many percents as there is years in the data
#percent10<-((full2mat[,10]/fullmat[,10])-1)*100
#percent11<-((full2mat[,11]/fullmat[,11])-1)*100
#percent12<-((full2mat[,12]/fullmat[,12])-1)*100
#percent13<-((full2mat[,13]/fullmat[,13])-1)*100
#percent14<-((full2mat[,14]/fullmat[,14])-1)*100

>percentagegrowth<-c(percent1,percent2,percent3,percent4,percent5,percent6,percent7,percent8)#,percent9,percent10,percent11,percent12,percent13,percent14) #puts the percentage growths for each year in the same vector

>percentagegrowth1<-head(percentagegrowth,-(length(fullmat)-length(datan))) #removes the unnecessary values from the end of the matrix

>zero<-matrix(,nrow=(length(percentagegrowth1)-length(percentf))) #creates a matrix with no values 

>percentf1<-c(zero,percentf) #creates a vector with the NA values and the percantage growth of the forecast data

>percentagegrowth1r<-round(percentagegrowth1,1)

>names(percentagegrowth1)<-c("09 q1","09 q2","09 q3","09 q4","10 q1","10 q2","10 q3","10 q4","11 q1","11 q2","11 q3","11 q4","12 q1","12 q2","12 q3","12 q4","13 q1","13 q2","13 q3","13 q4","14 q1","14 q2","14 q3","14 q4","15 q1","15 q2")

>percentagegrowth1_percent<-paste(percentagegrowth1r,"%",sep="")

>plot.ts(percentagegrowth1,xaxt="n",xlab="Quarter",ylab="Percentage growth(%)",main="Year-over-Year Quarterly Percentage Growth") #plots all the percentage growth points

>for(i in 1:length(percentagegrowth1r))
+{text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=1)
+ text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=3)}

>lines(percentf1,col="red") #adds the forecasted data as a red line

>points(percentagegrowth1r,col="blue") #circles each of the points making them easier to see

>axis(1,at=seq(1,length(percentagegrowth1),by=1),labels=names(percentagegrowth1),las=2,cex.axis=0.6)

>legend("topright",c("Red=Forecasted data"))

Is there any way to shorten the code where it has percent1,percent2...#percent14? and also at names(percentagegrowth1) taking into consideration the length of the data may change hence the names will change?

Here is the data used in this code so you can see what i have done:

http://s21.postimg.org/t6nldfo13/datan.png (datan)

http://s14.postimg.org/vmn2kjatp/arimab2f.png (Arimab2f (the forcasted data using ARIMA))

horiz=4

freqdata=4

You can run my whole code by copying and pasting all of this (includes data):

datan<-c(79160.56,91759.73,91186.48,106353.82,70346.47,80279.15,82611.60,131392.72,93798.99,105944.78,103913.13,154530.69,110157.40,117416.09,127423.42,156752.00,120097.81,121307.75,115021.12,150657.83,113711.53,115353.14,112701.98,154319.18,116803.54,118352.54)
freqdata<-4 #set frequency of the data (e.g. 4 for quarterly, 12 for monthly etc)  
startdata<-c(8,1) #where the first interval is ((8,1) means first quarter in 2008)  
horiz<-4 #how many predictions to be made (4 predicts a year for the quarterly data)    
datats<-ts(datan,frequency=freqdata,start=startdata) #turns the data into a time series with the frequency of data and where it starts
plot(datats,ylab="Total",xlab="Time",main="Original Time Series Plot") #plots a time series graph of the original data
seasonplot(datats,ylab="Total",xlab="Time",main="Seasonal plot",year.labels=TRUE,year.labels.left=TRUE,col=1:20,pch=19)
fit<-stl(datats,s.window=5)
lines(fit$time.series[,1],col="red",ylab="Trend")
plot(fit)
force.log<-"log"
datadates<-as.character(data[,1]) #creates a character vector of the data column
dataMAT<-matrix(0,ncol=freqdata,nrow=(length(datats)+freqdata),byrow=TRUE) #creates a zero matrix of size specified and 'byrow=TRUE' specifies you want it to fill the matrix row-wise (column-wise is the default)
for(i in 1:freqdata)
{dataMAT[,i]<-c(rep(0,length=i-1),lag(datats,k=-i+1),rep(0,length=freqdata-i+1))} #for every i in 1 to freqdata creates a vector entry in the i column of the zero matrix. 'rep(0,length=i-1)' is 0 repeated i-1 times, 'lag(datan,k=-i+1)' shifts the time space of datan back by -i+1 observations
dataind<-dataMAT[c(-1:(-freqdata+1),-(length(dataMAT[,1])-freqdata+1):-(length(dataMAT[,1]))),] #fills in the zero matrix diagonally with the values from the above input
dataind2<-data.frame(dataind) #creates a data frame of the matrix making it easier for R
lm1<-lm(X1~.,data=dataind2) #creates a full linear model with x1 being the dependant variable using data from dataind2
lm2<-lm(X1~X2+dataind2[,length(dataind2[1,])],data=dataind2) #creates a linear model with dependant variable x1 with x2 and 'dataind2[,length(dataind2[1,])]' being variables in the model (includes 1 lag and 1y lag)  using data from dataind2.
library(lmtest) #activates the lmtest package
library(car) #activates the car package
bptest1<-bptest(lm1) #does a Breusch-Pagan test on lm1 to test for heteroscedasticity (see http://en.wikipedia.org/wiki/Heteroscedasticity for description)
bptest2<-bptest(lm2) #does a Breusch-Pagan test on lm2 to test for heteroscedasticity
gqtest1<-gqtest(lm1) #does a Goldfeld-Quandt test on lm1 to test for homoscedasticity (if all random variable in model have the same finite variance)
ncvtest1<-ncvTest(lm1) #a non-constant error variance test on lm1 (much like the Breusch-Pagan). add depending on model
ncvtest2<-ncvTest(lm2) #a non-constant error variance test on lm2. add depending on model
if(force.log=="level") 
{aslog<-"n"}else
{{if(force.log=="log")
   {aslog<-"y"}else
      {if(bptest1$p.value<0.1|bptest2$p.value<0.1|gqtest1$p.value<0.1|ncvtest1$p<0.1|ncvtest2$p<0.1) #if the p-value of bptest1/bptest2/gqtest1 is < 0.1 name aslog 'y'
         {aslog<-"y"}else
            {aslog<-"n"}}}}
if(aslog=="y")
{dataa<-log(datats)}else
{dataa<-datats} #if there is evidence to show that the data should be transformed then it makes a log of the data if not then it remains the same
startLa<-startdata[1]+trunc((1/freqdata)*(length(dataa)-horiz)) #finds a start year
startLb<-1+((1/freqdata)*(length(dataa)-horiz)-trunc((1/freqdata)*(length(dataa)-horiz)))*freqdata #finds a start quarter
startL<-c(startLa,startLb) #creates a vector of the date
K<-ts(rep(dataa,length=length(dataa)-horiz),frequency=freqdata,start=startdata) #split series into two, K is in sample (the original sample)
L<-ts(dataa[-1:-(length(dataa)-horiz)],frequency=freqdata,start=startL) #split series into two, L is out sample (predictions)
library(strucchange) #activates strucchange package
efp1rc<-efp(lm1,data=dataind2,type="Rec-CUSUM") #returns a one-dimensional empirical process of cumulative sums of residuals from lm1
efp2rc<-efp(lm2,data=dataind2,type="Rec-CUSUM") #returns a one-dimensional empirical process of cumulative sums of residuals from lm2
efp1rm<-efp(lm1,data=dataind2,type="Rec-MOSUM") #returns a one-dimensional empirical process of moving sums of residuals from lm1
efp2rm<-efp(lm2,data=dataind2,type="Rec-MOSUM") #returns a one-dimensional empirical process of moving sums of residuals from lm2
plot(efp2rc) #plots the recursive cumulative sum of residuals for lm2
lines(efp1rc$process,col ="darkblue") #plots the recursive cumulative sum of residuals for lm1 on the same graph
plot(efp2rm)
lines(efp1rm$process,col="darkblue")
gefp2<-gefp(lm2,data=dataind2) #plots a M-fluctuation of lm2
plot(gefp2)
plot(dataa) #plots a graph of dataa
pacf(dataa) #plots a correlogram of dataa
sctest(efp2rc) #tests for structural change in regression model
cat("log series,y/n?:",aslog) #shows if series has been logged or not

########## ARIMA ########

library(tseries) #activates tseries package
library(forecast) #activates forecast package
max.sdiff<-3 #set the maximum seasonal differences allowed
arima.force.seasonality<-"n"
kpssW<-kpss.test(dataa,null="Level") #computes the Kwiatkowski-Phillips-Schmidt-Shin test for the null-hypothesis that dataa is level
#kpssW<-ndiffs(dataa,alpha=0.05,test="kpss") #if the above doesn't work
ppW<-tryCatch({ppW<-pp.test(dataa,alternative="stationary")},error=function(ppW){ppW<-list(error="TRUE",p.value=0.99)}) #performs a Phillips-Perron Unit Root test for the null hypothesis that dataa has a unit root instead of a stationary alternative. if p.value>0.05 then assume unit root
adfW<-adf.test(dataa,alternative="stationary",k=trunc((length(dataa)-1)^(1/3))) #performs the Augmented Dickey-Fuller test that the null of dataa has unit root
if(kpssW$p.value<0.05|ppW$p.value>0.05|adfW$p.value>0.05)
{ndiffsW=1}else
 {ndiffsW=0} #finds the estimate of the number of differences required to make time series stationary
aaW<-auto.arima(dataa,max.D=max.sdiff,d=ndiffsW,seasonal=TRUE,allowdrift=FALSE,stepwise=FALSE,trace=TRUE,seasonal.test="ch") #fits the best ARIMA model
orderWA<-c(aaW$arma[1],aaW$arma[6],aaW$arma[2])
orderWS<-c(aaW$arma[3],aaW$arma[7],aaW$arma[4])
if(sum(aaW$arma[1:2])==0)
{orderWA[1]<-1}else
{NULL}
if(arima.force.seasonality=="y")
{if(sum(aaW$arma[3:4])==0)
{orderWS[1]<-1}else
  {NULL}}else
    {NULL}
Arimab<-Arima(dataa,order=orderWA,seasonal=list(order=orderWS),method="ML")
fArimab<-forecast(Arimab,h=8,simulate=TRUE,fan=TRUE) #returns the forecasts for the Arima model. h=number of periods for forecasting
if(aslog=="y")
{fArimabF<-exp(fArimab$mean[1:horiz])}else
{fArimabF<-fArimab$mean[1:horiz]} #if data was logged then its converted back by using the exponetial function as only interested in original data not transformed data
plot(fArimab,main="ARIMA Forecast",sub="blue=fitted,red=actual") #plots the forecast
lines(dataa,col="red",lwd=2) #changes colour and size of dataa
lines(ts(append(fitted(Arimab),fArimab$mean[1]),frequency=freqdata,start=startdata),col="blue",lwd=2) #shows the fitted arima on the same graph
if(aslog=="y")
{Arimab2f<-exp(fArimab$mean[1:horiz])}else
{Arimab2f<-fArimab$mean[1:horiz]} #if data was logged then its converted back by using the exponetial function as only interested in original data not transformed data
start(fArimab$mean)->startARIMA
ArimaALTf<-ts(prettyNum(Arimab2f,big.interval=3L,big.mark=","),frequency=freqdata,start=startARIMA) #puts forecasts in table
View(ArimaALTf,title="ARIMA2 final forecast") #brings up table of the forecasts
summary(Arimab)

######Percentage growth for Arima######

###when using this feature you will need to change lines 118-132, 137 to suit your data###

lastyr<-tail(datan,horiz) #selects the last values from the original data
percentf<-((Arimab2f/lastyr)-1)*100 #finds the percentage growth of the forecasted data
percentfr<-round(percentf,digits=2) #rounds the results to 3d.p
percentf_percent<-paste(percentfr,"%",sep="")
plot.ts(percentf,xaxt="n",ylab="Percentage growth rate",xlab="Quarter",main="Percentage Growth of the Forecasts") #plots the percentage growth of the forecasts and removes the x-axis values
axis(1,at=seq(1,horiz,by=1),las=1) #adds the choosen values onto the x-axis at the points
points(percentf,col="blue") #puts a blue circle around each of the points
for(i in 1:length(percentfr))
{text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=1)
 text(x=i,y=percentfr[i],labels=percentf_percent[i],cex=0.5,pos=3)} #adds the percentage values next to the points on both the left and right side
fulldata<-c(datan,Arimab2f,0,0) #this makes a vector of all the data with the forecasts, the added zeros make it easier to seperate the years if it doesnt finish in the 4th quarter
fullmat<-matrix(fulldata,nrow=freqdata) #produces a matrix of the full data with the years seperated into columns
full1mat<-fullmat[,-1] #removes the first column from the matrix
full2mat<-matrix(c(full1mat,0,0,0,0),nrow=freqdata) #makes a matrix with a zero column at the end to account for the one removed in the last line
percent1<-((full2mat[,1]/fullmat[,1])-1)*100
percent2<-((full2mat[,2]/fullmat[,2])-1)*100
percent3<-((full2mat[,3]/fullmat[,3])-1)*100
percent4<-((full2mat[,4]/fullmat[,4])-1)*100
percent5<-((full2mat[,5]/fullmat[,5])-1)*100
percent6<-((full2mat[,6]/fullmat[,6])-1)*100
percent7<-((full2mat[,7]/fullmat[,7])-1)*100
percent8<-((full2mat[,8]/fullmat[,8])-1)*100
#percent9<-((full2mat[,9]/fullmat[,9])-1)*100 #add as many percents as there is years in the data
#percent10<-((full2mat[,10]/fullmat[,10])-1)*100
#percent11<-((full2mat[,11]/fullmat[,11])-1)*100
#percent12<-((full2mat[,12]/fullmat[,12])-1)*100
#percent13<-((full2mat[,13]/fullmat[,13])-1)*100
#percent14<-((full2mat[,14]/fullmat[,14])-1)*100
percentagegrowth<-c(percent1,percent2,percent3,percent4,percent5,percent6,percent7,percent8)#,percent9,percent10,percent11,percent12,percent13,percent14) #puts the percentage growths for each year in the same vector
percentagegrowth1<-head(percentagegrowth,-(length(fullmat)-length(datan))) #removes the unnecessary values from the end of the matrix
zero<-matrix(,nrow=(length(percentagegrowth1)-length(percentf))) #creates a matrix with no values 
percentf1<-c(zero,percentf) #creates a vector with the NA values and the percantage growth of the forecast data
percentagegrowth1r<-round(percentagegrowth1,1)
names(percentagegrowth1)<-c("09 q1","09 q2","09 q3","09 q4","10 q1","10 q2","10 q3","10 q4","11 q1","11 q2","11 q3","11 q4","12 q1","12 q2","12 q3","12 q4","13 q1","13 q2","13 q3","13 q4","14 q1","14 q2","14 q3","14 q4","15 q1","15 q2")
percentagegrowth1_percent<-paste(percentagegrowth1r,"%",sep="")
plot.ts(percentagegrowth1,xaxt="n",xlab="Quarter",ylab="Percentage growth(%)",main="Year-over-Year Quarterly Percentage Growth") #plots all the percentage growth points
for(i in 1:length(percentagegrowth1r))
{text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=1)
 text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],cex=0.5,font=2,pos=3)}
lines(percentf1,col="red") #adds the forecasted data as a red line
points(percentagegrowth1r,col="blue") #circles each of the points making them easier to see
axis(1,at=seq(1,length(percentagegrowth1),by=1),labels=names(percentagegrowth1),las=2,cex.axis=0.6)
legend("topright",c("Red=Forecasted data"))

Solution

  • Your example is far from the best but try this as a start

    lastyr<-tail(datan, horiz) #selects the last values from the original data
    percentf <- ((arimab2f/lastyr) - 1) * 100 #finds the percentage growth of the forecasted data
    percentfr<-round(percentf,digits = 2) #rounds the results to 3d.p
    percentf_percent<-paste(percentfr, "%", sep="")
    
    plot.ts(percentf, xaxt="n", ylab="Percentage growth rate",
            xlab="Quarter",
            main="Percentage Growth of the Forecasts") #plots the percentage growth of the forecasts and removes the x-axis values
    axis(1, at = seq(1, horiz, by=1), las=1) #adds the choosen values onto the x-axis at the points
    points(percentf, col="blue") #puts a blue circle around each of the points
    
    for(i in 1:length(percentfr)){
      text(x=i,y=percentfr[i], labels=percentf_percent[i], cex=0.5, pos=1)
      text(x=i,y=percentfr[i], labels=percentf_percent[i], cex=0.5, pos=3)
    } #adds the percentage values next to the points on both the left and right side
    
    fulldata<-c(datan, arimab2f) #this makes a vector of all the data with the forecasts, the added zeros make it easier to seperate the years if it doesnt finish in the 4th quarter
    fullmat <- matrix(0, ncol=floor(length(fulldata)/4)+1, nrow = 4) #produces a matrix of the full data with the years seperated into columns
    fullmat[1:length(fulldata)] <- fulldata
    full1mat <- fullmat[ ,-1] #removes the first column from the matrix
    full2mat <- cbind(full1mat, 0) #makes a matrix with a zero column at the end to account for the one removed in the last line
    percent1 <- list()
    for(i in 1:ncol(full2mat)){
      percent1[[i]] <- ((full2mat[ ,i]/fullmat[ ,i])-1) * 100
    } 
    percentagegrowth <- unlist(percent1)
    #removes the unnecessary values from the end of the matrix
    percentagegrowth1<-head(percentagegrowth,-(length(fullmat)-length(datan))) 
    #creates a matrix with no values 
    zero <- matrix( ,nrow = (length(percentagegrowth1) - length(percentf))) 
    #creates a vector with the NA values and the percantage growth of the forecast data
    percentf1 <- c(zero, percentf)
    percentagegrowth1r <- round(percentagegrowth1, 1)
    names(percentagegrowth1) <- paste('yq', 1:length(percentagegrowth1)) # alternative names
    percentagegrowth1_percent <- paste(percentagegrowth1r, "%", sep = "")
    
    #plots all the percentage growth points
    plot.ts(percentagegrowth1, xaxt = "n", xlab = "Quarter",
            ylab="Percentage growth(%)",
            main = "Year-over-Year Quarterly Percentage Growth") 
    
    for(i in 1:length(percentagegrowth1r)){
      text(x = i,y = percentagegrowth1r[i],labels=percentagegrowth1_percent[i],
           cex = 0.5, font = 2, pos = 1)
      text(x=i,y=percentagegrowth1r[i],labels=percentagegrowth1_percent[i],
           cex=0.5,font=2,pos=3)
    }
    lines(percentf1,col="red") #adds the forecasted data as a red line
    points(percentagegrowth1r,col="blue") #circles each of the points making them easier to see
    axis(1, at = seq(1, length(percentagegrowth1), by = 1),
         labels = names(percentagegrowth1),
         las = 2,cex.axis = 0.6)
    legend("topright", c("Red=Forecasted data"))
    

    plot ts