Search code examples
rdataframefinancequantitative-finance

Calculate the differences between cells in the columns of a dataframe


I have a STOXX investment universe that comes from from here:

 head(df)

        Date   SX5P   SX5E  SXXP  SXXE  SXXF  SXXA   DK5F  DKXF
1 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
2 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
3 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81
4 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82
5 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06
6 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20

To understand the principles of allocation of actions. I have to determine, at the end of each month, the allocation so that each share contributes the same risk to the total portfolio.

Then I follow this tutorial that makes you for Python.

But, on the one hand, I have problems calculating daily returns. In fact I have all the data thanks to:

url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
df$Date <- as.Date(sub('(.{2}).(.{2}).(.{4})', "\\3-\\2-\\1", df$Date))

And then I have to calculate them. I have seen that there is a function, Delt that says to be able to do it between two columns. But I have to make the differences between each cell. I do not know how to do it:

new = df[2:9]
# How to calculate the returns ?
Delt(df.a_given_day,df.a_given_day_plus_1,k=0:2)  #... Delt do it 0,1 y 2 periods between two columns.

After, I could calculate the covariance with cov_matrix_df <- cov(data.matrix(new, rownames.force = NA)) and continue my search to calculate the risk.

On the other hand I do not know how to modify it to determine, at the end of each month the risk to decide at the end of each month, the allocation.

my attempt:

From this answer I tried from this answer:

dr_df = cbind(df[-1,1],apply(df[,-1],2,function(x) diff(x)/head(x,-1)))

Which returns:

> head(dr_df)
                  SX5P         SX5E         SXXP         SXXE         SXXF         SXXA
[1,] 6209  0.000000000  0.000000000  0.000000000  0.000000000  0.000000000  0.000000000
[2,] 6210 -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638  0.004488850
[3,] 6213  0.001297202  0.007344861  0.003027734  0.008179959  0.007800472 -0.003027245
[4,] 6214  0.005220951  0.004441575  0.005554214  0.005983773  0.006517975  0.004916136
[5,] 6215  0.006817713 -0.003513166  0.006003842 -0.002318782 -0.002124861  0.015683453
[6,] 6216 -0.004595435 -0.013101262 -0.003103366 -0.011014551 -0.009531535  0.005949851
              DK5F          DKXF
[1,]  0.0000000000  0.0000000000
[2,]  0.0036574404  0.0038133008
[3,]  0.0035823477  0.0001519526
[4,]  0.0039234391  0.0036463081
[5,] -0.0007969471  0.0021192855
[6,] -0.0098164026 -0.0087613293

Which seems quite good but I don't understand the code :/ And when I try to create the covariance matrix I have some issues:

> cov(dr_df[2:8])
Error in cov(dr_df[2:8]) : supply both 'x' and 'y' or a matrix-like 'x'
> cov(dr_df)
             SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
     9886513   NA   NA   NA   NA   NA   NA   NA   NA
SX5P      NA   NA   NA   NA   NA   NA   NA   NA   NA
SX5E      NA   NA   NA   NA   NA   NA   NA   NA   NA
SXXP      NA   NA   NA   NA   NA   NA   NA   NA   NA
SXXE      NA   NA   NA   NA   NA   NA   NA   NA   NA
SXXF      NA   NA   NA   NA   NA   NA   NA   NA   NA
SXXA      NA   NA   NA   NA   NA   NA   NA   NA   NA
DK5F      NA   NA   NA   NA   NA   NA   NA   NA   NA
DKXF      NA   NA   NA   NA   NA   NA   NA   NA   NA

Soren's attempt

It seems that I have non-numeric argument to binary operator for SX5P - SX5P1d :

> library(lubridate)
Attaching package: ‘lubridate’

The following objects are masked from ‘package:data.table’:

    hour, mday, month, quarter, wday, week, yday, year

The following object is masked from ‘package:base’:

    date

> library(data.table)
> 
> 
> url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
> df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
> names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
> df$Date <- dmy(df$Date)
> df$End_month_date <- ceiling_date(df$Date,unit="month") - days(1)
> 
> dt <- as.data.table(df)
> 
> #daily returns
> dt[, c("last_date",'SX5P1d','SX5E1d','SXXP1d','SXXE1d','SXXF1d','SXXA1d','DK5F1d','DKXF1d') := shift(.SD[,c("Date",'SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')], n=1, fill=NA, type=c("lag")),]
Warning messages:
1: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'last_date' (recycled leaving remainder of 5 items).
2: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'SX5P1d' (recycled leaving remainder of 5 items).
3: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'SX5E1d' (recycled leaving remainder of 5 items).
4: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'SXXP1d' (recycled leaving remainder of 5 items).
5: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'SXXE1d' (recycled leaving remainder of 5 items).
6: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'SXXF1d' (recycled leaving remainder of 5 items).
7: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'SXXA1d' (recycled leaving remainder of 5 items).
8: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'DK5F1d' (recycled leaving remainder of 5 items).
9: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d",  :
  Supplied 9 items to be assigned to 7673 items of column 'DKXF1d' (recycled leaving remainder of 5 items).
> dt[,`:=`(SX5P_r=SX5P-SX5P1d,
+          SX5E_r=SX5E-SX5E1d,
+          SXXP_r=SXXP-SXXP1d,
+          SXXE_r=SXXE-SXXE1d,
+          SXXF_r=SXXF-SXXF1d,
+          SXXA_r=SXXA-SXXA1d,
+          DK5F_r=DK5F-DK5F1d,
+          DKXF_r=DKXF-DKXF1d)]
Error in SX5P - SX5P1d : non-numeric argument to binary operator
> #monthly returns
> returns <- dt[,list(SX5P=sum(SX5P_r,na.rm=T),
+                     SX5E=sum(SX5E_r,na.rm=T),
+                     SXXP=sum(SXXP_r,na.rm=T),
+                     SXXE=sum(SXXE_r,na.rm=T),
+                     SXXF=sum(SXXF_r,na.rm=T),
+                     SXXA=sum(SXXA_r,na.rm=T),
+                     DK5F=sum(DK5F_r,na.rm=T),
+                     DKXF=sum(DKXF_r,na.rm=T)),by="End_month_date"]
Error in `[.data.table`(dt, , list(SX5P = sum(SX5P_r, na.rm = T), SX5E = sum(SX5E_r,  : 
  object 'SX5P_r' not found

Here is dt after shift operation that generated the warning messages :

> head(dt)
         Date   SX5P   SX5E  SXXP  SXXE  SXXF  SXXA   DK5F  DKXF End_month_date
1: 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56     1986-12-31
2: 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56     1987-01-31
3: 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81     1987-01-31
4: 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82     1987-01-31
5: 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06     1987-01-31
6: 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20     1987-01-31
   last_date SX5P1d SX5E1d SXXP1d SXXE1d SXXF1d SXXA1d DK5F1d DKXF1d
1:        NA     NA     NA     NA     NA     NA     NA     NA     NA
2:      Date   Date   Date   Date   Date   Date   Date   Date   Date
3:      SX5P   SX5P   SX5P   SX5P   SX5P   SX5P   SX5P   SX5P   SX5P
4:      SX5E   SX5E   SX5E   SX5E   SX5E   SX5E   SX5E   SX5E   SX5E
5:      SXXP   SXXP   SXXP   SXXP   SXXP   SXXP   SXXP   SXXP   SXXP
6:      SXXE   SXXE   SXXE   SXXE   SXXE   SXXE   SXXE   SXXE   SXXE

Solution

  • The data has some erratic values on dates 2016-03-25 and 2016-03-28.

    library(dplyr)
    df <- filter(df, SX5P>0)            # drop erratic data points
    percent_change <- function(x) (x - lag(x)) / lag(x) # function that calculates percentage change 
    daily_return <- df %>% 
      mutate_at(vars(-Date), percent_change) %>%     # for each column excluding Date, apply percent_change function
      filter(complete.cases(.)) %>%                  # filter out NAs
      select(-Date) %>%                              # drop Date variable 
      as.matrix()                                    # convert to matrix                   
    
    head(daily_return, 5)
    #             SX5P         SX5E         SXXP         SXXE         SXXF         SXXA          DK5F         DKXF
    #[1,]  0.000000000  0.000000000  0.000000000  0.000000000  0.000000000  0.000000000  0.0000000000 0.0000000000
    #[2,] -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638  0.004488850  0.0036574404 0.0038133008
    #[3,]  0.001297202  0.007344861  0.003027734  0.008179959  0.007800472 -0.003027245  0.0035823477 0.0001519526
    #[4,]  0.005220951  0.004441575  0.005554214  0.005983773  0.006517975  0.004916136  0.0039234391 0.0036463081
    #[5,]  0.006817713 -0.003513166  0.006003842 -0.002318782 -0.002124861  0.015683453 -0.0007969471 0.0021192855
    
    cov(daily_return)  
    #             SX5P         SX5E         SXXP         SXXE         SXXF         SXXA         DK5F         DKXF
    #SX5P 0.0001458898 0.0001531675 0.0001339905 0.0001400356 0.0001335696 0.0001283412 0.0001355236 0.0001410957
    #SX5E 0.0001531675 0.0001781671 0.0001431415 0.0001622366 0.0001519764 0.0001252829 0.0001497803 0.0001561299
    #SXXP 0.0001339905 0.0001431415 0.0001267415 0.0001328073 0.0001265858 0.0001210988 0.0001314346 0.0001359420
    #SXXE 0.0001400356 0.0001622366 0.0001328073 0.0001502001 0.0001410354 0.0001165071 0.0001412857 0.0001471070
    #SXXF 0.0001335696 0.0001519764 0.0001265858 0.0001410354 0.0001343114 0.0001130397 0.0001380515 0.0001432671
    #SXXA 0.0001283412 0.0001252829 0.0001210988 0.0001165071 0.0001130397 0.0001257977 0.0001221743 0.0001254364
    #DK5F 0.0001355236 0.0001497803 0.0001314346 0.0001412857 0.0001380515 0.0001221743 0.0001914781 0.0001946354
    #DKXF 0.0001410957 0.0001561299 0.0001359420 0.0001471070 0.0001432671 0.0001254364 0.0001946354 0.0002103559 
    

    monthly return

    library(lubridate)
    percent_change2 <- function(x)last(x)/first(x) - 1
    monthly_return <- df %>% 
      group_by(gr = floor_date(Date, unit = "month")) %>%
      summarize_at(vars(-Date, -gr), percent_change2) %>%
      ungroup() %>%
      select(-gr) %>% 
      as.matrix()  
    head(monthly_return, 5)
    
                SX5P         SX5E        SXXP         SXXE         SXXF       SXXA        DK5F        DKXF
    [1,]  0.00000000  0.000000000 0.000000000  0.000000000  0.000000000 0.00000000  0.00000000  0.00000000
    [2,] -0.01089032 -0.046335561 0.005316578 -0.025867316 -0.025494595 0.04170287 -0.02977095 -0.01281269
    [3,]  0.03167912 -0.009493186 0.032518367 -0.011141476 -0.011708861 0.07918740  0.05577361  0.04355828
    [4,]  0.02633308  0.031731340 0.025284157  0.027359491  0.027197099 0.02322630  0.04121760  0.03157433
    [5,]  0.02660200 -0.002816901 0.023347620 -0.003767437 -0.002362366 0.05061867  0.03758165  0.03917672
    
    cov(monthly_return)
                SX5P        SX5E        SXXP        SXXE        SXXF        SXXA        DK5F        DKXF
    SX5P 0.002068415 0.002243488 0.002011784 0.002160762 0.002076261 0.001867744 0.002282369 0.002381529
    SX5E 0.002243488 0.002712719 0.002225923 0.002605715 0.002448324 0.001857319 0.002549326 0.002671546
    SXXP 0.002011784 0.002225923 0.002025003 0.002182308 0.002095078 0.001873543 0.002321951 0.002407614
    SXXE 0.002160762 0.002605715 0.002182308 0.002548197 0.002399266 0.001826243 0.002514475 0.002629281
    SXXF 0.002076261 0.002448324 0.002095078 0.002399266 0.002291523 0.001797954 0.002458753 0.002558314
    SXXA 0.001867744 0.001857319 0.001873543 0.001826243 0.001797954 0.001927949 0.002134767 0.002189677
    DK5F 0.002282369 0.002549326 0.002321951 0.002514475 0.002458753 0.002134767 0.003414248 0.003523391
    DKXF 0.002381529 0.002671546 0.002407614 0.002629281 0.002558314 0.002189677 0.003523391 0.003813587
    

    data.table version

    library(data.table)
    per_change <- function(x)x/shift(x) - 1
    setDT(df)
    df <- df[SX5P>0]
    daily <- df[, lapply(.SD, per_change), .SDcols=-"Date"][-1, ]
    daily
    cov(daily)
    monthly <- df[, lapply(.SD, percent_change2), by = .(gr=floor_date(Date, unit = "month")), .SDcols=-"Date"][-1, -"gr" ]
    cov(monthly)