I have a STOXX investment universe that comes from from here:
head(df)
Date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
1 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
2 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56
3 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81
4 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82
5 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06
6 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20
To understand the principles of allocation of actions. I have to determine, at the end of each month, the allocation so that each share contributes the same risk to the total portfolio.
Then I follow this tutorial that makes you for Python.
But, on the one hand, I have problems calculating daily returns. In fact I have all the data thanks to:
url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
df$Date <- as.Date(sub('(.{2}).(.{2}).(.{4})', "\\3-\\2-\\1", df$Date))
And then I have to calculate them. I have seen that there is a function, Delt
that says to be able to do it between two columns. But I have to make the differences between each cell. I do not know how to do it:
new = df[2:9]
# How to calculate the returns ?
Delt(df.a_given_day,df.a_given_day_plus_1,k=0:2) #... Delt do it 0,1 y 2 periods between two columns.
After, I could calculate the covariance with cov_matrix_df <- cov(data.matrix(new, rownames.force = NA))
and continue my search to calculate the risk.
On the other hand I do not know how to modify it to determine, at the end of each month the risk to decide at the end of each month, the allocation.
From this answer I tried from this answer:
dr_df = cbind(df[-1,1],apply(df[,-1],2,function(x) diff(x)/head(x,-1)))
Which returns:
> head(dr_df)
SX5P SX5E SXXP SXXE SXXF SXXA
[1,] 6209 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
[2,] 6210 -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638 0.004488850
[3,] 6213 0.001297202 0.007344861 0.003027734 0.008179959 0.007800472 -0.003027245
[4,] 6214 0.005220951 0.004441575 0.005554214 0.005983773 0.006517975 0.004916136
[5,] 6215 0.006817713 -0.003513166 0.006003842 -0.002318782 -0.002124861 0.015683453
[6,] 6216 -0.004595435 -0.013101262 -0.003103366 -0.011014551 -0.009531535 0.005949851
DK5F DKXF
[1,] 0.0000000000 0.0000000000
[2,] 0.0036574404 0.0038133008
[3,] 0.0035823477 0.0001519526
[4,] 0.0039234391 0.0036463081
[5,] -0.0007969471 0.0021192855
[6,] -0.0098164026 -0.0087613293
Which seems quite good but I don't understand the code :/ And when I try to create the covariance matrix I have some issues:
> cov(dr_df[2:8])
Error in cov(dr_df[2:8]) : supply both 'x' and 'y' or a matrix-like 'x'
> cov(dr_df)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
9886513 NA NA NA NA NA NA NA NA
SX5P NA NA NA NA NA NA NA NA NA
SX5E NA NA NA NA NA NA NA NA NA
SXXP NA NA NA NA NA NA NA NA NA
SXXE NA NA NA NA NA NA NA NA NA
SXXF NA NA NA NA NA NA NA NA NA
SXXA NA NA NA NA NA NA NA NA NA
DK5F NA NA NA NA NA NA NA NA NA
DKXF NA NA NA NA NA NA NA NA NA
It seems that I have non-numeric argument to binary operator for SX5P - SX5P1d
:
> library(lubridate)
Attaching package: ‘lubridate’
The following objects are masked from ‘package:data.table’:
hour, mday, month, quarter, wday, week, yday, year
The following object is masked from ‘package:base’:
date
> library(data.table)
>
>
> url <- 'https://www.stoxx.com/document/Indices/Current/HistoricalData/hbrbcpe.txt'
> df <- read.table(url, sep = ';', skip = 4, stringsAsFactors = FALSE)
> names(df) <- c('Date','SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')
> df$Date <- dmy(df$Date)
> df$End_month_date <- ceiling_date(df$Date,unit="month") - days(1)
>
> dt <- as.data.table(df)
>
> #daily returns
> dt[, c("last_date",'SX5P1d','SX5E1d','SXXP1d','SXXE1d','SXXF1d','SXXA1d','DK5F1d','DKXF1d') := shift(.SD[,c("Date",'SX5P','SX5E','SXXP','SXXE','SXXF','SXXA','DK5F','DKXF')], n=1, fill=NA, type=c("lag")),]
Warning messages:
1: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'last_date' (recycled leaving remainder of 5 items).
2: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SX5P1d' (recycled leaving remainder of 5 items).
3: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SX5E1d' (recycled leaving remainder of 5 items).
4: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXP1d' (recycled leaving remainder of 5 items).
5: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXE1d' (recycled leaving remainder of 5 items).
6: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXF1d' (recycled leaving remainder of 5 items).
7: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'SXXA1d' (recycled leaving remainder of 5 items).
8: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'DK5F1d' (recycled leaving remainder of 5 items).
9: In `[.data.table`(dt, , `:=`(c("last_date", "SX5P1d", "SX5E1d", :
Supplied 9 items to be assigned to 7673 items of column 'DKXF1d' (recycled leaving remainder of 5 items).
> dt[,`:=`(SX5P_r=SX5P-SX5P1d,
+ SX5E_r=SX5E-SX5E1d,
+ SXXP_r=SXXP-SXXP1d,
+ SXXE_r=SXXE-SXXE1d,
+ SXXF_r=SXXF-SXXF1d,
+ SXXA_r=SXXA-SXXA1d,
+ DK5F_r=DK5F-DK5F1d,
+ DKXF_r=DKXF-DKXF1d)]
Error in SX5P - SX5P1d : non-numeric argument to binary operator
> #monthly returns
> returns <- dt[,list(SX5P=sum(SX5P_r,na.rm=T),
+ SX5E=sum(SX5E_r,na.rm=T),
+ SXXP=sum(SXXP_r,na.rm=T),
+ SXXE=sum(SXXE_r,na.rm=T),
+ SXXF=sum(SXXF_r,na.rm=T),
+ SXXA=sum(SXXA_r,na.rm=T),
+ DK5F=sum(DK5F_r,na.rm=T),
+ DKXF=sum(DKXF_r,na.rm=T)),by="End_month_date"]
Error in `[.data.table`(dt, , list(SX5P = sum(SX5P_r, na.rm = T), SX5E = sum(SX5E_r, :
object 'SX5P_r' not found
Here is dt
after shift
operation that generated the warning messages :
> head(dt)
Date SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF End_month_date
1: 1986-12-31 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56 1986-12-31
2: 1987-01-01 775.00 900.82 82.76 98.58 98.06 69.06 645.26 65.56 1987-01-31
3: 1987-01-02 770.89 891.78 82.57 97.80 97.43 69.37 647.62 65.81 1987-01-31
4: 1987-01-05 771.89 898.33 82.82 98.60 98.19 69.16 649.94 65.82 1987-01-31
5: 1987-01-06 775.92 902.32 83.28 99.19 98.83 69.50 652.49 66.06 1987-01-31
6: 1987-01-07 781.21 899.15 83.78 98.96 98.62 70.59 651.97 66.20 1987-01-31
last_date SX5P1d SX5E1d SXXP1d SXXE1d SXXF1d SXXA1d DK5F1d DKXF1d
1: NA NA NA NA NA NA NA NA NA
2: Date Date Date Date Date Date Date Date Date
3: SX5P SX5P SX5P SX5P SX5P SX5P SX5P SX5P SX5P
4: SX5E SX5E SX5E SX5E SX5E SX5E SX5E SX5E SX5E
5: SXXP SXXP SXXP SXXP SXXP SXXP SXXP SXXP SXXP
6: SXXE SXXE SXXE SXXE SXXE SXXE SXXE SXXE SXXE
The data has some erratic values on dates 2016-03-25 and 2016-03-28.
library(dplyr)
df <- filter(df, SX5P>0) # drop erratic data points
percent_change <- function(x) (x - lag(x)) / lag(x) # function that calculates percentage change
daily_return <- df %>%
mutate_at(vars(-Date), percent_change) %>% # for each column excluding Date, apply percent_change function
filter(complete.cases(.)) %>% # filter out NAs
select(-Date) %>% # drop Date variable
as.matrix() # convert to matrix
head(daily_return, 5)
# SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
#[1,] 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.0000000000 0.0000000000
#[2,] -0.005303226 -0.010035301 -0.002295795 -0.007912355 -0.006424638 0.004488850 0.0036574404 0.0038133008
#[3,] 0.001297202 0.007344861 0.003027734 0.008179959 0.007800472 -0.003027245 0.0035823477 0.0001519526
#[4,] 0.005220951 0.004441575 0.005554214 0.005983773 0.006517975 0.004916136 0.0039234391 0.0036463081
#[5,] 0.006817713 -0.003513166 0.006003842 -0.002318782 -0.002124861 0.015683453 -0.0007969471 0.0021192855
cov(daily_return)
# SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
#SX5P 0.0001458898 0.0001531675 0.0001339905 0.0001400356 0.0001335696 0.0001283412 0.0001355236 0.0001410957
#SX5E 0.0001531675 0.0001781671 0.0001431415 0.0001622366 0.0001519764 0.0001252829 0.0001497803 0.0001561299
#SXXP 0.0001339905 0.0001431415 0.0001267415 0.0001328073 0.0001265858 0.0001210988 0.0001314346 0.0001359420
#SXXE 0.0001400356 0.0001622366 0.0001328073 0.0001502001 0.0001410354 0.0001165071 0.0001412857 0.0001471070
#SXXF 0.0001335696 0.0001519764 0.0001265858 0.0001410354 0.0001343114 0.0001130397 0.0001380515 0.0001432671
#SXXA 0.0001283412 0.0001252829 0.0001210988 0.0001165071 0.0001130397 0.0001257977 0.0001221743 0.0001254364
#DK5F 0.0001355236 0.0001497803 0.0001314346 0.0001412857 0.0001380515 0.0001221743 0.0001914781 0.0001946354
#DKXF 0.0001410957 0.0001561299 0.0001359420 0.0001471070 0.0001432671 0.0001254364 0.0001946354 0.0002103559
library(lubridate)
percent_change2 <- function(x)last(x)/first(x) - 1
monthly_return <- df %>%
group_by(gr = floor_date(Date, unit = "month")) %>%
summarize_at(vars(-Date, -gr), percent_change2) %>%
ungroup() %>%
select(-gr) %>%
as.matrix()
head(monthly_return, 5)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
[1,] 0.00000000 0.000000000 0.000000000 0.000000000 0.000000000 0.00000000 0.00000000 0.00000000
[2,] -0.01089032 -0.046335561 0.005316578 -0.025867316 -0.025494595 0.04170287 -0.02977095 -0.01281269
[3,] 0.03167912 -0.009493186 0.032518367 -0.011141476 -0.011708861 0.07918740 0.05577361 0.04355828
[4,] 0.02633308 0.031731340 0.025284157 0.027359491 0.027197099 0.02322630 0.04121760 0.03157433
[5,] 0.02660200 -0.002816901 0.023347620 -0.003767437 -0.002362366 0.05061867 0.03758165 0.03917672
cov(monthly_return)
SX5P SX5E SXXP SXXE SXXF SXXA DK5F DKXF
SX5P 0.002068415 0.002243488 0.002011784 0.002160762 0.002076261 0.001867744 0.002282369 0.002381529
SX5E 0.002243488 0.002712719 0.002225923 0.002605715 0.002448324 0.001857319 0.002549326 0.002671546
SXXP 0.002011784 0.002225923 0.002025003 0.002182308 0.002095078 0.001873543 0.002321951 0.002407614
SXXE 0.002160762 0.002605715 0.002182308 0.002548197 0.002399266 0.001826243 0.002514475 0.002629281
SXXF 0.002076261 0.002448324 0.002095078 0.002399266 0.002291523 0.001797954 0.002458753 0.002558314
SXXA 0.001867744 0.001857319 0.001873543 0.001826243 0.001797954 0.001927949 0.002134767 0.002189677
DK5F 0.002282369 0.002549326 0.002321951 0.002514475 0.002458753 0.002134767 0.003414248 0.003523391
DKXF 0.002381529 0.002671546 0.002407614 0.002629281 0.002558314 0.002189677 0.003523391 0.003813587
library(data.table)
per_change <- function(x)x/shift(x) - 1
setDT(df)
df <- df[SX5P>0]
daily <- df[, lapply(.SD, per_change), .SDcols=-"Date"][-1, ]
daily
cov(daily)
monthly <- df[, lapply(.SD, percent_change2), by = .(gr=floor_date(Date, unit = "month")), .SDcols=-"Date"][-1, -"gr" ]
cov(monthly)