Search code examples
ranalysiscryptocurrency

Market analysis project - cryptocurrencies


+#I just started to code for some cryptocurrencies analysis. But I just encounterd an issue with the code.
    _____________________________________________________________________________

        library(magrittr)
        library(dplyr)
        library(tidyverse)
        library(crypto)
        library(lubridate)
        library(xts)
        library(quantmod)
        library(tidyr)

        df <- getCoins(limit = 50)

        yourfunction <- function(df, frequency = NULL) {
          freq        <- frequency
          df$date     <- lubridate::round_date(df$date, freq)
          data        <-
            df %>% dplyr::group_by(date, slug, symbol, name, ranknow) %>%
            dplyr::summarise(
              open   = dplyr::first(open),
              high   = max(high),
              low    = min(low),
              close  = dplyr::last(close),
              volume = sum(volume),
              market = dplyr::last(market))
          data$volume <- round(data$volume, digits = 0)
          data$market <- round(data$market, digits = 0)
          data        <- as.data.frame(data)
          results <- xts::xts(data[, 2:ncol(data)], as.POSIXct(data[, 1], format =
                                                                 "%d.%m.%Y %H:%M:%S"))
          return(results)
        }

        #### IF NO ERRORS IN ABOVE RUN THESE INDIVIDUALLY------
week_xts  <- yourfunction(df, frequency = "week")
str(week_xts)

as.tbl(week_xts) %>%
  select(date, slug, open) %>%
  spread(slug, open) %>%
  tail()

#### IF NO ERRORS IN ABOVE RUN THESE INDIVIDUALLY------
month_xts <- yourfunction(df, frequency = "month")
str(month_xts)

as.tbl(month_xts) %>%
  select(date, slug, open) %>%
  spread(slug, open) %>%
  tail()
########################



#Making daily, weekly and monthly open for ten biggest coins

daily_open <- spread(df[,c(1,4,6)],slug,open)
weekly_open <- spread(week_xts[,c(1, 2, 6)],slug,open)
monthly_open <- spread(month_xts[,c(1, 2, 6)],slug,open)

ten_biggest <- c(list_of_coins[1:10,"slug"])

daily_open <- daily_open[,c("date",ten_biggest)]
weekly_open <- weekly_open[,c("date",ten_biggest)]
monthly_open <- monthly_open[,c("date",ten_biggest)]


#Making daily, weekly and monthly high for ten biggest coins

daily_high <- spread(df[,c(1,4,7)],slug,high)
weekly_high <- spread(week_xts[,c(1,2,7)],slug,high)
monthly_high <- spread(month_xts[,c(1,2,7)],slug,high)

daily_high <- daily_high[,c("date",ten_biggest)]
weekly_high <- weekly_high[,c("date",ten_biggest)]
monthly_high <- monthly_high[,c("date",ten_biggest)]

#Making daily, weekly and monthly low for ten biggest coins

daily_low <- spread(df[,c(1,4,8)],slug,low)
weekly_low <- spread(week_xts[,c(1,2,8)],slug,low)
monthly_low <- spread(month_xts[,c(1,2,8)],slug,low)

daily_low <- daily_low[,c("date",ten_biggest)]
weekly_low <- weekly_low[,c("date",ten_biggest)]
monthly_low <- monthly_low[,c("date",ten_biggest)]

#Making daily, weekly and monthly close for ten biggest coins

daily_close <- spread(df[,c(1,4,9)],slug,close)
weekly_close <- spread(week_xts[,c(1,2,9)],slug,close)
monthly_close <- spread(month_xts[,c(1,2,9)],slug,close) 

daily_close <- daily_close[,c("date",ten_biggest)]
weekly_close <- weekly_close[,c("date",ten_biggest)]
monthly_close <- monthly_close[,c("date",ten_biggest)]


#Making daily, weekly and monthly volume for ten biggest coins

daily_volume <- spread(df[,c(1,4,10)],slug,volume)
weekly_volume <- spread(week_xts[,c(1,2,10)],slug,volume)
monthly_volume <- spread(month_xts[,c(1,2,10)],slug,volume)

daily_volume <- daily_volume[,c("date",ten_biggest)]
weekly_volume <- weekly_volume[,c("date",ten_biggest)]
monthly_volume <- monthly_volume[,c("date",ten_biggest)]


#Making daily,weekly and monthly market for ten biggest coins

daily_market <- spread(df[,c(1,4,11)],slug,market)
weekly_market <- spread(week_xts[,c(1,2,11)],slug,market)
monthly_market <- spread(month_xts[,c(1,2,11)],slug,market)

daily_market <- daily_market[,c("date",ten_biggest)]
weekly_market <- weekly_market[,c("date",ten_biggest)]
monthly_market <- monthly_market[,c("date",ten_biggest)]


#Doesn't work
weekly_open_returns <- periodReturn(weekly_open, period="daily", subset=NULL, type="arithmetic",leading="TRUE")
weekly_open_returns <- Return.calculate(weekly_open)

I would like to make a data frame for each data type (open, high, close, volume, market, close_ratio and spread) for daily, monthly and weekly frequencies. These data frames should be constructed in a way that each column represents one currency and the rows would represent observations. However, when I try to use spread function for weekly and monthly frequencies in order to do this, it gives an error. How should I proceed?

Thank you in advance for help. Have a pleasant day.

The errors are:

    > weekly_open_returns <- periodReturn(weekly_open, period="daily", subset=NULL, type="arithmetic",leading="TRUE")
Error in try.xts(x) : 
  Error in as.POSIXlt.character(x, tz, ...) :   character string is not in a standard unambiguous format
> weekly_open_returns <- Return.calculate(weekly_open)
Error in checkData(prices, method = "xts") : 
  The data cannot be converted into a time series.  If you are trying to pass in names from a data object with one column, you should use the form 'data[rows, columns, drop = FALSE]'.  Rownames should have standard date formats, such as '1985-03-15'.

(From comment:)

I would like to make a data frame for each data type (open, high, close, volume, market, close_ratio and spread) for daily, monthly and weekly frequencies. These data frames should be constructed in a way that each column represents one currency and the rows would represent observations. However, when I try to use spread function for weekly and monthly frequencies in order to do this, it gives an error. How should I proceed? Thank you in advance for help. Have a pleasant day. – MP PM 28 mins ago


Solution

  • After testing the code, the error is fairly clear: tidyr::spread works on tibbles, but what you have is class [1] "xts" "zoo". Additionally, likely because of the xts step, it is all character, meaning all of your numbers in week_xts are strings. I see what you think you are doing with daily_xts (post-spread), but most of the data will be NA since most of the currencies are not present until late in the data. Is this intentional?

    Part of the problem is your use of xts: it does not return a data.frame, it returns a vector or matrix, as shown here:

    str(week_xts)
    # An 'xts' object on 2013-04-27 17:00:00/2018-04-28 17:00:00 containing:
    #   Data: chr [1:4593, 1:10] "bitcoin" "litecoin" "bitcoin" "litecoin" "bitcoin" ...
    #  - attr(*, "dimnames")=List of 2
    #   ..$ : NULL
    #   ..$ : chr [1:10] "slug" "symbol" "name" "ranknow" ...
    #   Indexed by objects of class: [POSIXct,POSIXt] TZ: 
    #   xts Attributes:  
    #  NULL
    

    This says that everything is character. So if you do

    head(week_xts[,1:5])
    #                     slug       symbol name       ranknow open          
    # 2013-04-27 17:00:00 "bitcoin"  "BTC"  "Bitcoin"  " 1"    "  135.300000"
    # 2013-04-27 17:00:00 "litecoin" "LTC"  "Litecoin" " 7"    "    4.300000"
    # 2013-05-04 17:00:00 "bitcoin"  "BTC"  "Bitcoin"  " 1"    "  116.380000"
    # 2013-05-04 17:00:00 "litecoin" "LTC"  "Litecoin" " 7"    "    3.780000"
    # 2013-05-11 17:00:00 "bitcoin"  "BTC"  "Bitcoin"  " 1"    "  113.200000"
    # 2013-05-11 17:00:00 "litecoin" "LTC"  "Litecoin" " 7"    "    3.400000"
    

    you'll see that what you want as numbers are actually strings.

    I suggest that perhaps yourfunction should not be doing an xts::xts before you spread things. instead:

    yourfunction <- function(df, frequency = NULL) {
      # ...
      return(data)
    }
    week_xts  <- yourfunction(df, frequency = "week")
    str(week_xts)
    # 'data.frame': 4593 obs. of  11 variables:
    #  $ date   : Date, format: "2013-04-28" "2013-04-28" ...
    #  $ slug   : chr  "bitcoin" "litecoin" "bitcoin" "litecoin" ...
    #  $ symbol : chr  "BTC" "LTC" "BTC" "LTC" ...
    #  $ name   : chr  "Bitcoin" "Litecoin" "Bitcoin" "Litecoin" ...
    #  $ ranknow: num  1 7 1 7 1 7 1 7 1 7 ...
    #  $ open   : num  135.3 4.3 116.38 3.78 113.2 ...
    #  $ high   : num  147.49 4.57 125.6 4.04 122 ...
    #  $ low    : num  107.72 3.52 79.1 2.4 103.5 ...
    #  $ close  : num  116.99 3.8 113.57 3.41 114.22 ...
    #  $ volume : num  0 0 0 0 0 0 0 0 0 0 ...
    #  $ market : num  1542820000 73901200 1219450000 57196300 1242760000 ...
    

    From there:

    as.tbl(week_xts) %>%
      select(date, slug, open) %>%
      spread(slug, open) %>%
      tail()
    # # A tibble: 6 x 51
    #   date        `0x`  aelf aeternity `binance-coin` bitcoin `bitcoin-cash`
    #   <date>     <dbl> <dbl>     <dbl>          <dbl>   <dbl>          <dbl>
    # 1 2018-03-25 0.583 0.633      1.75           10.0   8939.          1033.
    # 2 2018-04-01 0.635 0.625      1.64           11.7   7979.           862.
    # 3 2018-04-08 0.521 0.519      1.43           12.2   6849.           649.
    # 4 2018-04-15 0.597 0.895      1.47           12.4   6955.           666.
    # 5 2018-04-22 0.923 1.11       1.86           12.3   8159.           891.
    # 6 2018-04-29 1.01  1.02       2.3            13.4   8867.          1290.
    # # ... with 44 more variables: `bitcoin-diamond` <dbl>, `bitcoin-gold` <dbl>,
    # #   `bitcoin-private` <dbl>, bitshares <dbl>, `bytecoin-bcn` <dbl>,
    # #   bytom <dbl>, cardano <dbl>, dash <dbl>, decred <dbl>, digixdao <dbl>,
    # #   dogecoin <dbl>, eos <dbl>, ethereum <dbl>, `ethereum-classic` <dbl>,
    # #   icon <dbl>, iota <dbl>, lisk <dbl>, litecoin <dbl>, loopring <dbl>,
    # #   maker <dbl>, mixin <dbl>, monero <dbl>, nano <dbl>, nem <dbl>, neo <dbl>,
    # #   omisego <dbl>, ontology <dbl>, populous <dbl>, qtum <dbl>, rchain <dbl>,
    # #   ripple <dbl>, siacoin <dbl>, status <dbl>, steem <dbl>, stellar <dbl>,
    # #   stratis <dbl>, tether <dbl>, tron <dbl>, vechain <dbl>, verge <dbl>,
    # #   wanchain <dbl>, waves <dbl>, zcash <dbl>, zilliqa <dbl>
    

    (I show the tail of it because the majority of early dates are empty for most currencies.)

    Side note: I suggest you use select(...) and column names instead of indices; you used week_xts[,c(1,5)] which matches date and open, yes, but it is not abundantly clear without looking at the data. Also, by skipping the xts conversion, it's now c(1,2,6) to capture date, slug, and open.

    I wonder if this is the point you should consider using xts::xts, though with slug in the data, you'll likely still be converting all numbers to character.


    20 of the last date's entries, if somebody else wants a quick gander at this:

    > dput(head(filter(df, date==tail(date,1)),n=20))
    structure(list(slug = c("bitcoin", "ethereum", "ripple", "bitcoin-cash", 
    "eos", "cardano", "litecoin", "stellar", "tron", "neo", "iota", 
    "dash", "monero", "nem", "tether", "vechain", "ethereum-classic", 
    "qtum", "omisego", "icon"), symbol = c("BTC", "ETH", "XRP", "BCH", 
    "EOS", "ADA", "LTC", "XLM", "TRX", "NEO", "MIOTA", "DASH", "XMR", 
    "XEM", "USDT", "VEN", "ETC", "QTUM", "OMG", "ICX"), name = c("Bitcoin", 
    "Ethereum", "Ripple", "Bitcoin Cash", "EOS", "Cardano", "Litecoin", 
    "Stellar", "TRON", "NEO", "IOTA", "Dash", "Monero", "NEM", "Tether", 
    "VeChain", "Ethereum Classic", "Qtum", "OmiseGO", "ICON"), date = structure(c(17651, 
    17651, 17651, 17651, 17651, 17651, 17651, 17651, 17651, 17651, 
    17651, 17651, 17651, 17651, 17651, 17651, 17651, 17651, 17651, 
    17651), class = "Date"), ranknow = c(1, 2, 3, 4, 5, 6, 7, 8, 
    9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20), open = c(9426.11, 
    689.76, 0.871404, 1440.96, 21.64, 0.364297, 153.65, 0.458688, 
    0.089656, 92.15, 2.04, 498.44, 256.35, 0.42658, 0.997553, 4.83, 
    21.84, 23.15, 18.05, 4.69), high = c(9477.14, 694.44, 0.876788, 
    1440.96, 21.64, 0.364297, 154.08, 0.460128, 0.101197, 94.76, 
    2.05, 499.43, 259.07, 0.42658, 1.01, 4.83, 22.95, 25.6, 18.06, 
    4.69), low = c(9166.81, 666.12, 0.831208, 1339.36, 16.86, 0.336625, 
    147.87, 0.417446, 0.086102, 82.86, 1.94, 472.02, 239.08, 0.403889, 
    0.992921, 4.37, 21.42, 22.12, 16.81, 4.26), close = c(9240.55, 
    669.92, 0.837938, 1350.05, 17.58, 0.343318, 148.48, 0.424659, 
    0.093777, 84.27, 1.96, 472.77, 242.46, 0.40888, 0.998919, 4.49, 
    21.68, 22.64, 16.95, 4.34), volume = c(8673920000, 2853100000, 
    575364000, 753114000, 4073370000, 298712000, 341397000, 81453300, 
    1749640000, 377385000, 61762500, 118497000, 103574000, 25523800, 
    4498440000, 109782000, 351049000, 414455000, 68967800, 94284100
    ), market = c(160302000000, 68376400000, 34112200000, 24642000000, 
    17849100000, 9445160000, 8651810000, 8518430000, 5894710000, 
    5990010000, 5676530000, 4006280000, 4096790000, 3839220000, 2411230000, 
    2539730000, 2215850000, 2050300000, 1841910000, 1815430000), 
        close_ratio = c(0.2376, 0.1342, 0.1477, 0.1052, 0.1506, 0.2419, 
        0.0982, 0.169, 0.5084, 0.1185, 0.1818, 0.0274, 0.1691, 0.22, 
        0.3512, 0.2609, 0.1699, 0.1494, 0.112, 0.186), spread = c(310.33, 
        28.32, 0.05, 101.6, 4.78, 0.03, 6.21, 0.04, 0.02, 11.9, 0.11, 
        27.41, 19.99, 0.02, 0.02, 0.46, 1.53, 3.48, 1.25, 0.43)), .Names = c("slug", 
    "symbol", "name", "date", "ranknow", "open", "high", "low", "close", 
    "volume", "market", "close_ratio", "spread"), row.names = c(NA, 
    20L), class = "data.frame")