Search code examples
rdplyrtraminer

How do I use state data from BORIS with TraMineR?


I am struggling to figure out how to convert the BORIS output into one of the state sequence analysis formats that I can analyze with TraMineR.

The BORIS outputs are basically tables that look like this:

                File    Time     Behavior Status
1  K8121319_feed3_01   0.000     Approach  START
2  K8121319_feed3_01 393.225     Approach   STOP
3  K8121319_feed3_01 393.226 Out-of-Frame  START
4  K8121319_feed3_01 426.003 Out-of-Frame   STOP
5  K8121319_feed3_01 442.006     Approach  START
6  K8121319_feed3_01 465.755     Approach   STOP
7  K8121319_feed3_01 465.756        Avoid  START
8  K8121319_feed3_01 513.255        Avoid   STOP
9  K8121319_feed3_01 513.256      Explore  START
10 K8121319_feed3_01 746.577      Explore   STOP

It seems like it would be possible to convert to the SPELL sequence format using dplyr, but I can't figure out how. Has anyone used these two softwares together?

The SPELL format would look like this:

                File Behavior     Start     Stop
1  K8121319_feed3_01 Approach      0.000    393.225
2  K8121319_feed3_01 OOF          393.226   426.003
3  K8121319_feed3_01 Approach     426.006   465.755
4  K8121319_feed3_01 Avoid        465.756   513.255
5  K8121319_feed3_01 Explore      513.256   746.577

I have been trying to use dplyr::spread to do this.

Edit: here is the result of dput(data1[1:20,])

structure(list(File = c("K8121319_feed3_01", "K8121319_feed3_01", 
"K8121319_feed3_01", "K8121319_feed3_01", "K8121319_feed3_01", 
"K8121319_feed3_01", "K8121319_feed3_01", "K8121319_feed3_01", 
"K8121319_feed3_01", "K8121319_feed3_01", "K8121319_feed3_02", 
"K8121319_feed3_02", "K8121319_feed3_02", "K8121319_feed3_02", 
"K8121319_feed3_02", "K8121319_feed3_02", "K8121319_feed3_02", 
"K8121319_feed3_02", "K8121319_feed3_02", "K8121319_feed3_02"
), Time = c(0, 393.225, 393.226, 426.003, 442.006, 465.755, 465.756, 
513.255, 513.256, 746.577, 0, 29.85, 29.851, 66.6, 66.601, 292.646, 
292.647, 362.208, 362.209, 442.456), Behavior = c("Approach", 
"Approach", "Out-of-Frame", "Out-of-Frame", "Approach", "Approach", 
"Avoid", "Avoid", "Explore", "Explore", "Approach", "Approach", 
"Avoid", "Avoid", "Approach", "Approach", "Avoid", "Avoid", "Approach", 
"Approach"), Status = c("START", "STOP", "START", "STOP", "START", 
"STOP", "START", "STOP", "START", "STOP", "START", "STOP", "START", 
"STOP", "START", "STOP", "START", "STOP", "START", "STOP")), row.names = c(NA, 
20L), class = "data.frame")

Edit: dput for part of df with repeated states

dput(data1[360:370,])

structure(list(File = c("K8121819_feed3_13", "K8121819_feed3_13", 
"K8121819_feed3_13", "K8121819_feed3_13", "K8121819_feed3_13", 
"K8121819_feed3_14", "K8121819_feed3_14", "K8121819_feed3_14", 
"K8121819_feed3_14", "K8121819_feed3_14", "K8121819_feed3_14"
), Time = c(700.311, 700.312, 720.311, 742.851, 754.339, 0, 32.124, 
32.125, 47.14, 47.141, 84.671), Behavior = c("Approach", "Avoid", 
"Avoid", "Avoid", "Avoid", "Avoid", "Avoid", "Explore", "Explore", 
"Approach", "Approach"), Status = c("STOP", "START", "STOP", 
"START", "STOP", "START", "STOP", "START", "STOP", "START", "STOP"
)), row.names = 360:370, class = "data.frame")

Solution

  • I question your statement that the SPELL format can be used with continuous data, because providing a double to seqdef results in an error that the beginning and end columns must be integer.

    Hopefully this will get you started though:

    Edit: Now to potentially fix duplicated Behavior states:

    library(TraMineR)
    library(tidyverse)
    library(data.table)
    data.long <- data1 %>% 
      mutate(id = rleid(Behavior),
             Behavior = str_replace_all(Behavior,pattern = "-", replacement = "")) %>%
      group_by(File,id) %>% 
      dplyr::filter(Time == min(Time) | Time == max(Time)) %>%
      pivot_wider(id_cols = c("File","Behavior", "id"),
                  names_from = "Status",
                  values_from = "Time") %>%
      mutate(START = 1L+as.integer(floor(START)),
             STOP = 1L+as.integer(floor(STOP))) %>%
      as.data.frame()
    
    data.long
    #                File   Behavior id START STOP
    #1  K8121319_feed3_01   Approach  1     1  394
    #2  K8121319_feed3_01 OutofFrame  2   394  427
    #3  K8121319_feed3_01   Approach  3   443  466
    #4  K8121319_feed3_01      Avoid  4   466  514
    #5  K8121319_feed3_01    Explore  5   514  747
    #6  K8121319_feed3_02   Approach  6     1   30
    #7  K8121319_feed3_02      Avoid  7    30   67
    #8  K8121319_feed3_02   Approach  8    67  293
    #9  K8121319_feed3_02      Avoid  9   293  363
    #10 K8121319_feed3_02   Approach 10   363  443
    

    I removed the - because it was causing problems with seqstatl, and I added 1 because apparently the package authors thought 0 not allowed. I used rleid from the data.table package because it saved a lot of typing trying to use base R's rle.

    Now we can use seqdef:

    data.SPELL <- seqdef(data = data.long,
                         var = c("File", "START", "STOP", "Behavior"),
                         informat = "SPELL",
                         labels = seqstatl(data.long$Behavior),
                         states = seq_along(seqstatl(data.long$Behavior)),
                         process = FALSE)
    data.SPELL
    #K8121319_feed3_01 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-4-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
    #K8121319_feed3_02 1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-2-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1-1