I am trying to create interaction matrices from multiple files. With some of my files, I am receiving the following error:
Error in interaction_matrix[as.numeric(pig_id), as.numeric(closest_pig)] :
subscript out of bounds
This is the code I am using.
create_interaction_matrices <- function(nm, behavior_code = "AFF", select_every = 10, nsel = 25) {
NNdatafull <- read_excel(nm, sheet = 1)
colnames(NNdatafull) <- c("Date", "Time", "Pen", "Pig_ID", "Distance", "Closest_Pig", "Behavior", "Orientation", "Location")
datelevels <- unique(NNdatafull$Date)
interaction_matrices <- list()
for (date in datelevels) {
mdat <- filter(NNdatafull, Date == date)
mdat <- filter(mdat, Behavior == behavior_code)
# Get unique pig IDs from both Pig_ID and Closest_Pig columns
unique_pigs <- union(unique(mdat$Pig_ID), unique(mdat$Closest_Pig))
# Initialize interaction matrix with the size based on the number of unique pigs
interaction_matrix <- matrix(0, nrow = length(unique_pigs), ncol = length(unique_pigs))
for (i in 1:nsel) {
time <- min(mdat$Time) + (i - 1) * (60 * select_every)
time_data <- filter(mdat, Time == time)
interacting_pigs <- unique(time_data$Pig_ID)
closest_pigs <- unique(time_data$Closest_Pig)
# Update interaction matrix based on pig ID to closest pig interactions
for (pig_id in interacting_pigs) {
closest_pig <- time_data$Closest_Pig[time_data$Pig_ID == pig_id]
# Increment the value in the interaction matrix
interaction_matrix[as.numeric(pig_id), as.numeric(closest_pig)] <- interaction_matrix[as.numeric(pig_id), as.numeric(closest_pig)] + 1
}
}
interaction_matrices[[as.character(date)]] <- interaction_matrix
}
return(interaction_matrices)
}
This is an example of data I am using: data example
I think it is because the code has the matrix as length(unique_pigs) but some of the ID numbers are greater than the number of pigs. For example, a pen has 13 pigs, but one pig has 16 as an ID number.
How would I edit the code to include only the ID numbers present in the datasheet?
Thanks!
You were correct in assuming that the issue arises because the pig IDs and the closest pig IDs do not necessarily correspond to the row and column numbers of the matrix. You would need to create a mapping between pig IDs and the matrix indices.
Here's the how I did it:
create_interaction_matrices <- function(nm, behavior_code = "AFF", select_every = 10,
nsel = 25) {
# you can keep doing as you had here, I just did this for myself
file_path <- file.path(getwd(), paste0(nm, ".xlsx"))
NNdatafull <- read_excel(file_path, sheet = 1)
colnames(NNdatafull) <- c("Date", "Time", "Pen", "Pig_ID", "Distance",
"Closest_Pig", "Behavior", "Orientation", "Location")
datelevels <- unique(NNdatafull$Date)
interaction_matrices <- list()
for (date in datelevels) {
mdat <- filter(NNdatafull, Date == date)
mdat <- filter(mdat, Behavior == behavior_code)
# Get unique pig IDs from both Pig_ID and Closest_Pig columns
unique_pigs <- union(unique(mdat$Pig_ID), unique(mdat$Closest_Pig))
# create mapping of pig IDs to matrix indices
pig_id_to_matrix_idx <- match(unique_pigs, sort(unique_pigs))
# Initialize interaction matrix with the size based on the number of unique pigs
interaction_matrix <- matrix(0, nrow = length(unique_pigs),
ncol = length(unique_pigs))
for (i in 1:nsel) {
time <- min(mdat$Time) + (i - 1) * (60 * select_every)
time_data <- filter(mdat, Time == time)
# Update interaction matrix based on pig ID to closest pig interactions
for (row in 1:nrow(time_data)) {
pig_id <- time_data$Pig_ID[row]
closest_pig <- time_data$Closest_Pig[row]
# convert pig IDs to matrix indices
pig_idx <- which(unique_pigs == pig_id)
closest_pig_idx <- which(unique_pigs == closest_pig)
# check if the indices are valid; then update the interaction matrix
if (length(pig_idx) == 1 && length(closest_pig_idx) ==
1) {
interaction_matrix[pig_idx, closest_pig_idx] <- interaction_matrix[pig_idx,
closest_pig_idx] + 1
}
}
}
interaction_matrices[[as.character(date)]] <- interaction_matrix
}
return(interaction_matrices)
}
this doesn't return any errors on my end, and after running it through a tester function to graph it, it seems to work:
test_and_plot_interaction_matrices <- function(filename) {
library(ggplot2)
library(reshape2)
interaction_matrices <- create_interaction_matrices(filename)
some_date <- names(interaction_matrices)[1]
matrix_to_plot <- interaction_matrices[[some_date]]
matrix_long <- melt(matrix_to_plot)
names(matrix_long) <- c("Pig_ID", "Closest_Pig", "Frequency")
ggplot(matrix_long, aes(x = Pig_ID, y = Closest_Pig, fill = Frequency)) +
geom_tile() + scale_fill_gradient(low = "white", high = "steelblue") +
theme_minimal() + labs(title = paste("Interaction Matrix for Date:", some_date),
x = "Pig ID", y = "Closest Pig ID", fill = "Interaction\nFrequency") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
}
image of heatmap on stack.imgur.com
I don't know if I graphed the data correctly, but it shouldn't matter since your function works now [I hope :)]
edit:
I thought of a way to do it with vectorized operations:
library(dplyr)
create_interaction_matrices <- function(nm, behavior_code = "AFF", select_every = 10,
nsel = 25) {
file_path <- file.path(getwd(), paste0(nm, ".xlsx"))
NNdatafull <- read_excel(file_path, sheet = 1)
# preprocess
NNdatafull <- NNdatafull |>
filter(Behavior == behavior_code) |>
mutate(Date = as.character(Date))
# store interaction matrices for each date
interaction_matrices <- list()
# process each date
for (date in unique(NNdatafull$Date)) {
mdat <- filter(NNdatafull, Date == date)
# Get unique pig IDs and create a mapping to matrix indices
unique_pigs <- sort(unique(c(mdat$Pig_ID, mdat$Closest_Pig)))
pig_id_to_matrix_idx <- match(unique_pigs, unique_pigs)
# Initialize the interaction matrix with the size based on the number of unique pigs
interaction_matrix <- matrix(0, nrow = length(unique_pigs),
ncol = length(unique_pigs), dimnames = list(unique_pigs,
unique_pigs))
# iterate through selected dates
for (i in seq_len(nsel)) {
time <- min(mdat$Time) + (i - 1) * (60 * select_every)
time_data <- filter(mdat, Time == time)
# update interaction matrix for each row in time_data
apply(time_data, 1, function(row) {
pig_idx <- match(row["Pig_ID"], unique_pigs)
closest_pig_idx <- match(row["Closest_Pig"], unique_pigs)
if (!is.na(pig_idx) && !is.na(closest_pig_idx)) {
interaction_matrix[pig_idx, closest_pig_idx] <- interaction_matrix[pig_idx,
closest_pig_idx] + 1
}
})
}
interaction_matrices[[date]] <- interaction_matrix
}
return(interaction_matrices)
}
I'm not sure if it's the best way to do it, but it doesn't give any errors either,on my end at least, we'll have to see how it goes for you :)