Search code examples
rgisigraph

How to resolve this IGraph error: "Some vertex names in edge list are not listed in vertex data frame"


I have created a dataframe for nodes and edges as follows:

points <- data.frame(
  "lon" = c(hajj_xy$lon, mecca$lon),
  "lat" = c(hajj_xy$lat, mecca$lat),
  "id" = 1:60,
  "name" = c(hajj_geom$COUNTRY, "Mecca")
)
edges <- data.frame(
  "from" = 1:59,
  "to" = 60,
  "weight" = hajj_geom$Quota
)
network <- graph_from_data_frame(edges, directed = FALSE, vertices = points)

The longitude and latitude represent country centroids and I use an id where 1 through 59 represent a country and 60 represents a city. I want to create edges from these 59 countries to this city:

From: 1, 2, 3, 4 To: 60, 60, 60, 60

This however consistently results in the error of: "Some vertex names in edge list are not listed in vertex data frame"

As far as I know every vertex node in the edge list is also represented in the points$id column. My two theories for why I am encountering problems here are that whatever technique I use is incompatible with an egocentric graph where every node connects to just one or that IGraph incorrectly interprets the "name" and "id" columns. I tried to convert the "id" currently used in the edges dataframe for "from" and "to" to "names" but the error persisted.

Edit: Data for reproduction hajj_xy dataframe:

structure(list(lon = c(65.9479669237535, 2.64627591851302, 134.301675336053, 
50.559740079786, 90.2774496745135, 4.67120680615151, 12.7378973231993, 
-96.5151903184251, -15.6779756706145, 18.6543043886044, 103.47404963678, 
29.8882108687771, 39.6249885904802, 2.57607841738202, 10.3733677926846, 
-10.9354410214561, 79.5677823835835, 117.240109217723, 54.3703083971922, 
43.8023614719206, 12.2021633466777, 36.7732630026489, 66.5242262906917, 
37.8579259912893, 47.5916172583958, 35.8855654481423, 109.709896258184, 
-3.57225496144005, -9.22396620324199, 96.5133408322776, 83.969553803746, 
172.887106865069, 9.34371838735596, 8.09537716523291, 56.0793582943553, 
69.2121051275642, 35.2032751468566, 51.1910642073789, 95.5207429227211, 
-14.4635439326227, 103.808051264695, 45.8403143133632, 25.2021063215796, 
30.3246716935576, -3.49984093678964, 80.7052712549262, 29.9279569045922, 
16.2610113889828, 38.4844962331616, 34.8121811983606, 9.56479507925492, 
35.1836671448445, 32.3858023843978, 54.3291144962912, -2.78544980942346, 
-103.653184801265, 63.3315134443397, -66.1582835594826, 47.5815038390001
), lat = c(33.8223926119716, 28.0602306656642, -25.7660383223791, 
26.0222416357322, 23.8364271015654, 50.6421503303993, 5.67607783880337, 
60.4371344083858, 28.3430009172033, 15.3027008939933, 36.64663743772, 
26.458188051087, 8.61780988456197, 46.5157605712005, 51.0501892897143, 
10.4393711388205, 22.7740773594071, -2.27968957959564, 32.6848865193055, 
33.0183506494569, 42.7147338731206, 31.2463171818873, 48.2500725050767, 
0.529599870066816, 29.3398343693073, 33.9187854190576, 3.81369770557425, 
17.3195378615238, 29.016208130024, 21.080636958992, 28.2620878317739, 
-41.7274247693474, 17.4188384125821, 9.59073204611439, 20.5826361524773, 
29.9008939656735, 31.9134231526049, 25.3139236554331, 66.0030903910723, 
14.3655524566031, 1.35161751178752, 6.04845819078977, -28.9720370359756, 
7.27885724378438, 40.3571655250189, 7.60641956825879, 15.9775064603477, 
62.3941286813784, 35.0066609354285, -6.26290261225646, 34.0803348597117, 
39.1362233427041, 1.27950304842164, 23.9126156648872, 54.0415069640199, 
44.768175410617, 41.7709037388632, 7.12051690117722, 15.9140418997466
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-59L))

hajj_geom (omitted geometry column due to size):

structure(list(FID = c(1L, 3L, 13L, 18L, 19L, 22L, 41L, 42L, 
43L, 46L, 48L, 68L, 74L, 79L, 86L, 97L, 105L, 106L, 107L, 108L, 
112L, 116L, 118L, 119L, 121L, 125L, 135L, 137L, 151L, 153L, 156L, 
159L, 161L, 162L, 169L, 170L, 172L, 182L, 185L, 200L, 204L, 209L, 
210L, 213L, 214L, 215L, 216L, 219L, 221L, 223L, 230L, 231L, 235L, 
237L, 238L, 239L, 243L, 246L, 249L), COUNTRY = c("Afghanistan", 
"Algeria", "Australia", "Bahrain", "Bangladesh", "Belgium", "Cameroon", 
"Canada", "Canarias", "Chad", "China", "Egypt", "Ethiopia", "France", 
"Germany", "Guinea", "India", "Indonesia", "Iran", "Iraq", "Italy", 
"Jordan", "Kazakhstan", "Kenya", "Kuwait", "Lebanon", "Malaysia", 
"Mali", "Morocco", "Myanmar", "Nepal", "New Zealand", "Niger", 
"Nigeria", "Oman", "Pakistan", "Palestinian Territory", "Qatar", 
"Russian Federation", "Senegal", "Singapore", "Somalia", "South Africa", 
"South Sudan", "Spain", "Sri Lanka", "Sudan", "Sweden", "Syria", 
"Tanzania", "Tunisia", "Turkiye", "Uganda", "United Arab Emirates", 
"United Kingdom", "United States", "Uzbekistan", "Venezuela", 
"Yemen"), ISO = c("AF", "DZ", "AU", "BH", "BD", "BE", "CM", "CA", 
"ES", "TD", "CN", "EG", "ET", "FR", "DE", "GN", "IN", "ID", "IR", 
"IQ", "IT", "JO", "KZ", "KE", "KW", "LB", "MY", "ML", "MA", "MM", 
"NP", "NZ", "NE", "NG", "OM", "PK", "PS", "QA", "RU", "SN", "SG", 
"SO", "ZA", "SS", "ES", "LK", "SD", "SE", "SY", "TZ", "TN", "TR", 
"UG", "AE", "GB", "US", "UZ", "VE", "YE"), COUNTRYAFF = c("Afghanistan", 
"Algeria", "Australia", "Bahrain", "Bangladesh", "Belgium", "Cameroon", 
"Canada", "Spain", "Chad", "China", "Egypt", "Ethiopia", "France", 
"Germany", "Guinea", "India", "Indonesia", "Iran, Islamic Republic of", 
"Iraq", "Italy", "Jordan", "Kazakhstan", "Kenya", "Kuwait", "Lebanon", 
"Malaysia", "Mali", "Morocco", "Myanmar", "Nepal", "New Zealand", 
"Niger", "Nigeria", "Oman", "Pakistan", "Palestine, State of", 
"Qatar", "Russian Federation", "Senegal", "Singapore", "Somalia", 
"South Africa", "South Sudan", "Spain", "Sri Lanka", "Sudan", 
"Sweden", "Syrian Arab Republic", "Tanzania, United Republic of", 
"Tunisia", "Turkiye", "Uganda", "United Arab Emirates", "United Kingdom", 
"United States", "Uzbekistan", "Venezuela, Bolivarian Republic of", 
"Yemen"), AFF_ISO = c("AF", "DZ", "AU", "BH", "BD", "BE", "CM", 
"CA", "ES", "TD", "CN", "EG", "ET", "FR", "DE", "GN", "IN", "ID", 
"IR", "IQ", "IT", "JO", "KZ", "KE", "KW", "LB", "MY", "ML", "MA", 
"MM", "NP", "NZ", "NE", "NG", "OM", "PK", "PS", "QA", "RU", "SN", 
"SG", "SO", "ZA", "SS", "ES", "LK", "SD", "SE", "SY", "TZ", "TN", 
"TR", "UG", "AE", "GB", "US", "UZ", "VE", "YE"), SHAPE_Leng = c(50.8032097216606, 
70.5400821564478, 252.165237695418, 1.65372595275656, 42.011938392116, 
11.9990027190426, 41.9605955652992, 2210.68482601386, 9.49147417107331, 
51.6719899775874, 312.493593845668, 53.2304769311098, 46.8103153863514, 
65.8470426672871, 56.1150376523079, 31.7201924739149, 176.501522091413, 
416.25500787155, 75.7732135392343, 33.6200149198639, 68.044300922069, 
16.1779616152639, 140.347747504746, 33.6126787257929, 7.54876822741623, 
5.61156883768039, 60.7667623241101, 64.2045167752772, 56.1032566069788, 
87.3471888591459, 23.5332666615078, 80.3321209534038, 50.803637221269, 
48.108082151029, 32.3352139020106, 71.729793755934, 4.75789758672638, 
4.78981111664242, 1536.28714977792, 31.010946782367, 0.858102626091241, 
48.9953616048232, 68.4548253215778, 41.8962017150055, 51.7249563726083, 
13.9984198710169, 65.2824905939917, 92.1859283329707, 22.535536993499, 
45.4054960712079, 25.0143018445706, 76.9907310297634, 21.248227720151, 
16.7596763569019, 115.97423585184, 726.106055708415, 60.3220685063147, 
76.9474475013067, 34.0113268468468)), class = c("sf", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -59L))

mecca dataframe:

structure(list(query = "Mecca", lat = 21.420847, lon = 39.826869, 
    address = "محافظة مكة المكرمة, منطقة مكة المكرمة, السعودية"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))

Solution

  • The error is caused by id not being the first column in the vertices data frame (points). Also note that vertex attribute name has a special meaning. Unless your intention is to give the vertices the id of the country names, you'd better rename it, for example country = paste0("n", sample(n))

    To read the documentation try,

    >help(graph_from_data_frame)

    The simplified example below works without errors.

    library(igraph)
    n <- 60
    points <- data.frame(
      id     = seq_len(n),
      lon    = "lon",
      lat    = "lat",
      name   = paste0("n", sample(n))
    )
    edges <- data.frame(
      from   = seq_len(n-1),
      to     = n,
      weight = sample(n-1)
    )
    network <- graph_from_data_frame(edges, directed = FALSE, vertices = points)
    vertex_attr(network)