SASpencer SASpencer - 29 days ago 6
R Question

Accessing nested information in data frame

I'm a little lost in accessing nested information in the below data frame to plot

draw
against
mpg
.

library(rstanarm)
library(ggplot2)
require(tidyr)

mycars <- mtcars
mycars$key <- 1:nrow(mycars) # 'data.frame': 32 obs., 12 variables. Key to link mycars to matrix draws below

fit <- stan_lmer(mpg ~ wt + qsec + am + (1 | cyl), data = mycars,
chains = 1, iter = 1000, seed = 12345)

draws <- posterior_predict(fit, mycars) # matrix num [1:500, 1:32]

colnames(draws) <- mycars$key

new.draws <- nest(data.frame(key=mycars$key, t(draws)), -key, .key=draw)
result <- merge(mycars, new.draws, by="key", sort = FALSE)

# want to be able to plot all the draw against mpg per group
ggplot(result, aes(x = mpg, y = draw, group = cyl)) + geom_violin()


The result I want would be equivalent to if each value in
result[,"draw"]
was an observation in that row of the data frame. Also, if there's a better approach, I'm all ears.

Here's how I would expect the graph to look:
enter image description here

Here's a small version of
draws
with the key already added:

draws <- structure(c(44.9550897623431, 46.9574946075541, 50.6257323964234,
50.0031952811056, 57.6322623502873, 42.279785160287, 37.6285099543378,
34.3629595207816, 46.9697972139608, 40.2009794005213, 21.630084704092,
25.2162712723008, 37.3211931224694, 23.7999171251995, 21.2302233407838,
-36.8461193595574, -39.0244897070139, -35.6664210659805, -34.5704931842508,
-39.8731521661833, -12.9430641715912, -9.44198662533142, -20.935754511249,
-19.8027662001992, -9.42582727890284, -43.6633523231745, -34.6555858158487,
-49.6090662962726, -48.4239742344322, -35.3575849921295, -4.11891117890216,
-10.9495440674842, -1.86986228692283, 1.04944608379319, 2.68853060587935,
-41.5599207956663, -31.6734212028181, -32.6595573264561, -50.7714581349866,
-48.747883177673, -66.1831938460601, -62.5858575915011, -64.8294869549527,
-76.0185102307402, -66.4890455174192, -31.8152536041983, -25.5314880027914,
-18.3613451368219, -24.7996773753553, -31.3883062670062, -23.5534809583603,
-30.4137478198723, -23.3406092032618, -25.232305205219, -32.6684929803068,
-37.8986821655283, -21.0428325455143, -20.1679180798068, -8.8558706482521,
-19.569991652331, -9.64665181291452, -15.5754428707762, -19.5247965378013,
-14.0109034924601, -25.4027455887613, -23.1744226929195, -21.0806041334146,
-19.2518361690643, -28.3098578036366, -20.9599368166869, -40.7598960725687,
-25.9534493623183, -31.5763184126101, -29.3302105023077, -34.4263349620411,
-22.6005703678329, -27.1070627509852, -25.9070161090915, -32.4883502737357,
-28.0217699439445, -32.6647435432891, -25.3694454582095, -18.5111182675911,
-12.789998792761, -23.0157832304767, 10.9871157808473, 9.15941241791976,
9.64165664846513, 8.3955755223663, 21.1288875830197, 21.8417479141679,
23.123692683766, 30.8894495805228, 30.0114123953862, 24.4622163786097,
13.726611821335, 14.900583440238, 16.9479851066342, 13.208245802755,
10.7617694112498, -43.9180512533282, -33.8499808522102, -41.8000078349458,
-33.2535511985488, -30.6431572056385, -20.6114139777757, -15.5762504548734,
-8.26763825391994, -12.3409716955566, -9.18481830890617, -16.4514139155318,
-15.3106886451986, -14.7770168167629, -24.2815042348966, -22.1182422295905,
-6.52941652901941, -0.555864926094891, 8.56550091146217, 9.03591481925792,
1.18280776802875, -9.99523906861024, -8.98161424104466, -19.9140442613001,
-16.7758036160869, -10.4761950996821, 25.8386010342749, 32.3311554674313,
19.877931974953, 21.9957855763085, 19.1998642117212, 51.6580161027565,
51.0081894682624, 52.3665903658103, 48.2488512056915, 43.2540065729433,
34.9886903131088, 50.4945347064368, 42.2806769307072, 42.8596372711001,
35.1207584532066, 54.3819322026358, 56.23761001748, 62.0808778016103,
62.0715726103241, 65.5742574444192, 61.2790994127163, 56.3045899471206,
51.8497227880728, 42.3728137399616, 52.5963246897359, 68.0882868435083,
63.8685610913712, 52.3724149884809, 57.7359628738795, 56.6425178382738,
16.2054981275189, 26.0474506173286, 8.4753588627518, 34.8278403341958,
23.0634429028774), .Dim = c(5L, 32L), .Dimnames = list(NULL,
c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21",
"22", "23", "24", "25", "26", "27", "28", "29", "30", "31",
"32")))

Answer

This approach doesn't access nested information directly. But I think it might be a good idea to release values from nest using unnest() and change it into a long format using gather() when you use ggplot().

# (I used `draws` the first code block made)

result %>% unnest() %>%                                   # release values from `nest`
  gather(cols_name, draw, -one_of(colnames(mycars))) %>%  # melt data without cols of mycars
  ggplot(aes(mpg, draw, group = cyl)) + geom_violin()     # draw

enter image description here

Comments