Morpheu5 Morpheu5 - 23 days ago 3
R Question

How do I melt a dataframe that includes pre-computed confidence intervals for plotting?

I have the a dataframe that includes +/- confidence intervals. I assembled the dataset myself from secondary open datasets that only had +/- CIs, so there is not much that I can do. I understand that the quickest way to plot multiple series with

is to
the dataframe, which I can easily do like

melt(df, id.vars = c("Year"))

except that that turns CI columns into proper series. Now, I'd eventually like to produce a plot like this.

enter image description here

which I produced with

ggplot(df, aes(x = Year)) +
geom_line(aes(y = Total.inflow), color="red") +
geom_ribbon(aes(ymin = Total.inflow-Total.inflow.CI, ymax = Total.inflow+Total.inflow.CI), colour="red", fill="red", alpha=0.1) +
geom_line(aes(y = EU.inflow), color="blue") +
geom_ribbon(aes(ymin = EU.inflow-EU.inflow.CI, ymax = EU.inflow+EU.inflow.CI), colour="blue", fill="blue", alpha=0.1) +
geom_line(aes(y = ROW.inflow), color="green") +
geom_ribbon(aes(ymin = ROW.inflow-ROW.inflow.CI, ymax = ROW.inflow+ROW.inflow.CI), colour="green", fill="green", alpha=0.1)



Thanks to @lukeA for pointing me towards the right method. For some reason, his solution produced an empty data frame, but I managed to figure out what he was trying to do and found a reasonable solution myself.

First of all, let's separate the GDP column from the flows dataset. I suspected this was necessary from the beginning, but I was confident I could filter it out while plotting. Turns out it's just easier to separate the two. Also, I'll normalise its values for later, because tens of billions…

df <- read.csv('stats.csv', header=T)
gdp <- data.frame(Year = df$Year, GDP = df$GDP/10000000000)
df <- within(df, rm(GDP))

The goal is to get the CI values side by side with their corresponding series. This was the code inside the
in @lukeA answer. Once I took it apart, the path to the solution became clearer to me.

var_value <- df %>%
select(-ends_with("CI")) %>%
gather(var, value, -Year)

var_conf <- df %>%
select(Year, ends_with("CI")) %>%
setNames(sub("(.*)\\sCI$", "\\1", names(.))) %>%
gather(var, conf, -Year)

final.df = data.frame(var_value, conf = var_conf$conf)

Finally, @lukeA's ggplot code does produce the chart he shows.

ggplot(final.df, aes(
x = Year,
y = value,
ymin = value - conf,
ymax = value + conf,
color = var,
fill = var
)) +
geom_ribbon(alpha = .2) +


df <- structure(list(Year = 1991:2014, Total.inflow = c(329L, 268L,
266L, 315L, 312L, 318L, 327L, 391L, 454L, 479L, 481L, 516L, 511L,
589L, 567L, 596L, 574L, 590L, 567L, 591L, 566L, 498L, 526L, 632L
), Total.inflow.CI = c(23L, 20L, 19L, 23L, 22L, 25L, 27L, 27L,
31L, 31L, 30L, 32L, 33L, 40L, 37L, 39L, 40L, 39L, 30L, 31L, 28L,
27L, 29L, 36L), Total.outflow = c(-285L, -281L, -266L, -238L,
-236L, -264L, -279L, -251L, -291L, -321L, -309L, -363L, -363L,
-344L, -361L, -398L, -341L, -427L, -368L, -339L, -351L, -321L,
-317L, -319L), Total.outflow.CI = c(23L, 21L, 20L, 20L, 19L,
28L, 24L, 22L, 24L, 27L, 25L, 29L, 32L, 28L, 31L, 34L, 27L, 41L,
22L, 20L, 22L, 20L, 19L, 22L), UK.inflow = c(93L, 81L, 75L, 91L,
67L, 75L, 79L, 90L, 92L, 83L, 89L, 74L, 85L, 73L, 82L, 66L, 60L,
71L, 82L, 84L, 69L, 73L, 70L, 68L), UK.inflow.CI = c(15L, 15L,
12L, 16L, 13L, 15L, 14L, 15L, 16L, 16L, 16L, 14L, 16L, 12L, 16L,
14L, 12L, 14L, 13L, 14L, 11L, 11L, 12L, 11L), UK.outflow = c(-142L,
-146L, -141L, -112L, -130L, -141L, -140L, -121L, -133L, -151L,
-150L, -172L, -184L, -189L, -175L, -200L, -158L, -159L, -130L,
-125L, -133L, -131L, -125L, -128L), UK.outflow.CI = c(17L, 16L,
16L, 14L, 15L, 22L, 19L, 18L, 16L, 18L, 18L, 22L, 22L, 23L, 22L,
26L, 19L, 22L, 11L, 11L, 12L, 14L, 11L, 13L), EU.inflow = c(60L,
49L, 48L, 53L, 60L, 74L, 70L, 75L, 64L, 55L, 54L, 57L, 58L, 128L,
149L, 173L, 189L, 186L, 162L, 171L, 168L, 148L, 193L, 256L),
EU.inflow.CI = c(12L, 10L, 8L, 10L, 11L, 14L, 18L, 14L, 16L,
13L, 15L, 16L, 17L, 22L, 23L, 26L, 28L, 27L, 19L, 21L, 18L,
17L, 20L, 25L), EU.outflow = c(-51L, -39L, -40L, -46L, -38L,
-50L, -51L, -52L, -57L, -55L, -50L, -54L, -47L, -45L, -56L,
-63L, -66L, -126L, -104L, -92L, -92L, -75L, -78L, -86L),
EU.outflow.CI = c(10L, 6L, 7L, 8L, 7L, 13L, 10L, 9L, 13L,
12L, 13L, 13L, 16L, 10L, 14L, 15L, 15L, 31L, 16L, 13L, 14L,
12L, 12L, 15L), ROW.inflow = c(175L, 138L, 143L, 171L, 185L,
169L, 178L, 226L, 298L, 340L, 338L, 385L, 368L, 388L, 336L,
358L, 325L, 333L, 323L, 336L, 329L, 277L, 264L, 308L), ROW.inflow.CI = c(13L,
10L, 11L, 13L, 15L, 14L, 14L, 17L, 21L, 23L, 20L, 24L, 22L,
31L, 25L, 25L, 25L, 25L, 19L, 18L, 19L, 18L, 18L, 24L), ROW.outflow = c(-91L,
-96L, -85L, -80L, -69L, -73L, -88L, -78L, -101L, -114L, -109L,
-136L, -133L, -109L, -129L, -135L, -117L, -142L, -134L, -122L,
-126L, -115L, -114L, -105L), ROW.outflow.CI = c(12L, 12L,
10L, 11L, 8L, 10L, 11L, 9L, 14L, 15L, 13L, 15L, 16L, 13L,
17L, 16L, 12L, 16L, 9L, 9L, 11L, 9L, 9L, 11L), GDP = c(1142797178130.51,
1179659529659.53, 1061388722255.55, 1140489745944.29, 1237561937825.47,
1306575663026.52, 1446444007858.55, 1537103345478.64, 1565408509949.85,
1554801028899.98, 1535942133294.95, 1680256294964.03, 1943025306122.45,
2297889051629.44, 2418941818181.82, 2588077276908.92, 2969733893557.42,
2793376838235.29, 2314577036921.64, 2403504326328.8, 2594904662714.31,
2630472981169.65, 2712296271989.99, 2990201431078.23)), .Names = c("Year",
"Total.inflow", "Total.inflow.CI", "Total.outflow", "Total.outflow.CI",
"UK.inflow", "UK.inflow.CI", "UK.outflow", "UK.outflow.CI", "EU.inflow",
"EU.inflow.CI", "EU.outflow", "EU.outflow.CI", "ROW.inflow",
"ROW.inflow.CI", "ROW.outflow", "ROW.outflow.CI", "GDP"), row.names = c(NA,
-24L), class = "data.frame")


For example

  tf <- tempfile(), 
  method = "libcurl"
df <- read_csv(tf)
  df %>% 
    select(-ends_with("CI")) %>% 
    gather(var, value, -Year),
  df %>% 
    select(Year, ends_with("CI")) %>% 
    setNames(sub("(.*)\\sCI$", "\\1", names(.))) %>% 
    gather(var, conf, -Year),
  by = c("Year", "var")
) %>% 
    x = Year, 
    y = value, 
    ymin = value - conf,
    ymax = value + conf, 
    color = var, 
    fill = var
  )) + 
  geom_ribbon(alpha = .2) + 

gives you

enter image description here

(I'm using the latest development version of ggplot2)