Gaurav Bansal Gaurav Bansal - 23 days ago 5
R Question

Adding legend to ggplot made from multiple data frames with controlled colors

I have a

ggplot2
line chart made from three data frames for which I have controlled the color scheme. I've instead used
linetype
to distinguish between lines. This leads to a situation in which a legend is not automatically generated. How can I create a legend for this plot?

tpAct <- data.frame(
Date=seq.Date(as.Date('2017-09-01'), as.Date('2018-01-01'),by='month'),
Reg1=rnorm(5, 10, 5),
Reg2=rnorm(5, 15, 5),
Reg3=rnorm(5, 20, 5),
Reg4=rnorm(5, 25, 5),
Reg5=rnorm(5, 30, 5),
Total=rnorm(5, 60, 5)
)

tpOL <- data.frame(
Date=seq.Date(as.Date('2017-09-01'), as.Date('2018-01-01'),by='month'),
Reg1=rnorm(5, 10, 5),
Reg2=rnorm(5, 25, 5),
Reg3=rnorm(5, 20, 5),
Reg4=rnorm(5, 25, 5),
Reg5=rnorm(5, 30, 5),
Total=rnorm(5, 60, 5)
)

tpModL2 <- data.frame(
Date=seq.Date(as.Date('2017-09-01'), as.Date('2018-01-01'),by='month'),
Reg1=rnorm(5, 10, 5),
Reg2=rnorm(5, 25, 5),
Reg3=rnorm(5, 20, 5),
Reg4=rnorm(5, 25, 5),
Reg5=rnorm(5, 30, 5),
Total=rnorm(5, 60, 5)
)

ggplot() +
geom_line(data=tpAct, aes(x=Date, y=Reg1), color='red', size=1.25) +
geom_line(data=tpAct, aes(x=Date, y=Reg2), color='blue', size=1.25) +
geom_line(data=tpAct, aes(x=Date, y=Reg3), color='green', size=1.25) +
geom_line(data=tpAct, aes(x=Date, y=Reg4), color='pink', size=1.25) +
geom_line(data=tpAct, aes(x=Date, y=Reg5), color='yellow', size=1.25) +
geom_line(data=tpAct, aes(x=Date, y=Total), color='black', size=1.25) +
geom_line(data=tpOL, aes(x=Date, y=Reg1), linetype=5, color='red', size=1.25) +
geom_line(data=tpOL, aes(x=Date, y=Reg2), linetype=5, color='blue', size=1.25) +
geom_line(data=tpOL, aes(x=Date, y=Reg3), linetype=5, color='green', size=1.25) +
geom_line(data=tpOL, aes(x=Date, y=Reg4), linetype=5, color='pink', size=1.25) +
geom_line(data=tpOL, aes(x=Date, y=Reg5), linetype=5, color='yellow', size=1.25) +
geom_line(data=tpOL, aes(x=Date, y=Total), linetype=5, color='black', size=1.25) +
geom_line(data=tpModL2, aes(x=Date, y=Reg1), linetype=4, color='red', size=1.25) +
geom_line(data=tpModL2, aes(x=Date, y=Reg2), linetype=4, color='blue', size=1.25) +
geom_line(data=tpModL2, aes(x=Date, y=Reg3), linetype=4, color='green', size=1.25) +
geom_line(data=tpModL2, aes(x=Date, y=Reg4), linetype=4, color='pink', size=1.25) +
geom_line(data=tpModL2, aes(x=Date, y=Reg5), linetype=4, color='yellow', size=1.25) +
geom_line(data=tpModL2, aes(x=Date, y=Total), linetype=4, color='black', size=1.25) +
labs(x='', y='Total Balances ($B)')


enter image description here

Answer Source

Here's how to stack and plot the data using the sample data frames you provided:

library(tidyverse)

setNames(list(tpAct, tpOL, tpModL2), c("tpAct","tpOL","tpModL2")) %>% 
  map_df(~ .x %>% gather(key, value, -Date), .id="source") %>% 
  ggplot(aes(Date, value, colour=key, linetype=source)) +
    geom_line() +
    scale_colour_manual(values=c('red','blue','green','pink', 'yellow', 'black')) +
    theme_classic()

setNames(list(tpAct, tpOL, tpModL2), c("tpAct","tpOL","tpModL2")) puts the three data frames in a list and assigns the data frame names as the names of the list elements.

map_df(~ .x %>% gather(key, value, -Date), .id="source") converts the individual data frames to long format and stacks them into a single long-format data frame.

Here's what the plot looks like:

enter image description here

A faceted plot might be easier to read:

setNames(list(tpAct, tpOL, tpModL2), c("tpAct","tpOL","tpModL2")) %>% 
  map_df(~ .x %>% gather(key, value, -Date), .id="source") %>% 
  ggplot(aes(Date, value, colour=key)) +
    geom_line() +
    scale_colour_manual(values=c('red','blue','green','pink', 'yellow', 'black')) +
    theme_classic() +
    facet_grid(~ source)

enter image description here