user3022875 user3022875 - 2 months ago 7
R Question

unable to add column with dplyr mutate

I currently have this data frame

dat = data.frame(time= c("Q1","Q2","Q3"),
measure1 = c(1,2,9 ),
measure2 = c(4,5,6 ))
dat2 = dat %>% gather(key= Metric, value = Value, c(measure1, measure2)) %>% group_by(Metric, time)
dat2 = as.data.frame(dat2)

time Metric Value
1 Q1 measure1 1
2 Q2 measure1 2
3 Q3 measure1 9
4 Q1 measure2 4
5 Q2 measure2 5
6 Q3 measure2 6


I can add this "test" column like this

dat2$test= ifelse( dat2$Metric =="measure1" & dat2$Value > dat2$Value[ dat2$Metric=="measure2"] ,1,
ifelse( dat2$Metric == "measure2" & dat2$Value > dat2$Value[ dat2$Metric=="measure1"] ,1,-1)
) #end ifelse

time Metric Value test
1 Q1 measure1 1 -1
2 Q2 measure1 2 -1
3 Q3 measure1 9 1
4 Q1 measure2 4 1
5 Q2 measure2 5 1
6 Q3 measure2 6 -1


BUT I would like to use dplyr and mutate like this

dat = data.frame(time= c("Q1","Q2","Q3"),
measure1 = c(1,2,9 ),
measure2 = c(4,5,6 ))
dat %>% gather(key= Metric, value = Value, c(measure1, measure2)) %>% group_by(Metric, time) %>%
mutate(test= ifelse( Metric =="measure1" & Value > Value[Metric=="measure2"] ,1,
ifelse( Metric =="measure2" & Value > Value[Metric=="measure1"] ,1,-1)
) #end ifelse
)#end mutate
dat2 = as.data.frame(dat2)
dat2


but the "test" column is all NA

time Metric Value test
1 Q1 measure1 1 NA
2 Q2 measure1 2 NA
3 Q3 measure1 9 NA
4 Q1 measure2 4 NA
5 Q2 measure2 5 NA
6 Q3 measure2 6 NA


why can't you use mutate to add the column? does it have something to do with using as.data.frame and adding it manually...mutate doesn't recognize the column name?

Thank you.

Answer

The problem lies in the fact that when you group by Metric, for groups where Metric is measure1, Metric == "measure2" will always return FALSE, and Value[FALSE] == numeric(0), zero length objects are transformed to NA after the mutate:

dat %>% 
        gather(key= Metric, value = Value, c(measure1, measure2)) %>% 
        group_by(time) %>%        # <<<<<<<<<<<< here
        mutate(test= ifelse(Metric =="measure1" & Value > Value[Metric=="measure2"] ,1,
                            ifelse(Metric =="measure2" & Value > Value[Metric=="measure1"],1,-1)
                            ) #end ifelse
              )

# Source: local data frame [6 x 4]
# Groups: time [3]

#     time   Metric Value  test
#   <fctr>    <chr> <dbl> <dbl>
# 1     Q1 measure1     1    -1
# 2     Q2 measure1     2    -1
# 3     Q3 measure1     9     1
# 4     Q1 measure2     4     1
# 5     Q2 measure2     5     1
# 6     Q3 measure2     6    -1

You can see more clearly what is happening when you group by Metric with this example:

dat %>% 
        gather(key= Metric, value = Value, c(measure1, measure2)) %>% 
        group_by(Metric, time) %>%
        mutate(test= Value[Metric == "measure2"])

# Source: local data frame [6 x 4]
# Groups: Metric, time [6]

#     time   Metric Value  test
#   <fctr>    <chr> <dbl> <dbl>
# 1     Q1 measure1     1    NA
# 2     Q2 measure1     2    NA
# 3     Q3 measure1     9    NA
# 4     Q1 measure2     4     4
# 5     Q2 measure2     5     5
# 6     Q3 measure2     6     6