hvollmeier hvollmeier - 1 month ago 21
R Question

Calculate `cumsum` by run of signal value

I would like to calculate

cumsum
of some value starting for every run of signals where
signal == 1
.

data:

set.seed(123)
seq.Date(as.Date('2016-09-01'),as.Date('2016-09-30'),by = 'days')
sample(1:10,size=30,replace = TRUE)
c(rep(0,3),rep(1,2),rep(0,1),rep(1,5),rep(0,6),rep(1,3),rep(0,5),rep(1,5))
df <- data.frame(Date = seq.Date(as.Date('2016-09-01'),as.Date('2016-09-30'),by = 'days'),
value = sample(1:10,size=30,replace = TRUE),
signal = c(rep(0,3),rep(1,2),rep(0,1),rep(1,5),rep(0,6),rep(1,3),rep(0,5),rep(1,5)))

> head(df,12)
Date value signal
1 2016-09-01 10 0
2 2016-09-02 10 0
3 2016-09-03 7 0
4 2016-09-04 8 1
5 2016-09-05 1 1
6 2016-09-06 5 0
7 2016-09-07 8 1
8 2016-09-08 3 1
9 2016-09-09 4 1
10 2016-09-10 3 1
11 2016-09-11 2 1
12 2016-09-12 5 0


what I have done so far:

My solution is working, but I think there is a more efficient and elegant way to do it using
dplyr
or
data.table
.

df$pl <- rep(0,length(df))
# calculating the indices of start/end of runs where signal == 1
runs <- rle(df$signal)
start <- cumsum(runs$lengths) +1
start <- start[seq(1, length(start), 2)]
end <- cumsum(runs$lengths)[-1]
end <- end[seq(1, length(end), 2)]
for(i in 1:length(start))
{
df$pl[start[i]:end[i]] <- cumsum(df$value[start[i]:end[i]])
}

> head(df,12)
Date value signal pl
1 2016-09-01 10 0 0
2 2016-09-02 10 0 0
3 2016-09-03 7 0 0
4 2016-09-04 8 1 8
5 2016-09-05 1 1 9
6 2016-09-06 5 0 0
7 2016-09-07 8 1 8
8 2016-09-08 3 1 11
9 2016-09-09 4 1 15
10 2016-09-10 3 1 18
11 2016-09-11 2 1 20
12 2016-09-12 5 0 0

Answer

Using data.table, you could do this

library(data.table)
set.seed(123)
seq.Date(as.Date('2016-09-01'),as.Date('2016-09-30'),by = 'days')
sample(1:10,size=30,replace = TRUE)
c(rep(0,3),rep(1,2),rep(0,1),rep(1,5),rep(0,6),rep(1,3),rep(0,5),rep(1,5))
df <- data.table(Date = seq.Date(as.Date('2016-09-01'),as.Date('2016-09-30'),by = 'days'),
                 value = sample(1:10,size=30,replace = TRUE),
                 signal = c(rep(0,3),rep(1,2),rep(0,1),rep(1,5),rep(0,6),rep(1,3),rep(0,5),rep(1,5)))

df[, pl := cumsum(value)*signal, by = .(signal, rleid(signal))]
#>           Date value signal pl
#>  1: 2016-09-01    10      0  0
#>  2: 2016-09-02    10      0  0
#>  3: 2016-09-03     7      0  0
#>  4: 2016-09-04     8      1  8
#>  5: 2016-09-05     1      1  9
#>  6: 2016-09-06     5      0  0
#>  7: 2016-09-07     8      1  8
#>  8: 2016-09-08     3      1 11
#>  9: 2016-09-09     4      1 15
#> 10: 2016-09-10     3      1 18
#> 11: 2016-09-11     2      1 20
#> 12: 2016-09-12     5      0  0
#> 13: 2016-09-13     5      0  0
#> 14: 2016-09-14     4      0  0
#> 15: 2016-09-15     2      0  0
#> 16: 2016-09-16     2      0  0
#> 17: 2016-09-17     3      0  0
#> 18: 2016-09-18     5      1  5
#> 19: 2016-09-19     3      1  8
#> 20: 2016-09-20     9      1 17
#> 21: 2016-09-21     1      0  0
#> 22: 2016-09-22     5      0  0
#> 23: 2016-09-23     8      0  0
#> 24: 2016-09-24     2      0  0
#> 25: 2016-09-25     6      0  0
#> 26: 2016-09-26     3      1  3
#> 27: 2016-09-27     2      1  5
#> 28: 2016-09-28     8      1 13
#> 29: 2016-09-29     9      1 22
#> 30: 2016-09-30     4      1 26
#>           Date value signal pl