BWRT BWRT - 3 months ago 16
R Question

R Loop for matched data.table columns

I am really struggling to create a function that runs a model where all the variables

a
,
b
,
d
,
g
&
N
have multiple versions as shown in the data.table below which I've named
crm
:

structure(list(East = c(265000, 265000, 265000, 265000, 265000,
265000), North = c(115000, 120000, 125000, 130000, 135000, 140000
), rain = c(1049.61, 1114.31, 1361.61, 1407.2, 1499.56, 1654.13
), crop = c("Wheat", "Wheat", "Wheat", "Wheat", "Wheat", "Wheat"
), area = c(0.1718, 0.1629, 0.1082, 0.0494, 0.02, 0.004), rn = c("10007",
"10018", "10023", "10024", "10025", "10026"), N1 = c(184.262648839489,
184.262648839489, 184.262648839489, 184.262648839489, 184.262648839489,
184.262648839489), N2 = c(180.312874871521, 180.312874871521,
180.312874871521, 180.312874871521, 180.312874871521, 180.312874871521
), N3 = c(178.615847839997, 178.615847839997, 178.615847839997,
178.615847839997, 178.615847839997, 178.615847839997), N4 = c(182.531626054579,
182.531626054579, 182.531626054579, 182.531626054579, 182.531626054579,
182.531626054579), a1 = c(0.186117715072018, 0.186117715072018,
0.186117715072018, 0.186117715072018, 0.186117715072018, 0.186117715072018
), a2 = c(-0.0232731908915799, -0.0232731908915799, -0.0232731908915799,
-0.0232731908915799, -0.0232731908915799, -0.0232731908915799
), a3 = c(0.227017532149122, 0.227017532149122, 0.227017532149122,
0.227017532149122, 0.227017532149122, 0.227017532149122), a4 = c(0.162943230565506,
0.162943230565506, 0.162943230565506, 0.162943230565506, 0.162943230565506,
0.162943230565506), b1 = c(0.000478900233700419, 0.000478900233700419,
0.000478900233700419, 0.000478900233700419, 0.000478900233700419,
0.000478900233700419), b2 = c(0.000787931973696371, 0.000787931973696371,
0.000787931973696371, 0.000787931973696371, 0.000787931973696371,
0.000787931973696371), b3 = c(0.000458478256537521, 0.000458478256537521,
0.000458478256537521, 0.000458478256537521, 0.000458478256537521,
0.000458478256537521), b4 = c(0.000517304324750896, 0.000517304324750896,
0.000517304324750896, 0.000517304324750896, 0.000517304324750896,
0.000517304324750896), d1 = c(-0.000328164576390286, -0.000328164576390286,
-0.000328164576390286, -0.000328164576390286, -0.000328164576390286,
-0.000328164576390286), d2 = c(-0.000112122093240884, -0.000112122093240884,
-0.000112122093240884, -0.000112122093240884, -0.000112122093240884,
-0.000112122093240884), d3 = c(0.000112702113716146, 0.000112702113716146,
0.000112702113716146, 0.000112702113716146, 0.000112702113716146,
0.000112702113716146), d4 = c(7.40875908059628e-05, 7.40875908059628e-05,
7.40875908059628e-05, 7.40875908059628e-05, 7.40875908059628e-05,
7.40875908059628e-05), g1 = c(4.04709473710477e-06, 4.04709473710477e-06,
4.04709473710477e-06, 4.04709473710477e-06, 4.04709473710477e-06,
4.04709473710477e-06), g2 = c(3.68724096485995e-06, 3.68724096485995e-06,
3.68724096485995e-06, 3.68724096485995e-06, 3.68724096485995e-06,
3.68724096485995e-06), g3 = c(3.47214450131546e-06, 3.47214450131546e-06,
3.47214450131546e-06, 3.47214450131546e-06, 3.47214450131546e-06,
3.47214450131546e-06), g4 = c(3.55825543257538e-06, 3.55825543257538e-06,
3.55825543257538e-06, 3.55825543257538e-06, 3.55825543257538e-06,
3.55825543257538e-06)), .Names = c("East", "North", "rain", "crop",
"area", "rn", "N1", "N2", "N3", "N4", "a1", "a2", "a3", "a4",
"b1", "b2", "b3", "b4", "d1", "d2", "d3", "d4", "g1", "g2", "g3",
"g4"), sorted = "rn", class = c("data.table", "data.frame"), row.names = c(NA,
-6L), .internal.selfref = <pointer: 0x0000000000200788>)


What I'm trying to do is run the function below to calculate a value for
lnN
and put it into a column that has the same number in the heading as the variables entered into the model. I.e. Using
a1
,
b1
,
d1
,
g1
&
N1
will produce the column
lnN1
and so on for all the 2s, 3s and 4s.

n <- 1:4
cols <- paste0("lnN",n)
for(i in 1:length(n)){
crm[,(cols) := lapply(.SD ,function (x) {
N = crm[,7+i]
a = crm[,11+i]
b = crm[,15+i]
d = crm[,19+i]
g = crm[,23+i]
a + (b*crm[,rain]) + (g*N) + (d*crm[,rain]*N)}), .SDcols = paste0("N",n)]


}

I've yet to find an example anywhere on how to accomplish this. I've tried using
mapply
but I can't see how to iterate mapply through all the iterations of each variable. Thank you for your help!

Answer

How about:

library(dplyr)
cbind(crm, do.call(cbind, 
  lapply(1:4, function(x) {
    select(crm, c(contains(as.character(x)), rain)) %>% 
      setnames(gsub("[0-9]", "", names(.))) %>%
      transmute(lnN = a + (b*rain) + (g*N) + (d*rain*N)) %>%
      setnames(paste0("lnN", x))
  })
))

The main idea is, for each number, select only the columns that contain the number (and also rain), rename the columns to remove the numbers, apply the formula, rename the resulting column to append the number, and then cbind the result onto the original table.