Hjorvik Hjorvik - 1 month ago 7
R Question

Creating a matrix of vectors with apply (or other iterative function) in R

I need to run a orthogonal encoding function over a set of octamers (sets of 8 letters), and return them as a matrix of nx160 numbers (where n is the number of octamers on the data).

The orthogonal coding functions is:

orthocode <- function(octamer){
matcode <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
octamer_char <- as.character(octamer)
octamer_split <- strsplit(octamer_char,"")[[1]]
for (letter in octamer_split){
ifelse (letter == "A", (matcode = rbind(matcode,c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "R", (matcode = rbind(matcode,c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "N", (matcode = rbind(matcode,c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "D", (matcode = rbind(matcode,c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "C", (matcode = rbind(matcode,c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "Q", (matcode = rbind(matcode,c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "E", (matcode = rbind(matcode,c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "G", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "H", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "I", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "L", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0))),
ifelse (letter == "K", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0))),
ifelse (letter == "M", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0))),
ifelse (letter == "F", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0))),
ifelse (letter == "P", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0))),
ifelse (letter == "S", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0))),
ifelse (letter == "T", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0))),
ifelse (letter == "W", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0))),
ifelse (letter == "Y", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0))),
ifelse (letter == "V", (matcode = rbind(matcode,c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)))
))))))))))))))))))))
}
matcode <- matcode[-1,]
matcode <- c(matcode)
return(matcode)
}


AS some have asked, here is an example, even if this is not the part that isn't working:

orthocode("ARNDCQEG")
[1] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


The function is working on individual octamers, but when I try to use lapply on it, the result is just a 160 numbers vector, this time with the code altered (and meaningless).

lapply(data[1], orthocode)


The result looks like:

$V1
[1] 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[81] 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


The orthocode function is actually working. What I need to know is how do I take the octamers from a dataframe, run the fucntion on them and as a result end up with a matrix that looks like this one:

rbind(orthocode("ARNDCQEG"),orthocode("NGJKAEPS"),orthocode("ABGSWKLA"))
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28]
[1,] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
[2,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[3,] 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50] [,51] [,52] [,53] [,54]
[1,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62] [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74] [,75] [,76] [,77] [,78] [,79] [,80]
[1,] 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[,81] [,82] [,83] [,84] [,85] [,86] [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98] [,99] [,100] [,101] [,102] [,103] [,104] [,105]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
[,106] [,107] [,108] [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118] [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[,128] [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138] [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148] [,149]
[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[2,] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
[,150] [,151] [,152] [,153] [,154] [,155] [,156] [,157] [,158] [,159] [,160]
[1,] 0 0 0 0 0 0 0 0 0 0 0
[2,] 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 0 0 0 0 0 0 0 0 0


The output hase to be a matrix of n rows, 160 columns. On the data I have to run it, the result matrix should be a 947x160 one.

Any ideas?

Answer

We can simplify ifelse with match, and drop the forloop:

orthocode <- function(octamer){
  matcode <- rep(0, 20)
  octamer_char <- as.character(octamer)
  octamer_split <- strsplit(octamer_char,"")[[1]]

  t(sapply(octamer_split, function(letter){
    res <- matcode
    res[ match(letter, c("A","R","N","D","C","Q","E","G","H","I",
                         "L","K","M","F","P","S","T","W","Y","V"))] <- 1
    res
  }))
}