Nik Bernou Nik Bernou - 3 months ago 7
R Question

how can I manipulate data frame based on several columns strings

My data is like this

df <- structure(list(col1 = structure(c(2L, 3L, 4L, 5L, 10L, 6L, 9L,
8L, 7L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "ADliba1", "ADNIL2",
"DFGH2", "GDH76", "ML2IS5", "QSEA12", "RR2JDG", "T2HDY3", "TR5421"
), class = "factor"), res1 = c(3.59e-08, 2.15e-08, 1.52e-07,
1.24e-07, 4.53e-08, 3.11e-08, 7.08e-08, 1.98e-08, 1.46e-08, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), col2 = structure(c(12L, 6L, 2L, 7L, 3L, 8L, 4L,
13L, 5L, 9L, 10L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "A8WHS3", "A9D0C6",
"A9D649", "A9UJN4", "ADliba1", "ADNIL2", "GDH76", "Q9XXN2", "Q9XXQ4",
"Q9XXQ6", "QSEA12", "T2HDY3"), class = "factor"), res2 = c(1.46e-08,
8.11e-07, 3.86e-08, 7.21e-08, 2.68e-08, 4.02e-08, 2.7e-08, 2.32e-08,
7.76e-08, 7.76e-08, 7.76e-08, 7.76e-08, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), col3 = structure(c(2L,
3L, 4L, 5L, 8L, 10L, 6L, 7L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"A8WIT0", "A9D0C6", "A9D4S6", "A9D8E6", "A9Z1L6", "ADliba1",
"ADNIL2", "B0M0N9", "GDH76"), class = "factor"), res3 = c(2.13e-08,
7.85e-08, 3.57e-08, 1.46e-07, 1.4e-07, 2.8e-08, 5.23e-08, 8.76e-08,
6.44e-08, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), col4 = structure(c(7L, 2L, 3L, 4L,
8L, 9L, 5L, 12L, 6L, 10L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "A8WFJ8",
"A8WFK2", "A8WHR6", "A8XQE0", "A9D0C6", "ADliba1", "ADNIL2",
"GDH76", "Q9XXL6", "Q9XXN0", "T2HDY3"), class = "factor"), res4 = c(1.42e-07,
1.26e-05, 8.58e-08, 2.83e-08, 8.66e-08, 7.64e-08, 3.7e-08, 6.28e-07,
7.25e-07, 1.26e-05, 8.58e-08, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("col1",
"res1", "col2", "res2", "col3", "res3", "col4", "res4"), class = "data.frame", row.names = c(NA,
-45L))


is like this

I want to make all the strings in col1 , col2 , col3 and col4 unique and then bring their res value in front of it. so the output is look like this

I want to have output like this

output <- structure(list(col1 = structure(c(13L, 14L, 16L, 17L, 27L, 18L,
26L, 25L, 24L, 4L, 7L, 9L, 11L, 21L, 22L, 23L, 5L, 8L, 10L, 12L,
15L, 1L, 2L, 3L, 6L, 19L, 20L), .Label = c("A8WFJ8", "A8WFK2",
"A8WHR6", "A8WHS3", "A8WIT0", "A8XQE0", "A9D0C6", "A9D4S6", "A9D649",
"A9D8E6", "A9UJN4", "A9Z1L6", "ADliba1", "ADNIL2", "B0M0N9",
"DFGH2", "GDH76", "ML2IS5", "Q9XXL6", "Q9XXN0", "Q9XXN2", "Q9XXQ4",
"Q9XXQ6", "QSEA12", "RR2JDG", "T2HDY3", "TR5421"), class = "factor"),
res1 = c(3.59e-08, 2.15e-08, 1.52e-07, 1.24e-07, 4.53e-08,
3.11e-08, 7.08e-08, 1.98e-08, 1.46e-08, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), res2 = c(8.11e-07, 7.21e-08,
0, 4.02e-08, 0, 0, 2.32e-08, 0, 1.46e-08, 3.86e-08, 2.68e-08,
2.7e-08, 7.76e-08, 7.76e-08, 7.76e-08, 7.76e-08, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), res3 = c(8.76e-08, 1.4e-07, 0, 2.8e-08,
0, 0, 0, 0, 0, 0, 7.85e-08, 0, 0, 0, 0, 0, 2.13e-08, 3.57e-08,
1.46e-07, 5.23e-08, 6.44e-08, 0, 0, 0, 0, 0, 0), res4 = c(1.42e-07,
8.66e-08, 0, 7.64e-08, 0, 0, 6.28e-07, 0, 0, 0, 7.25e-07,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.26e-05, 8.58e-08, 2.83e-08,
3.7e-08, 1.26e-05, 8.58e-08)), .Names = c("col1", "res1",
"res2", "res3", "res4"), class = "data.frame", row.names = c(NA,
-27L))

Answer

Begin by cleaning up the data

# organizes your "col" and "res" values into different lists
splitDF <- lapply(seq(1, ncol(df), by = 2), 
                  function(x) df[x:(x+1)])
# renames first column to make it easier for the merge
splitDF <- lapply(splitDF, function(x) names(x)[1] <- "col1")
# removes blank lines
splitDF <- lapply(splitDF, function(x) x[complete.cases(x), ])

Then you can use the great merge solution found here to gather into one data frame.

output <- Reduce(function(...) merge(..., all=T), splitDF)

Finally you can set all of the NA values to zero and reorder the rows.

output[is.na(output)] <- 0
varOrder <- c("ADliba1", "ADNIL2", "DFGH2", "GDH76", "TR5421", "ML2IS5",
              "T2HDY3", "RR2JDG", "QSEA12", "A8WHS3", "A9D0C6", "A9D649", 
              "A9UJN4", "Q9XXN2", "Q9XXQ4", "Q9XXQ6", "A8WIT0", "A9D4S6",
              "A9D8E6", "A9Z1L6", "B0M0N9", "A8WFJ8", "A8WFK2", "A8WHR6", 
              "A8XQE0", "Q9XXL6", "Q9XXN0")
output <- output[match(varOrder, output[["col1"]]), ]
Comments