I have two df, one is df1
df1<- structure(list(V1 = structure(c(1L, 2L, 3L, 7L, 5L, 6L, 4L, 9L,
8L), .Label = c("A0A061ACH4;Q95Q10;Q9U1W6", "A0A061ACL3;Q965I6;O76618",
"A0A061ACR1;Q2XN02;F5GUA3;Q22498", "A0A061AJJ3;A0A061AEA8", "A0A061AL01",
"C1P641", "H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5",
"Q22501;A0A061AE05", "Q86CZ7"), class = "factor")), .Names = "V1", class = "data.frame", row.names = c(NA,
-9L))
df2 <- structure(list(V1 = structure(c(1L, 2L, 3L, 6L, 5L, 4L, 8L, 9L,
7L), .Label = c("A0A061ACH4;Q95Q10;Q9U1W6", "A0A061ACL3;Q965I6;O76618",
"A0A061ACR1;Q2XN02;F5GUA3;Q22498", "A0A061AJJ3;A0A061AEA8", "A0A061AL01",
"H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5",
"Q22501;A0A061AE05", "Q27GQ4", "Q86CZ7"), class = "factor")), .Names = "V1", class = "data.frame", row.names = c(NA,
-9L))
output<- structure(list(V1 = structure(c(1L, 2L, 3L, 4L, 8L, 6L, 7L, 5L,
10L, 11L, 9L), .Label = c("", "A0A061ACH4;Q95Q10;Q9U1W6", "A0A061ACL3;Q965I6;O76618",
"A0A061ACR1;Q2XN02;F5GUA3;Q22498", "A0A061AJJ3;A0A061AEA8", "A0A061AL01",
"C1P641", "H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5",
"Q22501;A0A061AE05", "Q27GQ4", "Q86CZ7"), class = "factor"),
V2 = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L), .Label = c("", "0", "df1"), class = "factor"), V3 = structure(c(3L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c("", "0",
"df2"), class = "factor")), .Names = c("V1", "V2", "V3"), class = "data.frame", row.names = c(NA,
-11L))
Q27GQ4
C1P641
Try this out:
op <- merge(df1,df2,
all.x = TRUE,
all.y = TRUE)
op$df1 <- 1*(op$V1 %in% df1$V1)
op$df2 <- 1*(op$V1 %in% df2$V1)
> op
V1 df1 df2
1 A0A061ACH4;Q95Q10;Q9U1W6 1 1
2 A0A061ACL3;Q965I6;O76618 1 1
3 A0A061ACR1;Q2XN02;F5GUA3;Q22498 1 1
4 A0A061AJJ3;A0A061AEA8 1 1
5 A0A061AL01 1 1
6 C1P641 1 0
7 H2FLH3;H2FLH2;A0A061ACT3;A0A061AE24;Q23551-2;Q23551;Q23551-4;Q23551-3;Q23551-5 1 1
8 Q22501;A0A061AE05 1 1
9 Q86CZ7 1 1
10 Q27GQ4 0 1
OR
library(dplyr)
op <- merge(df1,df2,
all.x = TRUE,
all.y = TRUE) %>%
mutate(df1=1*(V1 %in% df1$V1),
df2=1*(V1 %in% df2$V1))
And here are the answers for your extra questions:
-know how many lines from df1 and df2 are similar?
sum(df1$V1 %in% df2$V1)
-how many of df1 exist which don't exist in df2?
sum(!(df1$V1 %in% df2$V1))
-how many of df2 exist which don't exist in df1?
sum(!(df2$V1 %in% df1$V1))