在R中,我们可以写一个函数
test_func <- function(df, ColF, ColT, input_string) {
#Split input string at every character
str <- strsplit(input_string, "")[[1]]
#Create a sequence between column numbers that match
cols <- do.call(seq, as.list(match(c(ColF, ColT), names(df))))
#Count number of exact matches in each row
vals <- apply(df[cols], 1, function(x) sum(x == str))
#Sort them in descending order
ord_vals <- order(vals, decreasing = TRUE)
#Display it
data.frame(Species = df$Speci[ord_vals], vals[ord_vals])
}
input_string = "CAAGGA"
ColF = 46
ColT = 51
test_func(df, ColF, ColT, input_string)
# Species value
#1 LL 6
#2 MM 5
#3 PP 4
#4 KK 3
#5 JJ 1
#6 NN 0
数据
df <- structure(list(Speci = structure(c(6L, 1L, 2L, 3L, 4L, 5L), .Label = c("JJ",
"KK", "LL", "MM", "NN", "PP"), class = "factor"), `45` = structure(c(1L,
3L, 1L, 1L, 3L, 2L), .Label = c("A", "C", "G"), class = "factor"),
`46` = structure(c(3L, 3L, 3L, 2L, 2L, 1L), .Label = c("-",
"C", "T"), class = "factor"), `47` = structure(c(2L, 2L,
1L, 2L, 2L, 3L), .Label = c("-", "A", "G"), class = "factor"),
`48` = structure(c(1L, 1L, 2L, 2L, 2L, 3L), .Label = c("-",
"A", "T"), class = "factor"), `49` = structure(c(2L, 3L,
2L, 2L, 2L, 1L), .Label = c("C", "G", "T"), class = "factor"),
`50` = structure(c(4L, 2L, 1L, 4L, 4L, 3L), .Label = c("-",
"A", "C", "G"), class = "factor"), `51` = structure(c(1L,
2L, 1L, 1L, 2L, 3L), .Label = c("A", "G", "T"), class = "factor")),
class = "data.frame", row.names = c(NA, -6L))