另一个使用Rfast::rowTabulate的选项
v <- c('Hard', 'Match', 'Easy', NA)
DT[, (v) := as.data.table(Rfast::rowTabulate(matrix(match(as.matrix(.SD), v), nrow=.N))),
.SDcols=Task_Alpha:Task_Delta]
输出:
userID Score Task_Alpha Task_Beta Task_Charlie Task_Delta Hard Match Easy NA
1: 3108 -8.00 Easy Easy Easy Easy 0 0 4 0
2: 3207 3.00 Hard Easy Match Match 1 2 1 0
3: 3350 5.78 Hard Easy Hard Hard 3 0 1 0
4: 3961 10.00 Easy <NA> Hard Hard 2 0 1 1
5: 4021 10.00 Easy Easy <NA> Hard 1 0 2 1
Wimpel 的数据:
library(data.table)
DT <- fread("userID Score Task_Alpha Task_Beta Task_Charlie Task_Delta
3108 -8.00 Easy Easy Easy Easy
3207 3.00 Hard Easy Match Match
3350 5.78 Hard Easy Hard Hard
3961 10.00 Easy NA Hard Hard
4021 10.00 Easy Easy NA Hard
")
想知道这种方法在实际数据集上的运行速度有多快,以及实际数据集是否很大。
编辑:添加时间
library(data.table)
set.seed(0L)
nr <- 1e6
v <- c('Hard', 'Match', 'Easy', NA)
DT <- data.table(userID=1:nr, Task_Alpha=sample(v, nr, TRUE),
Task_Beta=sample(v, nr, TRUE), Task_Charlie=sample(v, nr, TRUE),
Task_Delta=sample(v, nr, TRUE))
df <- as.data.frame(DT)
mtd0 <- function() {
t(apply(df[-1L], 1L, function(x)
table(factor(x, levels = c("Easy", "Hard", "Match")))))
}
mtd1 <- function() {
DT.melt <- melt( DT, id.vars = "userID", measure.vars = patterns( task = "^Task_") )
dcast( DT.melt, userID ~ value, fun.aggregate = length )
}
mtd2 <- function() {
DT[, Rfast::rowTabulate(matrix(match(as.matrix(.SD), v), nrow=.N)),
.SDcols=Task_Alpha:Task_Delta]
}
bench::mark(mtd0(), mtd1(), mtd2(), check=FALSE)
时间安排:
# A tibble: 3 x 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 mtd0() 54.7s 54.7s 0.0183 137MB 1.70 1 93 54.7s <int[,3] [1,000,000 x 3]> <df[,3] [107,168 x 3]> <bch:tm> <tibble [1 x 3]>
2 mtd1() 2.4s 2.4s 0.417 398MB 0.833 1 2 2.4s <df[,5] [1,000,000 x 5]> <df[,3] [12,517 x 3]> <bch:tm> <tibble [1 x 3]>
3 mtd2() 252.8ms 264.4ms 3.78 107MB 3.78 2 2 528.7ms <int[,4] [1,000,000 x 4]> <df[,3] [6,509 x 3]> <bch:tm> <tibble [2 x 3]>