使用append()。
for (i in 0:2*ncol(dat)/2) dat <- as.data.frame(append(dat, list(emp=NA), i))
dat
# emp col1 col2 emp.1 col3 col4 emp.2 col5 col6
# 1 NA 1 2 NA 2 3 NA 5 7
# 2 NA 2 4 NA 6 4 NA 8 2
# 3 NA 5 9 NA 7 3 NA 2 5
# 4 NA 3 4 NA 5 6 NA 8 1
数据:
dat <- structure(list(col1 = c(1L, 2L, 5L, 3L), col2 = c(2L, 4L, 9L,
4L), col3 = c(2L, 6L, 7L, 5L), col4 = c(3L, 4L, 3L, 6L), col5 = c(5L,
8L, 2L, 8L), col6 = c(7L, 2L, 5L, 1L)), class = "data.frame", row.names = c(NA,
-4L))
来了……
微基准测试
# Unit: microseconds
# expr min lq mean median uq max neval cld
# ronak() 969.707 990.9945 1001.4807 1012.282 1017.368 1022.453 3 d
# user() 349.937 358.0145 364.3877 366.092 371.613 377.134 3 a
# jay() 2098.003 2100.8540 2115.7640 2103.705 2124.644 2145.584 3 e
# groth1() 2164.896 2262.5745 2363.6133 2360.253 2462.972 2565.691 3 f
# groth2() 424.546 438.0185 455.0820 451.491 470.350 489.209 3 ab
# groth3() 722.551 728.0910 733.1910 733.631 738.511 743.391 3 c
# r.user() 612.432 619.6570 636.9573 626.882 649.220 671.558 3 bc
## and with the usual expanded data frame:
set.seed(42)
dat <- dat[sample(nrow(dat), 1e6, replace=T), ]
microbenchmark::microbenchmark(ronak(), user(), jay(), groth1(), groth2(), groth3(), r.user(), times=3L)
# Unit: milliseconds
# expr min lq mean median uq max neval cld
# ronak() 1375.139030 1456.858743 1564.509886 1538.578457 1659.19531 1779.81217 3 c
# user() 89.017416 200.845539 251.548652 312.673662 332.81427 352.95488 3 a
# jay() 7.655812 8.382333 9.941684 9.108855 11.08462 13.06039 3 a
# groth1() 501.263785 514.097103 621.755474 526.930421 682.00132 837.07222 3 b
# groth2() 143.438836 147.783741 189.033391 152.128645 211.83067 271.53269 3 a
# groth3() 1387.314877 1406.898863 1469.493158 1426.482849 1510.58230 1594.68175 3 c
# r.user() 1469.543881 1472.770464 1483.834022 1475.997046 1490.97909 1505.96114 3 c
代码:
ronak <- \() {
split_data <- split.default(dat,rep(seq_along(dat), each=2, length.out=ncol(dat)))
do.call(cbind, Map(function(x, y) cbind(setNames(data.frame(NA), paste0('empty', x)), y),
seq_along(split_data), split_data))
}
user <- \() {
tmp1=seq(1, 9,3);tmp2=!(1:9 %in% tmp1);dat2=data.frame(matrix(NA,nrow(dat),ncol(dat)+ncol(dat)/2))
dat2[tmp2]=dat;colnames(dat2)[tmp1]=paste0("empty",1:length(tmp1))
colnames(dat2)[tmp2]=colnames(dat);dat2
}
jay <- \() {for (i in 0:2*ncol(dat)/2) dat <- as.data.frame(append(dat, list(emp=NA), i));dat}
groth1 <- \() suppressMessages({
require(dplyr):require(purrr)
dat %>% split.default(as.numeric(gl(ncol(.), 2, ncol(.)))) %>% map(~ bind_cols(empty=NA, .)) %>%
bind_cols
})
groth2 <- \() {
ix <- cumsum(seq_along(dat) %% 2 + 1);dat2 <- replace(data.frame(matrix(NA, nrow(dat), max(ix))), ix, dat)
names(dat2) <- replace(rep("empty", ncol(dat2)), ix, names(dat));dat2
}
groth3 <- \() {
ix <- as.numeric(gl(ncol(dat), 2, ncol(dat))) # 1 1 2 2 3 3
do.call("cbind", Map(cbind, empty = NA, split.default(dat, ix)))
}
r.user <- \() do.call(cbind, lapply(seq(1, ncol(dat), by=2), function(i)
cbind(empty=rep(NA, nrow(dat)), dat[, seq(i, i+1)])))