您可以通过对数据框的行进行采样并使用 3 + 1 行从中生成 matrix 来轻松做到这一点。如果 nrow(df) %% 4 == 0 是 FALSE 因为回收,这可能会在最后产生重复。我们可以通过将duplicated 设置为NA 来消除它们。
set.seed(42)
m <- matrix(sample(1:nrow(df)), 4)
m[duplicated(as.vector(m))] <- NA
m
# [,1] [,2] [,3]
# [1,] 1 2 7
# [2,] 5 4 3
# [3,] 10 6 NA
# [4,] 8 9 NA
现在我们将apply 普通子集设置为m 的列,将第一行值用于单个样本,其余用于四倍样本。当然,如果 nrow(df) 不能被四整除,m 中的 NAs 会导致四倍样本更小或为零。
res <- setNames(
apply(m, 2, function(i) list(samp.1=df[na.omit(i[2:4]), ], samp.2=df[i[1], ])),
paste0("draw.", seq(ncol(m))))
res
# $draw.1
# $draw.1$samp.1
# X1 X2
# 5 0.40426832 -0.1333213
# 10 -0.06271410 1.3201133
# 8 -0.09465904 -2.6564554
#
# $draw.1$samp.2
# X1 X2
# 1 1.370958 1.30487
#
#
# $draw.2
# $draw.2$samp.1
# X1 X2
# 4 0.6328626 -0.2787888
# 6 -0.1061245 0.6359504
# 9 2.0184237 -2.4404669
#
# $draw.2$samp.2
# X1 X2
# 2 -0.5646982 2.286645
#
#
# $draw.3
# $draw.3$samp.1
# X1 X2
# 3 0.3631284 -1.388861
#
# $draw.3$samp.2
# X1 X2
# 7 1.511522 -0.2842529
数据:
df <- structure(list(X1 = c(1.37095844714667, -0.564698171396089, 0.363128411337339,
0.63286260496104, 0.404268323140999, -0.106124516091484, 1.51152199743894,
-0.0946590384130976, 2.01842371387704, -0.062714099052421), X2 = c(1.30486965422349,
2.28664539270111, -1.38886070111234, -0.278788766817371, -0.133321336393658,
0.635950398070074, -0.284252921416072, -2.65645542090478, -2.44046692857552,
1.32011334573019)), class = "data.frame", row.names = c(NA, -10L
))