【发布时间】:2019-06-15 20:35:01
【问题描述】:
我有 5 个条件可以是 present (=1) 或 not (=0):
set.seed(101)
df <- data.frame(
alfa = sample(c(0, 1), 30, replace = TRUE),
beta = sample(c(0, 1), 30, replace = TRUE),
gamma = sample(c(0, 1), 30, replace = TRUE),
delta = sample(c(0, 1), 30, replace = TRUE),
epsilon = sample(c(0, 1), 30, replace = TRUE)
)
我想从这些条件存在的所有可能组合 (25) 中生成一组虚拟变量。 具体来说,我想验证以下条件的同时存在:
- 1)
alfa + beta; - 2)
alfa + gamma; - 3)
alfa + delta; - 4)
alfa + epsilon; - 5)
alfa + beta + gamma; - [...]
- 27)
alfa + beta + gamma + delta + epsilon;
生成所有可能组合的矩阵
v <- as.matrix(expand.grid(rep(list(c(FALSE, TRUE)), ncol(df))))
所有组合:
head(v)
Var1 Var2 Var3 Var4 Var5
[1,] TRUE FALSE FALSE FALSE FALSE
[2,] FALSE TRUE FALSE FALSE FALSE
[3,] TRUE TRUE FALSE FALSE FALSE
[4,] FALSE FALSE TRUE FALSE FALSE
[5,] TRUE FALSE TRUE FALSE FALSE
[6,] FALSE TRUE TRUE FALSE FALSE
将矩阵转换为列索引列表
indexes <- lapply(seq_len(nrow(v)), function(x) v[x, ])
names(indexes) <- apply(v, 1, function(x) paste(names(df)[x], collapse="."))
不幸的是,我被困在这里。
我需要根据上述组合生成27个虚拟变量(32-5)。
EDT:我用这种肮脏的方式解决了问题:
# my df
set.seed(101)
df <- data.frame(
alfa = sample(c(0, 1), 30, replace = TRUE),
beta = sample(c(0, 1), 30, replace = TRUE),
gamma = sample(c(0, 1), 30, replace = TRUE),
delta = sample(c(0, 1), 30, replace = TRUE),
epsilon = sample(c(0, 1), 30, replace = TRUE)
)
# count the numebr of coesistent conditions:
df$n <- rowSums(df[1:5], na.rm = TRUE)
# Dirty way to compute multiple combinations:
df$alfa.beta <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$alfa.gamma <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$beta.gamma <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$alfa.beta.gamma <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 0 , 1, 0)
df$alfa.delta <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 0 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$beta.delta <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.beta.delta <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$gamma.delta <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.gamma.delta <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$beta.gamma.delta <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.beta.gamma.delta <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 0 , 1, 0)
df$alfa.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 0 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$beta.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 0 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$alfa.beta.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$gamma.epsilon <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$alfa.gamma.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$beta.gamma.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$alfa.beta.gamma.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 0 & df$epsilon == 1 , 1, 0)
df$delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$beta.delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.beta.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 0 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$gamma.delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.gamma.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 0 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$beta.gamma.delta.epsilon <- ifelse (df$alfa == 0 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
df$alfa.beta.gamma.delta.epsilon <- ifelse (df$alfa == 1 & df$beta == 1 & df$gamma == 1 & df$delta == 1 & df$epsilon == 1 , 1, 0)
【问题讨论】:
-
?我不确定你的问题是什么。我认为
expand.grid()步骤已经是您问题的答案。所以,如果没有。您的问题/疑问是什么? -
@petermeissner 我需要根据上述组合生成额外的 27 个虚拟变量。我相应地更新了问题。
标签: r dataframe combinations dummy-variable