【发布时间】:2021-11-02 06:59:51
【问题描述】:
我目前正在制作欧拉图。我设法使用这些代码制作了一个包含 6 个变量的欧拉图,尽管我认为这些代码效率不高:
dataset <- data.frame(
A = rep(c(1, 2, NA), length.out = 100),
B = rep(c(2, NA, 1), length.out = 100),
C = rep(c(NA, 1, 2), length.out = 100),
D = rep(c(NA, 2, 1), length.out = 100),
E = rep(c(1, NA, 2), length.out = 100),
F = rep(c(1, 2, NA), length.out = 100))
euler_primary <- c("A" = sum(dataset$A == 1, na.rm = TRUE),
"B" = sum(dataset$B == 1, na.rm = TRUE),
"C" = sum(dataset$C == 1, na.rm = TRUE),
"D" = sum(dataset$D == 1, na.rm = TRUE),
"E" = sum(dataset$E == 1, na.rm = TRUE),
"F" = sum(dataset$F == 1, na.rm = TRUE),
"A&B" = sum(dataset$B == 1 & dataset$A == 1, na.rm=TRUE),
"A&C" = sum(dataset$C == 1 & dataset$A == 1, na.rm=TRUE),
"A&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm = TRUE),
"A&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm = TRUE),
"A&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm = TRUE),
"B&C" = sum(dataset$B == 1 & dataset$C == 1, na.rm=TRUE),
"B&D" = sum(dataset$B == 1 & dataset$D == 1, na.rm=TRUE),
"B&E" = sum(dataset$B == 1 & dataset$E == 1, na.rm=TRUE),
"B&F" = sum(dataset$B == 1 & dataset$F == 1, na.rm=TRUE),
"C&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"C&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"C&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"D&E" = sum(dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"D&F" = sum(dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"E&F" = sum(dataset$F == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1, na.rm=TRUE),
"A&B&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&D&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&E&F" = sum(dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"B&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"B&C&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"B&D&E" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"B&D&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"B&E&F" = sum(dataset$B == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"C&D&E" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"C&D&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"C&E&F" = sum(dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"D&E&F" = sum(dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&C&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&D&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&E&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E, na.rm=TRUE),
"B&C&D&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F, na.rm=TRUE),
"B&C&E&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"B&D&E&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"C&D&E&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"A&B&C&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E&F" = sum(dataset$C == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E&F" = sum(dataset$C == 1 & dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE)
)
venn_primary <- euler(euler_primary)
plot(venn_primary6,
quantities = list(cex = .75),
fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
lty = 1,
cex = 0.5,
labels = NULL,
legend = list(labels = letters[1:6]))
上面的代码导致如下图:
但是,现在我需要制作一个包含 11 个变量的欧拉图。用 11 个变量制作这样的图表似乎是不可能的,因为变量的组合将达到数百甚至数千。我认为创建一个函数来分配字母并创建列表可能是解决方案。但是,由于我是 R 中数据清理和条件的新手,我无法提出这样的功能。 谁能帮我创建一个函数,我可以在其中输入数据集名称和将包含在图表中的列,然后该函数将完成其余的清理工作?
#p.s.:我注意到eulerr 包要求我们在变量之间添加& 以表示重叠情况。例如,如果我们想查看变量A 和B 之间的交集,我们需要创建一个精确的变量A&B。
非常感谢您
【问题讨论】:
-
如果您包含一个简单的reproducible example 以及可用于测试和验证可能的解决方案的示例输入,则更容易为您提供帮助。如果所有可能的重叠都存在,我真的不明白拥有一个包含 11 个不同变量的欧拉图会有什么实际意义。您是否计算出许多重叠部分是空的?比如 A&F 的值会在样本图中的什么位置?
-
感谢您的建议。我添加了一个示例并相应地更新了分析。是的,我知道这不实用,也不利于读者,但由于我的主管要求我创建这样的图表,我想在我证明他的数字有误之前,我无法说出这样的话。
标签: r function for-loop data-cleaning eulerr