【发布时间】:2020-10-29 13:27:07
【问题描述】:
我想基于 if/else if/else 语句创建一个新的变量/变量(取决于数据是宽格式还是长格式)。我熟悉使用 mutate 但似乎无法让它适用于多个变量。我看到有很多类似的帖子,并尝试了提供的解决方案,但没有成功。
以下是宽格式数据示例:
structure(list(distribution = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("bimodal",
"extreme_left", "extreme_right", "left_skew", "right_skew", "trunc_norm_left",
"trunc_norm_right", "trunc_uni_left", "trunc_uni_right", "uniform"
), class = "factor"), Rating_1 = c(25L, 15L, 15L, 5L, 5L, 4L,
4L, 18L, 5L, 9L, 25L, 7L, 2L, 1L, 5L, 12L, 5L, 13L, 7L, 11L),
Rating_2 = c(0L, 8L, 6L, 4L, 5L, 11L, 8L, 7L, 12L, 6L, 0L,
11L, 10L, 1L, 11L, 10L, 10L, 8L, 10L, 5L), Rating_3 = c(0L,
4L, 9L, 9L, 13L, 20L, 10L, 0L, 16L, 8L, 0L, 14L, 26L, 5L,
16L, 6L, 20L, 8L, 13L, 9L), Rating_4 = c(0L, 8L, 6L, 19L,
19L, 11L, 13L, 7L, 12L, 12L, 0L, 11L, 10L, 9L, 13L, 10L,
10L, 8L, 12L, 9L), Rating_5 = c(25L, 15L, 14L, 13L, 8L, 4L,
15L, 18L, 5L, 15L, 25L, 7L, 2L, 34L, 5L, 12L, 5L, 13L, 8L,
16L)), row.names = c(NA, 20L), class = "data.frame")
这是我尝试过的策略之一:
df %>%
arrange(distribution) %>%
if (distribution == "bimodal") {
mutate(Dist1 = Rating_1 * 20 - 5,
Dist2 = Rating_2 * 20 - 0,
Dist3 = Rating_3 * 20 - 0,
Dist4 = Rating_4 * 20 - 0,
Dist5 = Rating_5 * 20 - 0)
} else if (distribution == "extreme_left") {
mutate(Dist1 = Rating_1 * 20 - 0,
Dist2 = Rating_2 * 20 - 0,
Dist3 = Rating_3 * 20 - 1,
Dist4 = Rating_4 * 20 - 2,
Dist5 = Rating_5 * 20 - 6)
} else if (distribution == "extreme_right") {
mutate (Dist1 = Rating_1 * 20 - 6,
Dist2 = Rating_2 * 20 - 3,
Dist3 = Rating_3 * 20 - 1,
Dist4 = Rating_4 * 20 - 0,
Dist5 = Rating_5 * 20 - 0)
} else if (distribution == "left_skew") {
mutate (Dist1 = Rating_1 * 20 - 1,
Dist2 = Rating_2 * 20 - 1,
Dist3 = Rating_3 * 20 - 2,
Dist4 = Rating_4 * 20 - 2,
Dist5 = Rating_5 * 20 - 4)
} else if (distribution == "right_skew") {
mutate (Dist1 = Rating_1 * 20 - 4,
Dist2 = Rating_2 * 20 - 2,
Dist3 = Rating_3 * 20 - 2,
Dist4 = Rating_4 * 20 - 1,
Dist5 = Rating_5 * 20 - 1)
} else if (distribution == "trunc_norm_left") {
mutate (Dist1 = Rating_1 * 20 - 0,
Dist2 = Rating_2 * 20 - 0,
Dist3 = Rating_3 * 20 - 6,
Dist4 = Rating_4 * 20 - 3,
Dist5 = Rating_5 * 20 - 1)
} else if (distribution == "trunc_norm_right") {
mutate (Dist1 = Rating_1 * 20 - 1,
Dist2 = Rating_2 * 20 - 3,
Dist3 = Rating_3 * 20 - 6,
Dist4 = Rating_4 * 20 - 0,
Dist5 = Rating_5 * 20 - 0)
} else if (distribution == "trunc_uni_left") {
mutate (Dist1 = Rating_1 * 20 - 0,
Dist2 = Rating_2 * 20 - 0,
Dist3 = Rating_3 * 20 - 4,
Dist4 = Rating_4 * 20 - 3,
Dist5 = Rating_5 * 20 - 3)
} else if (distribution "trunc_uni_right") {
mutate(Dist1 = Rating_1 * 20 - 3,
Dist2 = Rating_2 * 20 - 3,
Dist3 = Rating_3 * 20 - 4,
Dist4 = Rating_4 * 20 - 0,
Dist5 = Rating_5 * 20 - 0)
} else {
mutate (Dist1 = Rating_1 * 20 - 2,
Dist2 = Rating_2 * 20 - 2,
Dist3 = Rating_3 * 20 - 2,
Dist4 = Rating_4 * 20 - 2,
Dist5 = Rating_5 * 20 - 2)
}
我对长格式数据和宽格式数据都进行了尝试,尽管我意识到长格式数据缺少一些东西。
如果数据是宽格式的,我正在寻找类似的东西。
structure(list(distribution = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("bimodal",
"extreme_left", "extreme_right", "left_skew", "right_skew", "trunc_norm_left",
"trunc_norm_right", "trunc_uni_left", "trunc_uni_right", "uniform"
), class = "factor"), Rating_1 = c(25L, 15L, 15L, 5L, 5L, 4L,
4L, 18L, 5L, 9L, 25L, 7L, 2L, 1L, 5L, 12L, 5L, 13L, 7L, 11L),
Rating_2 = c(0L, 8L, 6L, 4L, 5L, 11L, 8L, 7L, 12L, 6L, 0L,
11L, 10L, 1L, 11L, 10L, 10L, 8L, 10L, 5L), Rating_3 = c(0L,
4L, 9L, 9L, 13L, 20L, 10L, 0L, 16L, 8L, 0L, 14L, 26L, 5L,
16L, 6L, 20L, 8L, 13L, 9L), Rating_4 = c(0L, 8L, 6L, 19L,
19L, 11L, 13L, 7L, 12L, 12L, 0L, 11L, 10L, 9L, 13L, 10L,
10L, 8L, 12L, 9L), Rating_5 = c(25L, 15L, 14L, 13L, 8L, 4L,
15L, 18L, 5L, 15L, 25L, 7L, 2L, 34L, 5L, 12L, 5L, 13L, 8L,
16L), Dist1 = c(495, 295, 295, 95, 95, 75, 75, 355, 95, 175,
495, 135, 35, 15, 95, 235, 95, 255, 135, 215), Dist2 = c(0,
160, 120, 80, 100, 220, 160, 140, 240, 120, 0, 220, 200,
20, 220, 200, 200, 160, 200, 100), Dist3 = c(0, 80, 180,
180, 260, 400, 200, 0, 320, 160, 0, 280, 520, 100, 320, 120,
400, 160, 260, 180), Dist4 = c(0, 160, 120, 380, 380, 220,
260, 140, 240, 240, 0, 220, 200, 180, 260, 200, 200, 160,
240, 180), Dist5 = c(495, 295, 275, 255, 155, 75, 295, 355,
95, 295, 495, 135, 35, 675, 95, 235, 95, 255, 155, 315)), row.names = c(NA,
20L), class = "data.frame")
任何帮助将不胜感激。
【问题讨论】:
-
您是从每列中减去的那些常量值还是涉及一些逻辑?如果您仅以一种格式共享数据并显示与该格式对应的预期输出,那将很有帮助。目前,您的宽、长和预期输出不匹配。
-
我很抱歉造成混乱。是的,有一个逻辑,这些都是常数。这些值因分布和等级(1、2、3...)而异。我已经编辑了共享数据,只是以宽格式给出。
标签: r conditional-statements tidyr dplyr