【发布时间】:2020-09-17 17:31:02
【问题描述】:
我有一个现有的 df,我想在其中添加一个新列,其列索引为特定列范围的最小值 - 在本例中为 [,28:30]。
我已经能够添加两列来获得这个范围的平均值和标准差,但是,当我使用 which.min 来填充我的最后一列时,它不起作用。
这是我迄今为止尝试过的:
df$m1 <- apply(df[,28:30], 1, mean)
df$sd <-apply(df[,28:30], 1, sd)
df$indxcol <- apply(df[,28:30],1, which.min)
上面的代码有效,但它添加了列名和索引值,而我只希望索引为整数。
我也用 mutate 试过这个,但它没有添加任何东西
df$m1 <- apply(df[,28:30], 1, mean)
df$sd <-apply(df[,28:30], 1, sd)
df%>%mutate(indxcol = apply(.[,28:30],1, which.min))
这是我的 df 样本
df <- structure(list(YY = c(2007, 2007, 2007, 2007, 2007, 2007), DD = c(4,
4, 4, 4, 4, 4), MM = c("Aug", "Aug", "Aug", "Aug", "Aug", "Aug"
), Date = structure(c(13729, 13729, 13729, 13729, 13729, 13729
), class = "Date"), `ID (FIFA)` = c("FRA D1", "FRA D1", "FRA D1",
"FRA D1", "FRA D1", "FRA D1"), Country = c("France", "France",
"France", "France", "France", "France"), League = c("Ligue 1",
"Ligue 1", "Ligue 1", "Ligue 1", "Ligue 1", "Ligue 1"), Season = c("2007/2008",
"2007/2008", "2007/2008", "2007/2008", "2007/2008", "2007/2008"
), HOME = c("Bordeaux", "Caen", "Lille", "Monaco", "Paris SG",
"Rennes"), AWAY = c("Lens", "Nice", "Lorient", "St Etienne",
"Sochaux", "Nancy"), `Final Scores` = c(1, 1, 0, 1, 0, 0), ...12 = c(0,
0, 0, 1, 0, 2), ...13 = c("H", "H", "D", "D", "D", "A"), ...14 = c("U",
"U", "U", "U", "U", "U"), `ET/Pen/Awd` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
`1st Half Scores` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ...17 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), ...18 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), ...19 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `2nd Half Scores` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...21 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...22 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), ...23 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), FTMoneyline...24 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...25 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...26 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `Payout, %...27` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), FTMoneyline...28 = c(2.2,
2.4, 1.72, 1.9, 1.72, 1.72), ...29 = c(2.75, 2.75, 3, 2.9,
3, 3.2), ...30 = c(3.4, 3, 5, 4, 5, 4.5), `Payout, %...31` = c(89.9038461538462,
89.795918367347, 89.7079276773296, 89.1946580331849, 89.7079276773296,
89.596295760382), `FT TG 2.5...32` = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...33 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `FT TG 2.5...34` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...35 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), OUTCOME = c(1,
1, 2, 2, 2, 3), REFGAME = c("G423", "G424", "G425", "G426",
"G427", "G428"), m1 = c(2.78333333333333, 2.71666666666667,
3.24, 2.93333333333333, 3.24, 3.14)), row.names = c(NA, -6L
), class = c("tbl_df", "tbl", "data.frame"))
这是我使用第一个代码得到的 df 输出
df <-structure(list(YY = c(2007, 2007, 2007, 2007, 2007, 2007), DD = c(4,
4, 4, 4, 4, 4), MM = c("Aug", "Aug", "Aug", "Aug", "Aug", "Aug"
), Date = structure(c(13729, 13729, 13729, 13729, 13729, 13729
), class = "Date"), `ID (FIFA)` = c("FRA D1", "FRA D1", "FRA D1",
"FRA D1", "FRA D1", "FRA D1"), Country = c("France", "France",
"France", "France", "France", "France"), League = c("Ligue 1",
"Ligue 1", "Ligue 1", "Ligue 1", "Ligue 1", "Ligue 1"), Season = c("2007/2008",
"2007/2008", "2007/2008", "2007/2008", "2007/2008", "2007/2008"
), HOME = c("Bordeaux", "Caen", "Lille", "Monaco", "Paris SG",
"Rennes"), AWAY = c("Lens", "Nice", "Lorient", "St Etienne",
"Sochaux", "Nancy"), `Final Scores` = c(1, 1, 0, 1, 0, 0), ...12 = c(0,
0, 0, 1, 0, 2), ...13 = c("H", "H", "D", "D", "D", "A"), ...14 = c("U",
"U", "U", "U", "U", "U"), `ET/Pen/Awd` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
`1st Half Scores` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), ...17 = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), ...18 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), ...19 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `2nd Half Scores` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...21 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...22 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), ...23 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), FTMoneyline...24 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...25 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...26 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `Payout, %...27` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), FTMoneyline...28 = c(2.2,
2.4, 1.72, 1.9, 1.72, 1.72), ...29 = c(2.75, 2.75, 3, 2.9,
3, 3.2), ...30 = c(3.4, 3, 5, 4, 5, 4.5), `Payout, %...31` = c(89.9038461538462,
89.795918367347, 89.7079276773296, 89.1946580331849, 89.7079276773296,
89.596295760382), `FT TG 2.5...32` = c(NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...33 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `FT TG 2.5...34` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ...35 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), OUTCOME = c(1,
1, 2, 2, 2, 3), REFGAME = c("G423", "G424", "G425", "G426",
"G427", "G428"), m1 = c(2.78333333333333, 2.71666666666667,
3.24, 2.93333333333333, 3.24, 3.14), sd = c(0.600694043031337,
0.301385688667085, 1.65311826558175, 1.05039675043925, 1.65311826558175,
1.39097088395121), indxcol = list(c(FTMoneyline...28 = 1L),
c(FTMoneyline...28 = 1L), c(FTMoneyline...28 = 1L), c(FTMoneyline...28 = 1L),
c(FTMoneyline...28 = 1L), c(FTMoneyline...28 = 1L))), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -6L))
感谢您的帮助
【问题讨论】:
-
您需要重新分配给原始对象
df <- df%>%mutate(indxcol = apply(.[,7:9],1, which.min))。第 7 列和第 8 列是character类。在执行此操作之前,您可能需要转换为数字。str(df[7:9]) -
更快的选择是
max.coldf <- df %>% mutate(across(7:9, as.numeric)) %>% mutate(indxcol = max.col(-1 * .[7:9], 'first')) -
非常感谢您的回答@akrun。它对这个 df 很有效,但实际上我正在另一个数据帧上尝试这个,结果也包括列的名称,我不明白为什么。这是一个
-
您能否展示一个说明问题的小示例的
dput。您的第一组代码似乎有拼写错误apply(l[,7:9],1, unlist(which.min)),即使用了unlist -
我收到
df %>% mutate(indxcol = max.col(-1 * .[28:30], 'first')) %>% .$indxcol# [1] 1 1 1 1 1 1
标签: r