【发布时间】:2020-07-13 19:54:12
【问题描述】:
我有以下代码:
library(tidyverse)
astronauts %>%
group_by(name, nationality, total_hrs_sum) %>%
summarise()
输出:
name nationality total_hrs_sum
<chr> <chr> <dbl>
Acaba, Joseph M. U.S. 7272.23
Acton, Loren Wilbur U.S. 190.94
Adamson, James C. U.S. 334.00
Afanasyev, Viktor Mikhaylovich U.S.S.R/Russia 13338.55
Aidyn (Aydyn) Akanovich Aimbetov Kazakhstan 236.23
Akers, Thomas D. U.S. 814.00
Akiyama, Toyohiro Japan 189.90
Aksyonov, Vladimir U.S.S.R/Russia 284.18
Al Mansoori, Hazzaa UAE 189.00
Al-saud, Sultan bin Salman Saudi Arabia 170.00
我的问题:
我想进一步过滤此数据框,以便每个国籍仅提供 1 个姓名。每个国籍的一个名字应该在 total_hrs_sum 列中具有最高值。我对 dplyr 解决方案最满意,但也对其他可能的解决方案持开放态度。
样本数据:
structure(list(name = c("Acaba, Joseph M.", "Acton, Loren Wilbur",
"Adamson, James C.", "Afanasyev, Viktor Mikhaylovich", "Aidyn (Aydyn) Akanovich Aimbetov",
"Akers, Thomas D.", "Akiyama, Toyohiro", "Aksyonov, Vladimir",
"Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman", "Aldrin, Edwin Eugene, Jr.",
"Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr", "Allen, Andrew M.",
"Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison",
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)",
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.",
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II",
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri",
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena",
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.",
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.",
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick",
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel",
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.",
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan",
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria",
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia",
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.",
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia",
"U.S.S.R/Russia", "U.S.S.R/Russia"), total_hrs_sum = c(7272.23,
190.94, 334, 13338.55, 236.23, 814, 189.9, 284.18, 189, 170,
289, 47, 7434.03, 904, 314, 1224, 147, 4046, 593, 614.37, 261.525,
579, 847, 639.5, 206, 188.71, 307, 8784, 3471.35, 377.5, 664,
5686.82, 190.2, 4722, 17942.23, 338, 686, 965, 4297.28, 5085,
734, 190.94, 473.75, 169.63, 1671.75, 708, 190, 26.03, 94.83,
5073.07)), row.names = c(NA, -50L), groups = structure(list(name = c("Acaba, Joseph M.",
"Acton, Loren Wilbur", "Adamson, James C.", "Afanasyev, Viktor Mikhaylovich",
"Aidyn (Aydyn) Akanovich Aimbetov", "Akers, Thomas D.", "Akiyama, Toyohiro",
"Aksyonov, Vladimir", "Al Mansoori, Hazzaa", "Al-saud, Sultan bin Salman",
"Aldrin, Edwin Eugene, Jr.", "Aleksandrov, Aleksandr", "Aleksandrov, Aleksandr",
"Allen, Andrew M.", "Allen, Joseph P.", "Altman, Scott D.", "Anders, William Alison",
"Anderson, Clayton C.", "Anderson, Michael P.", "André-Deshays, Claudie (Haigneré)",
"Ansari, Anousheh", "Antonelli, Dominic A.", "Apt, Jerome", "Archambault, Lee J.",
"Armstrong, Neil A.", "Arnaldo Tamayo Mendez", "Arnold, Richard R., II",
"Artemyev, Oleg", "Artsebarsky, Anatoly", "Artyukhin, Yuri",
"Ashby, Jeffrey S.", "Atkov, Oleg", "Aubakirov, Toktar", "Auñón-Chancellor, Serena",
"Avdeyev, Sergei", "Bagian, James P.", "Baker, Ellen S.", "Baker, Michael A.",
"Balandin, Aleksandr", "Barratt, Michael R.", "Barry, Daniel T.",
"Bartoe, John-David Francis", "Baturin, Yuri", "Baudry, Patrick",
"Bean, Alan Lavern", "Behnken, Robert L.", "Bella, Ivan", "Belyayev, Pavel",
"Beregovoi, Georgi", "Berezovoy, Anatoly"), nationality = c("U.S.",
"U.S.", "U.S.", "U.S.S.R/Russia", "Kazakhstan", "U.S.", "Japan",
"U.S.S.R/Russia", "UAE", "Saudi Arabia", "U.S.", "Bulgaria",
"U.S.S.R/Russia", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.",
"France", "U.S.", "U.S.", "U.S.", "U.S.", "U.S.", "Cuba", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.",
"U.S.S.R/Russia", "U.S.S.R/Russia", "U.S.", "U.S.S.R/Russia",
"U.S.", "U.S.", "U.S.", "U.S.S.R/Russia", "U.S.", "U.S.", "U.S.",
"U.S.S.R/Russia", "France", "U.S.", "U.S.", "Slovakia", "U.S.S.R/Russia",
"U.S.S.R/Russia", "U.S.S.R/Russia"), .rows = structure(list(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L,
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 50L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
【问题讨论】:
标签: r dataframe dplyr tidyverse