1) 我们可以在dplyr 本身内执行此操作。按“物种”分组,summarise 我们需要的列,即“Sepal.length”,通过使用cur_group_id() 连接列的mean 和完整数据列的子集(除@987654325 外没有其他包使用@)
library(dplyr)
iris %>%
group_by(Species) %>%
summarise(InGroup = c('Yes', 'No'), MeanSepalLength = c(mean(Sepal.Length),
mean(.$Sepal.Length[as.numeric(.$Species) != cur_group_id()])),
.groups = 'drop')
# A tibble: 6 x 3
# Species InGroup MeanSepalLength
# <fct> <chr> <dbl>
#1 setosa Yes 5.01
#2 setosa No 6.26
#3 versicolor Yes 5.94
#4 versicolor No 5.80
#5 virginica Yes 6.59
#6 virginica No 5.47
2)如果我们想在多列中执行此操作,请使用across
iris %>%
group_by(Species) %>%
summarise(InGroup = c('Yes', 'No'),
across(where(is.numeric), ~ c(mean(.),
mean(iris[[cur_column()]][
as.numeric(iris$Species) != cur_group_id()])), .names = 'Mean{.col}'),
.groups = 'drop')
-输出
# A tibble: 6 x 6
# Species InGroup MeanSepal.Length MeanSepal.Width MeanPetal.Length MeanPetal.Width
# <fct> <chr> <dbl> <dbl> <dbl> <dbl>
#1 setosa Yes 5.01 3.43 1.46 0.246
#2 setosa No 6.26 2.87 4.91 1.68
#3 versicolor Yes 5.94 2.77 4.26 1.33
#4 versicolor No 5.80 3.20 3.51 1.14
#5 virginica Yes 6.59 2.97 5.55 2.03
#6 virginica No 5.47 3.10 2.86 0.786
3)如果我们需要一个函数,也可以创建它
f1 <- function(dat, grp) {
grp_str <- rlang::as_string(rlang::ensym(grp))
dat %>%
group_by({{grp}}) %>%
summarise(InGroup = c('Yes', 'No'),
across(where(is.numeric), ~ c(mean(.),
mean(dat[[cur_column()]][
as.numeric(dat[[grp_str]]) != cur_group_id()])),
.names = 'Mean{.col}'), .groups = 'drop')
}
-测试
f1(iris, Species)
# A tibble: 6 x 6
# Species InGroup MeanSepal.Length MeanSepal.Width MeanPetal.Length MeanPetal.Width
# <fct> <chr> <dbl> <dbl> <dbl> <dbl>
#1 setosa Yes 5.01 3.43 1.46 0.246
#2 setosa No 6.26 2.87 4.91 1.68
#3 versicolor Yes 5.94 2.77 4.26 1.33
#4 versicolor No 5.80 3.20 3.51 1.14
#5 virginica Yes 6.59 2.97 5.55 2.03
#6 virginica No 5.47 3.10 2.86 0.786
或diamonds
f1(diamonds, cut)
# A tibble: 10 x 9
# cut InGroup Meancarat Meandepth Meantable Meanprice Meanx Meany Meanz
# <ord> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 Fair Yes 1.05 64.0 59.1 4359. 6.25 6.18 3.98
# 2 Fair No 0.790 61.7 57.4 3920. 5.72 5.72 3.53
# 3 Good Yes 0.849 62.4 58.7 3929. 5.84 5.85 3.64
# 4 Good No 0.793 61.7 57.3 3933. 5.72 5.72 3.53
# 5 Very Good Yes 0.806 61.8 58.0 3982. 5.74 5.77 3.56
# 6 Very Good No 0.796 61.7 57.3 3919. 5.73 5.72 3.53
# 7 Premium Yes 0.892 61.3 58.7 4584. 5.97 5.94 3.65
# 8 Premium No 0.766 61.9 57.0 3709. 5.65 5.66 3.50
# 9 Ideal Yes 0.703 61.7 56.0 3458. 5.51 5.52 3.40
#10 Ideal No 0.861 61.8 58.5 4249. 5.88 5.88 3.63
4) 或者另一种选择是将sum 的差异除以行数的差异
iris %>%
group_by(Species) %>%
summarise(InGroup = c('Yes', 'No'), across(where(is.numeric),
~ c(mean(.), (sum(iris[[cur_column()]]) -
sum(.))/(nrow(iris) - n())), .names = 'Mean{.col}'), .groups = 'drop')