【问题标题】:tidyverse: append rows of totals in summary outputtidyverse:在汇总输出中附加总计行
【发布时间】:2021-08-23 08:21:04
【问题描述】:

我想在与group_by 一起使用的summarise 的输出中附加总计行。

Data <-
  structure(list(CT = c("1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2"), SCT = c("1", "1", "1", "1", "1", "1", "2", "2", "2", "2", 
"2", "2", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2"), 
    SSCT = c("1", "2", "3", "1", "2", "3", "1", "2", "1", "2", 
    "1", "2", "1", "2", "1", "2", "1", "2", "3", "1", "2", "3"
    ), Category = c(111L, 112L, 113L, 111L, 112L, 113L, 121L, 
    122L, 121L, 122L, 121L, 122L, 211L, 212L, 211L, 212L, 221L, 
    222L, 223L, 221L, 222L, 223L), Y = c(10L, 12L, 15L, 11L, 
    10L, 13L, 21L, 22L, 20L, 25L, 23L, 24L, 31L, 33L, 36L, 39L, 
    32L, 31L, 36L, 41L, 44L, 45L)), row.names = c(NA, -22L), class = "data.frame")
head(Data)
  CT SCT SSCT Category  Y
1  1   1    1      111 10
2  1   1    2      112 12
3  1   1    3      113 15
4  1   1    1      111 11
5  1   1    2      112 10
6  1   1    3      113 13

这里

CT:类别,SCT:子类别,SSCT:子子类别

Data %>%
  group_by(CT) %>% 
  summarise(Total = sum(Y))

# A tibble: 2 x 2
  CT    Total
  <chr> <int>
1 1       206
2 2       368


Data %>%
  group_by(CT, SCT) %>% 
  summarise(Total = sum(Y))

# A tibble: 4 x 3
# Groups:   CT [2]
  CT    SCT   Total
  <chr> <chr> <int>
1 1     1        71
2 1     2       135
3 2     1       139
4 2     2       229

Data %>%
  group_by(CT, SCT, SSCT) %>% 
  summarise(Total = sum(Y))
# A tibble: 10 x 4
# Groups:   CT, SCT [4]
   CT    SCT   SSCT  Total
   <chr> <chr> <chr> <int>
 1 1     1     1        21
 2 1     1     2        22
 3 1     1     3        28
 4 1     2     1        64
 5 1     2     2        71
 6 2     1     1        67
 7 2     1     2        72
 8 2     2     1        73
 9 2     2     2        75
10 2     2     3        81

所需输出

需要输出以下内容:

CT    SCT   SSCT  Total
Total               206
Total                71
1     1     1        21
1     1     2        22
1     1     3        28
Total               135
1     2     1        64
1     2     2        71
Total               368
Total               139
2     1     1        67
2     1     2        72
Total               229
2     2     1        73
2     2     2        75
2     2     3        81

【问题讨论】:

  • 查看this vignette 以获得像数据透视表一样的 excel
  • 感谢@AnilGoyal 指向一个非常方便的 R 包。
  • this question 相同,只是没有专门要求 tidyverse 解决方案

标签: r dplyr tidyverse summarize


【解决方案1】:

如果您希望最后一列是数字而不是字符,则必须接受 NA 而不是 ""

bind_rows(
  Data %>% 
    group_by(CT) %>% 
    summarise(Total = sum(Y)) %>% 
    mutate(SCT = "Total", SSCT = "Total"),
  
  Data %>%
    group_by(CT, SCT) %>% 
    summarise(Total = sum(Y)) %>% 
    mutate(SSCT = "Total"),
  
  
  Data %>%
    group_by(CT, SCT, SSCT) %>% 
    summarise(Total = sum(Y))
) %>% 
  mutate(across(c("SCT","SSCT"), ~ fct_relevel(., "Total"))) %>% 
  arrange(CT, SCT, SSCT) %>% 
  mutate(CT = ifelse(SSCT == "Total", "Total", CT),
         SCT = ifelse(SSCT == "Total", "", as.numeric(SCT)-1),
         SSCT = gsub("Total", "", SSCT)) %>%  
  select(CT, SCT, SSCT, Total) 
#  A tibble: 16 x 4
    CT    SCT   SSCT  Total
   <chr> <chr> <chr> <int>
 1 Total ""    ""      206
 2 Total ""    ""       71
 3 1     "1"   "1"      21
 4 1     "1"   "2"      22
 5 1     "1"   "3"      28
 6 Total ""    ""      135
 7 1     "2"   "1"      64
 8 1     "2"   "2"      71
 9 Total ""    ""      368
10 Total ""    ""      139
11 2     "1"   "1"      67
12 2     "1"   "2"      72
13 Total ""    ""      229
14 2     "2"   "1"      73
15 2     "2"   "2"      75
16 2     "2"   "3"      81

【讨论】:

    【解决方案2】:

    1) 我们可以使用 janitor 包中的 adorn_totals。在 janitor 中,总计通常位于总计的组之后,但我们可以使用名称“0”代替 Total 并排序,以便总计先排序,然后在最后用单词 Total 替换“0”。过滤器会删除包含多个字段且包含单词 Total 的行。

    library(dplyr)
    library(janitor)
    
    Data %>% 
      group_by(CT, SCT, SSCT) %>%
      summarize(Y = sum(Y), .groups = "drop") %>%
      group_by(CT) %>%
      group_modify(~ adorn_totals(., name = "0")) %>%
      group_by(CT, SCT) %>%
      group_modify(~ adorn_totals(., name = "0")) %>%
      ungroup %>%
      filter(rowSums(across() == "0") <= 1) %>%
      arrange(CT, SCT, SSCT) %>%
      mutate(across(where(is.character), ~ replace(., . == "0", "Total")))
    

    给予:

    # A tibble: 16 x 4
       CT    SCT   SSCT      Y
       <chr> <chr> <chr> <int>
     1 1     Total -       206
     2 1     1     Total    71
     3 1     1     1        21
     4 1     1     2        22
     5 1     1     3        28
     6 1     2     Total   135
     7 1     2     1        64
     8 1     2     2        71
     9 2     Total -       368
    10 2     1     Total   139
    11 2     1     1        67
    12 2     1     2        72
    13 2     2     Total   229
    14 2     2     1        73
    15 2     2     2        75
    16 2     2     3        81
    

    2)如果可以将总数放在组之后而不是之前,则可以简化。

    Data %>% 
      group_by(CT, SCT, SSCT) %>%
      summarize(Y = sum(Y), .groups = "drop") %>%
      group_by(CT) %>%
      group_modify(~ adorn_totals(.)) %>%
      group_by(CT, SCT) %>%
      group_modify(~ adorn_totals(.)) %>%
      ungroup %>%
      filter(rowSums(across() == "Total") <= 1)
    

    给予:

    # A tibble: 16 x 4
       CT    SCT   SSCT      Y
       <chr> <chr> <chr> <int>
     1 1     1     1        21
     2 1     1     2        22
     3 1     1     3        28
     4 1     1     Total    71
     5 1     2     1        64
     6 1     2     2        71
     7 1     2     Total   135
     8 1     Total -       206
     9 2     1     1        67
    10 2     1     2        72
    11 2     1     Total   139
    12 2     2     1        73
    13 2     2     2        75
    14 2     2     3        81
    15 2     2     Total   229
    16 2     Total -       368
    

    【讨论】:

      【解决方案3】:

      你可以在data.table包中使用rollup

      library(data.table)
      setDT(Data)
      
      group_vars <-  c('CT', 'SCT', 'SSCT')
      piv <- 
        rollup(Data, j = .(Total = sum(Y)), by = group_vars)
                     
      setorderv(piv, group_vars)[-1]
      #>     CT  SCT SSCT Total
      #>  1:  1 <NA> <NA>   206
      #>  2:  1    1 <NA>    71
      #>  3:  1    1    1    21
      #>  4:  1    1    2    22
      #>  5:  1    1    3    28
      #>  6:  1    2 <NA>   135
      #>  7:  1    2    1    64
      #>  8:  1    2    2    71
      #>  9:  2 <NA> <NA>   368
      #> 10:  2    1 <NA>   139
      #> 11:  2    1    1    67
      #> 12:  2    1    2    72
      #> 13:  2    2 <NA>   229
      #> 14:  2    2    1    73
      #> 15:  2    2    2    75
      #> 16:  2    2    3    81
      

      reprex package (v2.0.0) 于 2021-06-05 创建

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 2023-02-10
        • 2020-11-24
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2016-05-09
        • 1970-01-01
        相关资源
        最近更新 更多