【问题标题】:R mean of unique values in data.tabledata.table 中唯一值的 R 平均值
【发布时间】:2021-02-16 00:42:35
【问题描述】:

我有一个如下的data.table

structure(list(group = c("A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "A", "A", "A", "A", "A", "A", "A", "A", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B", "B", "B", "B", 
                          "B", "B", "B", "B", "B"), 
               V1 = c(6.38, 6.38, 6.38, 6.38, -1.53, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, 
                      -24.93, -24.93, -24.93, -6.8, -6.8, -6.8, -6.8, -6.8, -1.71, 
                      -1.71, -1.71, -1.71, -1.71, -1.06, -1.06, -1.06, -1.06, -1.06, 
                      -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, 
                      -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, 
                      -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, 
                      -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, 8.42, 8.42, 8.42, 4.34, 
                      4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 
                      4.34, 4.34, 4.34, 4.34, 4.34, 4.34), 
               V2 = c(0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, -0.11, -0.11, -11, -11, -11, -11, -11, 
                      -11, -11, -11, -11, -11, 1.6, 1.6, 1.6, 1.6, -0.55, -0.55, -0.55, 
                      -2.15, -2.15, -2.15, -2.15, -2.15, -2.15, -2.15, -2.15, -2.15, 
                      -0.19, -0.19, -0.19, -0.19, -0.19, 2.63, 2.63, 2.63, 2.63, 2.63, 
                      2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 
                      -3.86, -3.86, 0.48, 0.48, 0.48, 0.48, 0.48, 0.48, 0.48, -1.38, 
                      -1.38, -1.38, -1.38, -5.15, -11.58, -11.58, -11.58, -11.58, -11.58, 
                      -11.58, -11.58, -11.58, -11.58, -0.46, -7.32, -7.32, -7.32, -7.32, 
                      -7.32, -7.32, -7.32, -7.32, 2.67, 4.88, 4.88, 4.88, 4.88, 4.88, 
                      4.88, 4.88, 4.88, -11.57, -11.57, -11.57, 1.67, 1.55, 1.55, 2.3, 
                      2.3, 2.3, 2.3, 2.3, 2.3, 2.3, 2.3, -1.42, 21.88, 21.88, 21.88, 
                      21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 
                      21.88, 21.88, 21.88, 21.88, 21.88, 21.88, -0.59, -0.59, -0.59, 
                      -0.59, -0.59, -1.87, -1.87, -1.87)), row.names = c(NA, -138L), 
          class = c("data.table", "data.frame"))

我想按组查找每列中唯一值的总和。

我尝试了以下方法,它给出了每列中所有值的总和(但不是唯一值)。

library(data.table)
dt[, lapply(.SD, sum, na.rm = T), by=group, .SDcols = c("V1", "V2")]

   group       V1     V2
1:     A -1571.53 -88.67
2:     B    20.55 245.64

但是,我只想找到唯一值的总和。

答案应该是这样的 -

group       V1     V2
1:     A  -269.38 -12.43
2:     B    -4.47  27.17

谢谢!

【问题讨论】:

    标签: r data.table


    【解决方案1】:

    唯一值的总和(每列独立)可以这样计算

    dt[, .(sum(unique(V1)), sum(unique(V2))), group]
    #>    group     V1     V2
    #> 1:     A -20.08 -13.83
    #> 2:     B   3.19  -5.01
    

    你给出的答案是每一行是唯一的每一列的总和,即

    unique(dt)[, .(sum(V1), sum(V2)), group]
    #>    group      V1     V2
    #> 1:     A -269.38 -12.43
    #> 2:     B   -4.47  27.17
    

    对于列的命名列表

    unique(dt)[, lapply(.SD, sum), group, .SDcols = c('V1', 'V2')]
    

    或者如果它是您想要的唯一版本的第一个版本

    dt[, lapply(.SD, function(x) sum(unique(x))), group, .SDcols = c('V1', 'V2')]
    

    【讨论】:

      猜你喜欢
      • 2017-10-04
      • 2021-12-05
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2022-01-05
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多