【发布时间】:2021-02-16 00:42:35
【问题描述】:
我有一个如下的data.table
structure(list(group = c("A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B"),
V1 = c(6.38, 6.38, 6.38, 6.38, -1.53, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93, -24.93,
-24.93, -24.93, -24.93, -6.8, -6.8, -6.8, -6.8, -6.8, -1.71,
-1.71, -1.71, -1.71, -1.71, -1.06, -1.06, -1.06, -1.06, -1.06,
-1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06,
-1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06,
-1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06, -1.06,
-1.06, -1.06, -1.06, -1.06, -1.06, -1.06, 8.42, 8.42, 8.42, 4.34,
4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34, 4.34,
4.34, 4.34, 4.34, 4.34, 4.34, 4.34),
V2 = c(0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, -0.11, -0.11, -11, -11, -11, -11, -11,
-11, -11, -11, -11, -11, 1.6, 1.6, 1.6, 1.6, -0.55, -0.55, -0.55,
-2.15, -2.15, -2.15, -2.15, -2.15, -2.15, -2.15, -2.15, -2.15,
-0.19, -0.19, -0.19, -0.19, -0.19, 2.63, 2.63, 2.63, 2.63, 2.63,
2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63, 2.63,
-3.86, -3.86, 0.48, 0.48, 0.48, 0.48, 0.48, 0.48, 0.48, -1.38,
-1.38, -1.38, -1.38, -5.15, -11.58, -11.58, -11.58, -11.58, -11.58,
-11.58, -11.58, -11.58, -11.58, -0.46, -7.32, -7.32, -7.32, -7.32,
-7.32, -7.32, -7.32, -7.32, 2.67, 4.88, 4.88, 4.88, 4.88, 4.88,
4.88, 4.88, 4.88, -11.57, -11.57, -11.57, 1.67, 1.55, 1.55, 2.3,
2.3, 2.3, 2.3, 2.3, 2.3, 2.3, 2.3, -1.42, 21.88, 21.88, 21.88,
21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 21.88, 21.88,
21.88, 21.88, 21.88, 21.88, 21.88, 21.88, -0.59, -0.59, -0.59,
-0.59, -0.59, -1.87, -1.87, -1.87)), row.names = c(NA, -138L),
class = c("data.table", "data.frame"))
我想按组查找每列中唯一值的总和。
我尝试了以下方法,它给出了每列中所有值的总和(但不是唯一值)。
library(data.table)
dt[, lapply(.SD, sum, na.rm = T), by=group, .SDcols = c("V1", "V2")]
group V1 V2
1: A -1571.53 -88.67
2: B 20.55 245.64
但是,我只想找到唯一值的总和。
答案应该是这样的 -
group V1 V2
1: A -269.38 -12.43
2: B -4.47 27.17
谢谢!
【问题讨论】:
标签: r data.table