【问题标题】:Subtract data.table from another data.table when unique row ID and column names match当唯一行 ID 和列名匹配时,从另一个 data.table 中减去 data.table
【发布时间】:2019-07-11 17:09:38
【问题描述】:

我想从dt1 中减去dt2 以生成dt3 中显示的输出。然后我想对dt3 进行子集化,因此只保留包含负值的行(如dt4)。

dt1 <- data.table(
  UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
  Var1=c(100, 200, 300, 400, 500,600,700,800,900),
  Var2=c(1000, 2000, 3000, 4000, 5000,6000,7000,8000,9000),
  Var3=c(10000, 20000, 30000, 40000, 50000,60000,70000,80000,90000),
  Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))

dt2 <- data.table(
  UID=c("A001","A003","B001","B003","C001","C003"),
  Var1=c(10, 20, 30, 40, 50,950),
  Var2=c(100, 2500, 300, 400, 500,600),
  Var3=c(1000, 2000, 3000, 4000, 5000,6000))

dt3 <- data.table(
  UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
  Var1=c(90, 200, 280, 370, 500,560,650,800,-50),
  Var2=c(900, 2000, -500, 3700, 5000,5600,6500,8000,8400),
  Var3=c(9000, 20000, 28000, 37000, 50000,56000,65000,80000,84000),
  Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))

dt4 <- dt3[c(3,9),]

【问题讨论】:

  • 我认为当 UID=A003 时 Var2 有错字。 3000 - 2500 = 500 不是 -500

标签: r data.table subtraction


【解决方案1】:

另一种可能的方法:

DT <- copy(dt1)

#subtracting
cols <- setdiff(names(dt2), "UID")
DT[dt2, on=.(UID), (cols) := .SD - mget(paste0("i.", cols)), .SDcols=cols]

#subsetting
DT[Reduce(`|`, lapply(DT, `<`, 0))]

输出:

    UID Var1 Var2  Var3  Var4
1: C003  -50 8400 84000 95000

【讨论】:

    【解决方案2】:

    我发现先将其转换为长格式更容易。

    library(data.table)
    
    dt1 <- data.table(
      UID=c("A001","A002","A003","B001","B002","B003","C001","C002","C003"),
      Var1=c(100, 200, 300, 400, 500,600,700,800,900),
      Var2=c(1000, 2000, 3000, 4000, 5000,6000,7000,8000,9000),
      Var3=c(10000, 20000, 30000, 40000, 50000,60000,70000,80000,90000),
      Var4=c(15000, 25000, 35000, 45000, 55000,65000,75000,85000,95000))
    
    dt2 <- data.table(
      UID=c("A001","A003","B001","B003","C001","C003"),
      Var1=c(10, 20, 30, 40, 50,950),
      Var2=c(100, 2500, 300, 400, 500,600),
      Var3=c(1000, 2000, 3000, 4000, 5000,6000))
    
    dt2 <- 
    melt(dt2, "UID")[melt(dt1, "UID"), on = c("UID", "variable")][
      , sum(i.value, -1 * value, na.rm = TRUE), .(UID, variable)]
    
    dt3 <- dcast(dt2, UID ~  variable)
    
    dt4 <- dcast(dt2[UID %in% dt2[V1 < 0, UID]], UID ~  variable)
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2019-08-07
      • 2021-04-09
      • 1970-01-01
      • 1970-01-01
      • 2022-01-07
      • 2023-03-27
      相关资源
      最近更新 更多