【问题标题】:subtracting multiple columns from each other相互减去多列
【发布时间】:2020-09-15 20:06:49
【问题描述】:

我有一个大型数据集,我想根据它们的位置相互减去特定的列。我想从第 8 列中减去第 2 列,从第 9 列中减去第 3 列,从第 10 列中减去第 4 列。

非常感谢

马格纳斯

structure(list(Stamp_summertime = structure(c(1546684744, 1546685858, 
                                              1546687004, 1547030061, 1547030835, 1547031816), tzone = "UTC", class = c("POSIXct", 
                                                                                                                        "POSIXt")), X26.013 = c(0.138461, 0.138461, 0.138461, 0.144421, 
                                                                                                                                                0.144421, 0.144421), X27.024 = c(0.0752111, 0.0752111, 0.0752111, 
                                                                                                                                                                                 0.0426819, 0.0426819, 0.0426819), X33.031 = c(3.75788, 3.75788, 
                                                                                                                                                                                                                               3.75788, 3.12581, 3.12581, 3.12581), jar_camp = c("1_pf1.1", 
                                                                                                                                                                                                                                                                                 "2_pf1.1", "3_pf1.1", "1_pf2.1", "2_pf2.1", "3_pf2.1"), jar = structure(c(1L, 
                                                                                                                                                                                                                                                                                                                                                           12L, 23L, 1L, 12L, 23L), .Label = c("1", "10_blank", "11", "12", 
                                                                                                                                                                                                                                                                                                                                                                                               "13", "14", "15", "16_blank", "17", "18", "19", "2", "20_blank", 
                                                                                                                                                                                                                                                                                                                                                                                               "21", "22", "23", "24", "25", "26", "27", "28", "29", "3", "30_blank", 
                                                                                                                                                                                                                                                                                                                                                                                               "31", "32", "33", "34", "35", "36", "37", "38_blank", "39", "4", 
                                                                                                                                                                                                                                                                                                                                                                                               "40", "41", "42", "43", "44_blank", "45", "46", "47", "48", "49", 
                                                                                                                                                                                                                                                                                                                                                                                               "5_blank", "blank_50", "51", "52", "53", "54", "55", "56", "57", 
                                                                                                                                                                                                                                                                                                                                                                                               "6", "7", "8", "9", "X_blank"), class = "factor"), campaign = c("pf1.1", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                               "pf1.1", "pf1.1", "pf2.1", "pf2.1", "pf2.1"), i.X26.013 = c(0.144658, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           0.21502, 0.458296, 0.191571, 0.0789067, 0.711814), i.X27.024 = c(0.0595547, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            0.0651149, 0.146772, 0.0997815, 0.0539976, 0.185398), i.X33.031 = c(5.4066, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                3.30406, 18.0479, 6.13854, 1.3028, 22.2226)), sorted = "Stamp_summertime", class = c("data.table", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     "data.frame"), row.names = c(NA, -6L), .internal.selfref = <pointer: 0x00000237a3d91ef0>)

【问题讨论】:

    标签: r tidyverse


    【解决方案1】:

    我们可以创建 2 个位置向量并直接减去列。由于您有data.table,我们使用..column_number 按位置选择列。

    library(data.table)
    col1group <- 2:4
    col2group <- 8:10
    
    df[, ..col1group] - df[, ..col2group])
    

    如果您想将它们作为新列添加到原始数据中,您可以重命名它们并cbind

    cbind(df, setNames(df[, ..col1group] - df[, ..col2group],
                       paste0(names(df)[col1group], '_diff')))
    

    【讨论】:

      【解决方案2】:

      类似下面的内容计算问题中的减法。

      library(data.table)
      
      nms <- names(df1)
      iCols <- grep("^i\\.", nms, value = TRUE)
      Cols <- sub("^i\\.", "", iCols)
      
      df1[, lapply(seq_along(Cols), function(i) get(Cols[i]) - get(iCols[i]))]
      #           V1         V2        V3
      #1: -0.0061970  0.0156564  -1.64872
      #2: -0.0765590  0.0100962   0.45382
      #3: -0.3198350 -0.0715609 -14.29002
      #4: -0.0471500 -0.0570996  -3.01273
      #5:  0.0655143 -0.0113157   1.82301
      #6: -0.5673930 -0.1427161 -19.09679
      

      按照Ronak Shah's 的回答,我意识到下面的代码也可以工作。

      df1[, ..Cols] - df1[, ..iCols]
      

      数值结果相同,但列名是向量Cols

      要创建新列,请尝试

      newCols <- paste(Cols, "diff", sep = "_")
      df1[, (newCols) := lapply(seq_along(Cols), function(i) get(Cols[i]) - get(iCols[i]))]
      

      【讨论】:

        【解决方案3】:

        基础 R 解决方案:

        idx <- c(2, 3, 4)
        jdx <- c(8, 9, 10)
        

        使用lapply() 和列绑定列表:

        setNames(do.call("cbind", lapply(seq_along(idx), function(i){
              df[, jdx[i], drop = FALSE] - df[, idx[i], drop = FALSE]
            }
          )
        ), c(paste("x", jdx, idx, sep = "_")))
        

        使用 sapply() 并将向量强制转换为 data.frame:

        setNames(data.frame(sapply(seq_along(idx), function(i){
              df[, jdx[i], drop = FALSE] - df[, idx[i], drop = FALSE]
            }
          )
        ), c(paste("x", jdx, idx, sep = "_")))
        

        使用Map()Reduce() 以及将列绑定到原始data.frame:

        cbind(df, setNames(Reduce(cbind, Map(function(i){
            df[, jdx[i], drop = FALSE] - df[, idx[i], drop = FALSE]
          }, seq_along(idx))), c(paste("x", jdx, idx, sep = "_"))))
        

        【讨论】:

          猜你喜欢
          • 2019-06-28
          • 2018-01-27
          • 2017-04-22
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2017-06-17
          相关资源
          最近更新 更多