【问题标题】:Add rows to a data-frame based on values in one of the columns根据其中一列中的值将行添加到数据框
【发布时间】:2019-02-07 02:34:42
【问题描述】:

目前数据框看起来像这样:

   Scenario Month            A            B            C
         1     1 -0.593186301  1.045550808 -0.593816304
         1     2  0.178626141  2.043084432  0.111370583
         1     3  1.205779717 -0.324083723 -1.397716949
         2     1  0.933615199  0.052647056 -0.656486153
         2     2  1.647291688 -1.065793671  0.799040546
         2     3  1.613663101 -1.955567231 -1.817457972
         3     1 -0.621991775  1.634069402 -1.404981646
         3     2 -1.899326887 -0.836322394 -1.826351541
         3     3  0.164235141 -1.160701812  1.238246459 

我想在 Month = 1 的行的顶部添加行,如下所示。我知道 dplyr 有一个 add_rows 函数,但我想根据条件添加行。非常感谢任何帮助。


    Scenario Month       A            B            C
      0                                             
      1     1 -0.593186301  1.045550808 -0.593816304
      1     2  0.178626141  2.043084432  0.111370583
      1     3  1.205779717 -0.324083723 -1.397716949
      0                                             
      2     1  0.933615199  0.052647056 -0.656486153
      2     2  1.647291688 -1.065793671  0.799040546
      2     3  1.613663101 -1.955567231 -1.817457972
      0                                             
      3     1 -0.621991775  1.634069402 -1.404981646
      3     2 -1.899326887 -0.836322394 -1.826351541
      3     3  0.164235141 -1.160701812  1.238246459

【问题讨论】:

    标签: r dplyr tidyverse


    【解决方案1】:

    使用tidyverse 的解决方案。

    library(tidyverse)
    
    dat2 <- dat %>%
      split(f = .$Scenario) %>%
      map_dfr(~bind_rows(tibble(Scenario = 0), .x))
    dat2
    # # A tibble: 12 x 5
    #    Scenario Month       A        B       C
    #       <dbl> <int>   <dbl>    <dbl>   <dbl>
    #  1        0    NA  NA      NA       NA    
    #  2        1     1  -0.593   1.05    -0.594
    #  3        1     2   0.179   2.04     0.111
    #  4        1     3   1.21   -0.324   -1.40 
    #  5        0    NA  NA      NA       NA    
    #  6        2     1   0.934   0.0526  -0.656
    #  7        2     2   1.65   -1.07     0.799
    #  8        2     3   1.61   -1.96    -1.82 
    #  9        0    NA  NA      NA       NA    
    # 10        3     1  -0.622   1.63    -1.40 
    # 11        3     2  -1.90   -0.836   -1.83 
    # 12        3     3   0.164  -1.16     1.24 
    

    数据

    dat <- read.table(text = "Scenario Month            A            B            C
             1     1 -0.593186301  1.045550808 -0.593816304
             1     2  0.178626141  2.043084432  0.111370583
             1     3  1.205779717 -0.324083723 -1.397716949
             2     1  0.933615199  0.052647056 -0.656486153
             2     2  1.647291688 -1.065793671  0.799040546
             2     3  1.613663101 -1.955567231 -1.817457972
             3     1 -0.621991775  1.634069402 -1.404981646
             3     2 -1.899326887 -0.836322394 -1.826351541
             3     3  0.164235141 -1.160701812  1.238246459 ",
                      header = TRUE)
    

    【讨论】:

      【解决方案2】:

      不知何故,add_row 不会对其.before 参数采用多个值。

      一种方法是在Month = 1 的任何位置使用split 数据帧,然后为每个数据帧在Month = 1 上方使用add_row 添加一行。

      library(tidyverse)
      map_df(split(df, cumsum(df$Month == 1)), 
           ~ add_row(., Scenario = 0, .before = which(.$Month == 1)))
      
      #   Scenario Month          A           B          C
      #1         0    NA         NA          NA         NA
      #2         1     1 -0.5931863  1.04555081 -0.5938163
      #3         1     2  0.1786261  2.04308443  0.1113706
      #4         1     3  1.2057797 -0.32408372 -1.3977169
      #5         0    NA         NA          NA         NA
      #6         2     1  0.9336152  0.05264706 -0.6564862
      #7         2     2  1.6472917 -1.06579367  0.7990405
      #8         2     3  1.6136631 -1.95556723 -1.8174580
      #9         0    NA         NA          NA         NA
      #10        3     1 -0.6219918  1.63406940 -1.4049816
      #11        3     2 -1.8993269 -0.83632239 -1.8263515
      #12        3     3  0.1642351 -1.16070181  1.2382465
      

      【讨论】:

        【解决方案3】:

        这是data.table的一个选项

        library(data.table)
        setDT(df1)[, .SD[c(.N+1, seq_len(.N))], Scenario][
                           !duplicated(Scenario), Scenario := 0][]
        #   Scenario Month          A           B          C
        # 1:        0    NA         NA          NA         NA
        # 2:        1     1 -0.5931863  1.04555081 -0.5938163
        # 3:        1     2  0.1786261  2.04308443  0.1113706
        # 4:        1     3  1.2057797 -0.32408372 -1.3977169
        # 5:        0    NA         NA          NA         NA
        # 6:        2     1  0.9336152  0.05264706 -0.6564862
        # 7:        2     2  1.6472917 -1.06579367  0.7990405
        # 8:        2     3  1.6136631 -1.95556723 -1.8174580
        # 9:        0    NA         NA          NA         NA
        #10:        3     1 -0.6219918  1.63406940 -1.4049816
        #11:        3     2 -1.8993269 -0.83632239 -1.8263515
        #12:        3     3  0.1642351 -1.16070181  1.2382465
        

        或者像 cmets 中提到的@chinsoon12

        setDT(df1)[, rbindlist(.(.(Scenario=0L), c(.(Scenario=rep(Scenario, .N)), 
                .SD)), use.names=TRUE, fill=TRUE), by=.(Scenario)][, -1L]
        

        数据

        df1 <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
        ), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301, 
        0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101, 
        -0.621991775, -1.899326887, 0.164235141), B = c(1.045550808, 
        2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231, 
        1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304, 
        0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972, 
        -1.404981646, -1.826351541, 1.238246459)), class = "data.frame", 
        row.names = c(NA, 
        -9L))
        

        【讨论】:

        • 另一种可能性:df1[, rbindlist(.(.(Scenario=0L), c(.(Scenario=rep(Scenario, .N)), .SD)), use.names=TRUE, fill=TRUE), by=.(Scenario)][, -1L]
        【解决方案4】:

        这是一个使用base R的简单方法(没有循环)-

        df1 <- df[rep(1:nrow(df), (df$Month == 1)+1), ]
        
        df1[duplicated(df1, fromLast = T), ] <- NA
        
        df1$Scenario[is.na(df1$Scenario)] <- 0
        
        df1
            Scenario Month          A           B          C
        1          0    NA         NA          NA         NA
        1.1        1     1 -0.5931863  1.04555081 -0.5938163
        2          1     2  0.1786261  2.04308443  0.1113706
        3          1     3  1.2057797 -0.32408372 -1.3977169
        4          0    NA         NA          NA         NA
        4.1        2     1  0.9336152  0.05264706 -0.6564862
        5          2     2  1.6472917 -1.06579367  0.7990405
        6          2     3  1.6136631 -1.95556723 -1.8174580
        7          0    NA         NA          NA         NA
        7.1        3     1 -0.6219918  1.63406940 -1.4049816
        8          3     2 -1.8993269 -0.83632239 -1.8263515
        9          3     3  0.1642351 -1.16070181  1.2382465
        

        数据 -

        df <- structure(list(Scenario = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L
        ), Month = c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), A = c(-0.593186301, 
        0.178626141, 1.205779717, 0.933615199, 1.647291688, 1.613663101, 
        -0.621991775, -1.899326887, 0.164235141), B = c(1.045550808, 
        2.043084432, -0.324083723, 0.052647056, -1.065793671, -1.955567231, 
        1.634069402, -0.836322394, -1.160701812), C = c(-0.593816304, 
        0.111370583, -1.397716949, -0.656486153, 0.799040546, -1.817457972, 
        -1.404981646, -1.826351541, 1.238246459)), class = "data.frame", row.names = c(NA, 
        -9L))
        

        【讨论】:

          猜你喜欢
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 1970-01-01
          • 2022-08-13
          • 1970-01-01
          • 1970-01-01
          相关资源
          最近更新 更多