【问题标题】:Replicate each row and change one column into binary values复制每一行并将一列更改为二进制值
【发布时间】:2017-04-23 09:08:43
【问题描述】:
df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

如何扩展上面 data.frame 的最后两列,使每一行出现在“n”列中指定的次数。而第二列“survive”根据“survive”的值变成二进制值0/1

换句话说:

n  survive a  b
3  2       1  0
2  1       1  0
2  2       0  1

到这里

survive a  b
1       1  0
1       1  0
0       1  0
1       1  0
0       1  0
1       0  1
1       0  1

【问题讨论】:

  • @Sotos 这不是我的骗子,有人评论了它
  • 示例数据与代码块中的数据不同。
  • @Sotos 我重新打开了它。我以为是基于复制的

标签: r


【解决方案1】:

几种替代解决方案:

1) 使用基础 R:

rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))

给出:

> df2[,-1]
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

2)使用data.table-包:

library(data.table)
df2 <- setDT(df)[, rid := .I
                 ][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
                   , by = rid
                   ][, rid := NULL][]

给出:

> df2
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

或者更短一点:

df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
                 ][, nrow := NULL]

3) 使用 dplyr-package:

library(dplyr)
df %>% 
  mutate(rid = row_number()) %>% 
  .[rep(1:nrow(df), df$n),] %>% 
  group_by(rid) %>% 
  mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>% 
  ungroup() %>% 
  select(-n, -rid)

给出:

# A tibble: 7 × 3
  survive     a     b
    <dbl> <dbl> <dbl>
1       1     1     0
2       1     1     0
3       0     1     0
4       1     1     0
5       0     1     0
6       1     0     1
7       1     0     1

使用的数据:

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

【讨论】:

    【解决方案2】:

    一种使用splitstackshape扩展行和dplyr的解决方案,

    library(splitstackshape)
    library(dplyr)
    
    df %>% 
      mutate(new = 1) %>% 
      expandRows('n') %>% 
      group_by(grp = cumsum(c(1, diff(survive) != 0))) %>% 
      mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>% 
      arrange(grp, desc(survive)) %>% 
      ungroup() %>% 
      select(-c(new, grp))
    
    # A tibble: 7 × 3
    #  survive     a     b
    #    <dbl> <dbl> <dbl>
    #1       1     1     0
    #2       1     1     0
    #3       0     1     0
    #4       1     1     0
    #5       0     1     0
    #6       1     0     1
    #7       1     0     1
    

    【讨论】:

      【解决方案3】:

      我们可以使用base R

      df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
      row.names(df2) <- NULL
      df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
                   c(y, x-y)),  df1$n, df1$survive)), df2)
      df2
      #  Survive a b
      #1       1 1 0
      #2       1 1 0
      #3       0 1 0
      #4       1 1 0
      #5       0 1 0
      #6       1 0 1
      #7       1 0 1
      

      或者更矢量化的方法是

      df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
      df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))
      

      【讨论】:

        【解决方案4】:

        这是在基础 R 中使用拆分/应用/组合方法的解决方案:

        df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {
        
          survive = c(rep(1, i$survive), rep(0, i$n - i$survive))
        
          cbind(survive, i[rep(1, i$n), c("a", "b")])
        
        }))
        

        结果:

              survive a b
        1.1         1 1 0
        1.1.1       1 1 0
        1.1.2       0 1 0
        2.2         1 1 0
        2.2.1       0 1 0
        3.3         1 0 1
        3.3.1       1 0 1
        

        【讨论】:

          猜你喜欢
          • 2018-12-03
          • 1970-01-01
          • 1970-01-01
          • 2020-12-07
          • 2010-11-29
          • 1970-01-01
          • 2019-05-08
          • 2013-04-10
          • 1970-01-01
          相关资源
          最近更新 更多