复制每一行并将一列更改为二进制值答案

【问题标题】：Replicate each row and change one column into binary values复制每一行并将一列更改为二进制值
【发布时间】：2017-04-23 09:08:43
【问题描述】：

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

如何扩展上面 data.frame 的最后两列，使每一行出现在“n”列中指定的次数。而第二列“survive”根据“survive”的值变成二进制值0/1

换句话说：

n  survive a  b
3  2       1  0
2  1       1  0
2  2       0  1

到这里

survive a  b
1       1  0
1       1  0
0       1  0
1       1  0
0       1  0
1       0  1
1       0  1

【问题讨论】：

@Sotos 这不是我的骗子，有人评论了它
示例数据与代码块中的数据不同。
@Sotos 我重新打开了它。我以为是基于复制的

标签： r

【解决方案1】：

几种替代解决方案：

1) 使用基础 R：

rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))

给出：

> df2[,-1]
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

2)使用data.table-包：

library(data.table)
df2 <- setDT(df)[, rid := .I
                 ][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
                   , by = rid
                   ][, rid := NULL][]

给出：

> df2
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

或者更短一点：

df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
                 ][, nrow := NULL]

3) 使用 dplyr-package：

library(dplyr)
df %>% 
  mutate(rid = row_number()) %>% 
  .[rep(1:nrow(df), df$n),] %>% 
  group_by(rid) %>% 
  mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>% 
  ungroup() %>% 
  select(-n, -rid)

给出：

# A tibble: 7 × 3
  survive     a     b
    <dbl> <dbl> <dbl>
1       1     1     0
2       1     1     0
3       0     1     0
4       1     1     0
5       0     1     0
6       1     0     1
7       1     0     1

使用的数据：

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

【讨论】：

【解决方案2】：

一种使用splitstackshape扩展行和dplyr的解决方案，

library(splitstackshape)
library(dplyr)

df %>% 
  mutate(new = 1) %>% 
  expandRows('n') %>% 
  group_by(grp = cumsum(c(1, diff(survive) != 0))) %>% 
  mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>% 
  arrange(grp, desc(survive)) %>% 
  ungroup() %>% 
  select(-c(new, grp))

# A tibble: 7 × 3
#  survive     a     b
#    <dbl> <dbl> <dbl>
#1       1     1     0
#2       1     1     0
#3       0     1     0
#4       1     1     0
#5       0     1     0
#6       1     0     1
#7       1     0     1

【讨论】：

【解决方案3】：

我们可以使用base R

df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
             c(y, x-y)),  df1$n, df1$survive)), df2)
df2
#  Survive a b
#1       1 1 0
#2       1 1 0
#3       0 1 0
#4       1 1 0
#5       0 1 0
#6       1 0 1
#7       1 0 1

或者更矢量化的方法是

df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))

【讨论】：

【解决方案4】：

这是在基础 R 中使用拆分/应用/组合方法的解决方案：

df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {

  survive = c(rep(1, i$survive), rep(0, i$n - i$survive))

  cbind(survive, i[rep(1, i$n), c("a", "b")])

}))

结果：

      survive a b
1.1         1 1 0
1.1.1       1 1 0
1.1.2       0 1 0
2.2         1 1 0
2.2.1       0 1 0
3.3         1 0 1
3.3.1       1 0 1

【讨论】：