【问题标题】:data transformation using R [closed]使用 R 进行数据转换
【发布时间】:2022-01-22 05:59:13
【问题描述】:

我得到了一些这样的数据

structure(list(id = c(1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3), dead = c(1, 
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), futime = c(2062, 2062, 2062, 
2062, 2151, 2151, 388, 388, 388, 388, 388, 388), event = c("hosp", 
"out", "hosp", "out", "hosp", "out", "hosp", "out", "hosp", "out", 
"hosp", "out"), event_time = c(36, 52, 775, 776, 1268, 1283, 
178, 192, 271, 272, 387, 377.9)), class = "data.frame", row.names = c(NA, 
-12L))

我想让它看起来像这样

structure(list(id2 = c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 
3, 3), dead2 = c(NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, 
NA, NA, NA), futime2 = c(NA, NA, NA, NA, 2062, NA, NA, 2151, 
NA, NA, NA, NA, NA, NA, 388), event2 = c("hosp", "out", "hosp", 
"out", "death", "hosp", "out", "death", "hosp", "out", "hosp", 
"out", "hosp", "out", "censored"), event_time2 = c(36, 52, 775, 
776, 2062, 1268, 1283, 2151, 178, 192, 271, 272, 387, 377.9, 
388)), class = "data.frame", row.names = c(NA, -15L))

所以基本上,我希望 dead == 1 和 futime 列中的值出现在每个 id 的最后一次观察中。并创建一个新列,其中所有事件都按顺序输入。谢谢

【问题讨论】:

    标签: r dplyr


    【解决方案1】:

    我没有在结果的列名中添加“2”,但如果需要,您可以轻松地进行更改。

    library(dplyr)
    last_rows = df %>%
      select(id, dead, futime) %>%
      group_by(id) %>%
      slice(1) %>%
      mutate(
        event = ifelse(dead == 1, "death", "censored"),
        event_time = futime
      )
    
    result = df %>%
      mutate(
        dead = NA,
        futime = NA
      ) %>%
      bind_rows(last_rows) %>%
      arrange(id, event_time)
    
    result
    #    id dead futime    event event_time
    # 1   1   NA     NA     hosp       36.0
    # 2   1   NA     NA      out       52.0
    # 3   1   NA     NA     hosp      775.0
    # 4   1   NA     NA      out      776.0
    # 5   1    1   2062    death     2062.0
    # 6   2   NA     NA     hosp     1268.0
    # 7   2   NA     NA      out     1283.0
    # 8   2    1   2151    death     2151.0
    # 9   3   NA     NA     hosp      178.0
    # 10  3   NA     NA      out      192.0
    # 11  3   NA     NA     hosp      271.0
    # 12  3   NA     NA      out      272.0
    # 13  3   NA     NA      out      377.9
    # 14  3   NA     NA     hosp      387.0
    # 15  3    0    388 censored      388.0
    

    【讨论】:

      【解决方案2】:

      这是使用group_modifyadd_row 的一种方法

      library(dplyr)
      library(tibble)
      df1 %>%
          group_by(id, futime) %>%
          group_modify(~ .x %>% 
          add_row(dead = NA^!last(.x$dead), event_time = last(.y$futime), 
            event = if(last(.x$dead) == 1) "death" else "censored")) %>% 
          mutate(across(c(dead), ~ replace(., row_number() != n(), NA))) %>% 
          group_by(id) %>% 
          mutate(futime = replace(futime, duplicated(futime, fromLast = TRUE), 
               NA)) %>% 
          ungroup
      

      -输出

      # A tibble: 15 × 5
            id futime  dead event    event_time
         <dbl>  <dbl> <dbl> <chr>         <dbl>
       1     1     NA    NA hosp            36 
       2     1     NA    NA out             52 
       3     1     NA    NA hosp           775 
       4     1     NA    NA out            776 
       5     1   2062     1 death         2062 
       6     2     NA    NA hosp          1268 
       7     2     NA    NA out           1283 
       8     2   2151     1 death         2151 
       9     3     NA    NA hosp           178 
      10     3     NA    NA out            192 
      11     3     NA    NA hosp           271 
      12     3     NA    NA out            272 
      13     3     NA    NA hosp           387 
      14     3     NA    NA out            378.
      15     3    388    NA censored       388 
      

      【讨论】:

        猜你喜欢
        • 2021-07-01
        • 2013-06-25
        • 2019-06-29
        • 2019-05-12
        • 1970-01-01
        • 2021-10-18
        • 1970-01-01
        • 2022-01-26
        • 1970-01-01
        相关资源
        最近更新 更多