【问题标题】:Substraction based on rows基于行的减法
【发布时间】:2021-10-04 03:06:42
【问题描述】:
df <- data.frame(PatientID = c("0002" ,"0002", "0005", "0005" ,"0009" ,"0009" ,"0018", "0018" ,"0020" ,"0027", "0039" ,"0039" ,"0042", "0043" ,"0043" ,"0045", "0046", "0046" ,"0048" ,"0048", "0055"),
                 Timepoint= c("A", "B", "A", "B", "A", "B", "A", "B", "A", "A", "A", "B", "A", "A", "B",  "A",  "A", "B", "A", "B", "A"),
                 A = c(NA , 977.146 , NA , 964.315 ,NA , 952.311 , NA , 950.797 , 958.975  ,960.712  ,NA , 947.465 , 902.852 , NA,  985.124  ,NA , 930.141 ,1007.790 , 948.848, 1027.110 , 999.414),
                 B = c(998.988 , NA , 998.680 , NA , NA ,1020.560 , 947.751 ,1029.560 , 955.540 , 911.606 , 964.039   ,    NA,  988.087 , 902.367 , 959.338 ,1029.050 , 925.162 , 987.374 ,1066.400  ,957.512 , 917.597),
                 C = c( NA , 987.140 , 961.810 , 929.466 , 978.166, 1005.820  ,925.752 , 969.469 , 943.398  ,936.034,  965.292 , 996.404 , 920.610 , 967.047  ,986.565 , 913.517 , 893.428 , 921.606 , NA , 929.590  ,950.493), 
                 D = c(975.634 , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469  ,943.398 , NA , 965.292 , 996.404 , NA , 967.047 , 986.565 , NA , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
                 E = c(1006.330, 1028.070 , NA , 954.274 ,1005.910  ,949.969 , 992.820 , 977.048  ,934.407 , 948.913 , NA , NA , NA,  961.375  ,955.296 , 961.128  ,998.119 ,1009.110 , 994.891 ,1000.170  ,982.763),
                 G= c(NA , 958.990 , NA , NA , 924.680 , 955.927 , NA , 949.384  ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , NA , 979.454 , 952.605 , NA  ,   NA, NA , 956.507), stringsAsFactors = F)

我需要创建一个新的df$Timepoint 因子,即 C,将通过减去 [df$timepoint==B - df$timepoint==A] 得到

为了清楚起见,我在下面粘贴了一个小示例,其中包含来自上述数据集的 3 名患者。

感谢您的帮助!

【问题讨论】:

    标签: r row operation arithmetic-expressions


    【解决方案1】:
    library(tidyverse)
    df %>% 
      pivot_longer(-c(PatientID, Timepoint), names_to = "COLS") %>% 
      pivot_wider(c(PatientID, COLS), names_from = Timepoint, values_from = value) %>% 
      mutate(C = B - A) %>% 
      pivot_longer(-c(PatientID, COLS), names_to = "Timepoint") %>% 
      pivot_wider(c(PatientID, Timepoint), names_from = COLS)
    
    # A tibble: 39 x 8
       PatientID Timepoint     A     B      C      D      E     G
       <chr>     <chr>     <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
     1 0002      A           NA   999.   NA    976.  1006.   NA  
     2 0002      B          977.   NA   987.   987.  1028.  959. 
     3 0002      C           NA    NA    NA     11.5   21.7  NA  
     4 0005      A           NA   999.  962.   962.    NA    NA  
     5 0005      B          964.   NA   929.   929.   954.   NA  
     6 0005      C           NA    NA   -32.3  -32.3   NA    NA  
     7 0009      A           NA    NA   978.   978.  1006.  925. 
     8 0009      B          952. 1021. 1006.  1006.   950.  956. 
     9 0009      C           NA    NA    27.7   27.7  -55.9  31.2
    10 0018      A           NA   948.  926.   926.   993.   NA  
    # ... with 29 more rows
    

    【讨论】:

      【解决方案2】:

      这应该可以解决问题:

      library(dplyr)
      rbind(df, df %>% 
              group_by(PatientID) %>% 
              mutate(across(c("A":"G"), ~ . - lag(.))) %>% 
              filter(Timepoint == "B") %>% 
              mutate(Timepoint = "C")) %>% 
        arrange(PatientID, Timepoint)
      

      输出:

         PatientID Timepoint        A        B        C        D        E       G
      1       0002         A       NA  998.988       NA  975.634 1006.330      NA
      2       0002         B  977.146       NA  987.140  987.140 1028.070 958.990
      3       0002         C       NA       NA       NA   11.506   21.740      NA
      4       0005         A       NA  998.680  961.810  961.810       NA      NA
      5       0005         B  964.315       NA  929.466  929.466  954.274      NA
      6       0005         C       NA       NA  -32.344  -32.344       NA      NA
      7       0009         A       NA       NA  978.166  978.166 1005.910 924.680
      8       0009         B  952.311 1020.560 1005.820 1005.820  949.969 955.927
      9       0009         C       NA       NA   27.654   27.654  -55.941  31.247
      10      0018         A       NA  947.751  925.752  925.752  992.820      NA
      11      0018         B  950.797 1029.560  969.469  969.469  977.048 949.384
      12      0018         C       NA   81.809   43.717   43.717  -15.772      NA
      

      【讨论】:

        【解决方案3】:

        您可以汇总数据框并将其绑定到原始数​​据框。

        library(dplyr)
        
        df %>%
          arrange(PatientID, Timepoint) %>%
          group_by(PatientID) %>%
          summarise(across(A:G, .fns = diff)) %>%
          ungroup %>%
          mutate(Timepoint = 'C', .before = 2) %>%
          bind_rows(df) %>%
          arrange(PatientID, Timepoint) 
        
        # PatientID Timepoint     A     B      C      D      E     G
        #   <chr>     <chr>     <dbl> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
        # 1 0002      A           NA   999.   NA    976.  1006.   NA  
        # 2 0002      B          977.   NA   987.   987.  1028.  959. 
        # 3 0002      C           NA    NA    NA     11.5   21.7  NA  
        # 4 0005      A           NA   999.  962.   962.    NA    NA  
        # 5 0005      B          964.   NA   929.   929.   954.   NA  
        # 6 0005      C           NA    NA   -32.3  -32.3   NA    NA  
        # 7 0009      A           NA    NA   978.   978.  1006.  925. 
        # 8 0009      B          952. 1021. 1006.  1006.   950.  956. 
        # 9 0009      C           NA    NA    27.7   27.7  -55.9  31.2
        #10 0018      A           NA   948.  926.   926.   993.   NA  
        # … with 19 more rows
        

        【讨论】:

        • 感谢您总是这么快,Ronak!它给了我这个错误错误:across() 只能在 dplyr 动词中使用。运行rlang::last_error() 以查看错误发生的位置。我的数据集在列方面要大得多,这可能是错误的原因吗? - 谢谢!
        • 您可能还加载了plyr。尝试使用dplyr::summarisedplyr::mutate
        • 现在可以使用了!它有效(减法和创建额外的行)但它删除了列df$Timepoint!你认为这是为什么?
        • 这很奇怪。尝试从mutate 中删除.before = 2
        • 对不起!它确实有效!太好了,非常感谢!
        猜你喜欢
        • 1970-01-01
        • 2018-09-21
        • 1970-01-01
        • 2015-05-08
        • 1970-01-01
        • 1970-01-01
        • 2021-11-21
        • 2017-07-19
        • 1970-01-01
        相关资源
        最近更新 更多