【问题标题】:How to convert lists nested in a tibble into a single tibble with each list item on a new row?如何将嵌套在 tibble 中的列表转换为单个 tibble,每个列表项位于新行上?
【发布时间】:2020-03-05 16:37:59
【问题描述】:

我有一个具有这种结构的小标题(dput 下面):

# A tibble: 7 x 3
  Date  CC            Variance_Type
  <fct> <list>        <list>       
1 2-17  <dbl [8 x 2]> <chr [8]>    
2 2-18  <dbl [8 x 2]> <chr [8]>    
3 2-19  <dbl [8 x 2]> <chr [8]>    
4 2-20  <dbl [8 x 2]> <chr [8]>    
5 2-21  <dbl [8 x 2]> <chr [8]>    
6 2-22  <dbl [8 x 2]> <chr [8]>    
7 2-23  <dbl [8 x 2]> <chr [8]>    

我想要的输出是一个 56 行 x 4 列的小标题,其中:

  1. 列表中的项目已取消嵌套到各自的行中。
  2. Date 中的值填满该列以匹配当前所在行中的项目。
  3. CC 列中嵌套表的列被拆分为单独的列。

我一直在尝试结合 mutate()、map() 和 bind_rows() 在 tidyverse 中执行此操作,但我无法使其工作。非常感谢任何帮助。

小标题:

structure(list(Date = structure(1:7, .Label = c("2-17", "2-18", 
"2-19", "2-20", "2-21", "2-22", "2-23"), class = "factor"), CC = list(
    structure(c(0.0292, 0.1803, 0.1063, 0.3555, 0.0227, 0.0471, 
    0.2588, 1, 2.92, 18.03, 10.63, 35.55, 2.27, 4.71, 25.88, 
    100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0414, 0.2307, 0.1813, 0.3428, 0.0408, 
    0.0137, 0.1494, 1, 4.14, 23.07, 18.13, 34.28, 4.08, 1.37, 
    14.94, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0348, 0.2298, 0.4534, 0.0453, 0.1252, 
    0.0406, 0.0709, 1, 3.48, 22.98, 45.34, 4.53, 12.52, 4.06, 
    7.09, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0693, 0.1241, 0.3104, 0.0885, 0.1706, 
    0.0563, 0.1807, 1, 6.93, 12.41, 31.04, 8.85, 17.06, 5.63, 
    18.07, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0431, 0.1621, 0.3896, 0.2243, 0.1005, 
    -0.0339, 0.1143, 1, 4.31, 16.21, 38.96, 22.43, 10.05, -3.39, 
    11.43, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0656, 0.1021, 0.2638, 0.1299, 0.2102, 
    0.0061, 0.2223, 1, 6.56, 10.21, 26.38, 12.99, 21.02, 0.61, 
    22.23, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    ))), structure(c(0.0802, 0.1114, 0.6962, 0.0845, 0.1544, 
    -0.0958, -0.0309, 1, 8.02, 11.14, 69.62, 8.45, 15.44, -9.58, 
    -3.09, 100), .Dim = c(8L, 2L), .Dimnames = list(c("Unique to Jsc             ", 
    "Unique to Uoc             ", "Unique to FF              ", 
    "Common to Jsc, and Uoc    ", "Common to Jsc, and FF     ", 
    "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
    "Total                     "), c("Coefficient", "    % Total"
    )))), Variance_Type = list(c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "), c("Unique to Jsc             ", 
"Unique to Uoc             ", "Unique to FF              ", "Common to Jsc, and Uoc    ", 
"Common to Jsc, and FF     ", "Common to Uoc, and FF     ", "Common to Jsc, Uoc, and FF", 
"Total                     "))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -7L))

【问题讨论】:

    标签: r tidyverse


    【解决方案1】:

    这是基于data.table包的另一种解决方案:

    library(data.table)
    df2 <- setDT(df)[, as.data.table(.SD$CC[[1]], .SD[["Variance_Type"]]), Date]
    setnames(df2, "rn", "Variance_Type")
    
    df2
    #       Date              Variance_Type Coefficient     % Total
    #  1:   2-17 Unique to Jsc                   0.0292        2.92
    #  2:   2-17 Unique to Uoc                   0.1803       18.03
    #  3:   2-17 Unique to FF                    0.1063       10.63
    #  4:   2-17 Common to Jsc, and Uoc          0.3555       35.55
    #  5:   2-17 Common to Jsc, and FF           0.0227        2.27
    #  6:   2-17 Common to Uoc, and FF           0.0471        4.71
    #  7:   2-17 Common to Jsc, Uoc, and FF      0.2588       25.88
    #  8:   2-17 Total                           1.0000      100.00
    #  9:   2-18 Unique to Jsc                   0.0414        4.14
    # 10:   2-18 Unique to Uoc                   0.2307       23.07
    # 11:   2-18 Unique to FF                    0.1813       18.13
    # 12:   2-18 Common to Jsc, and Uoc          0.3428       34.28
    # 13:   2-18 Common to Jsc, and FF           0.0408        4.08
    # 14:   2-18 Common to Uoc, and FF           0.0137        1.37
    # 15:   2-18 Common to Jsc, Uoc, and FF      0.1494       14.94
    # 16:   2-18 Total                           1.0000      100.00
    # 17:   2-19 Unique to Jsc                   0.0348        3.48
    # 18:   2-19 Unique to Uoc                   0.2298       22.98
    # 19:   2-19 Unique to FF                    0.4534       45.34
    # 20:   2-19 Common to Jsc, and Uoc          0.0453        4.53
    # 21:   2-19 Common to Jsc, and FF           0.1252       12.52
    # 22:   2-19 Common to Uoc, and FF           0.0406        4.06
    # 23:   2-19 Common to Jsc, Uoc, and FF      0.0709        7.09
    # ....
    

    【讨论】:

      【解决方案2】:

      虽然这通常是 tidyrunnest() 可以帮助解决的问题类型,但您的 CC 变量是矩阵而不是 data.frame(或 tibble)。这会导致一些问题。将矩阵转换为 data.frame 后,我想您会发现 unnest() 是首选工具。

      然后我的第一步是遍历CC 中的矩阵并将它们转换为data.frames,使用mutate() 中的purrr::map 循环。然后我使用unnest() 一次取消嵌套这两个变量。

      这是 tidyr 1.0.0 版之后的工作原理。

      library(tidyr)
      library(purrr)
      library(dplyr)
      dat %>%
           mutate(CC = map(CC, as.data.frame) ) %>%
           unnest(cols = c(CC, Variance_Type) )
      
      # A tibble: 56 x 4
         Date  Coefficient `    % Total` Variance_Type               
         <fct>       <dbl>         <dbl> <chr>                       
       1 2-17       0.0292          2.92 "Unique to Jsc             "
       2 2-17       0.180          18.0  "Unique to Uoc             "
       3 2-17       0.106          10.6  "Unique to FF              "
       4 2-17       0.356          35.6  "Common to Jsc, and Uoc    "
       5 2-17       0.0227          2.27 "Common to Jsc, and FF     "
       6 2-17       0.0471          4.71 "Common to Uoc, and FF     "
       7 2-17       0.259          25.9  Common to Jsc, Uoc, and FF  
       8 2-17       1             100    "Total                     "
       9 2-18       0.0414          4.14 "Unique to Jsc             "
      10 2-18       0.231          23.1  "Unique to Uoc             "
      

      对于以前版本的 tidyrunnest() 代码看起来略有不同。

      dat %>%
           mutate(CC = map(CC, as.data.frame) ) %>%
           unnest(CC, Variance_Type )
      

      【讨论】:

      • 用你的unnest((cols = c(CC, Variance_Type)) 我得到Error: Column `cols` must be length 7 (the number of rows) or one, not 14。但如果我删除 cols 参数,它会完美运行。
      • 我会再检查一次。你的 tidyr 版本是最新的吗?
      • 啊,我有 tidyr 0.8.3,所以实际上没有。我正在使用 Conda 管理包,所以我认为我落后了。
      • 哦,一定是这样。 Unnest 在 tidyr 版本 1.0.0 中发生了一些变化。
      猜你喜欢
      • 1970-01-01
      • 2020-08-02
      • 2020-08-13
      • 2020-12-16
      • 2018-05-18
      • 1970-01-01
      • 2022-07-07
      • 2021-10-14
      • 2022-01-11
      相关资源
      最近更新 更多