【问题标题】:How to concatenate data.frame inside lists by using names?如何使用名称连接列表内的 data.frame?
【发布时间】:2019-04-07 20:31:30
【问题描述】:

我必须导入超过 1,000 个 excel 文件,每个 excel 包含多个工作表(有些工作表名称相同,有些工作表名称不同)。

下面举个小例子吧

games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <-  c('games', 'weather', 'cars')

games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
sport <- data.frame(index = c(1,2,3), interest = c('swim', 'soccer', 'rugby'))
list2 <- list(games, weather, sport)
names(list2) <-  c('games', 'weather', 'sport')
list3 <- list(games, weather)
names(list3) <-  c('games', 'weather')

rm(games, sport, weather, cars)  # clean envir from unneeded stuff

我正在寻找使用列表名称组合列表的方法。我曾尝试使用merge()mapply(),但它们没有返回我想要的内容

我想要的回报如下:

   $`games`
# A tibble: 6 x 2
  index player
  <dbl> <chr> 
1     1 John  
2     2 Sam   
3     3 Mary  
4     1 AA    
5     2 BB    
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
  <dbl> <chr>      
1     1 hot        
2     2 cold       
3     3 rainy      
4     1 cold       
5     2 rainy      
6     3 hot        

$cars
# A tibble: 3 x 2
  index car   
  <dbl> <chr> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
  index interest
1     1     swim
2     2   soccer
3     3    rugby

编辑:我遇到过 list2 中有 data.frame sport 的情况(不在 list1 中)

【问题讨论】:

    标签: r list dataframe merge mapply


    【解决方案1】:

    您可以使用purrr 来帮助操作列表。我添加stringAsFactors=FALSE 只是为了绑定data.frame。如果您已经使用 tibble,则不会有问题。

    • 我创建了一个列表。
    • transpose 更改列表以按名称重新组合元素。基本上,x[[1]][[2]] 等价于 transpose(x)[[2]][[1]]
    • 我使用map 来遍历列表,并使用dplyr::bind_rows 来获取生成的小标题。
    options(stringsAsFactors = FALSE)
    games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
    weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
    cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
    list1 <- list(games, weather, cars)
    names(list1) <-  c('games', 'weather', 'cars')
    
    games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
    weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
    list2 <- list(games, weather)
    names(list2) <-  c('games', 'weather')
    
    library(purrr)
    list(list1, list2) %>%
      # regroup named element together
      transpose() %>%
      # bind the df together
      map(dplyr::bind_rows)
    #> $games
    #>   index player
    #> 1     1   John
    #> 2     2    Sam
    #> 3     3   Mary
    #> 4     1     AA
    #> 5     2     BB
    #> 6     3     CC
    #> 
    #> $weather
    #>   index temperature
    #> 1     1         hot
    #> 2     2        cold
    #> 3     3       rainy
    #> 4     1        cold
    #> 5     2       rainy
    #> 6     3         hot
    #> 
    #> $cars
    #>   index    car
    #> 1     1  honda
    #> 2     2 toyota
    #> 3     3    bmw
    

    reprex package (v0.2.1) 于 2018 年 11 月 4 日创建

    如果第一个列表没有包含您想要的所有元素,则需要在转置中提供.names 参数。见help("transpose", package = "purrr")。 我为此建立了一个例子。

    options(stringsAsFactors = FALSE)
    games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
    weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
    list1 <- list(games = games, weather = weather)
    
    games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
    weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
    cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
    list2 <- list(games = games, weather = weather, cars = cars)
    
    library(purrr)
    all_list <- list(list1, list2)
    all_names <- all_list %>% map(names) %>% reduce(union)
    list(list1, list2) %>%
      # regroup named element together
      transpose(.names = all_names) %>%
      # bind the df together
      map(dplyr::bind_rows)
    #> $games
    #>   index player
    #> 1     1   John
    #> 2     2    Sam
    #> 3     3   Mary
    #> 4     1     AA
    #> 5     2     BB
    #> 6     3     CC
    #> 
    #> $weather
    #>   index temperature
    #> 1     1         hot
    #> 2     2        cold
    #> 3     3       rainy
    #> 4     1        cold
    #> 5     2       rainy
    #> 6     3         hot
    #> 
    #> $cars
    #>   index    car
    #> 1     1  honda
    #> 2     2 toyota
    #> 3     3    bmw
    

    reprex package (v0.2.1) 于 2018 年 11 月 4 日创建

    【讨论】:

    • 如果list2包含不在list1中的data.frame怎么办?我试过你的代码。它只返回 list1 中的 data.frame。您能否提供更多建议?谢谢
    • 您需要在transpose.names 参数中提供通用名称。正如帮助页面中所解释的,它默认只检查第一个元素。我更新了答案,它显示在?transpose的帮助页面中
    【解决方案2】:

    lapply() 有一个简单的方法。

    lapply(unique(unlist(lapply(mget(ls(pattern="list")), names))),
           function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]])))
    

    使用setNames()dplyr::as_tibble 获取列表名称和小标题。

    像这样:

    nms <- unique(unlist(lapply(Lol, names)))
    
    setNames(lapply(lapply(nms, function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]]))),
                dplyr::as_tibble), nms)
    

    产量

    $`games`
    # A tibble: 6 x 2
      index player
    * <dbl> <fct> 
    1     1 John  
    2     2 Sam   
    3     3 Mary  
    4     1 AA    
    5     2 BB    
    6     3 CC    
    
    $weather
    # A tibble: 6 x 2
      index temperature
    * <dbl> <fct>      
    1     1 hot        
    2     2 cold       
    3     3 rainy      
    4     1 cold       
    5     2 rainy      
    6     3 hot        
    
    $cars
    # A tibble: 3 x 2
      index car   
    * <dbl> <fct> 
    1     1 honda 
    2     2 toyota
    3     3 bmw   
    
    $sport
    # A tibble: 3 x 2
      index interest
    * <dbl> <fct>   
    1     1 swim    
    2     2 soccer  
    3     3 rugby  
    

    然而,如果列表的数量未知,假设您在全局环境中的所有列表都具有“列表”模式,您可以采用以下方法。

    Lol <- mget(ls(pattern="^list+"))  # list of lists
    
    mergeFun <- function(z) {
      l1 <- lapply(z, 
                   function(y) lapply(1:length(y),  # new column w/ sublist names
                                      function(x) cbind(y[[x]], list=names(y)[x])))
      l2 <- unlist(l1, recursive=FALSE)  # unnest lists
      l3 <- Reduce(function(...) merge(..., all=TRUE), l2)  # merge list 
      l4 <- split(l3, l3$list)  # new list of lists by sublist names
      l5 <- lapply(l4, function(w) 
        Filter(function(v) !all(is.na(v)), w[, -2]))  # delete NA cols
      return(lapply(l5, function(u) `rownames<-`(u, NULL)))  # reset row names
    }
    

    如果需要,请使用lapply(mergeFun(Lol), dplyr::as_tibble) 获取小标题,否则只需mergeFun(Lol)

    产量

    > lapply(mergeFun(Lol), dplyr::as_tibble)
    $`games`
    # A tibble: 6 x 2
      index player
      <dbl> <fct> 
    1     1 John  
    2     1 AA    
    3     2 Sam   
    4     2 BB    
    5     3 Mary  
    6     3 CC    
    
    $weather
    # A tibble: 6 x 2
      index temperature
      <dbl> <fct>      
    1     1 cold       
    2     1 hot        
    3     2 cold       
    4     2 rainy      
    5     3 hot        
    6     3 rainy      
    
    $cars
    # A tibble: 3 x 2
      index car   
      <dbl> <fct> 
    1     1 honda 
    2     2 toyota
    3     3 bmw   
    
    $sport
    # A tibble: 3 x 2
      index interest
      <dbl> <fct>   
    1     1 swim    
    2     2 soccer  
    3     3 rugby   
    

    数据

    list1 <- list(games = structure(list(index = c(1, 2, 3), player = structure(c(1L, 
    3L, 2L), .Label = c("John", "Mary", "Sam"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(2L, 
    1L, 3L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)), cars = structure(list(index = c(1, 2, 3), car = structure(c(2L, 
    3L, 1L), .Label = c("bmw", "honda", "toyota"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)))
    list2 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA", 
    "BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L, 
    3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)), sport = structure(list(index = c(1, 2, 3), interest = structure(3:1, .Label = c("rugby", 
    "soccer", "swim"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)))
    list3 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA", 
    "BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L, 
    3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
    -3L)))
    

    【讨论】:

    • 如果list2包含不在list1中的data.frame怎么办?我试过你的代码。它只返回 list1 中的 data.frame。您能否提供更多建议?谢谢
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2012-02-18
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多