【问题标题】:R: write list of lists of lists to a text file with namesR:将列表列表的列表写入具有名称的文本文件
【发布时间】:2018-10-18 17:01:40
【问题描述】:

我有一个具有这种结构的列表列表:

a = list("t" = list('abc'), "c" = list('def','ghi')) 
b = list("t" = list('jk','kl'), "c" = list('lmn')) 
c = list("t" = list('op','pq','z'), "c" = list('qrs','tuv','wxy'))
ls = list('one'=a,'two'=b,'three'=c)

我需要将它存储在一个文本文件中。由于并非所有元素的长度都相同,因此在将其转换为数据框并写入文件之前,我需要先填充 nans。我想要一个看起来像这样的文件(这里大致显示为 .csv,但 .txt 很好):

 t1, t2, t3, c1, c2, c3
one,abc,nan,nan,def,ghi,nan
two,jk,kl,nan,lmn,nan,nan
three,op,pq,z,qrs,tuv,wxy

我是 R 新手,所以我只能大致了解如何执行此操作,但无法弄清楚语法。类似这样的伪代码:

lapply(ls, fill nans)
lapply(ls, unlist)
lapply(ls, names=[t1,t2,t3,c1,c2,c3])
df=data.frame(ls)
write.table(df)

有人可以帮我完成这个吗?

编辑:我能够在以下方面取得一些进展:

ellength <- function(ls,i) {return(length(ls[[i]]))}

fillna <- function(ls,i,m) {
if (length(ls[[i]])<m) {
return(append(ls[[i]],vector('list',length=m-length(ls[[i]]))))
}
else {return(ls[[i]])}
}

make_col <- function(ls,i){
return(lapply(ls,fillna,i=i,m=max(unlist(lapply(ls,ellength,i=i)))))
}

> matrix(list(make_col(ls,'t'),make_col(ls,'c')))
     [,1]  
[1,] List,3
[2,] List,3

但我仍然无法以任何连贯的方式将其写入文件。这在 Python 中非常简单;我肯定错过了什么。帮忙?

【问题讨论】:

    标签: r list write.table


    【解决方案1】:

    这有帮助吗?

    library(tidyr)
    library(dplyr)
    a = list("t" = 'abc', "c" = list('def','ghi')) 
    b = list("t" = 'jk', "c" = list('lmn')) 
    c = list("t" = 'op', "c" = list('qrs','tuv','wxy'))
    ls = list('one'=a,'two'=b,'three'=c)
    #unlist and turn into a df
    lx <- as.data.frame( unlist(ls),stringsAsFactors = FALSE)
    #make rownames as column
    lx$nms <- rownames(lx)
    
    #split nms column so you can transpose your data
    lx <- separate(lx, nms, c("v1","v2"), sep = "[.]")
    lx <- mutate(lx, v3 = `unlist(ls)`) %>% 
          select(-`unlist(ls)`)
    #transpose your data - it fills with NA
    #NaN is a numeric field so you can't use it to fill character variables
    lx2 <- spread(lx,v2,v3)
    

    更新

    如果您想折叠变量,您可以使用ifelse 替换某些列中的NAs:

    lx2 <- mutate(lx2, c_1 = ifelse(is.na(c),c1,c))
    lx2 <- mutate(lx2, t_1 = ifelse(is.na(t),t1,t))
    lx3 <- lx2[c('v1','c_1','c2','c3','t_1','t2','t3')] 
    

    【讨论】:

    • 好的,现在我有: > lx2 v1 c c1 c2 c3 t t1 t2 t3 1 one def ghi abc 2 三 qrs tuv wxy op pq z 3 两个 lmn jk kl 如何用 c1 折叠 c 和用 t1 折叠 t?
    • 你可以使用ifelse折叠c和c1,我会更新我的答案来展示一个例子
    • 为 c 工作,但当我这样做时没有工作: lx2
    • 我相信你必须像这样将is.na(c)更改为is.na(t):lx2
    • 是的,你说得对,我刚刚意识到并且正要发表同样的评论
    【解决方案2】:

    考虑在取消列出嵌套列表后构建数据框列表:

    char_vec <- unlist(ls)
    
    df_list <- lapply(names(ls), function(x) {
      tmp <- data.frame(t(char_vec[names(char_vec)[grep(x, names(char_vec))]]),
                        stringsAsFactors = FALSE)
      names(tmp) <- gsub(".*\\.", "", names(tmp))
    
      return(tmp)
    })
    
    df_list
    # [[1]]
    #     t  c1  c2
    # 1 abc def ghi
    
    # [[2]]
    #   t1 t2   c
    # 1 jk kl lmn
    
    # [[3]]
    #   t1 t2 t3  c1  c2  c3
    # 1 op pq  z qrs tuv wxy
    

    要将所有数据框项绑定在一起,可以使用dplyrdata.table,甚至base R:

    dplyr (与bind_rows

    final_df1 <- bind_rows(df_list)
    
    # CLEAN UP AND RE-ORDER COLUMNS
    final_df1 <- transform(final_df1, 
                           t1 = ifelse(is.na(t), t1, t),
                           t = NULL,
                           c1 = ifelse(is.na(c), c1, c),
                           c = NULL
                  )
    
    final_df1 <- final_df1[order(names(final_df1))]
    

    data.table (与rbindlist

    final_df2 <- data.frame(rbindlist(df_list, fill=TRUE))
    
    # CLEAN UP AND RE-ORDER COLUMNS
    final_df2 <- transform(final_df2, 
                           t1 = ifelse(is.na(t), t1, t),
                           t = NULL,
                           c1 = ifelse(is.na(c), c1, c),
                           c = NULL
                  )
    
    final_df2 <- final_df2[order(names(final_df2))]
    

    基础 R (与do.call

    # RETRIEVE ALL COLUMN NAMES
    nms <- names(unlist(df_list))
    
    df_list <- lapply(df_list, function(df){
      # CREATE BLANK COLUMNS FOR rbind
      for(i in nms) {
        if(!i %in% names(df)) {
          df[[i]] <- NA
        }
      }
    
      # CLEAN UP AND RE-ORDER COLUMNS
      df <- within(df, {t1 <- ifelse(is.na(t), t1, t)
                        t <- NULL
                        c1 <- ifelse(is.na(c), c1, c)
                        c <- NULL
            })
    
      return(df[order(names(df))])
    })
    
    final_df3 <- do.call(rbind, df_list)
    

    输出

    final_df1
    #    c1   c2   c3  t1   t2   t3
    # 1 def  ghi <NA> abc <NA> <NA>
    # 2 lmn <NA> <NA>  jk   kl <NA>
    # 3 qrs  tuv  wxy  op   pq    z
    
    identical(final_df1, final_df2)
    # [1] TRUE
    identical(final_df1, final_df3)
    # [1] TRUE
    identical(final_df2, final_df3)
    # [1] TRUE
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2020-04-20
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2018-06-01
      相关资源
      最近更新 更多