【问题标题】:Combining items from a data.frame list into a single data.table将 data.frame 列表中的项目组合成单个 data.table
【发布时间】:2013-05-11 13:06:04
【问题描述】:

我有一个名为 data.frame,其中包含 10 个项目,每个项目三列。 data.frame 中的每个项目如下所示:

$RandArcBo1
$RandArcBo1$x
 [1]  -97.5 -174.5  156.5 -172.5  111.5 -132.5  142.5  115.5  146.5  171.5  151.5  162.5  168.5 -153.5  129.5  123.5

$RandArcBo1$y
 [1] 86.56732 79.56732 59.56732 77.56732 77.56732 75.56732 58.56732 82.56732 84.56732 74.56732 78.56732 74.56732 76.56732
[14] 77.56732 78.56732 84.56732

$RandArcBo1$Species
[1] "RandArcBo1"

我想将其转换为一个重新排列列的 data.table。我目前的做法是:

tempList <- c()
for (j in seq_along(dfList)){
   tempList <- append(tempList, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
}
assign(paste(name, "RandPoints", sep = ""), rbindlist(tempList))

但是,当我运行它时,我收到以下错误:

Error in rbindlist(tempList) : 
  Item 1 of list input is not a data.frame, data.table or list

有没有办法解决这个错误并使用 rbindlist,或者可能是一种迭代地将行添加到 data.table 的方法?我也尝试过这种方法,但无法解决如何创建一个空的 data.table 来添加行的问题。

更多信息,这里是我的完整脚本:

#You will need to collect the following in a folder specified as the working directory:
#   ASCII layers representing the Ms for each species to use as a mask
#   a file named "NumbLoc.csv" with names of ASCII layers and the number of points to be randomly generated in each
require(raster)
require(dismo)
require(SDMTools)
require(data.table)
#Get files
fileList <- list.files(pattern="*.asc", full.names=TRUE)
numbPoints <- read.csv("NumbLoc.csv", header = TRUE, sep = ",", stringsAsFactors=FALSE)
nPoints<-as.data.frame(numbPoints)
#Loop through files in specified folder
for (a in 1:length(fileList)){
  name <- substr(fileList[a], 3, 7 )
  print(name)
  #Reads in ASCII, changes it to raster
  #Raster will serve as a mask for generation of random points
  assign(name,raster(fileList[a]))
  #starts/clears a list of dataframes for each species
  dfList <- c()
  #loops through however many iterations you want, generating random points as specified in csv file
  for(i in 1:10){
    nameRand <- paste("Rand", substr(fileList[a], 3, 7),i, sep = "")
    dfList[[nameRand]]=c(as.data.frame(randomPoints(get(name), nPoints$Number[nPoints$Species==name])))
  }
  #adds a column in each data frame specifying the name of the randomized run
  #converts data frame to a series of data table with columns in the correct order
  tempList <- c()
  for (j in seq_along(dfList)){
    dfList[[j]]$Species <- names(dfList[j])
    #assign(tempListName, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
    tempList <- append(tempList, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
  }
  assign(paste(name, "RandPoints", sep = ""), rbindlist(tempList))
}

数据框如下所示:

> dput(head(dfList))
structure(list(RandArcBo1 = structure(list(x = c(-97.5, -174.5, 
156.5, -172.5, 111.5, -132.5, 142.5, 115.5, 146.5, 171.5, 151.5, 
162.5, 168.5, -153.5, 129.5, 123.5), y = c(86.567321777, 79.567321777, 
59.567321777, 77.567321777, 77.567321777, 75.567321777, 58.567321777, 
82.567321777, 84.567321777, 74.567321777, 78.567321777, 74.567321777, 
76.567321777, 77.567321777, 78.567321777, 84.567321777), Species = "RandArcBo1"), .Names = c("x", 
"y", "Species")), RandArcBo2 = structure(list(x = c(170.5, -160.5, 
150.5, 165.5, 78.5, 74.5, -161.5, -129.5, -134.5, -164.5, 166.5, 
-169.5, -156.5, -163.5, 89.5, 150.5), y = c(86.567321777, 76.567321777, 
57.567321777, 70.567321777, 77.567321777, 78.567321777, 74.567321777, 
74.567321777, 72.567321777, 80.567321777, 84.567321777, 85.567321777, 
78.567321777, 80.567321777, 76.567321777, 83.567321777), Species = "RandArcBo2"), .Names = c("x", 
"y", "Species")), RandArcBo3 = structure(list(x = c(179.5, 149.5, 
-153.5, 129.5, 158.5, 169.5, 131.5, -147.5, -140.5, 128.5, 173.5, 
-172.5, -177.5, -91.5, -143.5, 87.5), y = c(63.567321777, 57.567321777, 
72.567321777, 80.567321777, 50.567321777, 58.567321777, 76.567321777, 
72.567321777, 72.567321777, 81.567321777, 58.567321777, 88.567321777, 
88.567321777, 72.567321777, 73.567321777, 82.567321777), Species = "RandArcBo3"), .Names = c("x", 
"y", "Species")), RandArcBo4 = structure(list(x = c(114.5, 156.5, 
76.5, 171.5, -137.5, -140.5, -142.5, 135.5, 152.5, -136.5, -167.5, 
-131.5, 94.5, 154.5, -78.5, -124.5), y = c(83.567321777, 80.567321777, 
78.567321777, 57.567321777, 69.567321777, 73.567321777, 80.567321777, 
84.567321777, 52.567321777, 70.567321777, 67.567321777, 70.567321777, 
85.567321777, 85.567321777, 84.567321777, 81.567321777), Species = "RandArcBo4"), .Names = c("x", 
"y", "Species")), RandArcBo5 = structure(list(x = c(-162.5, 79.5, 
179.5, 166.5, 81.5, 115.5, 155.5, 84.5, 163.5, 166.5, 178.5, 
-119.5, -157.5, 128.5, 118.5, 164.5), y = c(77.567321777, 82.567321777, 
74.567321777, 55.567321777, 81.567321777, 81.567321777, 52.567321777, 
75.567321777, 51.567321777, 73.567321777, 61.567321777, 75.567321777, 
77.567321777, 81.567321777, 84.567321777, 55.567321777), Species = "RandArcBo5"), .Names = c("x", 
"y", "Species")), RandArcBo6 = structure(list(x = c(-148.5, 123.5, 
-130.5, 164.5, -129.5, 168.5, -106.5, 144.5, 166.5, -127.5, -135.5, 
96.5, -95.5, 76.5, -99.5, -144.5), y = c(71.567321777, 78.567321777, 
85.567321777, 70.567321777, 76.567321777, 79.567321777, 85.567321777, 
58.567321777, 56.567321777, 78.567321777, 74.567321777, 76.567321777, 
86.567321777, 73.567321777, 82.567321777, 75.567321777), Species = "RandArcBo6"), .Names = c("x", 
"y", "Species"))), .Names = c("RandArcBo1", "RandArcBo2", "RandArcBo3", 
"RandArcBo4", "RandArcBo5", "RandArcBo6"))

【问题讨论】:

  • 我担心您会感到困惑,因为(就像在您的上一个问题中一样)您似乎将对象称为数据帧,实际上并不是数据帧。您从一个列表开始,它看起来像一个嵌套列表。你确定你真的需要使用 data.table 包吗?为什么不能使用as.data.frame 简单地将这些转换为数据帧?
  • dput(head(your_data_object)) 拜托 - 目前还不清楚你想要什么或拥有什么
  • 绝对是一个数据框。我很抱歉不清楚,我不想发布整个脚本和生成的数据框,因为它们有点大。我已经编辑了我的帖子,以便为您提供完整的图片。
  • 没有。你误会了。 dfList 中的对象不是数据框。
  • 它们是列表。使用str 确定实际是什么对象。你误用了函数c。它用于连接向量。向量。我会尽量重写。

标签: r loops dataframe rows data.table


【解决方案1】:

这是您当前问题的解决方案,但我建议您重新考虑您的初始数据结构:

 rbindlist(lapply(dfList, as.data.table))

【讨论】:

    【解决方案2】:

    这是一个解开这个问题的尝试。但是你真的把事情搞得太复杂了。

    #read.csv returns a data frame. No need for this.
    nPoints<-as.data.frame(numbPoints)
    
    #Loop through files in specified folder
    for (a in 1:length(fileList)){
      name <- substr(fileList[a], 3, 7 )
      print(name)
      #Reads in ASCII, changes it to raster
      #Raster will serve as a mask for generation of random points
      assign(name,raster(fileList[a]))
      #starts/clears a list of dataframes for each species
      # If it's supposed to be a list, don't use c()!
      dfList <- list()
      #loops through however many iterations you want, generating random points as specified in csv file
      for(i in 1:10){
        nameRand <- paste("Rand", substr(fileList[a], 3, 7),i, sep = "")
        #Again, don't use c()!
        dfList[[nameRand]] <- as.data.frame(randomPoints(get(name), nPoints$Number[nPoints$Species==name]))
      }
      #adds a column in each data frame specifying the name of the randomized run
      #converts data frame to a series of data table with columns in the correct order
      tempList <- list()
      for (j in seq_along(dfList)){
        dfList[[j]]$Species <- names(dfList[j])
        #assign(tempListName, as.data.table(c(dfList[[j]][3], dfList[[j]][1], dfList[[j]][2])))
        #No need to coerce it _again_ if it's already a data frame. Just
        # reorder the columns the easy way.
        tempList <- append(tempList, dfList[[j]][,c(3,1,2)])
      }
      assign(paste(name, "RandPoints", sep = ""), do.call(rbind,tempList))
    }
    

    更一般地说,您的大部分麻烦都来自您对assignget 等功能的使用。这些几乎不应该在 R 中使用。确实没有必要。例如,脚本的第一部分基本上可以用 4 行来完成:

    #Loop over file list, calling raster on each
    rasterList <- lapply(fileList,raster)
    #Assign all the names at once.
    names(rasterList) <- substr(fileList,3,7)
    
    #Need to make sure that the rasters are in the same order as the values
    # in nPoints$Number
    dfList <- mapply(randomPoints,mask = rasterList,n = nPoints$Number)
    names(dfList) <- paste0("Rand",names(rasterList))
    

    此时,我将简单地将数据帧重新组合成一个:

    ns <- sapply(dfList,nrow)
    out <- do.call(dfList,rbind)
    

    然后通过以下方式创建 Species 列:

    out$Species <- rep(paste0("Rand",names(rasterList)),times = ns)
    

    【讨论】:

    • 啊,自学幼稚的危险。非常感谢您的帮助。
    • @hannah-o 没问题。我不能确定我的建议是否会在没有修改的情况下完全有效,因为我无法自己从头到尾运行您的代码。但希望它至少能给你一些想法。
    猜你喜欢
    • 2021-03-24
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2015-07-25
    • 2016-07-16
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多