【发布时间】:2014-10-21 19:23:34
【问题描述】:
我有一个包含大约 20 个变量的数据集。数据收集了三年(2012-2014),每年的每个观察都可以按Site和Plot分组。
我想找出不同年份之间每次观察的差异(即价值变化)。自从我在 2012 年开始收集数据以来,我想找出 2013 年和 2012 年以及 2014 年和 2013 年之间的区别(2014 - 2012 年,也欢迎)。
首先,我对plyr 和data.table 进行了一些尝试,试图找出一个变量的年份差异。这些都没有成功。我不确定如何在按Site 和Plot 分组时调用一个操作来减去Year。此外,我不知道在哪里存储它。
我已经包含了一个只有 3 个变量的示例数据集。我还包含了一个示例所需的输出 (output),但是,格式并不重要。我对价值观更感兴趣。
如果ddply 或data.table 看起来是不错的选择,我会很感激一些建议。如果它们似乎是这项工作的错误工具,我也会很感激这些信息。虽然此处未包含,但完整日期 (YYYY-mm-dd) 可以包含在数据集中。 Site 中的所有 Plots 都是在给定年份的同一天和同一个月观察到的。
require(plyr)
require(data.table)
# df data.frame
df <- structure(list(Site = c(1, 1, 2, 2, 2, 3.2, 3.2, 3.2, 3.2, 1,
1, 2, 2, 2, 3.2, 3.2, 3.2, 3.2, 1, 1, 2, 2, 2, 3.2, 3.2, 3.2,
3.2), Plot = c(1L, 2L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 1L,
2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 1L, 2L, 3L, 1L, 2L, 3L, 4L),
Year = c(2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L), V1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L), VH1 = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 10L, 10L, NA, 10L, 10L,
10L, 10L, 10L, 10L), V2 = c(3L, 3L, 3L, 3L, 3L, NA, 3L, 3L,
3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L)), .Names = c("Site", "Plot", "Year",
"V1", "VH1", "V2"), row.names = c(NA, -27L), class = "data.frame")
# as data.table
dt <- as.data.table(df)
# ddply attempt for just one variable: V1
ans.d <- ddply(df, .(Year, Site, Plot), transform, V1.1213=c(NA,diff(V1)))
# data.table attempt for just one variable: V1
KEY <- c("Year", "Site", "Plot")
setkeyv(dt, KEY)
ans.dt <- dt[,diff:=c(NA,diff(V1)),by=KEY]
ans.dt1 <- dt[, c('V1.1','V1.1213') := list(V1[1], V1 - V1[1]),by = KEY]
# data table attempt with diff key
KEY2 <- c("Site", "Plot")
setkeyv(dt, KEY2)
ans.dt.k2 <- dt[,diff:=c(NA,diff(V1)),by=Year]
# example solution
# where V1.1213 is [(V1, Year = 2013) - (V1, Year = 2012)], etc.
output <- structure(list(Site = c(1, 1, 2, 2, 2, 3.2, 3.2, 3.2, 3.2, 1,
1, 2, 2, 2, 3.2, 3.2, 3.2, 3.2, 1, 1, 2, 2, 2, 3.2, 3.2, 3.2,
3.2), Plot = c(1L, 2L, 1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 1L,
2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 1L, 2L, 3L, 1L, 2L, 3L, 4L),
Year = c(2012L, 2012L, 2012L, 2012L, 2012L, 2012L, 2012L,
2012L, 2012L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L, 2013L,
2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L, 2014L,
2014L, 2014L), V1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L), VH1 = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 10L, 10L, NA, 10L, 10L,
10L, 10L, 10L, 10L), V2 = c(3L, 3L, 3L, 3L, 3L, NA, 3L, 3L,
3L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L), V1.1213 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L,
NA, NA, NA, NA, NA, NA, NA, NA, NA), V1.1314 = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), VH1.1213 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, NA, NA, NA, NA, NA, NA, NA, NA, NA), VH1.1314 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 6L, 6L, NA, 6L, 6L, 6L, 6L, 6L, 6L), V2.1213 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, -3L, -3L, -3L, -3L, -3L,
NA, -3L, -3L, -3L, NA, NA, NA, NA, NA, NA, NA, NA, NA), V2.1314 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L)), .Names = c("Site",
"Plot", "Year", "V1", "VH1", "V2", "V1.1213", "V1.1314", "VH1.1213",
"VH1.1314", "V2.1213", "V2.1314"), class = "data.frame", row.names = c(NA,
-27L))
【问题讨论】:
标签: r date dataframe data.table plyr