也许您对tidyverse 替代方案感兴趣
library(tidyverse)
df %>%
gather(Year, val, -Variables, -Country) %>%
spread(Variables, val)
# Country Year Happiness Power
#1 France 2000 1872 1213
#2 France 2001 2345 1234
#3 UK 2000 2234 1726
#4 UK 2001 9082 6433
或者使用reshape2::melt和reshape2::dcast
reshape2::dcast(
reshape2::melt(df, id.vars = c("Country", "Variables"), variable.name = "Year"),
Country + Year ~ Variables)
# Country Year Happiness Power
#1 France 2000 1872 1213
#2 France 2001 2345 1234
#3 UK 2000 2234 1726
#4 UK 2001 9082 6433
或者(同样)使用data.table::melt和data.table::dcast
data.table::dcast(
data.table::melt(df, id.vars = c("Country", "Variables"), variable.name = "Year"),
Country + Year ~ Variables)
# Country Year Happiness Power
#1 France 2000 1872 1213
#2 France 2001 2345 1234
#3 UK 2000 2234 1726
#4 UK 2001 9082 6433
就性能/运行时间而言,我认为data.table 或tidyr 解决方案是最有效的。您可以通过对一些较大的样本数据运行microbenchmark 来检查。
样本数据
df <-read.table(text =
" Variables Country 2000 2001
1 Power France 1213 1234
2 Happiness France 1872 2345
3 Power UK 1726 6433
4 Happiness UK 2234 9082", header = T)
colnames(df)[3:4] <- c("2000", "2001")
基准分析
以下是对四种方法的microbenchmark 分析结果,基于(稍大)的78x22 样本数据集。
set.seed(2017)
df <- data.frame(
Variables = rep(c("Power", "Happiness", "something_else"), 26),
Country = rep(LETTERS[1:26], each = 3),
matrix(sample(10000, 20 * 26 * 3), nrow = 26 * 3))
colnames(df)[3:ncol(df)] <- 2000:2019
library(microbenchmark)
library(tidyr)
res <- microbenchmark(
reshape2 = {
reshape2::dcast(
reshape2::melt(df, id.vars = c("Country", "Variables"), variable.name = "Year"),
Country + Year ~ Variables)
},
tidyr = {
df %>%
gather(Year, val, -Variables, -Country) %>%
spread(Variables, val)
},
datatable = {
data.table::dcast(
data.table::melt(df, id.vars = c("Country", "Variables"), variable.name = "Year"),
Country + Year ~ Variables)
},
reshape = {
reshape::cast(reshape::melt(df), Country + variable ~ Variables)
}
)
res
#Unit: milliseconds
# expr min lq mean median uq max neval
# reshape2 3.088740 3.449686 4.313044 3.919372 5.112560 7.856902 100
# tidyr 4.482361 4.982017 6.215872 5.771133 6.931964 28.293377 100
# datatable 3.179035 3.511542 4.861192 4.040188 5.123103 46.010810 100
# reshape 27.371094 30.226222 32.425667 32.504644 34.118499 41.286803 100
library(ggplot2)
autoplot(res)