【发布时间】:2017-07-12 09:18:22
【问题描述】:
我有一个带有钻石一些特征的数据框:
structure(list(carat = c("Above average", "Above average", "Below average",
"Above average", "Below average", "Very Good", "Very Good", "Very Good",
"Below average", "Very Good", "Below average", "Above average",
"Above average", "Above average", "Above average", "Above average",
"Above average", "Below average", "Below average", "Very Good",
"Below average", "Very Good", "Very Good", "Very Good", "Very Good",
"Very Good", "Above average", "Very Good", "Very Good", "Very Good",
"Very Good", "Very Good", "Very Good", "Very Good", "Very Good",
"Below average", "Below average", "Below average", "Very Good",
"Above average", "Above average", "Above average", "Below average",
"Below average", "Below average", "Above average", "Very Good",
"Below average", "Very Good", "Very Good", "Very Good", "Above average",
"Above average", "Above average", "Above average", "Above average",
"Above average", "Very Good", "Very Good", "Below average", "Above average",
"Above average", "Above average", "Above average", "Above average",
"Above average", "Above average", "Very Good", "Above average",
"Above average", "Very Good", "Very Good", "Above average", "Above average",
"Below average", "Very Good", "Very Good", "Very Good", "Very Good",
"Very Good", "Very Good", "Very Good", "Above average", "Above average",
"Below average", "Above average", "Above average", "Above average",
"Above average", "Above average", "Above average", "Below average",
"Above average", "Very Good", "Very Good", "Below average", "Below average",
"Below average", "Very Good", "Above average"), color = structure(c(2L,
2L, 2L, 6L, 7L, 7L, 6L, 5L, 2L, 5L, 7L, 7L, 3L, 7L, 2L, 2L, 6L,
7L, 7L, 7L, 6L, 2L, 5L, 7L, 7L, 4L, 6L, 7L, 1L, 3L, 3L, 3L, 2L,
2L, 1L, 3L, 2L, 5L, 1L, 6L, 6L, 7L, 1L, 1L, 5L, 3L, 5L, 5L, 2L,
5L, 3L, 4L, 6L, 2L, 1L, 6L, 7L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 6L,
4L, 6L, 4L, 4L, 2L, 1L, 5L, 5L, 5L, 5L, 3L, 2L, 1L, 1L, 2L, 2L,
1L, 2L, 6L, 2L, 4L, 5L, 5L, 5L, 6L, 2L, 2L, 4L, 2L, 4L, 2L, 3L,
3L, 2L, 5L), .Label = c("1", "2", "3", "4", "5", "6", "7"), class = "factor"),
clarity = structure(c(2L, 3L, 5L, 4L, 2L, 6L, 7L, 3L, 4L,
5L, 3L, 5L, 3L, 2L, 2L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 5L, 3L,
3L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 4L,
2L, 2L, 3L, 4L, 5L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 5L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 3L, 5L, 3L, 3L, 3L, 2L, 6L, 7L, 3L, 3L,
7L, 7L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 2L,
7L, 7L, 7L, 7L, 6L, 3L, 3L, 2L, 4L, 4L, 2L, 4L, 5L, 2L, 3L,
3L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"),
price = c(481, 481, 492, 558, 568, 579, 579, 590, 590, 601,
610, 621, 642, 660, 671, 671, 700, 729, 729, 729, 729, 740,
750, 750, 750, 761, 772, 793, 793, 793, 951, 951, 951, 951,
951, 951, 951, 951, 952, 952, 952, 952, 952, 952, 952, 952,
952, 952, 953, 953, 953, 953, 953, 953, 953, 954, 954, 954,
954, 954, 958, 958, 958, 958, 958, 959, 959, 959, 959, 959,
959, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960, 960,
960, 960, 960, 960, 960, 960, 960, 960, 1, 1, 1, 2, 2, 2,
2, 2, 3, 3), cut_new = structure(c(1L, 1L, 2L, 1L, 2L, 3L,
3L, 3L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L, 2L,
3L, 3L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L,
2L, 2L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 3L, 2L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 3L, 2L,
2L, 2L, 3L, 1L), .Label = c("Above average", "Below average",
"Very Good"), class = "factor")), .Names = c("carat", "color",
"clarity", "price", "cut_new"), row.names = c(NA, 100L), class = "data.frame")
对于color 和cut_new 的每个组合,我计算了平均值:
structure(c(484.29290617849, 417.560131795717, 463.579787234043,
514.823308270677, 534.805128205128, 574.193661971831, 605.398230088496,
470.268456375839, 482.971830985916, 541.042253521127, 542.464788732394,
504.777777777778, 461.622857142857, 469.469465648855, 485.88986784141,
540.725490196078, 494.36, 640.603448275862), .Dim = c(6L, 3L), .Dimnames = list(
c("1", "2", "3", "4", "5", "6"), c("Above average", "Below average",
"Very Good")))
现在我想创建一个数据框,其中包含我的钻石信息 + 一列,这样我就可以看到每颗钻石的价格,旁边还有平均价格(基于 颜色和 cut_new)。
我曾尝试使用函数merge,但出现错误:
results <- merge (diamonds_2, mean, by.x = "cut_new", by.y= "color")
有什么建议吗?
谢谢
【问题讨论】:
-
从输入示例中不清楚您希望如何。你能显示几行的预期
-
@akrun,感谢您指出这一点。我已经添加了结果应该是什么样子的截图,希望它能阐明我的目标