【发布时间】:2019-01-31 22:01:00
【问题描述】:
考虑到这是我下面的数据集
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.7 2.5 5.0 2.0 virginica
7.7 3.0 6.1 2.3 virginica
6.7 3.3 5.7 2.1 virginica
4.8 3.0 1.4 0.1 setosa
5.5 4.2 1.4 0.2 setosa
4.9 3.6 1.4 0.1 setosa
6.3 3.3 4.7 1.6 versicolor
5.6 2.9 3.6 1.3 versicolor
5.9 3.0 4.2 1.5 versicolor
df <- structure(list(Sepal.Length = c(5.7, 7.7, 6.7, 4.8, 5.5, 4.9,
6.3, 5.6, 5.9), Sepal.Width = c(2.5, 3, 3.3, 3, 4.2, 3.6, 3.3,
2.9, 3), Petal.Length = c(5, 6.1, 5.7, 1.4, 1.4, 1.4, 4.7, 3.6,
4.2), Petal.Width = c(2, 2.3, 2.1, 0.1, 0.2, 0.1, 1.6, 1.3, 1.5
), Species = structure(c(3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("setosa",
"versicolor", "virginica"), class = "factor")), row.names = c(NA,
-9L), class = "data.frame")
我的目标是
-
从第一行 Species == "virginica" 中减去
Sepal.Length Sepal.Width Petal.Length Petal.Width的值与每一行 "Setosa",我在下面这样做
Virginia1_vs_Setosa1a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][1,] Virginia1_vs_Setosa1a 0.9 -0.5 3.6 1.9 Virginia1_vs_Setosa2a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][2,] Virginia1_vs_Setosa2a 0.2 -1.7 3.6 1.8 Virginia1_vs_Setosa3a <- df[1:4][df$Species == "virginica",][1,] - df[1:4][df$Species == "setosa",][3,] Virginia1_vs_Setosa3a 0.8 -1.1 3.6 1.9 -
取每个元素的乘积
Virginia1_vs_Setosa1 <- as.numeric( Virginia1_vs_Setosa1a[1]*Virginia1_vs_Setosa1a[2]* Virginia1_vs_Setosa1a[3]*Virginia1_vs_Setosa1a[4]) 0.9*-0.5*3.6*1.9 = -3.078 Virginia1_vs_Setosa2 <- as.numeric( Virginia1_vs_Setosa2a[1]*Virginia1_vs_Setosa2a[2]* Virginia1_vs_Setosa2a[3]*Virginia1_vs_Setosa2a[4]) 0.2*-1.7*3.6*1.8 = -2.2032 Virginia1_vs_Setosa3 <- as.numeric( Virginia1_vs_Setosa3a[1]*Virginia1_vs_Setosa3a[2]* Virginia1_vs_Setosa3a[3]*Virginia1_vs_Setosa3a[4]) 0.8*-1.1*3.6*1.9 = -6.0192
Virginica 的第 2 行与 setosa 的每一行类似。
Virginia2_vs_Setosa1a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][1,]
Virginia2_vs_Setosa2a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][2,]
Virginia2_vs_Setosa3a <- df[1:4][df$Species == "virginica",][2,] - df[1:4][df$Species == "setosa",][3,]
Virginia2_vs_Setosa1 <- as.numeric(
Virginia2_vs_Setosa1a[1]*Virginia2_vs_Setosa1a[2]*
Virginia2_vs_Setosa1a[3]*Virginia2_vs_Setosa1a[4])
Virginia2_vs_Setosa2 <- as.numeric(
Virginia2_vs_Setosa2a[1]*Virginia2_vs_Setosa2a[2]*
Virginia2_vs_Setosa2a[3]*Virginia2_vs_Setosa2a[4])
Virginia2_vs_Setosa3 <- as.numeric(
Virginia2_vs_Setosa3a[1]*Virginia2_vs_Setosa3a[2]*
Virginia2_vs_Setosa3a[3]*Virginia2_vs_Setosa3a[4])
rm(Virginia2_vs_Setosa1a, Virginia2_vs_Setosa2a,
Virginia2_vs_Setosa3a)
弗吉尼亚州的第三行与 setosa 的每一行类似
Virginia3_vs_Setosa1a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][1,]
Virginia3_vs_Setosa2a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][2,]
Virginia3_vs_Setosa3a <- df[1:4][df$Species == "virginica",][3,] - df[1:4][df$Species == "setosa",][3,]
Virginia3_vs_Setosa1 <- as.numeric(
Virginia3_vs_Setosa1a[1]*Virginia3_vs_Setosa1a[2]*
Virginia3_vs_Setosa1a[3]*Virginia3_vs_Setosa1a[4])
Virginia3_vs_Setosa2 <- as.numeric(
Virginia3_vs_Setosa2a[1]*Virginia3_vs_Setosa2a[2]*
Virginia3_vs_Setosa2a[3]*Virginia3_vs_Setosa2a[4])
Virginia3_vs_Setosa3 <- as.numeric(
Virginia3_vs_Setosa3a[1]*Virginia3_vs_Setosa3a[2]*
Virginia3_vs_Setosa3a[3]*Virginia3_vs_Setosa3a[4])
rm(Virginia3_vs_Setosa1a, Virginia3_vs_Setosa2a,
Virginia3_vs_Setosa3a)
最后创建一个像下面这样的 3*3 矩阵
matrix(c(Virginia1_vs_Setosa1, Virginia1_vs_Setosa2, Virginia1_vs_Setosa3, Virginia2_vs_Setosa1, Virginia2_vs_Setosa2, Virginia2_vs_Setosa3,
Virginia3_vs_Setosa1, Virginia3_vs_Setosa2, Virginia3_vs_Setosa3), nrow=3, ncol=3)
[,1] [,2] [,3]
[1,] -3.0780 0.0000 4.9020
[2,] -2.2032 -26.0568 -8.8236
[3,] -6.0192 -17.3712 -4.6440
如您所见,我的解决方案非常笨拙且效率低下。如果有人能向我展示实现相同结果的有效方法,我将非常感激。
【问题讨论】: