【发布时间】:2016-06-22 02:06:12
【问题描述】:
我正在尝试通过 k-means 聚类算法运行 9 列和 1064 行的数据帧,但出现以下错误:
Error in NbClust(df, min.nc = 2, max.nc = 15, method = "kmeans") :
The TSS matrix is indefinite. There must be too many missing values. The index cannot be calculated.
但是,没有缺失值
> dim(df)
[1] 1064 9
> sum(is.na(df))
[1] 0
知道问题可能是什么以及如何解决它吗?
> head(df)
hr_830 hr_930 hr_1030 hr_1130 hr_160 hr_180 hr_190 hr_200 hr_0
1 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 3
3 2 2 2 2 2 2 2 2 3
4 2 2 2 2 2 2 2 2 2
5 2 2 2 2 2 2 2 2 2
6 2 2 2 2 2 2 2 2 4
这里是输入示例:
> dput(input)
structure(list(hr_830 = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L,
2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), hr_930 = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 4L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), hr_1030 = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), hr_1130 = c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), hr_160 = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), hr_180 = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 4L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), hr_190 = c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), hr_200 = c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 4L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L), hr_0 = c(2L, 3L, 3L, 2L, 2L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L
)), .Names = c("hr_830", "hr_930", "hr_1030", "hr_1130", "hr_160",
"hr_180", "hr_190", "hr_200", "hr_0"), row.names = c(NA, 25L), class = "data.frame")
【问题讨论】:
标签: r cluster-analysis k-means