【发布时间】:2023-04-03 20:35:02
【问题描述】:
我要实现 SCUT 算法来平衡我的多类数据集,我得到了这个错误
1 apply(T, 2, max) 中的错误:dim(X) 必须具有正长度
library(EMCluster, quietly = TRUE)
library("lattice", lib.loc="C:/Program Files/R/R-3.4.2/library")
library(grid)
library(DMwR)
CoronaryEvent <-data$CoronaryEvent
class <- unique(CoronaryEvent)
for(i in 2:length(unique(CoronaryEvent))-1){
for(j in (i+1):length(unique(CoronaryEvent))){
print(paste(i,j,sep=","))
print(paste(class[i],class[j],sep=","))
coronaryEvent equal to class i and class j
class_i <- factor(as.factor(class[i]))
class_j <- factor(as.factor(class[j]))
a<-data[match(as.character(data$CoronaryEvent), class_i, nomatch = FALSE), ]
a$CoronaryEvent <-as.factor(class_i)
b<-data[match(as.character(data$CoronaryEvent), class_j, nomatch = FALSE), ]
b$CoronaryEvent <-as.factor(class_j)
D <- rbind(a,b)
str(D)
print(nrow(D))
#oversampling binary dataset (minority class till reach 36 observations)
m<-36
if((nrow(a)<m)|(nrow(b)<m)){
n<-nrow(a)
perc = as.integer((m/n)*100)
print(perc)
newData <- SMOTE(CoronaryEvent ~ ., D, perc.over = perc,perc.under = 50)
print("oversampling")
str(newData)
}
#undersampling majority class till 32 observations using EM algorithm
else if((nrow(a)>m)|(nrow(b)>m)){
print("undersampling")
}
}
}
下图包含SCUT算法
【问题讨论】:
标签: r