【发布时间】:2018-01-09 15:33:19
【问题描述】:
分类项目
我一直在使用手部检测器工作一段时间,其目的是检测图片/视频/网络摄像头中的手势。我已经用python2.7实现了这个项目,并使用了opencv和sklearn。
我终于达到了手势检测器使用以下技术检测手的程度:
- 将 RBG 颜色转换为 HSV 颜色并手动调整 HSV 参数(色调、饱和度、值)。 (我这样做是因为我读到 RBG 颜色不适合为皮肤建模。)
- 制作膨胀、侵蚀和中值模糊滤镜以获得肤色的阈值区域。
- 使用 Haar-Cascade 从图片中检测人脸并将其移除,因为只有手部有趣。
- 去除面部区域后,手部区域是包含肤色的最大区域。在该区域周围绘制轮廓。
- 我也想摆脱手中的手腕,所以我通过检查手掌内侧的 maxdistance 检测到手掌的中心 cv2.pointPolygonTest,在手掌内侧画一个圆圈并得到矩形的下限(=切割手腕)。
- 现在识别器从图片中检测到手
从图片中进行手部检测效果很好,但是当我尝试识别正确的手势类别时会出现问题。我已经通过使用 SVM、KNN、RandomForest 等算法训练图片来实现分类,这是我不想改变的技术。我有 6 种不同的手部姿势(6 类),训练集大小约为 100 张图片/类,测试集大小约为 10 张图片/类。训练和测试图片使用与上述相同的技术制作,并将它们转换为灰度 .bmp 图片。之后,我将图片调整为相同大小,并制作了 .pkl 数据集,如 MNIST 模型。然后我训练了具有以下特征的模型:
- 使用 HOG 功能改进了结果。
- 使用 sklearns StandardScaler 预处理来提高性能。
- 使用了许多 sklearn 的算法,如 SVM、KNN、RandomForest、MLP、NaiveBayes、决策树……来获得最佳模型。
在训练模型后,我得到了非常好的模型结果(最好的模型预测在 0.95-0.97 左右),混淆矩阵看起来也不错,所以我认为模型已经正确学习了。
问题:分类器分类但大多数时候是错误的。首先,我认为我应该增加数据集的大小,但后来我注意到有人设法识别只有 1 张图片/类的手势,所以现在我认为我做错了什么。我的模型也应该有效,因为同样的技术适用于 MNIST 手写数字,而且我的模型几乎正确分类了每个数字。问题也可能在 HOG 参数中,HOG 对我来说不是很熟悉。此外,我的数据集图片是在手部姿势周围没有空间的情况下绘制的,可以实现结果。如果有人知道我失败的地方,我将非常感激。
编辑 1: 我在这里包含了detectHand、generateClassifiers 文件,因为云服务不起作用。你必须用你的脸拍你的手的测试照片并调整 hsv 参数以获得阈值手。
detectHand.py
import cv2
import numpy as np
import argparse as ap
from sklearn.externals import joblib
from skimage.feature import hog
def callback(x):
pass
parser = ap.ArgumentParser()
parser.add_argument("-c", "--classiferPath", help="Path to Classifier File", required="True")
parser.add_argument("-i", "--image", help="Path to Image", required="True")
args = vars(parser.parse_args())
# Load the classifier
clf, pp = joblib.load(args["classiferPath"])
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
cv2.namedWindow('HSV')
# create trackbars for color change
cv2.createTrackbar('MinH','HSV',0,255, callback) # Adjust your hand to get thresholded with HSV adjuster
cv2.createTrackbar('MaxH','HSV',25,255, callback)
cv2.createTrackbar('MinS','HSV',86,255, callback)
cv2.createTrackbar('MaxS','HSV',180,255, callback)
cv2.createTrackbar('MinV','HSV',131,255, callback)
cv2.createTrackbar('MaxV','HSV',255,255, callback)
while True:
#read and resize image
im = cv2.imread(args["image"])
im = cv2.resize(im,(960,540))
# get current position of six trackbars
MinH = cv2.getTrackbarPos('MinH','HSV')
MaxH = cv2.getTrackbarPos('MaxH','HSV')
MinS = cv2.getTrackbarPos('MinS','HSV')
MaxS = cv2.getTrackbarPos('MaxS','HSV')
MinV = cv2.getTrackbarPos('MinV','HSV')
MaxV = cv2.getTrackbarPos('MaxV','HSV')
blur = cv2.blur(im,(3,3))
# make bgr to hsv, treshold and AND operator
hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
lower = np.array([MinH, MinS, MinV])
upper = np.array([MaxH, MaxS, MaxV])
mask2 = cv2.inRange(hsv,lower,upper)
#Kernel matrices for morphological transformation
kernel_square = np.ones((11,11),np.uint8)
kernel_ellipse= cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
#Perform morphological transformations to filter out the background noise
#Dilation increase skin color area
#Erosion increase skin color area
dilation = cv2.dilate(mask2,kernel_ellipse,iterations = 1)
erosion = cv2.erode(dilation,kernel_square,iterations = 1)
filtered = cv2.medianBlur(erosion,5)
ret,thresh = cv2.threshold(filtered,127,255,0)
# detect faces from picture and remove it
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
gray = cv2.equalizeHist(gray)
faces = face_cascade.detectMultiScale(gray, 1.3, 3, minSize=(20,20), flags=cv2.CASCADE_SCALE_IMAGE)
for (x,y,w,h) in faces:
cv2.rectangle(thresh, (x,y),(x+h,y+w), (0,0,0), cv2.FILLED)
im2, contours, hier = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
max_area=100
ci=0
for i in range(len(contours)):
cnt=contours[i]
area = cv2.contourArea(cnt)
if(area>max_area):
max_area=area
ci=i
#Largest area contour
im1 = im.copy()
cnts = contours[ci]
rect = cv2.boundingRect(cnts)
x1,y1,w1,h1 = rect
#center of palm
maxdistance=0
pt=(0,0)
for index_y in range(int(y1+0.25*h1),int(y1+0.8*h1)):
for index_x in range(int(x1+0.3*w1),int(x1+0.9*w1)):
distance=cv2.pointPolygonTest(cnts,(index_x,index_y), True)
if(distance>maxdistance):
maxdistance=distance
pt = (index_x,index_y)
cv2.circle(im1,pt,int(maxdistance),(255,0,0),2)
cv2.rectangle(im1, (x1,y1),(x1+w1,pt[1]+int(maxdistance)), (0,0,255), 3)
cropped_image = thresh[y1:pt[1]+int(maxdistance),x1:x1+w1]
#edged = cv2.Canny(cropped_image, 100,200)
roi = cv2.resize(cropped_image, (100, 150), interpolation=cv2.INTER_AREA)
# Calculate the HOG features
roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(5, 5), cells_per_block=(2, 2), visualise=False)
roi_hog_fd = pp.transform(np.array([roi_hog_fd], 'float64'))
nbr = clf.predict(roi_hog_fd)
cv2.putText(im1, str(nbr[0]), (x1,y1),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
cv2.imshow('Output', im1)
cv2.imshow('Hand', cropped_image)
cv2.imshow('roi', roi)
c= cv2.waitKey(5)
if c==27:
break
cv2.destroyAllWindows()
generateClassifiers.py
#!/usr/bin/python
# Import the modules
from sklearn.externals import joblib
import pickle
from skimage.feature import hog
from sklearn import preprocessing
import numpy as np
from collections import Counter
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
import cv2
def ModelRandomQuessing(hog_features, labels, pp):
model = "RandomQuessing"
clf = DummyClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model1randomquessing.pkl", compress=3)
return (model, clf)
def ModelLinearSVM(hog_features, labels, pp):
model = "LinearSVM"
clf = SGDClassifier(n_jobs=-1)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model2linearsvm.pkl", compress=3)
return (model, clf)
def ModelKNN(hog_features, labels, pp):
model = "KNearestNeighbors"
clf = KNeighborsClassifier(n_jobs=-1)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model3knn.pkl", compress=3)
return (model, clf)
def ModelSVM(hog_features, labels, pp):
model = "SupportVectorMachine"
clf = SVC(kernel="rbf")
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model4svm.pkl", compress=3)
return (model, clf)
def ModelDecisionTree(hog_features, labels, pp):
model = "DecisionTree"
clf = DecisionTreeClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model5decisiontree.pkl", compress=3)
return (model, clf)
def ModelRandomForest(hog_features, labels, pp):
model = "RandomForest"
clf = RandomForestClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model6randomforest.pkl", compress=3)
return (model, clf)
def ModelAdaboost(hog_features, labels, pp):
model = "Adaboost"
clf = AdaBoostClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model7adaboost.pkl", compress=3)
return (model, clf)
def ModelGaussianNB(hog_features, labels, pp):
model = "GaussianNaiveBayes"
clf = GaussianNB()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model8gaussiannb.pkl", compress=3)
return (model, clf)
def ModelLDA(hog_features, labels, pp):
model = "LinearDiscriminantAnalysis"
clf = LinearDiscriminantAnalysis()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model9lda.pkl", compress=3)
return (model, clf)
def ModelQDA(hog_features, labels, pp):
model = "QuadraticDiscriminantAnalysis"
clf = QuadraticDiscriminantAnalysis()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model10qda.pkl", compress=3)
return (model, clf)
def ModelLogisticRegression(hog_features, labels, pp):
model = "LogisticRegression"
clf = LogisticRegression(n_jobs=-1)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model11logisticregression.pkl", compress=3)
return (model, clf)
def ModelMLP(hog_features, labels, pp):
model = "MultilayerPerceptron"
clf = MLPClassifier(activation='relu',hidden_layer_sizes=(200,200),solver='lbfgs',alpha=10,verbose=True)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model12mlp.pkl", compress=3)
return (model, clf)
def ModelBestKNN(hog_features, labels, pp):
model = "BestKNearestNeighbors"
clf = KNeighborsClassifier(n_jobs=-1,weights='distance',n_neighbors=4)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model13bestknn.pkl", compress=3)
return (model, clf)
def ModelBestSVM(hog_features, labels, pp):
model = "BestSupportVectorMachine"
clf = SVC(kernel='rbf',cache_size=2000,C=10.0,gamma='auto',class_weight='balanced')
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model14bestsvm.pkl", compress=3)
return (model, clf)
def ModelBestRandomForest(hog_features, labels, pp):
model = "BestRandomForest"
clf = RandomForestClassifier(n_jobs=-1,n_estimators=500,max_features='auto')
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model15bestrf.pkl", compress=3)
return (model, clf)
def accuracy(modelclf, X_test, Y_test):
model, clf = modelclf
predicted = clf.predict(X_test)
print("Classification report for classifier %s:\n%s\n"
% (model, classification_report(Y_test, predicted)))
print("Confusion matrix:\n%s" % confusion_matrix(Y_test, predicted))
if __name__=='__main__':
# Load the dataset
with open('handdetection.pkl', 'rb') as f:
data = pickle.load(f)
# Extract the features and labels
X = data[0]
Y = data[1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)
# Extract the hog features
list_X_train = []
for trainsample in X_train:
fd, hog_image = hog(trainsample.reshape((150, 100)), orientations=9, pixels_per_cell=(5, 5), cells_per_block=(2, 2), visualise=True)
list_X_train.append(fd)
X_train = np.array(list_X_train, 'float64')
# Normalize the features
pp = preprocessing.StandardScaler().fit(X_train)
X_train = pp.transform(X_train)
#Same for testset
list_X_test = []
for testsample in X_test:
fd = hog(testsample.reshape((150, 100)), orientations=9, pixels_per_cell=(5, 5), cells_per_block=(2, 2), visualise=False)
list_X_test.append(fd)
X_test = np.array(list_X_test, 'float64')
X_test = preprocessing.StandardScaler().fit(X_test).transform(X_test)
print ("Count of digits in dataset", Counter(Y_train))
#accuracy(ModelRandomQuessing(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelLinearSVM(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelKNN(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelSVM(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelDecisionTree(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelRandomForest(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelAdaboost(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelGaussianNB(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelLDA(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelQDA(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelLogisticRegression(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelMLP(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelBestKNN(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelBestSVM(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelBestRandomForest(X_train, Y_train, pp),X_test,Y_test)
while True:
cv2.imshow('hog', hog_image)
c = cv2.waitKey(5)
if c==27:
break
cv2.destroyAllWindows()
我将再次尝试使用 dropbox 来面对 haar-cascade 和 handdetection.pkl 数据集。将这些所有文件包含到同一文件夹中。
哈尔:https://www.dropbox.com/s/zdc096drhbr1sx3/haarcascade_frontalface_default.xml?dl=0 数据集:https://www.dropbox.com/s/pieywxg8rl8rsw4/handdetection.pkl?dl=0
【问题讨论】:
标签: algorithm opencv artificial-intelligence svm