【发布时间】:2021-12-30 23:29:07
【问题描述】:
过去几天我一直在尝试编写自己的 k-mean 算法,但遇到了障碍。当我试图找到一个簇中点的平均位置来移动质心时,我会得到一个零除法错误(注意:这不会在 k = 2 时发生,只有在 k = 3 时才会发生,但总是当 k >= 4) 时发生。我试图通过确保每个质心从数据集中的一个点开始来解决这个问题,这样它在其集群中总是至少有一个点,但它没有工作。我也重新排列了计数器等,但同样,它没有用。我已经没有想法了,不知道为什么这个错误仍然会发生。我很确定问题来自这些功能之一(编辑:添加了所有代码和完整的错误消息):
import random
import math
import matplotlib.pyplot as plt
class Kmeans:
def __init__(self, K, dataset, centroids, sorting):
self.K = K
self.dataset = dataset
self.centroids = centroids
self.sorting = sorting
def initializeCentroids(self):
usedPoints = [random.choice(data_set)]
self.centroids = []
for q in range(self.K):
pointSelected = False
while not pointSelected:
m = random.choice(data_set)
print(m)
print(usedPoints)
distance = math.sqrt(abs(((m[0] - usedPoints[len(usedPoints) - 1][0]) ** 2) + (m[1] - usedPoints[len(usedPoints) - 1][1]) ** 2))
if usedPoints.count(m) == 0 and distance > 50:
self.centroids.append(list(m))
usedPoints.append(m)
pointSelected = True
return self.centroids
def calcDistance(self):
self.sorting = []
for w in self.dataset:
distances = []
counter = -1
for centr in self.centroids:
counter += 1
distances.append(math.sqrt(abs((((w[0] - centr[0]) ** 2) + (w[1] - centr[1]) ** 2))))
for x in range(len(distances)):
if len(distances) > 1:
print(distances)
if distances[0] > distances[1]:
distances.pop(0)
else:
distances.pop(1)
counter -= 1
print(counter)
self.sorting.insert(0, [w, counter, distances[0]])
return self.sorting
# not done
def find_ME(self):
counter2 = 0
for r in self.centroids:
for t in self.sorting:
nums = []
if t[1] == counter2:
nums.append(t[2])
population = len(nums)
error = sum(nums) / population
def reassignCentroids(self):
counter3 = 0
for r in self.centroids:
positionsX = []
positionsY = []
for t in self.sorting:
if t[1] == counter3:
positionsX.append(t[0][0])
positionsY.append(t[0][1])
population = len(positionsY)
print(population)
print(self.sorting)
r[0] = sum(positionsX) / population
r[1] = sum(positionsY) / population
counter3 += 1
return self.centroids
def checkSimilar(self, prevList):
list1 = []
list2 = []
for u in prevList:
list1.append(u[1])
for i in self.sorting:
list2.append(i[1])
print(i)
if list2 == list1:
return True
else:
return False
k = 3
data_set = [(1, 1), (1, 2), (1, 3), (2, 3), (50, 52), (48, 50), (47, 60), (112, 90), (120, 100), (108, 130), (102, 121), (43, 51), (0, 1)]
attempt = Kmeans(k, data_set, [], [])
attempt.initializeCentroids()
xvals = []
yvals = []
sortCompare = []
maxIterations = 100000
# plots
for p in data_set:
xvals.append(p[0])
yvals.append(p[1])
running = True
zeroError = True
while running:
attempt.calcDistance()
sortCompare = attempt.sorting
print(sortCompare, "thisss")
attempt.reassignCentroids()
attempt.calcDistance()
attempt.reassignCentroids()
boolVal = attempt.checkSimilar(sortCompare)
if boolVal or maxIterations <= 0:
xs = []
ys = []
for y in attempt.centroids:
xs.append(y[0])
ys.append(y[1])
plt.scatter(xs, ys)
running = False
else:
sortCompare = []
maxIterations -= 1
print(attempt.sorting)
print(attempt.centroids)
plt.scatter(xvals, yvals)
plt.show()
完全错误:回溯(最后一次调用): 文件“C:/Users/Jack Cramer/PycharmProjects/kmeans/main.py”,第 117 行,在 尝试.reassignCentroids() 文件“C:/Users/Jack Cramer/PycharmProjects/kmeans/main.py”,第 73 行,在 reassignCentroids r[0] = sum(positionsX) / 人口 ZeroDivisionError: 除以零
如果您知道为什么会发生这种情况,请告诉我,感谢您的建议。
【问题讨论】:
-
您能否给出完整的代码(即包装此代码的类,以及一些重现零除错误的驱动程序代码)?
-
您应该包含完整的错误回溯并提供minimal reproducible example
-
@BrokenBenchmark 添加了所有代码,感谢您的帮助
-
@ThierryLathuille 我添加了完整的错误消息和其余代码
-
正如消息所说,
population此时为 0。检查您的数据。
标签: python python-3.x runtime-error k-means