【发布时间】:2021-05-14 20:42:30
【问题描述】:
我不明白为什么我的 btree 有时会产生乱序的遍历,但只有当插入的数据是随机生成的。当数据按顺序插入时,数据有序的断言总是通过。
我怀疑找到一个有问题的插入点是我的逻辑。但我无法弄清楚它有什么问题,因为它有时会起作用。我想我需要找到最深的节点,即 >= 插入键。目前,我觉得它可能会跳过最深的节点。但我不确定。我需要其他人来看看代码。
请注意以下两个断言。一种是按顺序插入。第二个是随机插入。断言错误之上是遍历顺序。部分顺序让我感到困惑。
import bisect
import random
class BTree():
def __init__(self, leaf, M, key, value, parent=None):
self.leaf = leaf
self.children = []
self.M = M
self.key = key
self.value = value
self.parent = parent
def walk(self):
for child in self.children:
if child.leaf:
yield child
yield from child.walk()
def __repr__(self):
return "{}:{}".format(self.key, self.value)
def __str__(self):
return "{}:{}".format(self.key, self.value)
def insert(self, key, value, height=1, parent=None):
next_children = self.children
leaf = self
found = False
parents = [self]
child = None
while found == False:
next_children_changed = False
for child in next_children:
if key >= child.key:
print("Inspecting {} <= {} ".format(child.key, key))
next_children = child.children
# found one stage before the end
if len(next_children) == 0:
found = True
else:
parents.append(child)
leaf = child
next_children_changed = True
if not next_children_changed:
found = True
print("Trying to insert {} at Found insertion leaf {}".format(key, leaf))
walk(leaf)
if len(leaf.children) < leaf.M:
leaf.insert_non_full(key, value, parents[-1])
else:
# we need to split
current = leaf
last_current = current
inserted = False
new_root = None
while current != None:
original_parent = current.parent
if len(current.children) >= current.M:
new_left, new_right, separation_value = current.split()
new_left.parent = original_parent
new_right.parent = original_parent
if original_parent == None:
new_root = BTree(False, self.M, 0, None)
parent = new_root
new_root.children.append(new_left)
new_root.children.append(new_right)
new_root.key = new_left.key
new_root.value = new_left.value
else:
parent = original_parent
original_parent.children.remove(current)
original_parent.children.append(new_left)
original_parent.children.append(new_right)
original_parent.sort()
new_left.parent = parent
new_right.parent = parent
assert new_right.key > new_left.key
last_current = current
current = original_parent
if new_root != None:
# split went to root
print("Split went to root")
return new_root.insert(key, value)
else:
self.insert(key, value)
return self
return self
def split(self):
new_self = BTree(True, self.M, 0, None)
new_self.key = self.key
new_self.value = self.value
new_left = BTree(False, self.M, 0, None)
new_sibling = BTree(False, self.M, 0, None)
midpoint = int((len(self.children)+1)/2)
left_children = []
if self.leaf:
left_children = [new_self]
left_children = left_children + self.children[0:midpoint]
right_children = self.children[midpoint:]
for child in left_children:
child.parent = new_left
for child in right_children:
child.parent = new_sibling
new_sibling.key = right_children[0].key
new_sibling.value = right_children[0].value
new_left.children = left_children
new_sibling.children = right_children
new_left.leaf = False
new_left.key = left_children[0].key
new_left.value = left_children[0].value
return new_left, new_sibling, self.children[midpoint].key
def insert_after_split(self, key, value, parent):
height = height + 1
insertion_point, index = self.find_location_for_key(key)
if insertion_point == None:
self.insert_non_full(key, value, parent)
else:
split = insertion_point.insert(key, value, parent=self)
return split
return self
def insert_non_full(self, key, value, parent):
values = [child.key for child in self.children]
new_pos = bisect.bisect(values, key)
self.children.insert(new_pos, BTree(True, self.M, key, value, parent))
return self
def sort(self):
self.children.sort(key=lambda x: x.key)
def find_location_for_key(self, key):
index = None
for child in self.children:
if cmp(key, child.key) >= 0:
index = child, self.children.index(child)
if index:
return index
else:
return None, -1
def search(self, greater_than_equal, less_than):
for child in self.children:
if child.key >= greater_than_equal and child.key < less_than:
if child.leaf:
yield child
yield from child.search(greater_than_equal, less_than)
else:
yield from child.search(greater_than_equal, less_than)
def delete(self, key):
deletion_point, index = self.find_location_for_key(key)
if deletion_point:
if deletion_point.key == key:
self.children.remove(deletion_point)
return True
else:
return deletion_point.delete(key)
else:
return False
def walk(item, spaces=0):
print("{}{}={} {} {}".format(" " * spaces, item.key, item.value, "leaf" if item.leaf else "", item.parent))
for child in item.children:
walk(child, spaces + 1)
root = BTree(False, 3, 0, None)\
.insert(1, "1")\
.insert(2, "2")\
.insert(3, "3")\
.insert(4, "4")
for i in range(5, 100):
root = root.insert(i, str(i))
walk(root)
print(root.children)
def keysonly(items):
for item in items:
yield item.key
assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))
root = BTree(False, 3, 0, None)
seen = {}
for i in range(1, 100):
num1 = random.randint(0, 100)
if num1 not in seen:
seen[num1] = True
root = root.insert(num1, str(num1))
walk(root)
for item in root.walk():
print(item.key, item.value)
assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))
【问题讨论】:
-
请缩小问题范围。可能没有多少人有时间通读大约 8 页代码来找到调试器可以找到的问题。
-
看看pdb,Python 调试器 - 这可以用作
python3 -m pdb myscript.py;使用b <lineno>设置断点,n、c、s分别用于 next、continue 和 step .. 和?或help用于内部文档
标签: python data-structures b-tree