【问题标题】:Why does my b tree sometimes not return in order for randomly inserted items?为什么我的 b 树有时不按顺序返回随机插入的项目?
【发布时间】:2021-05-14 20:42:30
【问题描述】:

我不明白为什么我的 btree 有时会产生乱序的遍历,但只有当插入的数据是随机生成的。当数据按顺序插入时,数据有序的断言总是通过。

我怀疑找到一个有问题的插入点是我的逻辑。但我无法弄清楚它有什么问题,因为它有时会起作用。我想我需要找到最深的节点,即 >= 插入键。目前,我觉得它可能会跳过最深的节点。但我不确定。我需要其他人来看看代码。

请注意以下两个断言。一种是按顺序插入。第二个是随机插入。断言错误之上是遍历顺序。部分顺序让我感到困惑。

import bisect
import random

class BTree():
    def __init__(self, leaf, M, key, value, parent=None):
        self.leaf = leaf
        self.children = []
        self.M = M
        self.key = key
        self.value = value
        self.parent = parent
    
   
    def walk(self):
        for child in self.children:
            
            if child.leaf:
                yield child
            yield from child.walk()
        
    def __repr__(self):
        return "{}:{}".format(self.key, self.value)

    def __str__(self):
        return "{}:{}".format(self.key, self.value)
        
    def insert(self, key, value, height=1, parent=None):
        next_children = self.children
        leaf = self
        found = False
        parents = [self]
        child = None
        while found == False:
            next_children_changed = False
            for child in next_children:
                if key >= child.key:
                    print("Inspecting {} <= {} ".format(child.key, key))
                    next_children = child.children
                    
                    
                    # found one stage before the end
                    if len(next_children) == 0:
                        found = True
    
                    else:
                        parents.append(child)
                        leaf = child
                        next_children_changed = True
                    
                    
                        
                        
            if not next_children_changed:
                found = True
                
            
        
        print("Trying to insert {} at Found insertion leaf {}".format(key, leaf))
        walk(leaf)
        
        
        if len(leaf.children) < leaf.M:

            leaf.insert_non_full(key, value, parents[-1])
            
        else:
            # we need to split
            current = leaf
            last_current = current

            inserted = False
            new_root = None
            
            
            while current != None:
                
                
                original_parent = current.parent
                if len(current.children) >= current.M:
                    


                    
                    new_left, new_right, separation_value = current.split()
                    new_left.parent = original_parent
                    new_right.parent = original_parent

                   

                    if original_parent == None:
                        
                        new_root = BTree(False, self.M, 0, None)
                        
                        parent = new_root
                        

                        new_root.children.append(new_left)
                        
                        new_root.children.append(new_right)
                        
                        new_root.key = new_left.key
                        new_root.value = new_left.value
                        

                    else:
                        
                        parent = original_parent
                        
                            
                        
                        
                        original_parent.children.remove(current)
                        original_parent.children.append(new_left)
                        
                        original_parent.children.append(new_right)
                        original_parent.sort()
                   
                        
                

                    new_left.parent = parent
                    new_right.parent = parent
                    
                    assert new_right.key > new_left.key
                        
                    

                



                last_current = current
                current = original_parent

                
                
            
            if new_root != None:
                # split went to root
                print("Split went to root")
                
                return new_root.insert(key, value)
            else:
                
                self.insert(key, value)
            return self
            
            
        
        return self
            
        
            

    def split(self):

        
        new_self = BTree(True, self.M, 0, None)
        new_self.key = self.key
        new_self.value = self.value
        new_left = BTree(False, self.M, 0, None)
        new_sibling = BTree(False, self.M, 0, None)
        midpoint = int((len(self.children)+1)/2)

                
        left_children = []
        if self.leaf:
            left_children = [new_self]

        left_children = left_children + self.children[0:midpoint]
        right_children = self.children[midpoint:]
        for child in left_children:
            child.parent = new_left
        for child in right_children:
            child.parent = new_sibling

        new_sibling.key = right_children[0].key
        new_sibling.value = right_children[0].value

        new_left.children = left_children
        new_sibling.children = right_children

        new_left.leaf = False

        new_left.key = left_children[0].key
        new_left.value = left_children[0].value

        return new_left, new_sibling, self.children[midpoint].key


                
        
    def insert_after_split(self, key, value, parent):
        height = height + 1
        
        
        insertion_point, index = self.find_location_for_key(key)

        if insertion_point == None:
            self.insert_non_full(key, value, parent)
        else:
            split = insertion_point.insert(key, value, parent=self)

            return split
        
        return self
    
    def insert_non_full(self, key, value, parent):
        values = [child.key for child in self.children]
        new_pos = bisect.bisect(values, key)
        self.children.insert(new_pos, BTree(True, self.M, key, value, parent))
        return self

    def sort(self):
        self.children.sort(key=lambda x: x.key)
    
    def find_location_for_key(self, key):
        index = None
        for child in self.children:
            if cmp(key, child.key) >= 0:
                  index = child, self.children.index(child)
        if index:
            return index
        else:
            return None, -1
    
    def search(self, greater_than_equal, less_than):
        
            
        for child in self.children:
            if child.key >= greater_than_equal and child.key < less_than:
                if child.leaf:
                    yield child
                    yield from child.search(greater_than_equal, less_than)
                else:
                    yield from child.search(greater_than_equal, less_than)
    
    def delete(self, key):
        deletion_point, index = self.find_location_for_key(key)
        if deletion_point:
            if deletion_point.key == key:
                self.children.remove(deletion_point)
                return True
            else:
                return deletion_point.delete(key)
        else:
            return False
        

    

def walk(item, spaces=0):
    
    print("{}{}={} {} {}".format(" " * spaces, item.key, item.value, "leaf" if item.leaf else "", item.parent))
    
    for child in item.children:
        walk(child, spaces + 1)

root = BTree(False, 3, 0, None)\
.insert(1, "1")\
.insert(2, "2")\
.insert(3, "3")\
.insert(4, "4")

for i in range(5, 100):
    root = root.insert(i, str(i))


walk(root)

print(root.children)

def keysonly(items):
    for item in items:
        yield item.key

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

root = BTree(False, 3, 0, None)


seen = {}

for i in range(1, 100):
    num1 = random.randint(0, 100)
    if num1 not in seen:
        seen[num1] = True
    
        root = root.insert(num1, str(num1))



walk(root)

for item in root.walk():
    print(item.key, item.value)

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

【问题讨论】:

  • 请缩小问题范围。可能没有多少人有时间通读大约 8 页代码来找到调试器可以找到的问题。
  • 看看pdbPython 调试器 - 这可以用作python3 -m pdb myscript.py;使用b &lt;lineno&gt; 设置断点,ncs 分别用于 next、continue 和 step .. 和 ?help 用于内部文档

标签: python data-structures b-tree


【解决方案1】:

我重写了插入位置的搜索。它现在应该总是产生正确的位置。我反向搜索子节点,并在小于插入键的第一个子节点上中断。

当我去插入时,我总是插入last_child,所以我们填充一个旧节点而不是创建一个新子节点。

改变的地方。这是非常微妙的。

def insert(self, key, value, height=1, parent=None):
        next_children = self.children
        leaf = self
        found = False
        parents = [self]
        child = None
        last_child = self
        while found == False:
            next_children_changed = False
            for child in reversed(next_children):
                if key >= child.key:
                    print("Inspecting {} <= {} ".format(child.key, key))
                    next_children = child.children
                    
                    last_child = leaf
                    parents.append(child)
                    leaf = child
                    next_children_changed = True
                    break
                    
                    
                        
                        
            if not next_children_changed:
                found = True
                
        
        leaf = last_child

完整的工作代码:

import bisect
import random

class BTree():
    def __init__(self, leaf, M, key, value, parent=None):
        self.leaf = leaf
        self.children = []
        self.M = M
        self.key = key
        self.value = value
        self.parent = parent
    
   
    def walk(self):
        for child in self.children:
            
            if child.leaf:
                yield child
            yield from child.walk()
        
    def __repr__(self):
        return "{}:{}".format(self.key, self.value)

    def __str__(self):
        return "{}:{}".format(self.key, self.value)
        
    def insert(self, key, value, height=1, parent=None):
        next_children = self.children
        leaf = self
        found = False
        parents = [self]
        child = None
        last_child = self
        while found == False:
            next_children_changed = False
            for child in reversed(next_children):
                if key >= child.key:
                    print("Inspecting {} <= {} ".format(child.key, key))
                    next_children = child.children
                    
                    last_child = leaf
                    parents.append(child)
                    leaf = child
                    next_children_changed = True
                    break
                    
                    
                        
                        
            if not next_children_changed:
                found = True
                
        
        leaf = last_child
        
        print("Trying to insert {} at Found insertion leaf {}".format(key, leaf))
        # walk(leaf)
        
        
        if len(leaf.children) < leaf.M:

            leaf.insert_non_full(key, value, parents[-1])
            
        else:
            # we need to split
            current = leaf
            

            inserted = False
            new_root = None
            
            
            while current != None:
                
                
                original_parent = current.parent
                if len(current.children) >= current.M:
                    


                    
                    new_left, new_right, separation_value = current.split()
                                      

                    if original_parent == None:
                        
                        new_root = BTree(False, self.M, 0, None)
                        
                        parent = new_root
                        

                        new_root.children.append(new_left)
                        
                        new_root.children.append(new_right)
                        
                        new_root.key = new_left.key
                        new_root.value = new_left.value
                        

                    else:
                        
                        parent = original_parent
                      
                        original_parent.children.remove(current)
                        original_parent.children.append(new_left)
                        
                        original_parent.children.append(new_right)
                        original_parent.sort()
                   
                        
                

                    new_left.parent = parent
                    new_right.parent = parent
                    
                    assert new_right.key > new_left.key
                        
                    

                



                
                current = original_parent

                
                
            
            if new_root != None:
                # split went to root
                print("Split went to root")
                
                # walk(new_root)
                return new_root.insert(key, value)
            else:
                
                return self.insert(key, value)
            return self
            
            
        
        return self
            
        
            

    def split(self):

        
        new_self = BTree(True, self.M, 0, None)
        new_self.key = self.key
        new_self.value = self.value
        new_left = BTree(False, self.M, 0, None)
        new_sibling = BTree(False, self.M, 0, None)
        midpoint = int((len(self.children)+1)/2)

                
        left_children = []
        if self.leaf:
            left_children = [new_self]

        left_children = left_children + self.children[0:midpoint]
        right_children = self.children[midpoint:]
        for child in left_children:
            child.parent = new_left
        for child in right_children:
            child.parent = new_sibling

        new_sibling.key = right_children[0].key
        new_sibling.value = right_children[0].value

        new_left.children = left_children
        new_sibling.children = right_children

        new_left.leaf = False

        new_left.key = left_children[0].key
        new_left.value = left_children[0].value

        return new_left, new_sibling, self.children[midpoint].key


                
        
    def insert_after_split(self, key, value, parent):
        height = height + 1
        
        
        insertion_point, index = self.find_location_for_key(key)

        if insertion_point == None:
            self.insert_non_full(key, value, parent)
        else:
            split = insertion_point.insert(key, value, parent=self)

            return split
        
        return self
    
    def insert_non_full(self, key, value, parent):
        values = [child.key for child in self.children]
        new_pos = bisect.bisect(values, key)
        self.children.insert(new_pos, BTree(True, self.M, key, value, parent))
        return self

    def sort(self):
        self.children.sort(key=lambda x: x.key)
    
    def find_location_for_key(self, key):
        index = None
        for child in self.children:
            if cmp(key, child.key) >= 0:
                  index = child, self.children.index(child)
        if index:
            return index
        else:
            return None, -1
    
    def search(self, greater_than_equal, less_than):
        
            
        for child in self.children:
            if child.key >= greater_than_equal and child.key < less_than:
                if child.leaf:
                    yield child
                    yield from child.search(greater_than_equal, less_than)
                else:
                    yield from child.search(greater_than_equal, less_than)
    
    def delete(self, key):
        deletion_point, index = self.find_location_for_key(key)
        if deletion_point:
            if deletion_point.key == key:
                self.children.remove(deletion_point)
                return True
            else:
                return deletion_point.delete(key)
        else:
            return False
        

    

def walk(item, spaces=0):
    
    print("{}{}={} {} {}".format(" " * spaces, item.key, item.value, "leaf" if item.leaf else "", item.parent))
    
    for child in item.children:
        walk(child, spaces + 1)

root = BTree(False, 3, 0, None)\
.insert(1, "1")\
.insert(2, "2")\
.insert(3, "3")\
.insert(4, "4")

for i in range(5, 100):
    root = root.insert(i, str(i))


walk(root)

print(root.children)

def keysonly(items):
    for item in items:
        yield item.key

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

root = BTree(False, 3, 0, None)


seen = {}

for i in range(1, 100):
    num1 = random.randint(0, 100)
    if num1 not in seen:
        seen[num1] = True
    
        root = root.insert(num1, str(num1))



walk(root)

for item in root.walk():
    print(item.key, item.value)

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

【讨论】:

    猜你喜欢
    • 2022-06-11
    • 2012-01-18
    • 2011-06-03
    • 2019-07-04
    • 2017-11-26
    • 2013-06-06
    • 2011-01-27
    • 1970-01-01
    相关资源
    最近更新 更多