上一篇文章我們介紹了 B-Tree,接下來要介紹它的兄弟 - B+Tree,承襲 B-Tree 的特性,B+Tree 一樣是自平衡樹,搜尋的複雜度一樣也是可以穩定在 O(log n),原則也都一樣,唯一不同的是 B+Tree 的葉節點會有全部的索引鍵,可想而知,這會多使用一些空間,但是換來的是,在做範圍搜尋的時候可以掃瞄葉子節點就好。
建立 B+Tree 的原則跟 B-Tree 一樣,但是由於所有的索引鍵會保留在葉子節點,所以樹的長相就不一樣,大概像這樣子:
實作 - 新增索引鍵
直接進入實作的環節,與 B-Tree 相同的部分我就不多做說明了,不清楚的部分可以參考我 B-Tree 的文章,我們一樣用舊金山大學做的 B+Tree Visualization 網頁來輔助我們實作 B+Tree,B+Tree 節點類別內的屬性與 B-Tree 差不多,只不過多了兩個屬性 Previous
與 Next
。
public class BPlusTree
{
// 與 B-Tree 相同
}
internal class BPlusTreeNode
{
private readonly int maxKeyLength;
private readonly int minKeyLength;
public BPlusTreeNode(int degree, BPlusTreeNode parent)
{
this.maxKeyLength = degree;
this.minKeyLength = (int)Math.Ceiling(degree / 2d) - 1;
this.Keys = new int?[degree];
this.Parent = parent;
this.Children = new BPlusTreeNode[degree + 1];
}
public int?[] Keys { get; }
public int KeyCount { get; private set; }
public BPlusTreeNode[] Children { get; }
public BPlusTreeNode Parent { get; private set; }
public BPlusTreeNode Previous { get; private set; }
public BPlusTreeNode Next { get; private set; }
public bool IsRoot => this.Parent == null;
public bool IsLeaf => this.Children[0] == null;
}
與之前 B-Tree 文章內的範例一樣,我們建立一個 4 階的 B+Tree,索引鍵值分別是 48,62,26,53,35,94,55,79,23,78,8,72,50,67,74,38,84,69,17,96
,當新增 53
時,節點的索引鍵滿了一樣得分割
,不一樣的是中間索引鍵的位置算法是 Math.Floor(索引鍵數量 / 2)
,而且分割之後,會將所選中的中間索引鍵留在葉節點。
internal class BPlusTreeNode
{
// ...
public void Add(int key)
{
this.Insert(key);
this.SplitIfNecessary();
}
private static void SetChild(BPlusTreeNode node, int childIndex, BPlusTreeNode child)
{
if (child != null)
{
child.Parent = node;
}
node.Children[childIndex] = child;
}
private int Insert(int key)
{
var index = 0;
for (; index < this.KeyCount; index++)
{
var comparison = key.CompareTo(this.Keys[index]);
if (comparison == 0) return -1;
if (comparison < 0) break;
}
if (this.KeyCount > 0 && index < this.KeyCount)
{
for (var i = this.KeyCount; i > index; i--)
{
this.Keys[i] = this.Keys[i - 1];
this.Children[i + 1] = this.Children[i];
}
this.Children[index + 1] = this.Children[index];
this.Children[index] = default;
}
this.Keys[index] = key;
this.KeyCount++;
return index;
}
private void SplitIfNecessary()
{
if (this.KeyCount < this.maxKeyLength) return;
var medianIndex = this.KeyCount / 2;
var index = 0;
var leftNode = new BPlusTreeNode(this.maxKeyLength, this);
SetChild(leftNode, 0, this.Children[index]);
for (var i = index; index < medianIndex; index++)
{
leftNode.Keys[i] = this.Keys[index];
leftNode.KeyCount++;
SetChild(leftNode, i + 1, this.Children[index + 1]);
i++;
}
index++;
var rightNode = new BPlusTreeNode(this.maxKeyLength, this);
SetChild(rightNode, 0, this.Children[index]);
for (var i = 0; index < this.KeyCount; index++)
{
rightNode.Keys[i] = this.Keys[index];
rightNode.KeyCount++;
SetChild(rightNode, i + 1, this.Children[index + 1]);
i++;
}
var medianKey = this.Keys[medianIndex];
if (this.IsLeaf)
{
rightNode.Insert(medianKey.Value);
rightNode.Previous = leftNode;
leftNode.Next = rightNode;
if (this.Previous != null)
{
leftNode.Previous = this.Previous;
this.Previous.Next = leftNode;
this.Previous = null;
}
if (this.Next != null)
{
rightNode.Next = this.Next;
this.Next.Previous = rightNode;
this.Next = null;
}
}
if (this.IsRoot)
{
Array.Clear(this.Keys, 0, this.Keys.Length);
Array.Clear(this.Children, 0, this.Children.Length);
this.Keys[0] = medianKey;
this.Children[0] = leftNode;
this.Children[1] = rightNode;
this.KeyCount = 1;
}
else
{
var keyIndexInParent = this.Parent.Insert(medianKey.Value);
SetChild(this.Parent, keyIndexInParent, leftNode);
SetChild(this.Parent, keyIndexInParent + 1, rightNode);
this.Parent.SplitIfNecessary();
}
}
}
新增索引鍵一樣也是滿了就分割,直到將所有索引鍵新增完畢為止。
實作 - 刪除索引鍵
B+Tree 的刪除索引鍵原則上跟 B-Tree 一樣,不同於 B-Tree 的是,B+Tree 一律從葉節點開始刪起,因為所有的索引鍵都存在於葉節點,所以葉節點一定找得到要刪除的索引鍵,如果索引鍵也是非葉節點內的索引鍵,則在升冪排序的情況下,要往右邊找索引鍵來替補內節點空出來的位置。
假定我要刪除 62
這個索引鍵,則葉節點的 62 被刪除後,67
替補上原先 62 在非葉節點的位置。
當索引鍵的個數被刪到小於可允許的最小數量(公式同 B-Tree),處理的邏輯跟 B-Tree 一樣,先找索引鍵個數大於最小數量的左右鄰居來替補,否則進行合併,如果索引鍵也是非葉節點內的索引鍵,則鄰居要去替補空出來的位置。
假定我要刪除 84
,84 被刪除後,從左鄰居抓 78
來替補。
刪除到左右鄰居也無法維持索引鍵最小數量時,則進行合併,假定我要刪除 94
,94 被刪除後,與左鄰居進行合併成一個新節點。
internal class BPlusTreeNode
{
// ...
public void RemoveAt(int index)
{
if (this.IsLeaf)
{
this.DeleteAt(index);
this.MergeIfNecessary();
}
else
{
this.Keys[index] = default;
var rightLeaf = FindRightLeaf();
rightLeaf.DeleteAt(0);
rightLeaf.MergeIfNecessary(this, index);
}
BPlusTreeNode FindRightLeaf()
{
var leaf = this.Children[index + 1];
while (!leaf.IsLeaf)
{
leaf = leaf.Children[0];
}
return leaf;
}
}
private void MergeIfNecessary(BPlusTreeNode internalNode = null, int internalIndex = -1)
{
if (this.IsRoot) return;
if (this.KeyCount >= this.minKeyLength)
{
SetInternalNode(this.Keys[0]);
return;
}
var childIndexInParent = this.FindChildIndexInParent();
var leftSibling = childIndexInParent > 0 ? this.Parent.Children[childIndexInParent - 1] : default;
var rightSibling = childIndexInParent < this.maxKeyLength ? this.Parent.Children[childIndexInParent + 1] : default;
if (leftSibling != null && leftSibling.KeyCount > this.minKeyLength)
{
if (this.KeyCount == 0)
{
this.Children[1] = this.Children[0];
}
if (internalNode == null)
{
var parentKeyIndex = childIndexInParent - 1;
if (this.IsLeaf)
{
this.Insert(leftSibling.Keys[leftSibling.KeyCount - 1].Value);
}
else
{
this.Insert(this.Parent.Keys[parentKeyIndex].Value);
}
this.Parent.Keys[parentKeyIndex] = leftSibling.Keys[leftSibling.KeyCount - 1];
}
else
{
this.Insert(leftSibling.Keys[leftSibling.KeyCount - 1].Value);
SetInternalNode(this.Keys[0]);
}
SetChild(this, 0, leftSibling.Children[leftSibling.KeyCount]);
leftSibling.Children[leftSibling.KeyCount] = default;
leftSibling.DeleteAt(leftSibling.KeyCount - 1);
}
else if (rightSibling != null && rightSibling.KeyCount > this.minKeyLength)
{
var parentKeyIndex = childIndexInParent;
if (internalNode == null)
{
this.Insert(this.Parent.Keys[parentKeyIndex].Value);
if (this.IsLeaf)
{
this.Parent.Keys[parentKeyIndex] = rightSibling.Keys[1];
}
else
{
this.Parent.Keys[parentKeyIndex] = rightSibling.Keys[0];
}
}
else
{
this.Insert(rightSibling.Keys[0].Value);
this.Parent.Keys[parentKeyIndex] = rightSibling.Keys[1];
SetInternalNode(this.Keys[0]);
}
SetChild(this, this.KeyCount, rightSibling.Children[0]);
for (var i = 0; i < rightSibling.KeyCount; i++)
{
rightSibling.Children[i] = rightSibling.Children[i + 1];
}
rightSibling.Children[rightSibling.KeyCount] = rightSibling.Children[rightSibling.KeyCount + 1];
rightSibling.DeleteAt(0);
}
else
{
var parentKeyIndex = leftSibling != null ? childIndexInParent - 1 : childIndexInParent;
var mergedNode = leftSibling ?? this;
var manureNode = mergedNode == this ? rightSibling : this;
if (this.Parent.Keys[parentKeyIndex] != default && this.Parent.Keys[parentKeyIndex] != manureNode.Keys[0])
{
mergedNode.Keys[mergedNode.KeyCount] = this.Parent.Keys[parentKeyIndex];
mergedNode.KeyCount++;
SetChild(mergedNode, mergedNode.KeyCount, manureNode.Children[0]);
}
for (var i = 0; i < manureNode.KeyCount; i++)
{
mergedNode.Keys[mergedNode.KeyCount] = manureNode.Keys[i];
mergedNode.KeyCount++;
SetChild(mergedNode, mergedNode.KeyCount, manureNode.Children[i + 1]);
}
mergedNode.Next = manureNode.Next;
if (mergedNode.Next != null)
{
mergedNode.Next.Previous = mergedNode;
}
SetInternalNode(mergedNode.Keys[0]);
for (var i = parentKeyIndex; i < this.Parent.KeyCount; i++)
{
this.Parent.Children[i] = this.Parent.Children[i + 1];
}
this.Parent.Children[this.Parent.KeyCount] = default;
this.Parent.Children[parentKeyIndex] = mergedNode;
this.Parent.DeleteAt(parentKeyIndex);
if (this.Parent.IsRoot && this.Parent.KeyCount == 0)
{
Array.Clear(this.Parent.Keys, 0, this.Parent.Keys.Length);
Array.Clear(this.Parent.Children, 0, this.Parent.Children.Length);
for (var i = 0; i < mergedNode.KeyCount; i++)
{
this.Parent.Keys[i] = mergedNode.Keys[i];
SetChild(this.Parent, i, mergedNode.Children[i]);
}
SetChild(this.Parent, mergedNode.KeyCount, mergedNode.Children[mergedNode.KeyCount]);
this.Parent.KeyCount = mergedNode.KeyCount;
}
else
{
this.Parent.MergeIfNecessary();
}
}
void SetInternalNode(int? key)
{
if (internalNode == null) return;
internalNode.Keys[internalIndex] = key;
}
}
private static void SetChild(BPlusTreeNode node, int childIndex, BPlusTreeNode child)
{
if (child != null)
{
child.Parent = node;
}
node.Children[childIndex] = child;
}
private int FindChildIndexInParent()
{
for (var i = 0; i < this.Parent.Children.Length; i++)
{
if (this.Parent.Children[i] == this) return i;
}
return -1;
}
private void DeleteAt(int index)
{
this.KeyCount--;
for (var i = index; i < this.KeyCount; i++)
{
this.Keys[i] = this.Keys[i + 1];
}
this.Keys[this.KeyCount] = default;
}
}
範圍搜尋
B+Tree 在範圍搜尋方面比 B-Tree 有優勢,只要掃瞄葉節點就可以將所有的索引鍵巡覽一遍,底下我隨機取 10,000 個 1~99,999 的不重複整數拿來當索引鍵,同時也隨機取 100 個不重複整數拿來當搜尋對象,而搜尋的條件是「找出大於搜尋對象
」的索引鍵,以此情境來與 C# 的 List<T>.Where()
來比比看誰快?
第一次比試,下面是跑 Benchmark 的結果。
從結果看起來 B+Tree 的範圍搜尋還不如 List<T>.Where(),那大費周章建棵 B+Tree 做啥? 其實不然,我們在使用 B+Tree 的時候,要去控制我們樹的高度
,高度也不是愈小愈好,剛剛第一次比試的 B+Tree 是 3 階,10,000 個索引鍵,建出來樹的高度是 9,高度我們可以用 Math.Ceiling(log(總索引鍵數量) ÷ log(階值))
來算個大概,於是我開始逐步提升階值來降低樹的高度,大概降到高度 7 的時候,B+Tree 的搜尋效能就趕上了 List<T>.Where()。
當高度降低到 2
、3
的時候,效能是最好的。
所以說,要使用 B+Tree 來輔助我們做索引鍵搜尋,首先需要了解索引鍵的總數量是多少?在樹的高度限制在 2 或 3 之下,反推階值應該設定多少?隨著索引鍵的數量增加,導致樹的高度增加,樹就需要重建。
沒有最厲害的工具,只有最適當的工具,我們收集各式各樣的工具的同時,也動手把工具拿起來使看看,了解其特性、限制、優缺點,才不會用錯了,以上用 C# 來實作 B+Tree 就分享給大家,希望對大家能夠有一點幫助。
< Source Code >