B-樹
一棵m階的B-樹,或為空樹,或為滿足下列特性的m叉樹:
(1) 樹中每個結點至多有m棵孩子結點(即至多有m-1個關鍵字)。
(2) 若根節點不是葉子結點,至少有兩棵子樹。
(3) 除根結點之外的所有非終端結點至少有[m/2]棵子樹。
(4) 所有葉子結點都在同一層上,即B樹是所有結點的平衡因子均等于0的多路查找樹。
(5) 所有非終端結點中包含下列信息數據(n,A0,K1,A1,K2,A2,...Kn,An)。
#define MAXM 10
typedef int KeyType;
const int m = 5;
int min = 2;
typedef struct BTNode
{
int keynum;
KeyType keys[MAXM];
struct BTNode *parent;
struct BTNode *child[MAXM];
void (*insertKey)(struct BTNode *node,KeyType k);
void (*removeKey)(struct BTNode *node,KeyType k);
int (*childrenCount)(struct BTNode *node);
struct BTNode * (* addChild)(struct BTNode *node, struct BTNode *child);
struct BTNode * (* insertChild)(struct BTNode *node, struct BTNode *child, int pos);
bool (*removeChild)(struct BTNode *node, struct BTNode *child);
int (*indexOf)(struct BTNode *mySelf, struct BTNode *node);
}BTNode;
void insertKey(BTNode *node,KeyType k);
void removeKey(BTNode *node,KeyType k);
int getChildrenCount(BTNode *node);
BTNode* addChild(BTNode *node, BTNode *child);
BTNode * insertChild(BTNode *node, BTNode *child, int pos);
bool removeChild(BTNode *node, BTNode *child);
int indexOf(BTNode *mySelf,BTNode *node);
int binarySearch(BTNode *root,KeyType key);
void initBTNode(BTNode *node)
{
node->keynum = 0;
for(int i = 0; i < MAXM; i++)
{
node->keys[i] = -1;
}
node->insertKey = insertKey;
node->removeKey = removeKey;
node->parent = NULL;
for(int i = 0; i < MAXM; i++)
{
node->child[i] = NULL;
}
node->childrenCount = getChildrenCount;
node->addChild = addChild;
node->removeChild = removeChild;
node->indexOf = indexOf;
}
void insertKey(BTNode *node,KeyType k)
{
if(node->keynum == 0)
{
node->keys[0] = k;
return;
}
int left = 0, right = node->keynum - 1, mid = (left + right) / 2;
while (left <= right) {
mid = (left + right) / 2;
if(k < node->keys[mid])
right = mid - 1;
else if (k > node->keys[mid])
left = mid + 1;
}
int insertPos = k < node->keys[mid] ? mid : mid + 1;
for (int i = node->keynum - 1; i >= insertPos; i--) {
node->keys[i + 1] = node->keys[i];
}
node->keys[insertPos] = k;
node->keynum++;
}
void removeKey(BTNode *node,KeyType k)
{
int keyIndex = binarySearch(node, k);
for (int j = keyIndex; j < node->keynum; j++) {
node->keys[j] = node->keys[j+1];
}
node->keys[node->keynum - 1] = -1;
node->keynum--;
}
int getChildrenCount(BTNode *node)
{
int i = 0;
for(;i < MAXM; i++)
{
if(node->child[i] == NULL)
break;
}
return i;
}
BTNode* addChild(BTNode *node, BTNode *child)
{
int i = 0;
for(;i < MAXM; i++)
{
if(node->child[i] == NULL)
break;
}
node->child[i] = child;
return node;
}
BTNode* insertChild(BTNode *node, BTNode *child, int pos)
{
for (int i = node->keynum - 1; i >= pos; i--) {
node->child[i + 1] = node->child[i];
}
node->child[pos] = child;
return node;
}
bool removeChild(BTNode *node, BTNode *child)
{
int i = 0;
for (; i<node->keynum + 1; i++) {
if(child == node->child[i])
break;
}
if(i == node->keynum + 1)
return false;
for (int j = i; j < node->keynum; j++) {
node->child[j] = node->child[j+1];
}
node->child[node->keynum] = NULL;
return true;
}
int indexOf(BTNode *mySelf,BTNode *node)
{
int i = 0, count = mySelf->childrenCount(mySelf);
for (; i < count; i++) {
if(mySelf->child[i] == node)
break;
}
i = i == count ? -1 : i;
return i;
}
1. 查找
int binarySearch(BTNode *root,KeyType key)
{
int left = 0, right = root->keynum - 1, mid = (left + right) / 2;
while (left <= right) {
mid = (left + right)/2;
if(key < root->keys[mid])
right = mid - 1;
else if (key > root->keys[mid])
left = mid + 1;
else
break;
}
mid = (left <= right) ? mid : -mid;
return mid;
}
BTNode* searchNode(BTNode *root,KeyType key)
{
if(root == NULL)
return NULL;
int index = binarySearch(root,key);
if(index > 0){
return root;
}else{
return searchNode(root->child[-index-1], key);
}
}
2. 插入
對于新元素的插入,都是發生在葉子節點上的。所有的插入操作都是從根節點開始,搜索這棵樹,并找到該元素應該被插入的節點。將新元素插入到該節點需要如下步驟:
- 如果該節點上的元素數未滿,則將新元素插入到該節點,并保持節點中元素的順序。
- 如果該節點上的元素已滿,則需要將該節點平均地分裂成兩個節點:
?? 從該節點中的元素和新元素先出一個中位數
?? 小于中位數的元素放到左邊節點,大于中位數的元素放到右邊節點,中位數做為分隔值。
?? 分隔值被插入到父節點中(增加了樹的高度),這可能會導致父節點的分裂,分裂父節點時又可能會使它的父節點分裂,以此類推。如果分裂一直上升到根節點,那么就創建一個新的根節點,它有一個分隔值和兩個子節點。(這就是根節點并不像內部節點一樣有最少子節點數量限制的原因)
下圖是一個5階B樹,我們通過順序插入1到17,來觀察節點的分裂過程。
void insertNodeKey(BTNode *node,KeyType key);
void insert(BTNode **root,KeyType key)
{
if(*root == NULL){
BTNode *node = (BTNode *)malloc(sizeof(BTNode));
initBTNode(node);
node->insertKey(node,key);
*root = node;
return;
}
insertNodeKey(*root, key);
}
void addKeys(BTNode *node,BTNode *fromNode,int begin, int end)
{
for (int i = begin; i < end; i++) {
node->keys[i - begin] = fromNode->keys[i];
}
node->keynum = end - begin;
}
void addChildren(BTNode *node,BTNode *fromNode,int begin, int end)
{
for (int i = begin; i < end; i++) {
node->child[i-begin] = fromNode->child[i];
fromNode->child[i]->parent = node;
}
}
void split(BTNode *node)
{
int mid = node->keynum/2;
//分離值
KeyType sepKey = node->keys[mid];
//分離后的左節點
BTNode *leftNode = (BTNode *)malloc(sizeof(BTNode));
initBTNode(leftNode);
//添加key
addKeys(leftNode, node, 0, mid);
BTNode *rightNode = (BTNode *)malloc(sizeof(BTNode));
initBTNode(rightNode);
addKeys(rightNode, node, mid + 1, node->keynum);
//分離子結點
if(node->childrenCount(node) > 0)
{
addChildren(leftNode, node, 0, mid);
addChildren(rightNode, node, mid + 1, node->keynum);
}
//當前節點為根結點
BTNode *parent = node->parent;
if(parent == NULL){
BTNode *newRoot = (BTNode *)malloc(sizeof(BTNode));
newRoot->insertKey(newRoot,sepKey);
leftNode->parent = newRoot;
rightNode->parent = newRoot;
newRoot->addChild(newRoot,leftNode)->addChild(newRoot,rightNode);
}else{
parent->insertKey(parent,sepKey);
leftNode->parent = parent;
rightNode->parent = parent;
int pos = binarySearch(parent, sepKey);
parent->removeChild(parent,node);
parent->insertChild(parent,leftNode,pos)->insertChild(parent,rightNode,pos+1);
if(parent->keynum > m - 1)
split(parent);
}
}
void insertNodeKey(BTNode *node,KeyType key)
{
//當前節點為葉子節點
if(node->childrenCount == 0)
{
//如果當前結點key未滿,直接添加
if(node->keynum < m - 1)
{
node->insertKey(node,key);
return;
}
// 如果key已滿,分裂結點
node->insertKey(node,key);
split(node);
return;
}
//當前節點為內部節點的時候,需要查找相應的子結點,直到找到相應的葉子結點時,才插入
int index = binarySearch(node, key);
if(index < 0)
insertNodeKey(node->child[-index-1],key);
}
3. 刪除
B樹的刪除就復雜了許多,可分為下面幾種情況:
- 刪除葉子結點的元素:
(1) 搜索要刪除的元素
(2) 如果它在葉子節點上,直接將其刪除
(3) 如果刪除后產生了下溢出(鍵數小于最小值 [m/2]-1),則向其兄弟節點借元素。如果兄弟結點借,即將其父節點元素下移至當前節點,將兄弟節點中元素上移至父節點(若是左節點,上移最大元素;若是右節點,上移最小元素)
(4) 若兄弟節點也達到下限,即不借,則合并兄弟節點與分割鍵(父結點分割鍵下移到當前結點刪除的位置,然后和兄弟結點合并)。
下圖是一個5階B樹,我們通過刪除15、14、17、5四個鍵,來觀察刪除過程(基本涵蓋所有情況)。
- 刪除內部結點:
內部節點中元素為其左右子節點的分割值,需要從左子樹中找最大的key或右子樹中找最小的key來代替被刪除的key元素。從而轉化為刪除葉子結點上的元素
下面以3階的B-樹刪除內部結點為例進行演示:
void afterDeleteHandle(BTNode **root,BTNode *node,int deleteIndex);
void merge(BTNode **root,BTNode *node, BTNode *brotherNode, int parentKeyIndex);
void leftRotate(BTNode *node, int nodeIndex, BTNode *rightNode);
void rightRotate(BTNode *node, int nodeIndex, BTNode *leftNode);
BTNode* findLeftChildMaxKeyNode(BTNode *node,int index)
{
BTNode *maxNode = node->child[index];
BTNode *lastNode = maxNode;
int maxIndex = -1;
while (lastNode) {
maxIndex = maxNode->childrenCount(maxNode) - 1;
lastNode = maxNode->child[maxIndex];
if(lastNode)
maxNode = lastNode;
}
return maxNode;
}
BTNode* findRightChildMinKeyNode(BTNode *node,int index)
{
BTNode *minNode = node->child[index + 1];
BTNode *firstNode = minNode;
while (firstNode) {
firstNode = minNode->child[0];
if(firstNode)
minNode = firstNode;
}
return minNode;
}
void delete(BTNode **root,KeyType key)
{
BTNode *node = searchNode(*root, key);
if(!node)
return;
//刪除節點
int keyIndex = binarySearch(node, key);
node->removeKey(node, key);
if(node->keynum < min){
afterDeleteHandle(root,node, keyIndex);
}
}
void afterDeleteHandle(BTNode **root,BTNode *node,int deleteIndex)
{
//如果是內部結點 轉化為刪除葉子結點
//要么向左子樹中最大的葉子結點的最大key借
//要么向右子樹中最小的葉子結點的最小key借
if(node->childrenCount > 0 && deleteIndex > 0){
BTNode *leftChildMaxNode = findLeftChildMaxKeyNode(node, deleteIndex);
if(leftChildMaxNode){
int maxIndex = leftChildMaxNode->keynum - 1;
KeyType maxKey = leftChildMaxNode->keys[maxIndex];
node->insertKey(node,maxKey);
leftChildMaxNode->removeKey(leftChildMaxNode,maxKey);
if(leftChildMaxNode->keynum < min){
afterDeleteHandle(root, leftChildMaxNode, maxIndex);
}
}else{
BTNode *rightChildMinNode = findRightChildMinKeyNode(node, deleteIndex);
KeyType minKey = rightChildMinNode->keys[0];
node->insertKey(node,minKey);
rightChildMinNode->removeKey(rightChildMinNode,minKey);
if(rightChildMinNode->keynum < min){
afterDeleteHandle(root, rightChildMinNode, 0);
}
}
}
//刪除葉子結點 或繼續向兄弟結點借
BTNode *parentNode = node->parent;
//當前結點在父結點中的位置
int nodeIndex = parentNode->indexOf(parentNode,node);
//左兄弟結點
BTNode *leftNode = nodeIndex > 0 ? parentNode->child[nodeIndex-1] : NULL;
//右兄弟節點
BTNode *rightNode = nodeIndex == parentNode->childrenCount(parentNode) - 1 ? NULL : parentNode->child[nodeIndex + 1];
int leftKeyCount = leftNode == NULL ? 0 : leftNode->keynum;
int rightKeyCount = rightNode == NULL ? 0 : rightNode->keynum;
int maxCount = MAX(leftKeyCount, rightKeyCount);
// 左右兄弟節點元素數均達到限定值,合并
if(maxCount <= min){
if(leftNode){
//與左兄弟節點合并
merge(root,node, leftNode, nodeIndex - 1);
}else if (rightNode){
//與右兄弟節點合并
merge(root,node, rightNode, nodeIndex);
}
return;
}
//向最富裕的兄弟結點借
if(maxCount == leftKeyCount){
//向左節點借-> 右旋
rightRotate(node, nodeIndex, leftNode);
}else{
//向右節點借-> 左旋
leftRotate(node, nodeIndex, rightNode);
}
}
void merge(BTNode **root,BTNode *node, BTNode *brotherNode, int parentKeyIndex)
{
BTNode *parentNode = node->parent;
BTNode *newNode = (BTNode *)malloc(sizeof(BTNode));
initBTNode(newNode);
KeyType parentKey = parentNode->keys[parentKeyIndex];
addKeys(newNode, node, 0, node->keynum);
addKeys(newNode, brotherNode, 0, brotherNode->keynum);
newNode->insertKey(newNode,parentKey);//向父結點借
addChildren(newNode, node, 0, node->childrenCount(node));
addChildren(newNode, brotherNode, 0, brotherNode->childrenCount(brotherNode));
parentNode->removeKey(parentNode,parentKey);
parentNode->removeChild(parentNode,node);
parentNode->removeChild(parentNode,brotherNode);
parentNode->addChild(parentNode,newNode);
newNode->parent = parentNode;
//合并之后,若父結點的元素小于限定數 繼續調整
if(parentNode == *root && parentNode->keynum == 0)
{
*root = newNode;
free(parentNode);
return;
}
if(parentNode->keynum < min){
afterDeleteHandle(root, parentNode, -1);
}
}
/**
* 左旋轉
* (1)將父節點的中間值元素刪除,并添加到當前節點中
* (2)將右兄弟節點中最小元素刪除,并添加到父節點中
*
* @param node 當前節點
* @param nodeIndex 中間值索引
* @param rightNode 右節點
*/
void leftRotate(BTNode *node, int nodeIndex, BTNode *rightNode)
{
BTNode *parentNode = node->parent;
KeyType key = parentNode->keys[nodeIndex];
parentNode->removeKey(parentNode,key);
node->insertKey(node,key);
KeyType rightKey = rightNode->keys[0];
parentNode->insertKey(parentNode,rightKey);
rightNode->removeKey(rightNode,rightKey);
BTNode *rightFirstChild = rightNode->child[0];
rightNode->removeChild(rightNode,rightFirstChild);
node->addChild(node,rightFirstChild);
}
/**
* 右旋轉
* (1)將父節點的中間值元素刪除,并添加到當前節點中
* (2)將左兄弟節點中最大元素刪除,并添加到父節點中
*
* @param node 當前節點
* @param nodeIndex 中間值索引
* @param leftNode 左節點
*/
void rightRotate(BTNode *node, int nodeIndex, BTNode *leftNode)
{
BTNode *parentNode = node->parent;
KeyType key = parentNode->keys[nodeIndex - 1];
parentNode->removeKey(parentNode,key);
node->insertKey(node,key);
KeyType leftKey = leftNode->keys[leftNode->keynum - 1];
parentNode->insertKey(parentNode,leftKey);
leftNode->removeKey(leftNode,leftKey);
int leftLast = leftNode->childrenCount(leftNode) - 1;
BTNode *leftLastChild = leftNode->child[leftLast];
leftNode->removeChild(leftNode,leftLastChild);
node->addChild(node,leftLastChild);
}