-
Notifications
You must be signed in to change notification settings - Fork 238
/
Copy pathrax.c
2312 lines (2153 loc) · 107 KB
/
rax.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Rax -- A radix tree implementation.
*
* Version 1.2 -- 7 February 2019
*
* Copyright (c) 2017-2019, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <errno.h>
#include <math.h>
#include "rax.h"
#ifndef RAX_MALLOC_INCLUDE
#define RAX_MALLOC_INCLUDE "rax_malloc.h"
#endif
#include RAX_MALLOC_INCLUDE
/* This is a special pointer that is guaranteed to never have the same value
* of a radix tree node. It's used in order to report "not found" error without
* requiring the function to have multiple return values. */
void *raxNotFound = (void*)"rax-not-found-pointer";
/* -------------------------------- Debugging ------------------------------ */
void raxDebugShowNode(const char *msg, raxNode *n);
/* Turn debugging messages on/off by compiling with RAX_DEBUG_MSG macro on.
* When RAX_DEBUG_MSG is defined by default Rax operations will emit a lot
* of debugging info to the standard output, however you can still turn
* debugging on/off in order to enable it only when you suspect there is an
* operation causing a bug using the function raxSetDebugMsg(). */
/* 开启 rax debug
#define RAX_DEBUG_MSG
*/
#ifdef RAX_DEBUG_MSG
#define debugf(...) \
if (raxDebugMsg) { \
printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \
printf(__VA_ARGS__); \
fflush(stdout); \
}
#define debugnode(msg,n) raxDebugShowNode(msg,n)
#else
#define debugf(...)
#define debugnode(msg,n)
#endif
/* By default log debug info if RAX_DEBUG_MSG is defined. */
static int raxDebugMsg = 1;
/* When debug messages are enabled, turn them on/off dynamically. By
* default they are enabled. Set the state to 0 to disable, and 1 to
* re-enable. */
void raxSetDebugMsg(int onoff) {
raxDebugMsg = onoff;
}
/* ------------------------- raxStack functions --------------------------
* The raxStack is a simple stack of pointers that is capable of switching
* from using a stack-allocated array to dynamic heap once a given number of
* items are reached. It is used in order to retain the list of parent nodes
* while walking the radix tree in order to implement certain operations that
* need to navigate the tree upward.
* ------------------------------------------------------------------------- */
/* Initialize the stack. */
static inline void raxStackInit(raxStack *ts) {
ts->stack = ts->static_items;
ts->items = 0;
ts->maxitems = RAX_STACK_STATIC_ITEMS;
ts->oom = 0;
}
/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
static inline int raxStackPush(raxStack *ts, void *ptr) {
if (ts->items == ts->maxitems) {
if (ts->stack == ts->static_items) {
ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2);
if (ts->stack == NULL) {
ts->stack = ts->static_items;
ts->oom = 1;
errno = ENOMEM;
return 0;
}
memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
} else {
void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
if (newalloc == NULL) {
ts->oom = 1;
errno = ENOMEM;
return 0;
}
ts->stack = newalloc;
}
ts->maxitems *= 2;
}
ts->stack[ts->items] = ptr;
ts->items++;
return 1;
}
/* Pop an item from the stack, the function returns NULL if there are no
* items to pop. */
static inline void *raxStackPop(raxStack *ts) {
if (ts->items == 0) return NULL;
ts->items--;
return ts->stack[ts->items];
}
/* Return the stack item at the top of the stack without actually consuming
* it. */
static inline void *raxStackPeek(raxStack *ts) {
if (ts->items == 0) return NULL;
return ts->stack[ts->items-1];
}
/* Free the stack in case we used heap allocation. */
static inline void raxStackFree(raxStack *ts) {
if (ts->stack != ts->static_items) rax_free(ts->stack);
}
/* ----------------------------------------------------------------------------
* Radix tree implementation
* --------------------------------------------------------------------------*/
/* Return the padding needed in the characters section of a node having size
* 'nodesize'. The padding is needed to store the child pointers to aligned
* addresses. Note that we add 4 to the node size because the node has a four
* bytes header. */
/* 计算指针对齐最少需要填充的字节数量,对齐时需要将 raxNode 4 bytes 的 header 考虑在内。
* 设输入值(入参)为 n,输出值(返回值)为 x,x 为满足 (n+x+4) % size(void*) == 0 条件的最小值
* 32 位系统表现: (4 - (n+4)%4) & 3
* 64 位系统表现: (8 - (n+4)%8) & 7
* 这个填充恰好使包含 data 数组在内的节点长度为 4 或者 8 的倍数 */
#define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1))
/* Return the pointer to the last child pointer in a node. For the compressed
* nodes this is the only child pointer. */
/* 返回最后一个子节点指针的地址
* 直接以 raxNode 外第一个字节为起点,如果它是一个 key,左移两个指针长度,如果它不是一个 key,左移一个指针长度 */
#define raxNodeLastChildPtr(n) ((raxNode**) ( \
((char*)(n)) + \
raxNodeCurrentLength(n) - \
sizeof(raxNode*) - \
(((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \
))
/* Return the pointer to the first child pointer. */
/* 返回第一个子节点指针的地址
* 以 data 数组为起点,右移 size(子节点的数量或者压缩序列的长度) 个字节,再跨过填充的字节数量 */
#define raxNodeFirstChildPtr(n) ((raxNode**) ( \
(n)->data + \
(n)->size + \
raxPadding((n)->size)))
/* Return the current total size of the node. Note that the second line
* computes the padding after the string of characters, needed in order to
* save pointers to aligned addresses. */
/* 返回包含 raxNode header 和 柔性数组 raxNode->data 在内的总长度
* 因为 raxPadding 填充的原因,这个长度一定是 4(32位) 或者 8(64位) 的倍数 */
#define raxNodeCurrentLength(n) ( \
sizeof(raxNode)+(n)->size+ \
raxPadding((n)->size)+ \
((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
(((n)->iskey && !(n)->isnull)*sizeof(void*)) \
)
/* Allocate a new non compressed node with the specified number of children.
* If datafield is true, the allocation is made large enough to hold the
* associated data pointer.
* Returns the new node pointer. On out of memory NULL is returned. */
/* 创建一个新的,指定孩子数量的非压缩节点,并根据 datafield 的值决定是否预留一个 value 指针的空间 */
raxNode *raxNewNode(size_t children, int datafield) {
/* 数据形式 [raxNode HDR, data(children, padding, children pointers) ]*/
size_t nodesize = sizeof(raxNode)+children+raxPadding(children)+
sizeof(raxNode*)*children;
/* 是否要预留一个 value 指针的空间 */
if (datafield) nodesize += sizeof(void*);
raxNode *node = rax_malloc(nodesize);
if (node == NULL) return NULL;
node->iskey = 0;
node->isnull = 0;
node->iscompr = 0;
node->size = children;
return node;
}
/* Allocate a new rax and return its pointer. On out of memory the function
* returns NULL. */
/* 创建一颗空的 rax 树,这颗树只有一个无孩子的根节点 */
rax *raxNew(void) {
rax *rax = rax_malloc(sizeof(*rax));
if (rax == NULL) return NULL;
rax->numele = 0;
rax->numnodes = 1;
rax->head = raxNewNode(0,0);
if (rax->head == NULL) {
rax_free(rax);
return NULL;
} else {
return rax;
}
}
/* realloc the node to make room for auxiliary data in order
* to store an item in that node. On out of memory NULL is returned. */
/* 重新分配内存,增加一个存放 value 指针的空间,返回新分配的内存地址 */
raxNode *raxReallocForData(raxNode *n, void *data) {
if (data == NULL) return n; /* No reallocation needed, setting isnull=1 */
size_t curlen = raxNodeCurrentLength(n);
return rax_realloc(n,curlen+sizeof(void*));
}
/* Set the node auxiliary data to the specified pointer. */
/* 将节点 n 标记为 key,如果数据指针 data 不为 NULL,将其值拷贝到 n->data 字段的最后一个指针大小空位 */
void raxSetData(raxNode *n, void *data) {
n->iskey = 1;
if (data != NULL) {
n->isnull = 0;
void **ndata = (void**)
((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
memcpy(ndata,&data,sizeof(data));
} else {
n->isnull = 1;
}
}
/* Get the node auxiliary data. */
/* 返回指定 rax 节点 n 中 value 指针,需要调用方保证节点 n 是一个 key */
void *raxGetData(raxNode *n) {
if (n->isnull) return NULL;
void **ndata =(void**)((char*)n+raxNodeCurrentLength(n)-sizeof(void*));
void *data;
memcpy(&data,ndata,sizeof(data));
return data;
}
/* Add a new child to the node 'n' representing the character 'c' and return
* its new pointer, as well as the child pointer by reference. Additionally
* '***parentlink' is populated with the raxNode pointer-to-pointer of where
* the new child was stored, which is useful for the caller to replace the
* child pointer if it gets reallocated.
*
* On success the new parent node pointer is returned (it may change because
* of the realloc, so the caller should discard 'n' and use the new value).
* On out of memory NULL is returned, and the old node is still valid. */
/* 非压缩节点 n 增加一个表示字符 'c' 的子节点。
* 方法返回值为 n 的新地址,新增的子节点地址通过二级指针 childptr 写回调用发起方。
* 参数 parentlink 的作用:
* 生成的新节点可能在 raxAddChild() 完成之后的操作里重新分配内存(比如扩容来增加一个存放 value 指针的空间),
* 如果新节点地址因此被改变,那么它父节点里的相应指针的值也应该跟着改变,三级指针 parentlink 的作用就是将这个指针的地址写回调用发起方,
* 以便发生上述情况时,可以将新地址的值直接写入 *parentlink 指向的位置。*/
raxNode *raxAddChild(raxNode *n, unsigned char c, raxNode **childptr, raxNode ***parentlink) {
assert(n->iscompr == 0);
/* 暂时将孩子数目加 1,来计算节点 n 扩容后的大小 */
size_t curlen = raxNodeCurrentLength(n);
n->size++;
size_t newlen = raxNodeCurrentLength(n);
n->size--; /* For now restore the original size. We'll update it only on
success at the end. */
/* 生成一个无孩子,无 value 指针的新节点,它将以字符 c 的形式作为节点 n 的子节点 */
/* Alloc the new child we will link to 'n'. */
raxNode *child = raxNewNode(0,0);
if (child == NULL) return NULL;
/* 节点 n 扩容 */
/* Make space in the original node. */
raxNode *newn = rax_realloc(n,newlen);
if (newn == NULL) {
rax_free(child);
return NULL;
}
n = newn;
/* After the reallocation, we have up to 8/16 (depending on the system
* pointer size, and the required node padding) bytes at the end, that is,
* the additional char in the 'data' section, plus one pointer to the new
* child, plus the padding needed in order to store addresses into aligned
* locations.
*
* So if we start with the following node, having "abde" edges.
*
* Note:
* - We assume 4 bytes pointer for simplicity.
* - Each space below corresponds to one byte
*
* [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|
*
* After the reallocation we need: 1 byte for the new edge character
* plus 4 bytes for a new child pointer (assuming 32 bit machine).
* However after adding 1 byte to the edge char, the header + the edge
* characters are no longer aligned, so we also need 3 bytes of padding.
* In total the reallocation will add 1+4+3 bytes = 8 bytes:
*
* (Blank bytes are represented by ".")
*
* [HDR*][abde][Aptr][Bptr][Dptr][Eptr]|AUXP|[....][....]
*
* Let's find where to insert the new child in order to make sure
* it is inserted in-place lexicographically. Assuming we are adding
* a child "c" in our case pos will be = 2 after the end of the following
* loop. */
/* 由于指针需要内存对齐,所以 raxNode->data 字段的 字符区域和指针区域之前可能有一定数量的填充,填充的大小见宏 raxPadding 的计算逻辑。
* 以 32位系统为例,
* 1. 当存在填充时,新字符可以直接占用一个字节的填充空间,额外再需要分配 4 bytes 的空间来存放新节点指针。这时重新分配内存增加的空间为 4 bytes;
* 2. 若此时填充大小为 0,新增的一个字符需要额外 3 bytes 的填充来保证后面的指针时内存对齐的,此时就增加的空间最多,为 1+3+4 = 8 bytes
* 同理可计算 64 位系统增加的内存最多为 16 bytes。
* 新节点的字符需要按照字典序存储在父节点 n 的指定位置,如何确定这个位置,需要对 raxNode->data 字段的内存结构有一定了解 */
/* 确定字符和指针应该存放在第几个位置,
* 下面会分别对 value 指针、孩子指针、填充、字符向后移动,以便空出新字符和新指针的地址 */
int pos;
for (pos = 0; pos < n->size; pos++) {
if (n->data[pos] > c) break;
}
/* Now, if present, move auxiliary data pointer at the end
* so that we can mess with the other data without overwriting it.
* We will obtain something like that:
*
* [HDR*][abde][Aptr][Bptr][Dptr][Eptr][....][....]|AUXP|
*/
/* 如果当前节点是一个 key 且 value 有值,直接将 value 指针复制到最后面 */
unsigned char *src, *dst;
if (n->iskey && !n->isnull) {
src = ((unsigned char*)n+curlen-sizeof(void*));
dst = ((unsigned char*)n+newlen-sizeof(void*));
memmove(dst,src,sizeof(void*));
}
/* Compute the "shift", that is, how many bytes we need to move the
* pointers section forward because of the addition of the new child
* byte in the string section. Note that if we had no padding, that
* would be always "1", since we are adding a single byte in the string
* section of the node (where now there is "abde" basically).
*
* However we have padding, so it could be zero, or up to 8.
*
* Another way to think at the shift is, how many bytes we need to
* move child pointers forward *other than* the obvious sizeof(void*)
* needed for the additional pointer itself. */
/* shift 的值就是增加的填充大小,32位系统可能为 0 或者 4 bytes,64位系统可能为 0 或者 8 bytes */
size_t shift = newlen - curlen - sizeof(void*);
/* We said we are adding a node with edge 'c'. The insertion
* point is between 'b' and 'd', so the 'pos' variable value is
* the index of the first child pointer that we need to move forward
* to make space for our new pointer.
*
* To start, move all the child pointers after the insertion point
* of shift+sizeof(pointer) bytes on the right, to obtain:
*
* [HDR*][abde][Aptr][Bptr][....][....][Dptr][Eptr]|AUXP|
*/
/* 将比新节点的字典序大的所有子节点指针向后移动 shift + sizeof(raxNode*) 个字节,其实就是 newlen - curlen 个字节 */
src = n->data+n->size+
raxPadding(n->size)+
sizeof(raxNode*)*pos;
memmove(src+shift+sizeof(raxNode*),src,sizeof(raxNode*)*(n->size-pos));
/* Move the pointers to the left of the insertion position as well. Often
* we don't need to do anything if there was already some padding to use. In
* that case the final destination of the pointers will be the same, however
* in our example there was no pre-existing padding, so we added one byte
* plus there bytes of padding. After the next memmove() things will look
* like that:
*
* [HDR*][abde][....][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
*/
/* 如果本次扩容增加了填充空间,将比新节点的字典序小的所有子节点指针向后移动 shift 个字节 */
if (shift) {
src = (unsigned char*) raxNodeFirstChildPtr(n);
memmove(src+shift,src,sizeof(raxNode*)*pos);
}
/* Now make the space for the additional char in the data section,
* but also move the pointers before the insertion point to the right
* by shift bytes, in order to obtain the following:
*
* [HDR*][ab.d][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
*/
/* 当前已经确保字符区域后面已经空出了足够的安全空间,将 c 后面的所有字符后移 1 byte */
src = n->data+pos;
memmove(src+1,src,n->size-pos);
/* We can now set the character and its child node pointer to get:
*
* [HDR*][abcd][e...][Aptr][Bptr][....][Dptr][Eptr]|AUXP|
* [HDR*][abcd][e...][Aptr][Bptr][Cptr][Dptr][Eptr]|AUXP|
*/
/* 目前可以安全的将新字符和新节点地址写入腾挪空出的位置 */
n->data[pos] = c;
n->size++;
/* 对于指针区域,后面的指针向后移动了 shift + sizeof(raxNode*) 个字节,前面的指针向后移动了 shift 个字节,中间正好空出了一个存放指针的空间
* 把新节点的地址拷贝到这个空间 */
src = (unsigned char*) raxNodeFirstChildPtr(n);
raxNode **childfield = (raxNode**)(src+sizeof(raxNode*)*pos);
memcpy(childfield,&child,sizeof(child));
/* 将新节点的地址和对应父节点中的指针地址写回方法调用方 */
*childptr = child;
*parentlink = childfield;
return n;
}
/* Turn the node 'n', that must be a node without any children, into a
* compressed node representing a set of nodes linked one after the other
* and having exactly one child each. The node can be a key or not: this
* property and the associated value if any will be preserved.
*
* The function also returns a child node, since the last node of the
* compressed chain cannot be part of the chain: it has zero children while
* we can only compress inner nodes with exactly one child each. */
/* 将字符序列 s 以压缩节点的形式放入节点 n 中,方法返回节点 n 的新地址。
* 方法要求节点 n 一定没有任何子节点(即 n 为叶子节点),字符序列 s 放入 n 后,需要重新创建一个叶子节点作为 n 的子节点,
* 来表示以 s 为后缀的字符序列,二级指针 child 用来记录新创建的叶子节点地址 */
raxNode *raxCompressNode(raxNode *n, unsigned char *s, size_t len, raxNode **child) {
assert(n->size == 0 && n->iscompr == 0);
void *data = NULL; /* Initialized only to avoid warnings. */
size_t newsize;
debugf("Compress node: %.*s\n", (int)len,s);
/* Allocate the child to link to this node. */
*child = raxNewNode(0,0);
if (*child == NULL) return NULL;
/* Make space in the parent node. */
newsize = sizeof(raxNode)+len+raxPadding(len)+sizeof(raxNode*);
if (n->iskey) {
data = raxGetData(n); /* To restore it later. */
if (!n->isnull) newsize += sizeof(void*);
}
raxNode *newn = rax_realloc(n,newsize);
if (newn == NULL) {
rax_free(*child);
return NULL;
}
n = newn;
n->iscompr = 1;
n->size = len;
memcpy(n->data,s,len);
if (n->iskey) raxSetData(n,data);
raxNode **childfield = raxNodeLastChildPtr(n);
memcpy(childfield,child,sizeof(*child));
return n;
}
/* Low level function that walks the tree looking for the string
* 's' of 'len' bytes. The function returns the number of characters
* of the key that was possible to process: if the returned integer
* is the same as 'len', then it means that the node corresponding to the
* string was found (however it may not be a key in case the node->iskey is
* zero or if simply we stopped in the middle of a compressed node, so that
* 'splitpos' is non zero).
*
* Otherwise if the returned integer is not the same as 'len', there was an
* early stop during the tree walk because of a character mismatch.
*
* The node where the search ended (because the full string was processed
* or because there was an early stop) is returned by reference as
* '*stopnode' if the passed pointer is not NULL. This node link in the
* parent's node is returned as '*plink' if not NULL. Finally, if the
* search stopped in a compressed node, '*splitpos' returns the index
* inside the compressed node where the search ended. This is useful to
* know where to split the node for insertion.
*
* Note that when we stop in the middle of a compressed node with
* a perfect match, this function will return a length equal to the
* 'len' argument (all the key matched), and will return a *splitpos which is
* always positive (that will represent the index of the character immediately
* *after* the last match in the current compressed node).
*
* When instead we stop at a compressed node and *splitpos is zero, it
* means that the current node represents the key (that is, none of the
* compressed node characters are needed to represent the key, just all
* its parents nodes). */
/* 在 rax 树中查找字符序列 s 的底层方法,在增删查改中均有调用
* 方法如入参:
* rax: 待查找的 rax 树
* s/len: 待查找的字符序列(地址以及长度)
* stopnode: 查找过程中的终止节点,可能查找到这个节点时,待查找的字符序列匹配完成;也可能是第一个无法与待查找的序列匹配的节点
* plink: 三级指针,在父节点中有一个指向 stopnode 节点的指针,这个三级指针用于记录该指针的地址返回给方法调用方
* splitpos: 如果查找过程在一个压缩节点终止,这个二级指针用于记录该节点中压缩字符串的匹配位置,用于压缩节点后续的切割或插入。
* ts: 辅助栈,如果 ts != NULL,记录查找字符序列的路径。
*
* 方法返回序列 s 成功匹配的长度,设 i 为返回值
* 1. 当 i == len 时,表示树中存在一个同 s 一样的序列,但该序列可能为 key,可能不是 key,也可能停在压缩节点中间的某个元素以至于根本不可能表现为 key
* a. 终止节点 stopnode 是非压缩节点,根据节点的 iskey 字段判断序列是否为 key 即可
* b. 终止节点 stopnode 是压缩节点
* 如果 splitnode != 0,表示匹配了压缩节点至少一个但小于 stopnode->size 个字符,按压缩节点的特点,这个序列不可能是 key;
* 如果 splitnode == 0,根据节点的 iskey 字段判断序列是否为 key 即可
* 2. 当 i != len 时,表示未查找到完整的序列 s。
*
* 在 redis rax 树的实现中,某个节点的字符不是存在本节点中,而是存在父节点中。
* 如果换成边的概念理解:一个父节点有多个子节点,那么就对应多个边,每条边包括一个字符和一个节点指针,这些内容全部存放在父节点。
* 子节点的数据块中并不存在它本身的字符内容。详细的解释和为什么这么做,见 raxNode 数据结构的注释。
*
* 如果我们有这样一颗树,要查找的序列 s = "win",那么 stopnode 节点为 `["window"]`,splitpos = 3,树中存在序列 s,但不是 key。
* ["window"] ""
* \
* [] "window"
*
* 如果将树中的序列 "win" 修改为一个 key,那么需要将树转变为如下:
* ("win") ""
* \
* ["dow"] "win"
* \
* [] "window"
* 再次查找序列 s = "win",它会停在压缩节点 `["dow"]`,此时 splitpos = 0,树中存在序列 s,且这个序列是一个 key。
*/
static inline size_t raxLowWalk(rax *rax, unsigned char *s, size_t len, raxNode **stopnode, raxNode ***plink, int *splitpos, raxStack *ts) {
raxNode *h = rax->head;
raxNode **parentlink = &rax->head;
/* i 表示当前正在匹配的字符索引,其值等价于已经匹配成功的长度,当 i == len 时,表示字符串 s 已经全部匹配完成
* j 在压缩节点中表示第几个字符,在非压缩节点中表示第几个孩子 */
size_t i = 0; /* Position in the string. */
size_t j = 0; /* Position in the node children (or bytes if compressed).*/
while(h->size && i < len) {
debugnode("Lookup current node",h);
unsigned char *v = h->data;
if (h->iscompr) {
/* 压缩节点由连续的单孩子节点序列压缩而来,它表示的是一段序列;每匹配成功一个压缩字符,代表 rax 树的逻辑深度 +1
* j++, i++,继续比较下一对字符 */
for (j = 0; j < h->size && i < len; j++, i++) {
if (v[j] != s[i]) break;
}
/* 压缩节点未遍历完只有两种原因
* 1. 压缩字符串太长,提前触发 i == len,虽然找到了目标序列,但 stopnode 是压缩节点且 splitpos(j) != 0
* 2. 因 v[j] != s[i] 提前退出 for 循环,字符串 s 失配,此时 i != len,树中不存在目标序列。
* 无论是匹配完成(1) 还是已经失配(2),都没有再遍历的必要了,所以退出这个 while 循环
* 由于没走 if/else 后面的逻辑,它们的 stopnode 都是当前节点 */
if (j != h->size) break;
/* 若 j == h->size:
* 如果它的 child-ptr 指向的是一个非压缩节点(理论上肯定满足这个条件,如果是压缩节点,为什么不合并进当前节点呢),
* 那么它是一个 key
* */
} else {
/* */
/* Even when h->size is large, linear scan provides good
* performances compared to other approaches that are in theory
* more sounding, like performing a binary search. */
for (j = 0; j < h->size; j++) {
/* 如果节点 h 的第 j 个孩子等于字符 s[i],则退出 for 循环,准备去树的下一层找 s[i+1] */
if (v[j] == s[i]) break;
}
/* 当前节点所有孩子中,没有一个等于 s[i],查找失败,退出 while 循环,stopnode 是当前节点 */
if (j == h->size) break;
/* 准备匹配序列 s 的下一个字符,在下面的代码中,游标 h 会挪到表示字符 v[j] 的子节点中 */
i++;
}
/* 如果 raxStack ts 传了值,以栈的形式记录查找过程中经过的每个 raxNode 节点,在节点 h 入栈之前,栈顶元素总是 h 的父节点 */
if (ts) raxStackPush(ts,h); /* Save stack of parent nodes. */
/* h 的第一个子节点指针地址,也可以认为 children 是一个指针数组 */
raxNode **children = raxNodeFirstChildPtr(h);
/* 后面用 j 的值作为指针数组的索引,如果 h 是压缩节点,那么它只有一个子节点,这个子节点指针的索引为 0
* 补充以下,如果从 `if (h->iscompr)` 这个条件分支到达了这个位置,那么 j 必定等于 h-size */
if (h->iscompr) j = 0; /* Compressed node only child is at index 0. */
/* 跳转到第 j 个子节点中, 即 rax 树的下一层 */
memcpy(&h,children+j,sizeof(h));
/* 随时更新节点 h 在父节点中,对应子指针的地址 */
parentlink = children+j;
/* 复位 j,刚刚我们跳到了子节点中,在 i == len 的情况下,
* 如果新节点不是压缩节点,splitpos 对外没有意义,也不会做赋值动作,j 不复位也无所谓
* 如果新节点是压缩节点,splitpos 此时应该为 0,j 才有必要复位
* 所以英文注释的 'non compressed' 应该是 'compressed',被改错了 */
j = 0; /* If the new node is non compressed and we do not
iterate again (since i == len) set the split
position to 0 to signal this node represents
the searched key. */
}
debugnode("Lookup stop node is",h);
if (stopnode) *stopnode = h;
/* 将 stopnode 在父节点中 对应指针的地址 写回方法调用方,以便 stopnode 因为扩容导致地址变动的时候,可以直接修改这个指针的值 */
if (plink) *plink = parentlink;
if (splitpos && h->iscompr) *splitpos = j;
/* 返回字符串 s 已经被成功匹配的长度 */
return i;
}
/* Insert the element 's' of size 'len', setting as auxiliary data
* the pointer 'data'. If the element is already present, the associated
* data is updated (only if 'overwrite' is set to 1), and 0 is returned,
* otherwise the element is inserted and 1 is returned. On out of memory the
* function returns 0 as well but sets errno to ENOMEM, otherwise errno will
* be set to 0.
*/
/* 向 rax 树中插入字符序列 s,并将其设置为 key。
* 该方法是两个插入方法的底层实现,最开始使用 raxLowWalk() 查找一次序列 s,根据几个返回值可以归类为下面几类情况
*
* 1. rax 中存在完整的序列 s,且这个序列就是从 根节点 到 stopnode 所有边组成的字符序列,等价于 (!stopnode->iscompr || splitpos == 0)
* 此时不需要插入任何字符,也不需要分割任何节点。记录 kv 后方法结束。
* 2. rax 中不存在完整的序列 s,stopnode 是压缩节点,splitpos != 0;
* 失配的位置在压缩节点中间,用 ALGO 1 的逻辑将压缩节点切割,切割后符合条件 4,继续执行 4 的逻辑
* 3. rax 中存在完整的序列 s,stopnode 是压缩节点,且 splitpos != 0。
* 用 ALGO 2 的逻辑将压缩字符串从 splitpos 位置切开,分为一对父子节点。将新的子节点标记为等于序列 s 的 key,记录 kv 后方法结束。
*
* 4. 不符合分支 1,3,或者经过分支 2 处理后的插入操作会进入这个最终步骤。
* 此时所在的节点一定是非压缩节点,假设我们还剩 "xyz" 没匹配上,那么当前节点中需要补充一个表示字符 'x' 的孩子,
* 孩子的指针指向新创建的压缩节点,压缩字符串为 "xyz",最后再创建一个叶子节点,作为压缩节点的孩子。
* 叶子节点的字符序列就是 s,记录 kv 后方法结束。
*
* 方法如参:overwrite 表示是否覆盖原来的 value。
*/
int raxGenericInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old, int overwrite) {
size_t i;
/* 切割位置,raxLowWalk()方法的 splitpos */
int j = 0; /* Split position. If raxLowWalk() stops in a compressed
node, the index 'j' represents the char we stopped within the
compressed node, that is, the position where to split the
node for insertion. */
/* parentlink 是 stopnode h 在它的父节点中,对应指针的地址(引用),若节点 h 的地址改变,直接用这个引用更新指针的值 */
raxNode *h, **parentlink;
debugf("### Insert %.*s with value %p\n", (int)len, s, data);
i = raxLowWalk(rax,s,len,&h,&parentlink,&j,NULL);
/* If i == len we walked following the whole string. If we are not
* in the middle of a compressed node, the string is either already
* inserted or this middle node is currently not a key, but can represent
* our key. We have just to reallocate the node and make space for the
* data pointer. */
/* 分支 1,rax 中存在完整的序列 s,且这个序列就是从 根节点 到 stopnode 所有边组成的字符序列
* i == len 表示 rax 中存在完整的序列 s
* stopnode (节点h) 不是压缩节点 或者 是压缩节点,但 splitpos == 0,表示节点不需要切割。
* 此时稍加处理便可以结束方法。*/
if (i == len && (!h->iscompr || j == 0 /* not in the middle if j is 0 */)) {
debugf("### Insert: node representing key exists\n");
/* Make space for the value pointer if needed. */
/* 如果之前没有 value 指针,并且这次是覆盖插入,那么重新分配内存,以便增加一个存储 value 指针的空间 */
if (!h->iskey || (h->isnull && overwrite)) {
h = raxReallocForData(h,data);
if (h) memcpy(parentlink,&h,sizeof(h));
}
if (h == NULL) {
errno = ENOMEM;
return 0;
}
/* Update the existing key if there is already one. */
/* 如果之前已经是存在 kv 对,则记录旧 value,并且在覆盖插入的场景下,写入新 value */
if (h->iskey) {
if (old) *old = raxGetData(h);
if (overwrite) raxSetData(h,data);
errno = 0;
return 0; /* Element already exists. */
}
/* Otherwise set the node as a key. Note that raxSetData()
* will set h->iskey. */
/* 如果当前表示的字符序列之前不是一个 key,则只写入新 value,并且将 rax 树的 key 计数器加一 */
raxSetData(h,data);
rax->numele++;
return 1; /* Element inserted. */
}
/* 以下是分支 2 和分支 3 */
/* If the node we stopped at is a compressed node, we need to
* split it before to continue.
*
* Splitting a compressed node have a few possible cases.
* Imagine that the node 'h' we are currently at is a compressed
* node containing the string "ANNIBALE" (it means that it represents
* nodes A -> N -> N -> I -> B -> A -> L -> E with the only child
* pointer of this node pointing at the 'E' node, because remember that
* we have characters at the edges of the graph, not inside the nodes
* themselves.
* 因为 rax 实现中,字符存储在父节点而不是子节点中,所有上文中的 'E' node 指压缩节点 `"SCO"`
*
* In order to show a real case imagine our node to also point to
* another compressed node, that finally points at the node without
* children, representing 'O':
* 假设当前压缩节点 h 的压缩序列为 "ANNIBALE",h 的子节点是另一个压缩节点 'E' node,其压缩序列为 "SCO",
* 最后需要提醒的是,'O' node 是叶子节点,没有任何子节点。
*
* "ANNIBALE" -> "SCO" -> []
*
* When inserting we may face the following cases. Note that all the cases
* require the insertion of a non compressed node with exactly two
* children, except for the last case which just requires splitting a
* compressed node.
*
* 方法描述部分讨论的 分支2 和 分支3,都需要对压缩节点的压缩序列进行分割,
* 分割后的序列最多分成三份,按顺序装填进 压缩节点、非压缩节点、压缩节点中
* 我们把前面的压缩节点叫 trimmed,后面的叫 postfix,中间的非压缩节点叫 split node
*
* 分支2 因为缺少完整的序列 s
* 需要将原压缩节点 h 在索引 j 位置的字符 h->data[j] 单独拆出来,和匹配失败的字符 s[i] 一起放在非压缩节点 split 中。
* 对应下面的 case 1~4
* 分支3 情况下,已经存在完整的序列 s
* 只需要将原压缩节点的 [0,j-1] 拆到新压缩节点 trimmed 中,[j,h->size) 拆到新压缩节点 postfix 中。
* 对应下面的 case 5
*
* 1) Inserting "ANNIENTARE"
*
* |B| -> "ALE" -> "SCO" -> []
* "ANNI" -> |-|
* |E| -> (... continue algo ...) "NTARE" -> []
*
* "ANNI" 为 trimmed 节点,"ALE" 为 postfix 节点,"ANNIBALE" 中的字符 'B' 被单独拆出来和 s[i]('E') 组合为一个非压缩节点 split。
* 待插入序列 "ANNIENTARE" 的剩余的字符 "NTARE" 将进入分支 4 后处理。
*
* 2) Inserting "ANNIBALI"
*
* |E| -> "SCO" -> []
* "ANNIBAL" -> |-|
* |I| -> (... continue algo ...) []
*
* h->data[j] 已经是原压缩节点的最后一个字符,postfixlen = 0,此时只拆成了 trimmed 和 split 两部分,
* 后面的压缩节点 postfix 不存在,仍然需要进入分支 4 添加一个叶子节点
*
* 3) Inserting "AGO" (Like case 1, but set iscompr = 0 into original node)
*
* |N| -> "NIBALE" -> "SCO" -> []
* |A| -> |-|
* |G| -> (... continue algo ...) |O| -> []
*
* 同 case 1,但 trimmedlen = 1,此时可以将 trimmed 标记为非压缩节点
*
* 4) Inserting "CIAO"
*
* |A| -> "NNIBALE" -> "SCO" -> []
* |-|
* |C| -> (... continue algo ...) "IAO" -> []
*
* raxLowWalk() 返回值 j = 0,trimmedlen = 0,只存在中间的非压缩节点 split 和后面的压缩节点 postfix
* 如果原节点 `["ANNIBALE"]` 本来就是一个 key(假设为 ""),那么拆出去的字符 'A' 与字符 s[i]('C') 组成的节点仍应该被标记为 key
* 树形图如下:
* [A C] ""
* / \
* ("NNIBALE") "A" ("IAO") "C"
* / \
* ["SCO"] "ANNIBALE" [] "CIAO"
* /
* [] "ANNIBALESCO"
*
* 5) Inserting "ANNI"
*
* "ANNI" -> "BALE" -> "SCO" -> []
*
* rax 树中存在完整的序列 "ANNI",仅需要将后面的 "BALE" 拆出去,分为前面的 trimmed 和 后面的 postfix,没有中间的非压缩节点 split,
* 树形图如下:
* ("ANNI") ""
* \
* ["BALE"] "ANNI"
* \
* ["SCO"] "ANNIBALE"
* \
* [] "ANNIBALESCO"
*
* The final algorithm for insertion covering all the above cases is as
* follows.
*
* ============================= ALGO 1 =============================
*
* 算法 1 针对 case 1-4,即出现字符不匹配的位置,是在某个压缩节点中间
*
* 索引 splitpos 从 0 开始,在算法 1 的针对的情况下下,它的值表示压缩序列索引为 splitpos 位置的字符发生不匹配。
* 比如含有序列 "ANNIBALE" 的压缩节点,若我们插入 "ANNIENTARE",那么在索引为 4 的位置出现不匹配,splitpos 的值为 4。
*
* 一个压缩节点总会被切割为 3 部分,trimmed, split, postfix,按上面这个情况,分为 "ANNI", "B", "ALE",
* trimmedLen=4, splitLen=1, postfixLen=3,在 case 1-4,前后两段可能存在一段长度为 0,中间 splitLen 总是为 1
* 下面是处理上述 case 的主要步骤
*
* 1. 保存当前压缩节点的 NEXT 指针(压缩节点唯一的子节点)
* 2. 在未匹配位置创建切割节点 `split node`,存储压缩节点中未匹配的那个字符 'B',这个字符只作为 split 节点的孩子之一,
* split 节点的另一个孩子是序列 s 中未匹配的那个字符 'E',这个孩子将在步骤 6 处理。
* 3. splitpos 是否为 0 (第一部分 trimmed 长度是否为 0)
* a. splitpos == 0,用 split 节点接替 trimed 节点的位置,作为原压缩节点的父节点的新子节点,顶替原压缩节点。
* b. splitpos != 0,第一部分 trimmed 节点,作为原压缩节点的父节点的新子节点,顶替原压缩节点;并设置 split 为 trimmed 的子节点。
* 如果 trimmedlen == 1,将其标记为非压缩节点 (对应上文中的 case 3)
* 4. 第三部分 postfix 长度是否为 0
* a. postfixlen != 0,NEXT 节点设置为 postfix 的子节点。如果 postfixlen == 0,将其标记为非压缩节点
* b. postfixlen == 0,原压缩节点切割为两部分,公共前缀 trimmed 和 split node,将 NEXT 节点作为 postfix。
* 5. 将 postfix 设置为 split 索引为 0 的子节点 (这是 split 的第一个孩子,之后序列 s 中那个未匹配字符将作为第二个孩子放在合适的位置)
* 6. 此时 h's parent -> trimmed -> split -> postfix -> NEXT 已经串起来了,将当前节点设置为 split 节点,继续处理序列 s 中剩下
* 未匹配的字符。
*
* For the above cases 1 to 4, that is, all cases where we stopped in
* the middle of a compressed node for a character mismatch, do:
*
* Let $SPLITPOS be the zero-based index at which, in the
* compressed node array of characters, we found the mismatching
* character. For example if the node contains "ANNIBALE" and we add
* "ANNIENTARE" the $SPLITPOS is 4, that is, the index at which the
* mismatching character is found.
*
* 1. Save the current compressed node $NEXT pointer (the pointer to the
* child element, that is always present in compressed nodes).
*
* 2. Create "split node" having as child the non common letter
* at the compressed node. The other non common letter (at the key)
* will be added later as we continue the normal insertion algorithm
* at step "6".
*
* 3a. IF $SPLITPOS == 0:
* Replace the old node with the split node, by copying the auxiliary
* data if any. Fix parent's reference. Free old node eventually
* (we still need its data for the next steps of the algorithm).
*
* 3b. IF $SPLITPOS != 0:
* Trim the compressed node (reallocating it as well) in order to
* contain $splitpos characters. Change child pointer in order to link
* to the split node. If new compressed node len is just 1, set
* iscompr to 0 (layout is the same). Fix parent's reference.
*
* 4a. IF the postfix len (the length of the remaining string of the
* original compressed node after the split character) is non zero,
* create a "postfix node". If the postfix node has just one character
* set iscompr to 0, otherwise iscompr to 1. Set the postfix node
* child pointer to $NEXT.
*
* 4b. IF the postfix len is zero, just use $NEXT as postfix pointer.
*
* 5. Set child[0] of split node to postfix node.
*
* 6. Set the split node as the current node, set current index at child[1]
* and continue insertion algorithm as usually.
*
* ============================= ALGO 2 =============================
* 算法 2 针对 case 5,即在树中找到了字符序列 s,但匹配终点恰好在压缩节点中间;
* 这种情况采用分支 3 的逻辑处理,只分成 trimmed 和 postfix 两个部分,做好 key 的标记和 value 信息的处理后,直接退出方法。
*
* For case 5, that is, if we stopped in the middle of a compressed
* node but no mismatch was found, do:
*
* Let $SPLITPOS be the zero-based index at which, in the
* compressed node array of characters, we stopped iterating because
* there were no more keys character to match. So in the example of
* the node "ANNIBALE", adding the string "ANNI", the $SPLITPOS is 4.
*
* 1. Save the current compressed node $NEXT pointer (the pointer to the
* child element, that is always present in compressed nodes).
*
* 2. Create a "postfix node" containing all the characters from $SPLITPOS
* to the end. Use $NEXT as the postfix node child pointer.
* If the postfix node length is 1, set iscompr to 0.
* Set the node as a key with the associated value of the new
* inserted key.
*
* 3. Trim the current node to contain the first $SPLITPOS characters.
* As usually if the new node length is just 1, set iscompr to 0.
* Take the iskey / associated value as it was in the original node.
* Fix the parent's reference.
*
* 4. Set the postfix node as the only child pointer of the trimmed
* node created at step 1.
*/
/* ------------------------- ALGORITHM 1 --------------------------- */
if (h->iscompr && i != len) {
/* 分支2: 在压缩节点中间失配 */
debugf("ALGO 1: Stopped at compressed node %.*s (%p)\n",
h->size, h->data, (void*)h);
debugf("Still to insert: %.*s\n", (int)(len-i), s+i);
debugf("Splitting at %d: '%c'\n", j, ((char*)h->data)[j]);
debugf("Other (key) letter is '%c'\n", s[i]);
/* 1: Save next pointer. */
/* 1: 保存压缩节点的孩子指针 */
raxNode **childfield = raxNodeLastChildPtr(h);
raxNode *next;
memcpy(&next,childfield,sizeof(next));
debugf("Next is %p\n", (void*)next);
debugf("iskey %d\n", h->iskey);
if (h->iskey) {
debugf("key value is %p\n", raxGetData(h));
}
/* Set the length of the additional nodes we will need. */
size_t trimmedlen = j;
size_t postfixlen = h->size - j - 1;
/* 如果 j == 0 且原压缩节点之前表示一个 key,后面新创建的 splitnode 也应该是一个表示 key 的节点
* 在创建 split 节点时,split_node_is_key 用于判断是否需要预留 value 指针的存储空间 */
int split_node_is_key = !trimmedlen && h->iskey && !h->isnull;
size_t nodesize;
/* 2: Create the split node. Also allocate the other nodes we'll need
* ASAP, so that it will be simpler to handle OOM. */
/* 2: 创建中间的 split 节点,同时视情况将第一部分的 trimmed 节点和第三部分的 postfix 节点创建出来
* split_node_is_key 表示是否需要预留 value 指针 */
raxNode *splitnode = raxNewNode(1, split_node_is_key);
raxNode *trimmed = NULL;
raxNode *postfix = NULL;
/* 处理第一部分 trimmed */
if (trimmedlen) {
nodesize = sizeof(raxNode)+trimmedlen+raxPadding(trimmedlen)+
sizeof(raxNode*);
if (h->iskey && !h->isnull) nodesize += sizeof(void*);
trimmed = rax_malloc(nodesize);
}
/* 处理第三部分,postfix */
if (postfixlen) {
nodesize = sizeof(raxNode)+postfixlen+raxPadding(postfixlen)+
sizeof(raxNode*);
postfix = rax_malloc(nodesize);
}
/* OOM? Abort now that the tree is untouched. */
if (splitnode == NULL ||
(trimmedlen && trimmed == NULL) ||
(postfixlen && postfix == NULL))
{
rax_free(splitnode);
rax_free(trimmed);
rax_free(postfix);
errno = ENOMEM;
return 0;
}
/* split 节点存储中间分割出来的那个字符 */
splitnode->data[0] = h->data[j];
if (j == 0) {
/* 3a: Replace the old node with the split node. */
/* 3a: trimmedlen == 0,
* 把 split 节点当 trimmed 操作,原压缩节点是个 key 的情况下,将原节点的 value 指针拷贝到新节点 split */
if (h->iskey) {
void *ndata = raxGetData(h);
raxSetData(splitnode,ndata);
}
/* 用 split 节点替代原节点 h,parentlink 的值是节点 h 在其父节点中 指针的地址,修改这个指针使其指向 split */
memcpy(parentlink,&splitnode,sizeof(splitnode));
} else {
/* 3b: Trim the compressed node. */
/* 3b: trimedlen != 0,用 trimmed 节点替代原节点 h,继承 h 的内容;split 作为 trimmed 的子节点 */
trimmed->size = j;
memcpy(trimmed->data,h->data,j);
trimmed->iscompr = j > 1 ? 1 : 0;
trimmed->iskey = h->iskey;
trimmed->isnull = h->isnull;
if (h->iskey && !h->isnull) {
void *ndata = raxGetData(h);
raxSetData(trimmed,ndata);
}
/* 将 split 节点作为 trimmed 节点的子节点,trimmed 节点作为原 h 父节点的子节点 */
raxNode **cp = raxNodeLastChildPtr(trimmed);
memcpy(cp,&splitnode,sizeof(splitnode));
memcpy(parentlink,&trimmed,sizeof(trimmed));
parentlink = cp; /* Set parentlink to splitnode parent. */
rax->numnodes++;
}
/* 4: Create the postfix node: what remains of the original
* compressed node after the split. */
/* 4: 处理 postfix,将最开始保存的 NEXT 指针作为 postfix 节点的孩子指针 */
if (postfixlen) {
/* 4a: create a postfix node. */
/* 4a: postfixlen != 0,处理分割的第三部分,将原 h 节点剩下的序列 [j+1, h->size) 保存进 postfix */
postfix->iskey = 0;
postfix->isnull = 0;
postfix->size = postfixlen;
postfix->iscompr = postfixlen > 1;
memcpy(postfix->data,h->data+j+1,postfixlen);
/* 如果第三部分割下来的长度大于 0, 将最开始保存的 NEXT 指针作为它的孩子指针,(NEXT 本来是原 h 节点的孩子) */
raxNode **cp = raxNodeLastChildPtr(postfix);
memcpy(cp,&next,sizeof(next));
rax->numnodes++;
} else {
/* 4b: just use next as postfix node. */
/* 4b: 第三部分的长度为 0,将原 h 节点的孩子作为 postfix */
postfix = next;
}
/* 5: Set splitnode first child as the postfix node. */