Jinlong's Blog

Redis源代码分析之有序集合

引言

本文对Redis中的有序集合部分源代码进行分析(基于Redis3.2.0源码)
skiplist参考资料(A Skip List Cookbook):
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.17.524&rep=rep1&type=pdf

源码分析

Redis中有序集合的底层实现是skiplist和ziplist,在server.h中,相关的两个有序集合如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
//在有序集合的实现中用来表示score的范围
typedef struct {
double min, max;
int minex, maxex; /* are min or max exclusive? */
} zrangespec;
//在有序集合的实现中用来表示键的范围
typedef struct {
robj *min, *max;
int minex, maxex;
} zlexrangespec;
//跳表节点
typedef struct zskiplistNode {
robj *obj; //键对象
double score; //分值
struct zskiplistNode *backward; //上一个节点
struct zskiplistLevel {
struct zskiplistNode *forward; //下一个节点
unsigned int span; //与下一个节点之间的跨度
} level[]; //指向下一个节点的节点列表
} zskiplistNode;
//跳表
typedef struct zskiplist {
struct zskiplistNode *header, *tail;
unsigned long length;
int level;
} zskiplist;
//有序集合
typedef struct zset {
dict *dict;
zskiplist *zsl;
} zset;

t_zset.c是对redis有序集合的具体实现,代码以及注释如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
//创建skiplist节点
zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel));
zn->score = score;
zn->obj = obj;
return zn;
}
//创建skiplist
zskiplist *zslCreate(void) {
int j;
zskiplist *zsl;
zsl = zmalloc(sizeof(*zsl));
zsl->level = 1;
zsl->length = 0;
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
zsl->header->level[j].forward = NULL;
zsl->header->level[j].span = 0;
}
zsl->header->backward = NULL;
zsl->tail = NULL;
return zsl;
}
//释放skiplist节点
void zslFreeNode(zskiplistNode *node) {
decrRefCount(node->obj);
zfree(node);
}
//释放skiplist
void zslFree(zskiplist *zsl) {
zskiplistNode *node = zsl->header->level[0].forward, *next;
zfree(zsl->header);
while(node) {
next = node->level[0].forward;
zslFreeNode(node);
node = next;
}
zfree(zsl);
}
//随机生成一个level
int zslRandomLevel(void) {
int level = 1;
while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
level += 1;
return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}
//在skiplist中插入一个键值对
zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned int rank[ZSKIPLIST_MAXLEVEL];
int i, level;
serverAssert(!isnan(score));
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* store rank that is crossed to reach the insert position */
rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,obj) < 0))) {
rank[i] += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
/* we assume the key is not already inside, since we allow duplicated
* scores, and the re-insertion of score and redis object should never
* happen since the caller of zslInsert() should test in the hash table
* if the element is already inside or not. */
level = zslRandomLevel();
if (level > zsl->level) {
for (i = zsl->level; i < level; i++) {
rank[i] = 0;
update[i] = zsl->header;
update[i]->level[i].span = zsl->length;
}
zsl->level = level;
}
x = zslCreateNode(level,score,obj);
for (i = 0; i < level; i++) {
x->level[i].forward = update[i]->level[i].forward;
update[i]->level[i].forward = x;
/* update span covered by update[i] as x is inserted here */
x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
update[i]->level[i].span = (rank[0] - rank[i]) + 1;
}
/* increment span for untouched levels */
for (i = level; i < zsl->level; i++) {
update[i]->level[i].span++;
}
x->backward = (update[0] == zsl->header) ? NULL : update[0];
if (x->level[0].forward)
x->level[0].forward->backward = x;
else
zsl->tail = x;
zsl->length++;
return x;
}
//删除skiplist节点辅助函数,给定update数组和需要删除的节点x,以及相应的skiplist zsl
void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
int i;
for (i = 0; i < zsl->level; i++) {
if (update[i]->level[i].forward == x) {
update[i]->level[i].span += x->level[i].span - 1;
update[i]->level[i].forward = x->level[i].forward;
} else {
update[i]->level[i].span -= 1;
}
}
if (x->level[0].forward) {
x->level[0].forward->backward = x->backward;
} else {
zsl->tail = x->backward;
}
while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL)
zsl->level--;
zsl->length--;
}
//删除指定的键值对,要求score和obj对应
int zslDelete(zskiplist *zsl, double score, robj *obj) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,obj) < 0)))
x = x->level[i].forward;
update[i] = x;
}
/* We may have multiple elements with the same score, what we need
* is to find the element with both the right score and object. */
x = x->level[0].forward;
if (x && score == x->score && equalStringObjects(x->obj,obj)) {
zslDeleteNode(zsl, x, update);
zslFreeNode(x);
return 1;
}
return 0; /* not found */
}
//value是否大于最小值
static int zslValueGteMin(double value, zrangespec *spec) {
return spec->minex ? (value > spec->min) : (value >= spec->min);
}
//value是否小于最大值
int zslValueLteMax(double value, zrangespec *spec) {
return spec->maxex ? (value < spec->max) : (value <= spec->max);
}
//该skiplist的score是否在范围range之内
int zslIsInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
/* Test for ranges that will always be empty. */
if (range->min > range->max ||
(range->min == range->max && (range->minex || range->maxex)))
return 0;
x = zsl->tail;
if (x == NULL || !zslValueGteMin(x->score,range))
return 0;
x = zsl->header->level[0].forward;
if (x == NULL || !zslValueLteMax(x->score,range))
return 0;
return 1;
}
//找出在指定score范围range之内的第一个元素并返回
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInRange(zsl,range)) return NULL;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
!zslValueGteMin(x->level[i].forward->score,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslValueLteMax(x->score,range)) return NULL;
return x;
}
//找出在指定score范围range之内的最后一个元素并返回
zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInRange(zsl,range)) return NULL;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *IN* range. */
while (x->level[i].forward &&
zslValueLteMax(x->level[i].forward->score,range))
x = x->level[i].forward;
}
/* This is an inner range, so this node cannot be NULL. */
serverAssert(x != NULL);
/* Check if score >= min. */
if (!zslValueGteMin(x->score,range)) return NULL;
return x;
}
//删除指定score范围内range内的所有节点
unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long removed = 0;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (range->minex ?
x->level[i].forward->score <= range->min :
x->level[i].forward->score < range->min))
x = x->level[i].forward;
update[i] = x;
}
/* Current node is the last with score < or <= min. */
x = x->level[0].forward;
/* Delete nodes while in range. */
while (x &&
(range->maxex ? x->score < range->max : x->score <= range->max))
{
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->obj);
zslFreeNode(x);
removed++;
x = next;
}
return removed;
}
//删除键范围range之间的所有节点
unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long removed = 0;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
!zslLexValueGteMin(x->level[i].forward->obj,range))
x = x->level[i].forward;
update[i] = x;
}
/* Current node is the last with score < or <= min. */
x = x->level[0].forward;
/* Delete nodes while in range. */
while (x && zslLexValueLteMax(x->obj,range)) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->obj);
zslFreeNode(x);
removed++;
x = next;
}
return removed;
}
//删除第start个节点到第end个节点之间的节点
unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
unsigned long traversed = 0, removed = 0;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (traversed + x->level[i].span) < start) {
traversed += x->level[i].span;
x = x->level[i].forward;
}
update[i] = x;
}
traversed++;
x = x->level[0].forward;
while (x && traversed <= end) {
zskiplistNode *next = x->level[0].forward;
zslDeleteNode(zsl,x,update);
dictDelete(dict,x->obj);
zslFreeNode(x);
removed++;
traversed++;
x = next;
}
return removed;
}
//给定键值对,获取它的rank值
unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
zskiplistNode *x;
unsigned long rank = 0;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward &&
(x->level[i].forward->score < score ||
(x->level[i].forward->score == score &&
compareStringObjects(x->level[i].forward->obj,o) <= 0))) {
rank += x->level[i].span;
x = x->level[i].forward;
}
/* x might be equal to zsl->header, so test if obj is non-NULL */
if (x->obj && equalStringObjects(x->obj,o)) {
return rank;
}
}
return 0;
}
//获取第rank个节点
zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
zskiplistNode *x;
unsigned long traversed = 0;
int i;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
while (x->level[i].forward && (traversed + x->level[i].span) <= rank)
{
traversed += x->level[i].span;
x = x->level[i].forward;
}
if (traversed == rank) {
return x;
}
}
return NULL;
}
//把命令行中指定的字符串形式的范围转换到浮点数形式的范围中,下面的注释中有说明
static int zslParseRange(robj *min, robj *max, zrangespec *spec) {
char *eptr;
spec->minex = spec->maxex = 0;
/* Parse the min-max interval. If one of the values is prefixed
* by the "(" character, it's considered "open". For instance
* ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
* ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
if (min->encoding == OBJ_ENCODING_INT) {
spec->min = (long)min->ptr;
} else {
if (((char*)min->ptr)[0] == '(') {
spec->min = strtod((char*)min->ptr+1,&eptr);
if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
spec->minex = 1;
} else {
spec->min = strtod((char*)min->ptr,&eptr);
if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR;
}
}
if (max->encoding == OBJ_ENCODING_INT) {
spec->max = (long)max->ptr;
} else {
if (((char*)max->ptr)[0] == '(') {
spec->max = strtod((char*)max->ptr+1,&eptr);
if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
spec->maxex = 1;
} else {
spec->max = strtod((char*)max->ptr,&eptr);
if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR;
}
}
return C_OK;
}
//下面的几个带有Lex字样的函数是把命令行的各种代表数据的字符串转化成真正的数据,实际上是转化和包装函数
/* Parse max or min argument of ZRANGEBYLEX.
* (foo means foo (open interval)
* [foo means foo (closed interval)
* - means the min string possible
* + means the max string possible
*
* If the string is valid the *dest pointer is set to the redis object
* that will be used for the comparision, and ex will be set to 0 or 1
* respectively if the item is exclusive or inclusive. C_OK will be
* returned.
*
* If the string is not a valid range C_ERR is returned, and the value
* of *dest and *ex is undefined. */
int zslParseLexRangeItem(robj *item, robj **dest, int *ex) {
char *c = item->ptr;
switch(c[0]) {
case '+':
if (c[1] != '\0') return C_ERR;
*ex = 0;
*dest = shared.maxstring;
incrRefCount(shared.maxstring);
return C_OK;
case '-':
if (c[1] != '\0') return C_ERR;
*ex = 0;
*dest = shared.minstring;
incrRefCount(shared.minstring);
return C_OK;
case '(':
*ex = 1;
*dest = createStringObject(c+1,sdslen(c)-1);
return C_OK;
case '[':
*ex = 0;
*dest = createStringObject(c+1,sdslen(c)-1);
return C_OK;
default:
return C_ERR;
}
}
/* Populate the rangespec according to the objects min and max.
*
* Return C_OK on success. On error C_ERR is returned.
* When OK is returned the structure must be freed with zslFreeLexRange(),
* otherwise no release is needed. */
static int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) {
/* The range can't be valid if objects are integer encoded.
* Every item must start with ( or [. */
if (min->encoding == OBJ_ENCODING_INT ||
max->encoding == OBJ_ENCODING_INT) return C_ERR;
spec->min = spec->max = NULL;
if (zslParseLexRangeItem(min, &spec->min, &spec->minex) == C_ERR ||
zslParseLexRangeItem(max, &spec->max, &spec->maxex) == C_ERR) {
if (spec->min) decrRefCount(spec->min);
if (spec->max) decrRefCount(spec->max);
return C_ERR;
} else {
return C_OK;
}
}
/* Free a lex range structure, must be called only after zelParseLexRange()
* populated the structure with success (C_OK returned). */
void zslFreeLexRange(zlexrangespec *spec) {
decrRefCount(spec->min);
decrRefCount(spec->max);
}
/* This is just a wrapper to compareStringObjects() that is able to
* handle shared.minstring and shared.maxstring as the equivalent of
* -inf and +inf for strings */
int compareStringObjectsForLexRange(robj *a, robj *b) {
if (a == b) return 0; /* This makes sure that we handle inf,inf and
-inf,-inf ASAP. One special case less. */
if (a == shared.minstring || b == shared.maxstring) return -1;
if (a == shared.maxstring || b == shared.minstring) return 1;
return compareStringObjects(a,b);
}
static int zslLexValueGteMin(robj *value, zlexrangespec *spec) {
return spec->minex ?
(compareStringObjectsForLexRange(value,spec->min) > 0) :
(compareStringObjectsForLexRange(value,spec->min) >= 0);
}
static int zslLexValueLteMax(robj *value, zlexrangespec *spec) {
return spec->maxex ?
(compareStringObjectsForLexRange(value,spec->max) < 0) :
(compareStringObjectsForLexRange(value,spec->max) <= 0);
}
/* Returns if there is a part of the zset is in the lex range. */
int zslIsInLexRange(zskiplist *zsl, zlexrangespec *range) {
zskiplistNode *x;
/* Test for ranges that will always be empty. */
if (compareStringObjectsForLexRange(range->min,range->max) > 1 ||
(compareStringObjects(range->min,range->max) == 0 &&
(range->minex || range->maxex)))
return 0;
x = zsl->tail;
if (x == NULL || !zslLexValueGteMin(x->obj,range))
return 0;
x = zsl->header->level[0].forward;
if (x == NULL || !zslLexValueLteMax(x->obj,range))
return 0;
return 1;
}
/* Find the first node that is contained in the specified lex range.
* Returns NULL when no element is contained in the range. */
zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInLexRange(zsl,range)) return NULL;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *OUT* of range. */
while (x->level[i].forward &&
!zslLexValueGteMin(x->level[i].forward->obj,range))
x = x->level[i].forward;
}
/* This is an inner range, so the next node cannot be NULL. */
x = x->level[0].forward;
serverAssert(x != NULL);
/* Check if score <= max. */
if (!zslLexValueLteMax(x->obj,range)) return NULL;
return x;
}
/* Find the last node that is contained in the specified range.
* Returns NULL when no element is contained in the range. */
zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
zskiplistNode *x;
int i;
/* If everything is out of range, return early. */
if (!zslIsInLexRange(zsl,range)) return NULL;
x = zsl->header;
for (i = zsl->level-1; i >= 0; i--) {
/* Go forward while *IN* range. */
while (x->level[i].forward &&
zslLexValueLteMax(x->level[i].forward->obj,range))
x = x->level[i].forward;
}
/* This is an inner range, so this node cannot be NULL. */
serverAssert(x != NULL);
/* Check if score >= min. */
if (!zslLexValueGteMin(x->obj,range)) return NULL;
return x;
}
//再以下的代码是ziplist的api和有序集合的命令实现,以后再作讨论
/* 此处省略ziplist的api和有序集合的命令实现的代码 */

总结

Redis中的跳表实现跟一般教科书上的跳表实现基本相同,有一点不同的是Redis在skiplist的节点中加入了backward属性,来记录上一个节点的地址,由此跳表的节点与节点间便变成了一个双向链表,这样做在方便节点的插入删除操作的同时,又帮助实现了诸如ZREVRANGE这样的指令。

能力有限,难免有分析不周全或分析错误的地方,如发现望指出,大家一起学习交流。