博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
redis源码分析1------dict的实现
阅读量:7085 次
发布时间:2019-06-28

本文共 7082 字,大约阅读时间需要 23 分钟。

1. 总体结构

redis的dict就是hash表,使用链式结构来解决key值冲突,典型的数据结构

结构体的定义如下:

typedef struct dictEntry {    void *key;    union {        void *val;        uint64_t u64;        int64_t s64;        double d;    } v;    struct dictEntry *next;} dictEntry;typedef struct dictType {    uint64_t (*hashFunction)(const void *key);    void *(*keyDup)(void *privdata, const void *key);    void *(*valDup)(void *privdata, const void *obj);    int (*keyCompare)(void *privdata, const void *key1, const void *key2);    void (*keyDestructor)(void *privdata, void *key);    void (*valDestructor)(void *privdata, void *obj);} dictType;/* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */typedef struct dictht {    dictEntry **table;  //hash桶是一个指针数组,里面存放的是hash entry的指针类型,只需要(8字节*size)个连续内存不需要大量的连续内存    unsigned long size;  //这个是hash桶的大小    unsigned long sizemask;  //hash桶大小-1, **用hash**/sizemask来计算桶下标    unsigned long used; //当前这个dict一共放了多少个kv键值对} dictht;//一旦used/size >=dict_force_resize_ratio(默认值是5),就会触发rehash,可以理解为一个hash桶后面平均挂载的冲突队列个数为5的时候,就会触发rehashtypedef struct dict {    dictType *type;    void *privdata;    dictht ht[2];    long rehashidx; /* rehashing not in progress if rehashidx == -1 */    unsigned long iterators; /* number of iterators currently running */} dict;

如下图所示:

2. API接口分析

2.1 创建

API接口函数:

  • dictAdd(dict d, void key, void *val)

在d中增加一个k-v对,实现代码如下:

/* Add an element to the target hash table */int dictAdd(dict *d, void *key, void *val){    dictEntry *entry = dictAddRaw(d,key,NULL);//调用了内部函数    if (!entry) return DICT_ERR;    dictSetVal(d, entry, val);    return DICT_OK;}dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing){    long index;    dictEntry *entry;    dictht *ht;    if (dictIsRehashing(d)) _dictRehashStep(d); //如果正在rehash进行中,则每次操作都尝试进行一次rehash操作    /* Get the index of the new element, or -1 if     * the element already exists. 获取到hash桶的入口index*/    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)        return NULL;    /* Allocate the memory and store the new entry.     * Insert the element in top, with the assumption that in a database     * system it is more likely that recently added entries are accessed     * more frequently.     (译文:申请内存来存储一个新的entry结构,插入元素到头部,     这里的实现和一般的hash链式解决冲突的实现有点小不同,基于这样的假定:在数据库系统中,最近增加的entries越有可能被访问。      这里是把新插入的entry放到了链表头上,可以看上面的英文解释*/    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];    entry = zmalloc(sizeof(*entry));    entry->next = ht->table[index];    ht->table[index] = entry;    ht->used++;    /* Set the hash entry fields.*/    dictSetKey(d, entry, key);    return entry;}/* Returns the index of a free slot that can be populated with * a hash entry for the given 'key'. * If the key already exists, -1 is returned * and the optional output parameter may be filled. * * Note that if we are in the process of rehashing the hash table, the * index is always returned in the context of the second (new) hash table.   这个原版注释写的很清楚,如果正在rehashing的时候,index返回的是new的hashtable*/static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing){    unsigned long idx, table;    dictEntry *he;    if (existing) *existing = NULL;    /* Expand the hash table if needed ,判断hash桶是否需要扩大,这个地方是redis比较牛逼的地方,      hash桶是动态扩大的,默认初始的时候只有4,然后每次乘2的方式进行扩展,如果扩展了,就需要进行rehash*/    if (_dictExpandIfNeeded(d) == DICT_ERR)        return -1;    /*获取索引的时候,如果正在rehash,需要两个hashtable都进行查询*/    for (table = 0; table <= 1; table++) {        /*这个idx就是hash桶的下标*/        idx = hash & d->ht[table].sizemask;        /* Search if this slot does not already contain the given key */        he = d->ht[table].table[idx];        while(he) {        /*这里是必须遍历下冲突队列,保证key没有出现过*/            if (key==he->key || dictCompareKeys(d, key, he->key)) {                if (existing) *existing = he;                return -1;            }            he = he->next;        }        /*如果不在rehash的话,其实就没有必要再做查找的操作了,直接返回就好了*/        if (!dictIsRehashing(d)) break;    }    return idx;}
  • dictEntry dictFind(dict d, const void *key)
    根据key在d中寻找值,这个逻辑和add差不多,代码很简单,这里就不做解释了
dictEntry *dictFind(dict *d, const void *key){    dictEntry *he;    uint64_t h, idx, table;    if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */    if (dictIsRehashing(d)) _dictRehashStep(d);  //和增加的时候逻辑一样,如果正在rehashing,则进行一步rehash    h = dictHashKey(d, key);    for (table = 0; table <= 1; table++) {        idx = h & d->ht[table].sizemask;        he = d->ht[table].table[idx];        while(he) {            if (key==he->key || dictCompareKeys(d, key, he->key))                return he;            he = he->next;        }        if (!dictIsRehashing(d)) return NULL;    }    return NULL;}

3. rehash过程

redis对于dict支持两种rehash的方式:按照时间,或者按照操作进行rehash。每次都调整一个key值桶内所有的冲突链表到新的hash表中。
rehash 代码如下:

static void _dictRehashStep(dict *d) {    if (d->iterators == 0) dictRehash(d,1);}/* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * * Note that a rehashing step consists in moving a bucket (that may have more * than one key as we use chaining) from the old to the new hash table, however * since part of the hash table may be composed of empty spaces, it is not * guaranteed that this function will rehash even a single bucket, since it * will visit at max N*10 empty buckets in total, otherwise the amount of * work it does would be unbound and the function may block for a long time. */int dictRehash(dict *d, int n) {    int empty_visits = n*10; /* Max number of empty buckets to visit. */    if (!dictIsRehashing(d)) return 0;    while(n-- && d->ht[0].used != 0) {        dictEntry *de, *nextde;        /* Note that rehashidx can't overflow as we are sure there are more         * elements because ht[0].used != 0 */        assert(d->ht[0].size > (unsigned long)d->rehashidx);        while(d->ht[0].table[d->rehashidx] == NULL) {            d->rehashidx++;            if (--empty_visits == 0) return 1; //redis为了保证性能,扫描空桶,最多也是有一定的限制        }        de = d->ht[0].table[d->rehashidx];        /* Move all the keys in this bucket from the old to the new hash HT ,这个循环就是开始把这个rehashidx下标的hashtable迁移到新的下标下面,注意,这里需要重新计算key值,重新插入*/        while(de) {            uint64_t h;            nextde = de->next;            /* Get the index in the new hash table */            h = dictHashKey(d, de->key) & d->ht[1].sizemask;//重新计算key值,重新插入            de->next = d->ht[1].table[h];            d->ht[1].table[h] = de;            d->ht[0].used--;            d->ht[1].used++;            de = nextde;        }        d->ht[0].table[d->rehashidx] = NULL;        d->rehashidx++;    }    /* Check if we already rehashed the whole table...,一次操作完了,可能这个hashtable已经迁移完毕,返回0,否则返回1 */    if (d->ht[0].used == 0) {        zfree(d->ht[0].table);        d->ht[0] = d->ht[1]; //现在的0变成1        _dictReset(&d->ht[1]);  //现在的1被reset掉        d->rehashidx = -1;        return 0;    }    /* More to rehash... */    return 1;}

转载于:https://www.cnblogs.com/lh-ty/p/9972907.html

你可能感兴趣的文章
(原創) 如何重新動態配置記憶體空間? (C/C++) (C)
查看>>
(原創) 如何用管線(Pipeline)實作無號數乘加運算? (IC Design) (Verilog)
查看>>
Scrum 之 product Backlog
查看>>
英语:真正有效的英语学习心得,把英语当母语学习!(转载)
查看>>
TI公司CC系列的各种芯片的区别 CC2430 CC1100
查看>>
【转贴】游戏引擎大全
查看>>
刷机后实现:无线路由+usb脱机bt+远程管理!
查看>>
通过COM来获取CookieContainer,简单又好用
查看>>
#敏捷个人# 实践团报名
查看>>
LINQ之路11:LINQ Operators之过滤(Filtering)
查看>>
.Net Cancellable Task - APM异步超时机制扩展
查看>>
为UITableView的列设置间隔颜色
查看>>
ZOJ 3329 One Person Game (概率DP & 期望)
查看>>
搞平衡,我们公司跟国企也没有啥区别
查看>>
Easyui的datagrid的行编辑器Editor中添加事件(修改某个单元格带出其他单元格的值)...
查看>>
Appfabric caching 使用半年总结
查看>>
iphone iPhone拍照/摄像软件开发实例
查看>>
20个代码生成框架
查看>>
工作两个周的一点总结
查看>>
[思维导图]Free Ur Mind-推荐使用FreeMind工具
查看>>