memcached 源码阅读之 字符串 hash 与 搜集的一些 字符串 hash

作者: | 更新日期:

阅读 memcached 源码的时候,发现 memcached 有两个字符串hash 的代码,于是研究一下,然后理解这个hash的时候,搜集了一些互联网上的字符串hash方法

本文首发于公众号:天空的代码世界,微信号:tiankonguse

cover

memcached 源码阅读之 hash table文章的最后我说了,要研究一下 memcached 的 字符串 hash 方法的。
现在就开始记录下研究的结果。

jenkins 的位置在 jenkins_hash.c .

Little-Endian就是低位字节排放在内存的低地址端,高位字节排放在内存的高地址端。
Big-Endian就是高位字节排放在内存的低地址端,低位字节排放在内存的高地址端。
举一个例子,比如数字0x12 34 56 78在内存中的表示形式为:

  1. 1)大端模式:
  2. 低地址 -----------------> 高地址
  3. 0x12 | 0x34 | 0x56 | 0x78
  4. 2)小端模式:
  5. 低地址 ------------------> 高地址
  6. 0x78 | 0x56 | 0x34 | 0x12
  1. #if ENDIAN_BIG == 1
  2. # define HASH_LITTLE_ENDIAN 0
  3. # define HASH_BIG_ENDIAN 1
  4. #else
  5. # if ENDIAN_LITTLE == 1
  6. # define HASH_LITTLE_ENDIAN 1
  7. # define HASH_BIG_ENDIAN 0
  8. # else
  9. # define HASH_LITTLE_ENDIAN 0
  10. # define HASH_BIG_ENDIAN 0
  11. # endif
  12. #endif

看到的第一个是 rot 宏。
这个宏的作用是循环左移若干位。

  1. #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))

一个可逆的加密。
This is reversible, so any information in (a,b,c) before mix() is still in (a,b,c) after mix().

  1. #define mix(a,b,c) \
  2. { \
  3. a -= c; a ^= rot(c, 4); c += b; \
  4. b -= a; b ^= rot(a, 6); a += c; \
  5. c -= b; c ^= rot(b, 8); b += a; \
  6. a -= c; a ^= rot(c,16); c += b; \
  7. b -= a; b ^= rot(a,19); a += c; \
  8. c -= b; c ^= rot(b, 4); b += a; \
  9. }

final mixing of 3 32-bit values (a,b,c) into c
将 a,b,c 合并到 c中。

  1. #define final(a,b,c) \
  2. { \
  3. c ^= b; c -= rot(b,14); \
  4. a ^= c; a -= rot(c,11); \
  5. b ^= a; b -= rot(a,25); \
  6. c ^= b; c -= rot(b,16); \
  7. a ^= c; a -= rot(c,4); \
  8. b ^= a; b -= rot(a,14); \
  9. c ^= b; c -= rot(b,24); \
  10. }

源代码中大端小端,而且还分是 0x3 还是 0x1,这个目前就不知道干什么了。

  1. uint32_t jenkins_hash( const void *key, size_t length) {
  2. uint32_t a,b,c;
  3. a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;
  4. const char *k = (const char *)key;
  5. while (length > 12) {
  6. a += ((uint32_t)k[0])<<24;
  7. a += ((uint32_t)k[1])<<16;
  8. a += ((uint32_t)k[2])<<8;
  9. a += ((uint32_t)k[3]);
  10. b += ((uint32_t)k[4])<<24;
  11. b += ((uint32_t)k[5])<<16;
  12. b += ((uint32_t)k[6])<<8;
  13. b += ((uint32_t)k[7]);
  14. c += ((uint32_t)k[8])<<24;
  15. c += ((uint32_t)k[9])<<16;
  16. c += ((uint32_t)k[10])<<8;
  17. c += ((uint32_t)k[11]);
  18. mix(a,b,c);
  19. length -= 12;
  20. k += 12;
  21. }
  22. switch(length) {
  23. case 12:
  24. c+=k[11];
  25. case 11:
  26. c+=((uint32_t)k[10])<<8;
  27. case 10:
  28. c+=((uint32_t)k[9])<<16;
  29. case 9 :
  30. c+=((uint32_t)k[8])<<24;
  31. case 8 :
  32. b+=k[7];
  33. case 7 :
  34. b+=((uint32_t)k[6])<<8;
  35. case 6 :
  36. b+=((uint32_t)k[5])<<16;
  37. case 5 :
  38. b+=((uint32_t)k[4])<<24;
  39. case 4 :
  40. a+=k[3];
  41. case 3 :
  42. a+=((uint32_t)k[2])<<8;
  43. case 2 :
  44. a+=((uint32_t)k[1])<<16;
  45. case 1 :
  46. a+=((uint32_t)k[0])<<24;
  47. break;
  48. case 0 :
  49. return c;
  50. }
  51. final(a,b,c);
  52. return c;
  53. }

看完这个代码,我们可以给他缩短一下。

  1. uint32_t jenkins_hash( const void *key, size_t length) {
  2. uint32_t a,b,c;
  3. a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;
  4. const char *k = (const char *)key;
  5. while (length >= 12) {
  6. a += *((uint32_t*)(k+0));
  7. b += *((uint32_t*)(k+4));
  8. c += *((uint32_t*)(k+8));
  9. mix(a,b,c);
  10. length -= 12;
  11. k += 12;
  12. }
  13. if(length == 0) {
  14. return c;
  15. }
  16. switch(length) {
  17. case 11:
  18. c+=((uint32_t)k[10])<<8;
  19. case 10:
  20. c+=((uint32_t)k[9])<<16;
  21. case 9 :
  22. c+=((uint32_t)k[8])<<24;
  23. case 8 :
  24. b += *((uint32_t*)(k+4));
  25. a += *((uint32_t*)(k+0));
  26. break;
  27. case 7 :
  28. b+=((uint32_t)k[6])<<8;
  29. case 6 :
  30. b+=((uint32_t)k[5])<<16;
  31. case 5 :
  32. b+=((uint32_t)k[4])<<24;
  33. case 4 :
  34. a += *((uint32_t*)(k+0));
  35. break;
  36. case 3 :
  37. a+=((uint32_t)k[2])<<8;
  38. case 2 :
  39. a+=((uint32_t)k[1])<<16;
  40. case 1 :
  41. a+=((uint32_t)k[0])<<24;
  42. }
  43. final(a,b,c);
  44. return c;
  45. }

murmur3 hash 的位置在 murmur3_hash.c .

  1. //不检查数据越界问题,主要用于得到一些随机数字
  2. #define FORCE_INLINE inline __attribute__((always_inline))
  3. //循环左移
  4. static inline uint32_t ROTL32 ( uint32_t x, int8_t r ) {
  5. return (x << r) | (x >> (32 - r));
  6. }
  7. //得到指针p位置的值,i可能为负数
  8. static FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) {
  9. return p[i];
  10. }
  11. static FORCE_INLINE uint32_t fmix32 ( uint32_t h ) {
  12. h ^= h >> 16;
  13. h *= 0x85ebca6b;
  14. h ^= h >> 13;
  15. h *= 0xc2b2ae35;
  16. h ^= h >> 16;
  17. return h;
  18. }
  19. uint32_t MurmurHash3_x86_32 ( const void * key, size_t length) {
  20. const uint8_t * data = (const uint8_t*)key;
  21. const int nblocks = length / 4;
  22. uint32_t h1 = 0;
  23. uint32_t c1 = 0xcc9e2d51;
  24. uint32_t c2 = 0x1b873593;
  25. const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
  26. for(int i = -nblocks; i; i++) {
  27. uint32_t k1 = getblock32(blocks,i);
  28. k1 *= c1;
  29. k1 = ROTL32(k1,15);
  30. k1 *= c2;
  31. h1 ^= k1;
  32. h1 = ROTL32(h1,13);
  33. h1 = h1*5+0xe6546b64;
  34. }
  35. const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
  36. uint32_t k1 = 0;
  37. switch(length & 3) {
  38. case 3:
  39. k1 ^= tail[2] << 16;
  40. case 2:
  41. k1 ^= tail[1] << 8;
  42. case 1:
  43. k1 ^= tail[0];
  44. k1 *= c1;
  45. k1 = ROTL32(k1,15);
  46. k1 *= c2;
  47. h1 ^= k1;
  48. };
  49. h1 ^= length;
  50. h1 = fmix32(h1);
  51. return h1;
  52. }
  1. ub4 additive(char *key, ub4 len, ub4 prime){
  2. ub4 hash, i;
  3. for (hash=len, i=0; i<len; ++i)
  4. hash += key[i];
  5. return (hash % prime);
  6. }
  1. ub4 rotating(char *key, ub4 len, ub4 prime){
  2. ub4 hash, i;
  3. for (hash=len, i=0; i<len; ++i)
  4. hash = (hash<<4)^(hash>>28)^key[i];
  5. return (hash % prime);
  6. }
  1. ub4 one_at_a_time(char *key, ub4 len){
  2. ub4 hash, i;
  3. for (hash=0, i=0; i<len; ++i){
  4. hash += key[i];
  5. hash += (hash << 10);
  6. hash ^= (hash >> 6);
  7. }
  8. hash += (hash << 3);
  9. hash ^= (hash >> 11);
  10. hash += (hash << 15);
  11. return (hash & mask);
  12. }
  1. ub4 bernstein(ub1 *key, ub4 len, ub4 level){
  2. ub4 hash = level;
  3. ub4 i;
  4. for (i=0; i<len; ++i) hash = 33*hash + key[i];
  5. return hash;
  6. }
  1. u4 goulburn( const unsigned char *cp, size_t len, uint32_t last_value){
  2. register u4 h = last_value;
  3. int u;
  4. for( u=0; u<len; ++u ) {
  5. h += g_table0[ cp[u] ];
  6. h ^= (h << 3) ^ (h >> 29);
  7. h += g_table1[ h >> 25 ];
  8. h ^= (h << 14) ^ (h >> 18);
  9. h += 1783936964UL;
  10. }
  11. return h;
  12. }

uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed ){ const unsigned int m = 0xc6a4a793;

  1. const int r = 16;
  2. unsigned int h = seed ^ (len * m);
  3. //----------
  4. const unsigned char * data = (const unsigned char *)key;
  5. while(len >= 4){
  6. unsigned int k = *(unsigned int *)data;
  7. h += k;
  8. h *= m;
  9. h ^= h >> 16;
  10. data += 4;
  11. len -= 4;
  12. }
  13. //----------
  14. switch(len){
  15. case 3:
  16. h += data[2] << 16;
  17. case 2:
  18. h += data[1] << 8;
  19. case 1:
  20. h += data[0];
  21. h *= m;
  22. h ^= h >> r;
  23. };
  24. //----------
  25. h *= m;
  26. h ^= h >> 10;
  27. h *= m;
  28. h ^= h >> 17;
  29. return h; }
  1. //This preinitializes tab[] to an arbitrary permutation of 0 .. 255.
  2. char pearson(char *key, ub4 len, char tab[256]){
  3. char hash;
  4. ub4 i;
  5. for (hash=len, i=0; i<len; ++i)
  6. hash=tab[hash^key[i]];
  7. return (hash);
  8. }
  1. ub4 crc(char *key, ub4 len, ub4 mask, ub4 tab[256]){
  2. ub4 hash, i;
  3. for (hash=len, i=0; i<len; ++i)
  4. hash = (hash >> 8) ^ tab[(hash & 0xff) ^ key[i]];
  5. return (hash & mask);
  6. }
  1. //The size of tab[] is the maximum number of input bits.
  2. //Values in tab[] are chosen at random.
  3. ub4 universal(char *key, ub4 len, ub4 mask, ub4 tab[MAXBITS]){
  4. ub4 hash, i;
  5. for (hash=len, i=0; i<(len<<3); i+=8){
  6. register char k = key[i>>3];
  7. if (k&0x01) hash ^= tab[i+0];
  8. if (k&0x02) hash ^= tab[i+1];
  9. if (k&0x04) hash ^= tab[i+2];
  10. if (k&0x08) hash ^= tab[i+3];
  11. if (k&0x10) hash ^= tab[i+4];
  12. if (k&0x20) hash ^= tab[i+5];
  13. if (k&0x40) hash ^= tab[i+6];
  14. if (k&0x80) hash ^= tab[i+7];
  15. }
  16. return (hash & mask);
  17. }
  1. ub4 zobrist( char *key, ub4 len, ub4 mask, ub4 tab[MAXBYTES][256]){
  2. ub4 hash, i;
  3. for (hash=len, i=0; i<len; ++i)
  4. hash ^= tab[i][key[i]];
  5. return (hash & mask);
  6. }

本文首发于公众号:天空的代码世界,微信号:tiankonguse
如果你想留言,可以在微信里面关注公众号进行留言。

关注公众号,接收最新消息

tiankonguse +
穿越