Chinaunix首页 | 论坛 | 博客
  • 博客访问: 8139300
  • 博文数量: 594
  • 博客积分: 13065
  • 博客等级: 上将
  • 技术积分: 10324
  • 用 户 组: 普通用户
  • 注册时间: 2008-03-26 16:44
个人简介

推荐: blog.csdn.net/aquester https://github.com/eyjian https://www.cnblogs.com/aquester http://blog.chinaunix.net/uid/20682147.html

文章分类

全部博文(594)

分类: 服务器与存储

2018-03-29 20:07:33

结论:
待确认是否为redis的BUG,原因是进程实际占用的内存远小于配置的最大内存,所以不会是内存不够需要淘汰。

CPU百分百redis-server进程集群状态:
slave

解决办法:
使用gdb将d.ht[0].used的值改为0

问题原因:
dictGetRandomKey()过程中,
无法走到分支“if (dictSize(d) == 0) return NULL;”,
导致函数dbRandomKey()进入死循环。

版本:
Redis server v=3.2.0 sha=00000000:0 malloc=jemalloc-4.0.3 bits=64 build=9894db3ef433c070

现象1:CPU百分百
PID   USER  PR NI VIRT  RES  SHR  S %CPU  %MEM TIME+   COMMAND                                                                                                                
25636 redis 20 0  38492 4096 1360 R 100.0 0.0  2578:10 redis-server

现象2:大量CLOSE_WAIT状态连接:
tcp     2417      0 1.49.26.98:11382      1.49.26.98:37268      CLOSE_WAIT  -                   
tcp     2521      0 1.49.26.98:11382      1.49.26.98:35141      CLOSE_WAIT  -                   
tcp     2521      0 1.49.26.98:11382      1.49.26.98:57181      CLOSE_WAIT  -

进程状态:
redis 25636 30.0 0.0 38492  4096 ? Rsl 3月23 2579:55 /data/redis/bin/redis-server *:1382 [cluster]

最大内存配置(1G):
maxmemory 1073741824

运行日志:
25636:S 28 Mar 00:21:24.526 - 1 clients connected (0 slaves), 1312384 bytes in use
25636:S 28 Mar 00:21:29.531 - DB 0: 1 keys (1 volatile) in 8 slots HT.
25636:S 28 Mar 00:21:29.531 - 1 clients connected (0 slaves), 1312384 bytes in use
25636:S 28 Mar 00:21:32.585 - Accepted 1.118.14.7:58132

调用栈:
#0  dictGenHashFunction (key=, len=5) at dict.c:123
#1  0x00000000004232e6 in dictFind (d=0x7f71c2a17240, key=key@entry=0x7f71c2a15001) at dict.c:499
#2  0x000000000043a00a in dbRandomKey (db=0x7f71c2a24800) at db.c:176
#3  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355
#4  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221
#5  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500
#6  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296
#7  0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412
#8  0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455
#9  0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079

#0  0x00007f71c2fbc3a2 in random () from /lib64/libc.so.6
#1  0x0000000000423745 in dictGetRandomKey (d=0x7f71c2a171e0) at dict.c:646
#2  0x0000000000439fc0 in dbRandomKey (db=0x7f71c2a24800) at db.c:171
#3  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355
#4  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221
#5  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500
#6  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296
#7  0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412
#8  0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455
#9  0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079


#0  0x00007f71c30e17e4 in __memcmp_sse4_1 () from /lib64/libc.so.6
#1  0x0000000000424219 in dictSdsKeyCompare (privdata=, key1=, key2=) at server.c:445
#2  0x000000000042331d in dictFind (d=0x7f71c2a17240, key=0x7f71c2a27e73) at dict.c:504
#3  0x0000000000439494 in getExpire (db=0x7f71c2a24800, key=0x7f71c2a27e60) at db.c:824
#4  0x0000000000439c4f in expireIfNeeded (db=0x7f71c2a24800, key=0x7f71c2a27e60) at db.c:858
#5  0x000000000043a01a in dbRandomKey (db=0x7f71c2a24800) at db.c:177
#6  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355
#7  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221
#8  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500
#9  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296
#10 0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412
#11 0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455
#12 0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079

#0  dictGetRandomKey (d=) at dict.c:663
#1  0x0000000000439fc0 in dbRandomKey (db=0x7f71c2a24800) at db.c:171
#2  0x000000000043a0a2 in randomkeyCommand (c=0x7f71c2aae1c0) at db.c:355
#3  0x0000000000426b95 in call (c=c@entry=0x7f71c2aae1c0, flags=flags@entry=15) at server.c:2221
#4  0x0000000000429ba7 in processCommand (c=0x7f71c2aae1c0) at server.c:2500
#5  0x0000000000436515 in processInputBuffer (c=0x7f71c2aae1c0) at networking.c:1296
#6  0x0000000000421338 in aeProcessEvents (eventLoop=eventLoop@entry=0x7f71c2a2e050, flags=flags@entry=3) at ae.c:412
#7  0x00000000004215eb in aeMain (eventLoop=0x7f71c2a2e050) at ae.c:455
#8  0x000000000041e5df in main (argc=2, argv=0x7ffef34b2418) at server.c:4079

猜测:
达到最大内存,进入淘汰keys逻辑,但没有keys符合淘汰,从而死循环。

相关代码:

  1. /* Return a random key from the currently selected database. */
  2. void randomkeyCommand(client *c) {
  3.     robj *key;

  4.     if ((key = dbRandomKey(c->db)) == NULL) {
  5.         addReply(c,shared.nullbulk);
  6.         return;
  7.     }

  8.     addReplyBulk(c,key);
  9.     decrRefCount(key);
  10. }

  11. /* Return a random key, in form of a Redis object.
  12.  * If there are no keys, NULL is returned.
  13.  *
  14.  * The function makes sure to return keys not already expired. */
  15. robj *dbRandomKey(redisDb *db) {
  16.     dictEntry *de;

  17.     while(1) { // CPU百分百的原因,是这里死循环了
  18.         sds key;
  19.         robj *keyobj;

  20.         de = dictGetRandomKey(db->dict);
  21.         if (de == NULL) return NULL;

  22.         key = dictGetKey(de);
  23.         keyobj = createStringObject(key,sdslen(key));
  24.         if (dictFind(db->expires,key)) {
  25.             if (expireIfNeeded(db,keyobj)) {
  26.                 decrRefCount(keyobj);
  27.                 continue; /* search for another key. This expired. */
  28.             }
  29.         }
  30.         return keyobj;
  31.     }
  32. }

  33. void call(client *c, int flags) {
  34.     long long dirty, start, duration;
  35.     int client_old_flags = c->flags;

  36.     /* Sent the command to clients in MONITOR mode, only if the commands are
  37.      * not generated from reading an AOF. */
  38.     if (listLength(server.monitors) &&
  39.         !server.loading &&
  40.         !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
  41.     {
  42.         replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
  43.     }

  44.     /* Initialization: clear the flags that must be set by the command on
  45.      * demand, and initialize the array for additional commands propagation. */
  46.     c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
  47.     redisOpArrayInit(&server.also_propagate);

  48.     /* Call the command. */
  49.     dirty = server.dirty;
  50.     start = ustime();
  51.     c->cmd->proc(c);
  52.     duration = ustime()-start;
  53.     dirty = server.dirty-dirty;
  54.     if (dirty < 0) dirty = 0;
  55.     。。。。。。
  56. }

  57. /* With multiplexing we need to take per-client state.
  58.  * Clients are taken in a linked list. */
  59. typedef struct client {
  60.     。。。。。。
  61.     struct redisCommand *cmd, *lastcmd; /* Last command executed. */
  62.     。。。。。。
  63. };

  64. typedef void redisCommandProc(client *c);
  65. typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
  66. struct redisCommand {
  67.     char *name;
  68.     redisCommandProc *proc;
  69.     int arity;
  70.     char *sflags; /* Flags as string representation, one char per flag. */
  71.     int flags; /* The actual flags, obtained from the 'sflags' field. */
  72.     /* Use a function to determine keys arguments in a command line.
  73.      * Used for Redis Cluster redirect. */
  74.     redisGetKeysProc *getkeys_proc;
  75.     /* What keys should be loaded in background when calling this command? */
  76.     int firstkey; /* The first argument that's a key (0 = no keys) */
  77.     int lastkey; /* The last argument that's a key */
  78.     int keystep; /* The step between first and last key */
  79.     long long microseconds, calls;
  80. };

  81. /* This is our hash table structure. Every dictionary has two of this as we
  82.  * implement incremental rehashing, for the old to the new table. */
  83. typedef struct dictht {
  84.     dictEntry **table;
  85.     unsigned long size;
  86.     unsigned long sizemask;
  87.     unsigned long used;
  88. } dictht;

  89. typedef struct dict {
  90.     dictType *type;
  91.     void *privdata;
  92.     dictht ht[2];
  93.     long rehashidx; /* rehashing not in progress if rehashidx == -1 */
  94.     int iterators; /* number of iterators currently running */
  95. } dict;

  96. /* Return a random entry from the hash table. Useful to
  97.  * implement randomized algorithms */
  98. dictEntry *dictGetRandomKey(dict *d)
  99. {
  100.     dictEntry *he, *orighe;
  101.     unsigned int h;
  102.     int listlen, listele;

  103.     // (gdb) p *d
  104.     // $1 = {type = 0x71d940 <dbDictType>, privdata = 0x0, ht = {{table = 0x7f71c2a1e480, size = 8, sizemask = 7, used = 1}, {table = 0x0, size = 0, sizemask = 0, used = 0}}, rehashidx = -1, iterators = 0}
  105.     //
  106.     // (gdb) p d.ht[0]
  107.     // $3 = {table = 0x7f71c2a1e480, size = 8, sizemask = 7, used = 1}
  108.     // (gdb) p d.ht[1]
  109.     // $4 = {table = 0x0, size = 0, sizemask = 0, used = 0}
  110.     //
  111.     // (gdb) set variable d.ht[0].used=0
  112.     // (gdb) p d.ht[0].used
  113.     // $7 = 0

  114.     // #define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
  115.     if (dictSize(d) == 0) return NULL;
  116.     if (dictIsRehashing(d)) _dictRehashStep(d);
  117.     if (dictIsRehashing(d)) {
  118.         do {
  119.             /* We are sure there are no elements in indexes from 0
  120.              * to rehashidx-1 */
  121.             h = d->rehashidx + (random() % (d->ht[0].size +
  122.                                             d->ht[1].size -
  123.                                             d->rehashidx));
  124.             he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
  125.                                       d->ht[0].table[h];
  126.         } while(he == NULL);
  127.     } else {
  128.         do {
  129.             h = random() & d->ht[0].sizemask;
  130.             he = d->ht[0].table[h];
  131.         } while(he == NULL);
  132.     }

  133.     /* Now we found a non empty bucket, but it is a linked
  134.      * list and we need to get a random element from the list.
  135.      * The only sane way to do so is counting the elements and
  136.      * select a random index. */
  137.     listlen = 0;
  138.     orighe = he;
  139.     while(he) {
  140.         he = he->next;
  141.         listlen++;
  142.     }
  143.     listele = random() % listlen;
  144.     he = orighe;
  145.     while(listele--) he = he->next;
  146.     return he;
  147. }

  148. /* This function performs just a step of rehashing, and only if there are
  149.  * no safe iterators bound to our hash table. When we have iterators in the
  150.  * middle of a rehashing we can't mess with the two hash tables otherwise
  151.  * some element can be missed or duplicated.
  152.  *
  153.  * This function is called by common lookup or update operations in the
  154.  * dictionary so that the hash table automatically migrates from H1 to H2
  155.  * while it is actively used. */
  156. static void _dictRehashStep(dict *d) {
  157.     if (d->iterators == 0) dictRehash(d,1);
  158. }

进程内存(问题解决,退出死循环后才能看到,但结果和ps看到一致):
# Memory
used_memory:1375320
used_memory_human:1.31M
used_memory_rss:4321280
used_memory_rss_human:4.12M
used_memory_peak:2468448
used_memory_peak_human:2.35M
total_system_memory:33453797376
total_system_memory_human:31.16G
used_memory_lua:34816
used_memory_lua_human:34.00K
maxmemory:1073741824
maxmemory_human:1.00G
maxmemory_policy:allkeys-lru
mem_fragmentation_ratio:3.14
mem_allocator:jemalloc-4.0.3


阅读(15599) | 评论(1) | 转发(0) |
给主人留下些什么吧!~~

aquester2019-03-21 18:55:17

确认为BUG,4.0.11已修复