Installed 7.0-rc3 ./redis-server master.conf ./redis-server master-slave.conf ./redis-sentinel sentinel0.conf ./redis-sentinel sentinel1.conf ./redis-sentinel sentinel2.conf
redis-server working fine master.conf:
port 6379
requirepass "12345"
masterauth "12345"
loglevel notice
#logfile "Logs/redis_log.txt"
replica-serve-stale-data yes
replica-read-only yes
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-disable-tcp-nodelay no
master-slave.conf:
port 6380
requirepass "12345"
masterauth "12345"
loglevel notice
#logfile "Logs/redis_log.txt"
replica-serve-stale-data yes
replica-read-only yes
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-disable-tcp-nodelay no
replicaof 127.0.0.1 6379
sentinel0.conf is:
port 26380 loglevel notice
logfile "Logs/redis_sentinel_log.txt"
requirepass "12345" masterauth "12345"
sentinel monitor redis-gsf 127.0.0.1 6379 2 sentinel down-after-milliseconds redis-gsf 6001 sentinel failover-timeout redis-gsf 60000 sentinel config-epoch redis-gsf 1 sentinel auth-pass redis-gsf "12345"
sentinel1.conf
port 26379 loglevel notice
logfile "Logs/redis_sentinel_log.txt"
requirepass "12345" masterauth "12345" sentinel monitor redis-gsf 127.0.0.1 6379 2 sentinel down-after-milliseconds redis-gsf 5000 sentinel failover-timeout redis-gsf 60000 sentinel config-epoch redis-gsf 1 sentinel auth-pass redis-gsf 12345
sentinel2.conf
port 26378 loglevel debug
logfile "Logs/redis_sentinel_log_2.txt"
requirepass "12345" masterauth "12345" sentinel monitor redis-gsf 127.0.0.1 6379 2 sentinel down-after-milliseconds redis-gsf 5000 sentinel failover-timeout redis-gsf 60000 sentinel config-epoch redis-gsf 1 sentinel auth-pass redis-gsf 12345
./redis-sentinel sentinel0.conf
crash and report the following information:
[root@linux-server bin]# ./redis-sentinel slave0.conf
2674:X 17 Apr 2022 06:10:18.253 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
2674:X 17 Apr 2022 06:10:18.253 # Redis version=6.9.242, bits=64, commit=0000000 0, modified=0, pid=2674, just started
2674:X 17 Apr 2022 06:10:18.253 # Configuration loaded
2674:X 17 Apr 2022 06:10:18.254 * Increased maximum number of open files to 1003 2 (it was originally set to 1024).
2674:X 17 Apr 2022 06:10:18.254 * monotonic clock: POSIX clock_gettime
_._
_.-``__ ''-._
_.-`` `. `_. ''-._ Redis 6.9.242 (00000000/0) 64 bit
.-`` .-```. ```\/ _.,_ ''-._
( ' , .-` | `, ) Running in sentinel mode
|`-._`-...-` __...-.``-._|'` _.-'| Port: 26380
| `-._ `._ / _.-' | PID: 2674
`-._ `-._ `-./ _.-' _.-'
|`-._`-._ `-.__.-' _.-'_.-'|
| `-._`-._ _.-'_.-' | https://redis.io
`-._ `-._`-.__.-'_.-' _.-'
|`-._`-._ `-.__.-' _.-'_.-'|
| `-._`-._ _.-'_.-' |
`-._ `-._`-.__.-'_.-' _.-'
`-._ `-.__.-' _.-'
`-._ _.-'
`-.__.-'
2674:X 17 Apr 2022 06:10:18.256 # WARNING: The TCP backlog setting of 511 cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of 1 28.
2674:X 17 Apr 2022 06:10:18.264 * Sentinel new configuration saved on disk
2674:X 17 Apr 2022 06:10:18.264 # Sentinel ID is 27f343f1e62132da4465b82b932070a d4ecaad05
2674:X 17 Apr 2022 06:10:18.264 # +monitor master redis-gsf 127.0.0.1 6379 quoru m 2
2674:X 17 Apr 2022 06:10:18.265 * +slave slave 127.0.0.1:6380 127.0.0.1 6380 @ r edis-gsf 127.0.0.1 6379
*** Error in `./redis-sentinel *:26380 [sentinel]': munmap_chunk(): invalid poin ter: 0x00000000014e3010 ***
======= Backtrace: =========
/lib64/libc.so.6(+0x7f474)[0x7f0dca300474]
./redis-sentinel *:26380 [sentinel](rewriteConfig+0x59)[0x48e4c9]
./redis-sentinel *:26380 [sentinel](sentinelFlushConfig+0x24)[0x4b6fe4]
./redis-sentinel *:26380 [sentinel](sentinelRefreshInstanceInfo+0xd49)[0x4bbc99]
./redis-sentinel *:26380 [sentinel](redisProcessCallbacks+0xe7)[0x4fd587]
./redis-sentinel *:26380 [sentinel](redisAsyncHandleRead+0x3e)[0x4fde0e]
./redis-sentinel *:26380 [sentinel](aeProcessEvents+0x235)[0x439fd5]
./redis-sentinel *:26380 [sentinel](aeMain+0x1d)[0x43a31d]
./redis-sentinel *:26380 [sentinel](main+0x406)[0x4366c6]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f0dca2a3555]
./redis-sentinel *:26380 [sentinel][0x436a63]
=== REDIS BUG REPORT START: Cut & paste starting from here ===
2674:X 17 Apr 2022 06:10:18.266 # Redis 6.9.242 crashed by signal: 6, si_code: - 6
2674:X 17 Apr 2022 06:10:18.266 # Crashed running the instruction at: 0x7f0dca2b 7387
------ STACK TRACE ------
EIP:
/lib64/libc.so.6(gsignal+0x37)[0x7f0dca2b7387]
Backtrace:
/lib64/libpthread.so.0(+0xf630)[0x7f0dca65e630]
/lib64/libc.so.6(gsignal+0x37)[0x7f0dca2b7387]
/lib64/libc.so.6(abort+0x148)[0x7f0dca2b8a78]
/lib64/libc.so.6(+0x78f67)[0x7f0dca2f9f67]
/lib64/libc.so.6(+0x7f474)[0x7f0dca300474]
./redis-sentinel *:26380 [sentinel](rewriteConfig+0x59)[0x48e4c9]
./redis-sentinel *:26380 [sentinel](sentinelFlushConfig+0x24)[0x4b6fe4]
./redis-sentinel *:26380 [sentinel](sentinelRefreshInstanceInfo+0xd49)[0x4bbc99]
./redis-sentinel *:26380 [sentinel](redisProcessCallbacks+0xe7)[0x4fd587]
./redis-sentinel *:26380 [sentinel](redisAsyncHandleRead+0x3e)[0x4fde0e]
./redis-sentinel *:26380 [sentinel](aeProcessEvents+0x235)[0x439fd5]
./redis-sentinel *:26380 [sentinel](aeMain+0x1d)[0x43a31d]
./redis-sentinel *:26380 [sentinel](main+0x406)[0x4366c6]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f0dca2a3555]
./redis-sentinel *:26380 [sentinel][0x436a63]
------ REGISTERS ------
2674:X 17 Apr 2022 06:10:18.267 #
RAX:0000000000000000 RBX:0000000000000000
RCX:ffffffffffffffff RDX:0000000000000006
RDI:0000000000000a72 RSI:0000000000000a72
RBP:00007ffdd50536b0 RSP:00007ffdd5052c78
R8 :0000000000000002 R9 :000000000000000d
R10:0000000000000008 R11:0000000000000206
R12:000000000000000b R13:00007ffdd50531d0
R14:000000000000006c R15:000000000000000a
RIP:00007f0dca2b7387 EFL:0000000000000206
CSGSFS:0000000000000033
------ INFO OUTPUT ------
# Server
redis_version:6.9.242
redis_git_sha1:00000000
redis_git_dirty:0
redis_build_id:c2cc38bd25d6eff7
redis_mode:sentinel
os:Linux 3.10.0-1062.12.1.el7.x86_64 x86_64
arch_bits:64
multiplexing_api:epoll
atomicvar_api:atomic-builtin
gcc_version:4.8.5
process_id:2674
process_supervised:no
run_id:0aeff53e6e5919cf3977eebbbf03204a6186c939
tcp_port:26380
server_time_usec:1650175818264709
uptime_in_seconds:0
uptime_in_days:0
hz:10
configured_hz:10
lru_clock:6008650
executable:/usr/local/redis/bin/./redis-sentinel
config_file:/usr/local/redis/bin/slave0.conf
io_threads_active:0
# Clients
connected_clients:0
cluster_connections:0
maxclients:10000
client_recent_max_input_buffer:0
client_recent_max_output_buffer:0
blocked_clients:0
tracking_clients:0
clients_in_timeout_table:0
# Memory
used_memory:18446743925829184936
used_memory_human:18446743925829184936B
used_memory_rss:3031040
used_memory_rss_human:2.89M
used_memory_peak:18446743925829184936
used_memory_peak_human:18446743925829184936B
used_memory_peak_perc:2061199231418368.00%
used_memory_overhead:895136
used_memory_startup:894920
used_memory_dataset:18446743925828289800
used_memory_dataset_perc:100.00%
allocator_allocated:894872
allocator_active:2993152
allocator_resident:2993152
total_system_memory:8370454528
total_system_memory_human:7.80G
used_memory_lua:37888
used_memory_vm_eval:37888
used_memory_lua_human:37.00K
used_memory_scripts_eval:0
number_of_cached_scripts:0
number_of_functions:0
number_of_libraries:0
used_memory_vm_functions:37888
used_memory_vm_total:75776
used_memory_vm_total_human:74.00K
used_memory_functions:216
used_memory_scripts:216
used_memory_scripts_human:216B
maxmemory:0
maxmemory_human:0B
maxmemory_policy:noeviction
allocator_frag_ratio:3.34
allocator_frag_bytes:2098280
allocator_rss_ratio:1.00
allocator_rss_bytes:0
rss_overhead_ratio:1.01
rss_overhead_bytes:37888
mem_fragmentation_ratio:3.39
mem_fragmentation_bytes:2136168
mem_not_counted_for_evict:0
mem_replication_backlog:0
mem_total_replication_buffers:0
mem_clients_slaves:0
mem_clients_normal:0
mem_cluster_links:0
mem_aof_buffer:0
mem_allocator:libc
active_defrag_running:0
lazyfree_pending_objects:0
lazyfreed_objects:0
2674:X 17 Apr 2022 06:10:18.361 # dump of function (hexdump of 183 bytes):
648b0c25d4020000648b3425d002000085f67534b8ba0000000f0589c164890425d002000089c648 63d74863f64863f9b8ea0000000f05483d00f0ffff771ef3c30f1f800000000085c97fdb89c8f7d8 81e1ffffff7f0f44c689c1ebca488b159c0a3900f7d86489024883c8ffc3669085ff780cf7dfe985 0200000f1f440000488b05790a390064c70016000000b8ffffffffc3662e0f1f8400000000006690 900f1f440000662e0f1f84000000000048c7c00f000000
=== REDIS BUG REPORT END. Make sure to include from START to END. ===
Please report the crash by opening an issue on github:
http://github.com/redis/redis/issues
Suspect RAM error? Use redis-server --test-memory to verify it.
Aborted
Comment From: zkteco-home
in windows,i compiled via MSys,when i ran it,same issue:
I have 2 redis and 3 sentinels. Use it in 2 servers: master+ master-slave and slave + 2 slave-sentinels in separate services. Use Redis 7.0 rc-3 When master down, sentinels vote to elect a leader, and a leader was elected by voting, but no failover was performed and "failover-abort-not-elected" was reported. Thus, slave never come master. When master server went down, sentinel does not set slave to master. Version 3.0.504 for Windows with same config works as expected. Master.config:
new configration saved and changed like this
Comment From: oranagra
@zkteco-home i don't understand the stack trace, and how come SIGABRT happens from rewriteConfig. and what's the relation here to munmap_chunk (being part of the crash, or some error that happens before it and possibly lead to it).
i see in the second post you mention Windows (which isn't supported by this project), but i understand the first post was about Linux? anything special about this platform?
do you happen to know if this issue is new to 7.0 RC3, and didn't exist in previous versions?
Comment From: moticless
I see also that value of used_memory is wraparound which indicates wrong management of malloc/free.
# Memory
used_memory:18446743925829184936
@zkteco-home, regarding your linux machine reproduction, did you modify 7.0-rc3 code in any way, or have special configuration to the machine?
Comment From: zkteco-home
hi,thanks in advance 1.Linux is installed into my laptop,i have not special configration. 2.via WSYS2 compiled,in fact it is as same as redis of linux.it should work fine.redis-server is ok.but only redis-sentinel crash. 3.i provide config file,you can test it,it is easily reproduce, ./redis-server master.conf ./redis-server master-slave.conf ./redis-sentinel sentinel0.conf
when run redis-sentinel ,it crashed,redis-server work fine.if my linux problem,i guess redis-server also no working right?
Comment From: zkteco-home
I checked code,when start sentinel sentinel0.conf,first time rewriteconf is ok,second time after rewriteconf,sentinel.conf file is wrong.but i dont know which code make memory wrong,and requirepass and masterauth value is wrong.
Comment From: zkteco-home
i installed redis5.0.14 and 6.2.6 and tested it,it work fine with same linux system (centos 7)
Comment From: moticless
3.i provide config file,you can test it,it is easily reproduce, ./redis-server master.conf ./redis-server master-slave.conf ./redis-sentinel sentinel0.conf
You didn't supply the configuration files. Please describe also the steps to reproduce. Thank you
Comment From: zkteco-home
3.i provide config file,you can test it,it is easily reproduce, ./redis-server master.conf ./redis-server master-slave.conf ./redis-sentinel sentinel0.conf
You didn't supply the configuration files. Please describe also the steps to reproduce. Thank you
at the beginning i provide config information,please check it,like this master.conf:
port 6379 requirepass "12345" masterauth "12345" loglevel notice
logfile "Logs/redis_log.txt"
replica-serve-stale-data yes replica-read-only yes repl-diskless-sync no repl-diskless-sync-delay 5 repl-disable-tcp-nodelay no
you can copy it to file
Comment From: zkteco-home
I provide further information,i tested 21 Mar code,it is ok
i guess maybe config.c changed wrong on 22 Mar
Comment From: oranagra
@zkteco-home please post readable info (preferably no screenshots unless absolutely needed). also, considering you can reproduce it and saw some commit that's unaffected, maybe you can narrow down to the offensive commit?
Comment From: moticless
@zkteco-home, I managed to reproduce it. Intend to investigate later this evening. Thanks.
Comment From: hwware
I investigated the issue, the problem seems to be introduced in https://github.com/redis/redis/commit/bda9d74dad15fbc99a84a4f86c5a3cfc9252548f.
After this commit there is some error in the sentinel config files as shown below
Comment From: moticless
I think i found the root cause and created PR:
Following https://github.com/redis/redis/commit/bda9d74dad15fbc99a84a4f86c5a3cfc9252548f, at function sdsConfigRewrite(), if reached as non-module configuration, then *config->data.sds.config is referenced (with val) and get deleted at the end of the function as if it is temporary variable. Later on, accessing this parameter can cause various issues, such as file configuration corruption, crash, and so on.
@zkteco-home, can you check it please as well.
@hwware i put you as reviewer, if you don't mind.
Thank you
Comment From: zkteco-home
@moticless sentinel crash solved,but still no working for sentinel,my expectation is : When master down, sentinels vote to elect a leader, and a leader was elected by voting, but no failover was performed and "failover-abort-not-elected" was reported. Thus, slave never come master.
When master server went down, sentinel does not set slave to master.
@oranagra
Comment From: moticless
@moticless sentinel crash solved,but still no working for sentinel,my expectation is : When master down, sentinels vote to elect a leader, and a leader was elected by voting, but no failover was performed and "failover-abort-not-elected" was reported. Thus, slave never come master.
When master server went down, sentinel does not set slave to master.
@oranagra
@zkteco-home, like @oranagra mentioned above, this project is not supported for windows platform. Our CI does run the flow you are describing without any problem on linux machines. If you managed to reproduce the issue you are having on linux platform, please feel free to share the issue.
Comment From: zkteco-home
@moticless sentinel crash solved,but still no working for sentinel,my expectation is : When master down, sentinels vote to elect a leader, and a leader was elected by voting, but no failover was performed and "failover-abort-not-elected" was reported. Thus, slave never come master.
When master server went down, sentinel does not set slave to master.
@oranagra
@zkteco-home, like @oranagra mentioned above, this project is not supported for windows platform. Our CI does run the flow you are describing without any problem on linux machines. If you managed to reproduce the issue you are having on linux platform, please feel free to share the issue.
@moticless not bcz of windows platform,i tested 21 March code,it work fine,after that it is wrong,it is ok if you dont want to change.i just give you feedback.
Comment From: oranagra
@zkteco-home what he meant is that as far as we could tell it is working, so if you can reproduce a problem, please instruct us how to reproduce.
Comment From: oranagra
@zkteco-home can you please explain why you closed the issue? is the other problem resolved?
Comment From: zkteco-home
I used old config.c,it is ok,i will continue to compare what happened.
1200:X 19 Apr 2022 18:25:49.078 # +new-epoch 2 1200:X 19 Apr 2022 18:25:49.092 * Sentinel new configuration saved on disk 1200:X 19 Apr 2022 18:25:49.092 # +vote-for-leader 281dfcb8000f740a6e675c8f4c920a419c53b7e5 2 1200:X 19 Apr 2022 18:25:50.016 # +sdown master redis-gsf 127.0.0.1 6379 1200:X 19 Apr 2022 18:25:50.074 # +odown master redis-gsf 127.0.0.1 6379 #quorum 3/2 1200:X 19 Apr 2022 18:25:50.074 # Next failover delay: I will not start a failover before Tue Apr 19 18:27:49 2022 1200:X 19 Apr 2022 18:25:50.136 # +config-update-from sentinel 281dfcb8000f740a6e675c8f4c920a419c53b7e5 127.0.0.1 26379 @ redis-gsf 127.0.0.1 6379 1200:X 19 Apr 2022 18:25:50.136 # +switch-master redis-gsf 127.0.0.1 6379 127.0.0.1 6380 1200:X 19 Apr 2022 18:25:50.142 * +slave slave 127.0.0.1:6379 127.0.0.1 6379 @ redis-gsf 127.0.0.1 6380 1200:X 19 Apr 2022 18:25:50.152 * Sentinel new configuration saved on disk 1200:X 19 Apr 2022 18:25:56.208 # +sdown slave 127.0.0.1:6379 127.0.0.1 6379 @ redis-gsf 127.0.0.1 6380 1200:X 19 Apr 2022 18:31:54.561 * +sentinel-address-switch master redis-gsf 127.0.0.1 6380 ip 0.0.0.0 port 26378 for 805bc445bc371b874ca71200ab761f287e55ce05 1200:X 19 Apr 2022 18:31:54.568 * Sentinel new configuration saved on disk 1200:X 19 Apr 2022 18:31:54.569 # -sdown slave 127.0.0.1:6379 127.0.0.1 6379 @ redis-gsf 127.0.0.1 6380 1200:X 19 Apr 2022 18:31:54.917 * +sentinel-address-switch master redis-gsf 127.0.0.1 6380 ip 127.0.0.1 port 26378 for 805bc445bc371b874ca71200ab761f287e55ce05
Comment From: zkteco-home
@zkteco-home can you please explain why you closed the issue? is the other problem resolved?
sorry for my test,just now i tested sentinel again,it work fine,thank you all
Comment From: moticless
@zkteco-home, thank you for catching error in RC.