brpc
brpc copied to clipboard
父进程启动bthread后,fork出子进程后子进程踩内存
Describe the bug (描述bug) 1、父进程启动bthread 2、fork出子进程 3、子进程的Sampler踩了内存
To Reproduce (复现方法) 编译执行 使用valgrind可见
#include <sys/stat.h>
#include <sys/unistd.h>
#include <brpc/server.h>
#include <bthread/bthread.h>
#include <iostream>
#include <thread>
#include <gflags/gflags.h>
using namespace std;
void child_proc();
void parent_proc();
void *empty_th(void *) { std::cout << "do nothing but start bthread worker\n"; }
int main(int argc, char *argv[]) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
// preload: start bthread
bthread_t tid;
bthread_start_background(&tid, nullptr, empty_th, nullptr);
bthread_join(tid, nullptr);
std::cout << "start to fork\n";
int ret = fork();
if (ret == 0) {
child_proc();
} else if (ret > 0) {
parent_proc();
} else {
assert(false && "cannot fork");
}
return 0;
}
void child_proc() {
std::cout << "start child, pid: " << getpid() << "\n";
for (int i = 0; i < 40; i++) {
std::thread th([&] {
char mem[40960];
memset(mem, 0x7f, sizeof(mem));
while(true) sleep(3);
});
th.detach();
}
while(true) sleep(3);
}
void parent_proc() {
std::cout << "start parent, pid: " << getpid() << "\n";
while(true) sleep(3);
}
/*
* g++ -std=c++11 ./test.cc -lpthread -lbrpc -lgflags
* valgrind ./a.out
*/
(gdb) bt
#0 0x00007ff167ae948c in get_value (this=0x7ff1636628d0, this=0x7ff1636628d0) at /home/brpc-master/src/bvar/passive_status.h:140
#1 bvar::detail::ReducerSampler<bvar::PassiveStatus<unsigned long>, unsigned long, bvar::detail::AddTo<unsigned long>, bvar::detail::MinusFrom<unsigned long> >::take_sample (this=0x7ff154008c90) at /home/brpc-master/src/bvar/detail/sampler.h:137
#2 0x00007ff167af5b11 in bvar::detail::SamplerCollector::run (this=this@entry=0xcf9a60) at /home/brpc-master/src/bvar/detail/sampler.cpp:173
#3 0x00007ff167af67f0 in bvar::detail::SamplerCollector::sampling_thread (arg=0xcf9a60) at /home/brpc-master/src/bvar/detail/sampler.cpp:113
#4 0x00007ff168da2864 in start_thread (arg=0x7ff142ffd700) at pthread_create.c:477
#5 0x00007ff166b23c1f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) f 0
#0 0x00007ff167ae948c in get_value (this=0x7ff1636628d0, this=0x7ff1636628d0) at /home/brpc-master/src/bvar/passive_status.h:140
140 return (_getfn ? _getfn(_arg) : Tp());
(gdb) up
#1 bvar::detail::ReducerSampler<bvar::PassiveStatus<unsigned long>, unsigned long, bvar::detail::AddTo<unsigned long>, bvar::detail::MinusFrom<unsigned long> >::take_sample (this=0x7ff154008c90) at /home/brpc-master/src/bvar/detail/sampler.h:137
137 latest.data = _reducer->get_value();
(gdb) p _reducer
$1 = (bvar::PassiveStatus<unsigned long> *) 0x7ff1636628d0
(gdb) p *_reducer
$2 = {<bvar::Variable> = {_vptr.Variable = 0x7f7f7f7f7f7f7f7f, _name = {static npos = 18446744073709551615,
_M_dataplus = {<std::allocator<char>> = {<__gnu_cxx::new_allocator<char>> = {<No data fields>}, <No data fields>},
_M_p = 0x7f7f7f7f7f7f7f7f <Address 0x7f7f7f7f7f7f7f7f out of bounds>}, _M_string_length = 9187201950435737471, {_M_local_buf = '\177' <repeats 16 times>,
_M_allocated_capacity = 9187201950435737471}}}, static ADDITIVE = true, _getfn = 0x7f7f7f7f7f7f7f7f, _arg = 0x7f7f7f7f7f7f7f7f,
_sampler = 0x7f7f7f7f7f7f7f7f, _series_sampler = 0x7f7f7f7f7f7f7f7f}
(gdb)
10:36:53 docker /home valgrind ./a.out
==13029== Memcheck, a memory error detector
==13029== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==13029== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==13029== Command: ./a.out
==13029==
do nothing but start bthread workerstart parent, pid: 13029
do nothing but start bthread workerstart child, pid: 13041
==13041== Thread 2 bvar_sampler:
==13041== Jump to the invalid address stated on the next line
==13041== at 0x7F7F7F7F7F7F7F7F: ???
==13041== by 0x5269B10: bvar::detail::SamplerCollector::run() (sampler.cpp:173)
==13041== by 0x526A7EF: bvar::detail::SamplerCollector::sampling_thread(void*) (sampler.cpp:113)
==13041== by 0x4E44863: start_thread (pthread_create.c:477)
==13041== by 0x7102C1E: clone (clone.S:95)
==13041== Address 0x7f7f7f7f7f7f7f7f is not stack'd, malloc'd or (recently) free'd
==13041==
Expected behavior (期望行为) 子进程能正常运作
Versions (各种版本) OS:CentOS Linux release 7.8.2003 (Core) Compiler: gcc (GCC) 7.5.0 brpc: master protobuf: protobuf-2.2.1
Additional context/screenshots (更多上下文/截图) 已经可以确认是timer thread线程中那几个开在栈上的bvar 在fork之后 因线程的栈空间被强制回收,没有走bvar的析构流程,导致其sampler指向了无效地址 我构造的demo里面开辟大量的栈空间可以稳定将其内存踩坏