A mutex to protect the access to unregistered_contexts across threads
Added a std::mutex
to make accesses to unregistered_contexts
thread-safe.
Right now I run into segfaults and freezes, when my netio client application unsubscribes and resubscribes to Felixcore.
It happens when I unsubscribe my sockets. A backtrace of the moment:
(gdb) bt
#0 0x00007f099825c2d3 in std::_Rb_tree_insert_and_rebalance(bool, std::_Rb_tree_node_base*, std::_Rb_tree_node_base*, std::_Rb_tree_node_base&) () from /cvmfs/sft.cern.ch/lcg/releases/gcc/11.2.0-8a51a/x86_64-centos7/lib64/libstdc++.so.6
#1 0x00007f09944c4cfb in std::_Rb_tree<netio::event_loop::context*, netio::event_loop::context*, std::_Identity<netio::event_loop::context*>, std::less<netio::event_loop::context*>, std::allocator<netio::event_loop::context*> >::_M_insert_<netio::event_loop::context* const&, std::_Rb_tree<netio::event_loop::context*, netio::event_loop::context*, std::_Identity<netio::event_loop::context*>, std::less<netio::event_loop::context*>, std::allocator<netio::event_loop::context*> >::_Alloc_node> (this=0x7f0994ddf208 <ConnectionHandler::getInstance()::instance+8>, __x=0x0, __p=0x3c84270,
__v=@0x7ffe4b114270: 0x3c900e0, __node_gen=...) at /opt/rh/devtoolset-10/root/usr/include/c++/10/bits/stl_tree.h:1818
#2 0x00007f09944c476d in std::_Rb_tree<netio::event_loop::context*, netio::event_loop::context*, std::_Identity<netio::event_loop::context*>, std::less<netio::event_loop::context*>, std::allocator<netio::event_loop::context*> >::_M_insert_unique<netio::event_loop::context* const&> (
this=0x7f0994ddf208 <ConnectionHandler::getInstance()::instance+8>, __v=@0x7ffe4b114270: 0x3c900e0) at /opt/rh/devtoolset-10/root/usr/include/c++/10/bits/stl_tree.h:2159
#3 0x00007f09944c42bd in std::set<netio::event_loop::context*, std::less<netio::event_loop::context*>, std::allocator<netio::event_loop::context*> >::insert (this=0x7f0994ddf208 <ConnectionHandler::getInstance()::instance+8>, __x=@0x7ffe4b114270: 0x3c900e0)
at /opt/rh/devtoolset-10/root/usr/include/c++/10/bits/stl_set.h:512
#4 0x00007f09944c2be7 in netio::event_loop::unregister_fd (this=0x7f0994ddf200 <ConnectionHandler::getInstance()::instance>, ctx=0x3c900e0) at /home/itkfelixstrips/software/opcua_quasar/temp2/opc-server/build/QuasarModuleITkStripsDCS/ic-over-netio/external/src/netio4/src/eventloop.cpp:226
#5 0x00007f09944be161 in netio::posix_send_socket::disconnect (this=0x3c900d0) at /home/itkfelixstrips/software/opcua_quasar/temp2/opc-server/build/QuasarModuleITkStripsDCS/ic-over-netio/external/src/netio4/src/posix.cpp:224
#6 0x00007f09944bde10 in netio::posix_send_socket::~posix_send_socket (this=0x3c900d0, __in_chrg=<optimized out>) at /home/itkfelixstrips/software/opcua_quasar/temp2/opc-server/build/QuasarModuleITkStripsDCS/ic-over-netio/external/src/netio4/src/posix.cpp:162
#7 0x00007f09944bde38 in netio::posix_send_socket::~posix_send_socket (this=0x3c900d0, __in_chrg=<optimized out>) at /home/itkfelixstrips/software/opcua_quasar/temp2/opc-server/build/QuasarModuleITkStripsDCS/ic-over-netio/external/src/netio4/src/posix.cpp:163
#8 0x00007f09944ca84f in netio::low_latency_send_socket::~low_latency_send_socket (this=0x7ffe4b114390, __in_chrg=<optimized out>) at /home/itkfelixstrips/software/opcua_quasar/temp2/opc-server/build/QuasarModuleITkStripsDCS/ic-over-netio/external/src/netio4/src/sockets.cpp:45
#9 0x00007f09944ce26d in netio::low_latency_subscribe_socket::unsubscribe (this=0x243fc40, tag=29, ep=...) at /home/itkfelixstrips/software/opcua_quasar/temp2/opc-server/build/QuasarModuleITkStripsDCS/ic-over-netio/external/src/netio4/src/sockets.cpp:1011
-- there posix_send_socket
ends up calling unregistered_contexts.insert(ctx)
in netio::event_loop::unregister_fd
.
And at the same time, my main netio thread (the one that executes event_loop::run_forever()
) calls unregistered_contexts.clear()
in netio::event_loop::wait_for_events
.
Sometimes the two accesses may collide.
I surrounded the accesses to unregistered_contexts
with a lock on mutex:
{
std::unique_lock<std::mutex> lock(m_unregistered_contexts_mtx);
unregistered_contexts.clear();
}
And now it disconnects-reconnects fine.
A point to note: the semantics of unique_lock
initialization inside a block of code may not work in all C++ standards (I think it works in C++11 and up).
modified: netio/netio.hpp
modified: src/eventloop.cpp