asterisk/main/lock.c

1318 lines
36 KiB
C
Raw Normal View History

/*
* Asterisk -- An open source telephony toolkit.
*
* Copyright (C) 1999 - 2010, Digium, Inc.
*
* Mark Spencer <markster@digium.com>
*
* See http://www.asterisk.org for more information about
* the Asterisk project. Please do not directly contact
* any of the maintainers of this project for assistance;
* the project provides a web site, mailing lists and IRC
* channels for your use.
*
* This program is free software, distributed under the terms of
* the GNU General Public License Version 2. See the LICENSE file
* at the top of the source tree.
*/
/*! \file
*
* \brief General Asterisk locking.
*/
/*** MODULEINFO
<support_level>core</support_level>
***/
#include "asterisk.h"
#ifdef HAVE_MTX_PROFILE
/* profile mutex */
static int mtx_prof = -1;
static void __attribute__((constructor)) __mtx_init(void)
{
mtx_prof = ast_add_profile("mtx_lock_" __FILE__, 0);
}
#endif
#include "asterisk/utils.h"
#include "asterisk/lock.h"
#include "asterisk/manager.h"
/* Allow direct use of pthread_mutex_* / pthread_cond_* */
#undef pthread_mutex_init
#undef pthread_mutex_destroy
#undef pthread_mutex_lock
#undef pthread_mutex_trylock
#undef pthread_mutex_t
#undef pthread_mutex_unlock
#undef pthread_cond_init
#undef pthread_cond_signal
#undef pthread_cond_broadcast
#undef pthread_cond_destroy
#undef pthread_cond_wait
#undef pthread_cond_timedwait
#if defined(DEBUG_THREADS) || defined(DETECT_DEADLOCKS)
#define log_mutex_error(canlog, ...) \
do { \
if (canlog) { \
ast_log(LOG_ERROR, __VA_ARGS__); \
} else { \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#endif
#if defined(DEBUG_THREADS) && defined(HAVE_BKTR)
static void __dump_backtrace(struct ast_bt *bt, int canlog)
{
char **strings;
ssize_t i;
strings = backtrace_symbols(bt->addresses, bt->num_frames);
for (i = 0; i < bt->num_frames; i++) {
log_mutex_error(canlog, "%s\n", strings[i]);
}
ast_std_free(strings);
}
#endif /* defined(DEBUG_THREADS) && defined(HAVE_BKTR) */
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
#ifdef DEBUG_THREADS
AST_MUTEX_DEFINE_STATIC(reentrancy_lock);
static inline struct ast_lock_track *ast_get_reentrancy(struct ast_lock_track **plt,
struct ast_lock_track_flags *flags, int no_setup)
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
{
pthread_mutexattr_t reentr_attr;
struct ast_lock_track *lt;
if (!flags->tracking || flags->setup) {
return *plt;
}
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
pthread_mutex_lock(&reentrancy_lock.mutex);
if (*plt) {
pthread_mutex_unlock(&reentrancy_lock.mutex);
return *plt;
}
if (no_setup) {
pthread_mutex_unlock(&reentrancy_lock.mutex);
return NULL;
}
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
lt = *plt = ast_std_calloc(1, sizeof(*lt));
if (!lt) {
fprintf(stderr, "%s: Failed to allocate lock tracking\n", __func__);
#if defined(DO_CRASH) || defined(THREAD_CRASH)
abort();
#else
pthread_mutex_unlock(&reentrancy_lock.mutex);
return NULL;
#endif
}
pthread_mutexattr_init(&reentr_attr);
pthread_mutexattr_settype(&reentr_attr, AST_MUTEX_KIND);
pthread_mutex_init(&lt->reentr_mutex, &reentr_attr);
pthread_mutexattr_destroy(&reentr_attr);
flags->setup = 1;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
pthread_mutex_unlock(&reentrancy_lock.mutex);
return lt;
}
static inline void delete_reentrancy_cs(struct ast_lock_track **plt)
{
struct ast_lock_track *lt;
if (*plt) {
lt = *plt;
*plt = NULL;
pthread_mutex_destroy(&lt->reentr_mutex);
ast_std_free(lt);
}
}
#endif /* DEBUG_THREADS */
int __ast_pthread_mutex_init(int tracking, const char *filename, int lineno, const char *func,
const char *mutex_name, ast_mutex_t *t)
{
int res;
pthread_mutexattr_t attr;
#if defined(DEBUG_THREADS) && defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && \
defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if ((t->mutex) != ((pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER)) {
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
int canlog = tracking && strcmp(filename, "logger.c");
log_mutex_error(canlog, "%s line %d (%s): NOTICE: mutex '%s' is already initialized.\n",
filename, lineno, func, mutex_name);
DO_THREAD_CRASH;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
return EBUSY;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
#if defined(DEBUG_THREADS) || defined(DETECT_DEADLOCKS)
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
t->track = NULL;
t->flags.tracking = tracking;
t->flags.setup = 0;
#endif /* DEBUG_THREADS */
pthread_mutexattr_init(&attr);
pthread_mutexattr_settype(&attr, AST_MUTEX_KIND);
res = pthread_mutex_init(&t->mutex, &attr);
pthread_mutexattr_destroy(&attr);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
return res;
}
int __ast_pthread_mutex_destroy(const char *filename, int lineno, const char *func,
const char *mutex_name, ast_mutex_t *t)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 1);
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if ((t->mutex) == ((pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER)) {
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
/* Don't try to uninitialize an uninitialized mutex
* This may have no effect on linux
* but it always generates a core on *BSD when
* linked with libpthread.
* This is not an error condition if the mutex is created on the fly.
*/
log_mutex_error(canlog, "%s line %d (%s): NOTICE: mutex '%s' is uninitialized.\n",
filename, lineno, func, mutex_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
res = EINVAL;
goto lt_cleanup;
}
#endif
res = pthread_mutex_trylock(&t->mutex);
switch (res) {
case 0:
pthread_mutex_unlock(&t->mutex);
break;
case EINVAL:
log_mutex_error(canlog, "%s line %d (%s): Error: attempt to destroy invalid mutex '%s'.\n",
filename, lineno, func, mutex_name);
break;
case EBUSY:
log_mutex_error(canlog, "%s line %d (%s): Error: attempt to destroy locked mutex '%s'.\n",
filename, lineno, func, mutex_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
log_mutex_error(canlog, "%s line %d (%s): Error: '%s' was locked here.\n",
lt->file[ROFFSET], lt->lineno[ROFFSET], lt->func[ROFFSET], mutex_name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[ROFFSET], canlog);
#endif
ast_reentrancy_unlock(lt);
}
break;
}
#endif /* DEBUG_THREADS */
res = pthread_mutex_destroy(&t->mutex);
#ifdef DEBUG_THREADS
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error destroying mutex %s: %s\n",
filename, lineno, func, mutex_name, strerror(res));
}
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
lt_cleanup:
#endif
if (lt) {
ast_reentrancy_lock(lt);
lt->file[0] = filename;
lt->lineno[0] = lineno;
lt->func[0] = func;
lt->reentrancy = 0;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[0] = 0;
#ifdef HAVE_BKTR
memset(&lt->backtrace[0], 0, sizeof(lt->backtrace[0]));
#endif
ast_reentrancy_unlock(lt);
delete_reentrancy_cs(&t->track);
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_pthread_mutex_lock(const char *filename, int lineno, const char *func,
const char* mutex_name, ast_mutex_t *t)
{
int res;
#if defined(DETECT_DEADLOCKS) || defined(DEBUG_THREADS)
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#endif
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_MUTEX, filename, lineno, func, mutex_name, t, bt);
}
#endif /* DEBUG_THREADS */
#if defined(DETECT_DEADLOCKS)
{
time_t seconds = time(NULL);
time_t wait_time, reported_wait = 0;
do {
#ifdef HAVE_MTX_PROFILE
ast_mark(mtx_prof, 1);
#endif
res = pthread_mutex_trylock(&t->mutex);
#ifdef HAVE_MTX_PROFILE
ast_mark(mtx_prof, 0);
#endif
if (res == EBUSY) {
wait_time = time(NULL) - seconds;
if (wait_time > reported_wait && (wait_time % 5) == 0) {
log_mutex_error(canlog, "%s line %d (%s): Deadlock? waited %d sec for mutex '%s'?\n",
filename, lineno, func, (int) wait_time, mutex_name);
#ifdef DEBUG_THREADS
if (lt) {
ast_reentrancy_lock(lt);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[lt->reentrancy], canlog);
#endif
log_mutex_error(canlog, "%s line %d (%s): '%s' was locked here.\n",
lt->file[ROFFSET], lt->lineno[ROFFSET],
lt->func[ROFFSET], mutex_name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[ROFFSET], canlog);
#endif
ast_reentrancy_unlock(lt);
}
#endif
reported_wait = wait_time;
if ((int) wait_time < 10) { /* Only emit an event when a deadlock starts, not every 5 seconds */
/*** DOCUMENTATION
<managerEvent language="en_US" name="DeadlockStart">
<managerEventInstance class="EVENT_FLAG_SYSTEM">
<synopsis>Raised when a probable deadlock has started.
Delivery of this event is attempted but not guaranteed,
and could fail for example if the manager itself is deadlocked.
</synopsis>
<syntax>
<parameter name="Mutex">
<para>The mutex involved in the deadlock.</para>
</parameter>
</syntax>
</managerEventInstance>
</managerEvent>
***/
manager_event(EVENT_FLAG_SYSTEM, "DeadlockStart",
"Mutex: %s\r\n",
mutex_name);
}
}
usleep(200);
}
} while (res == EBUSY);
}
#else /* !DETECT_DEADLOCKS || !DEBUG_THREADS */
#ifdef HAVE_MTX_PROFILE
ast_mark(mtx_prof, 1);
res = pthread_mutex_trylock(&t->mutex);
ast_mark(mtx_prof, 0);
if (res)
#endif
res = pthread_mutex_lock(&t->mutex);
#endif /* !DETECT_DEADLOCKS || !DEBUG_THREADS */
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt && !res) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = lineno;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
} else {
log_mutex_error(canlog, "%s line %d (%s): '%s' really deep reentrancy!\n",
filename, lineno, func, mutex_name);
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
#ifdef HAVE_BKTR
if (lt->reentrancy) {
ast_reentrancy_lock(lt);
bt = &lt->backtrace[lt->reentrancy-1];
ast_reentrancy_unlock(lt);
} else {
bt = NULL;
}
#endif
ast_remove_lock_info(t, bt);
}
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error obtaining mutex: %s\n",
filename, lineno, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_pthread_mutex_trylock(const char *filename, int lineno, const char *func,
const char* mutex_name, ast_mutex_t *t)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_MUTEX, filename, lineno, func, mutex_name, t, bt);
}
#endif /* DEBUG_THREADS */
res = pthread_mutex_trylock(&t->mutex);
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt && !res) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = lineno;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
} else {
log_mutex_error(canlog, "%s line %d (%s): '%s' really deep reentrancy!\n",
filename, lineno, func, mutex_name);
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
ast_mark_lock_failed(t);
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_pthread_mutex_unlock(const char *filename, int lineno, const char *func,
const char *mutex_name, ast_mutex_t *t)
{
int res;
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
struct ast_lock_track *lt = NULL;
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
struct ast_bt *bt = NULL;
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if ((t->mutex) == ((pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER)) {
log_mutex_error(canlog, "%s line %d (%s): Error: mutex '%s' is uninitialized.\n",
filename, lineno, func, mutex_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
return EINVAL;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
lt = ast_get_reentrancy(&t->track, &t->flags, 0);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
if (lt->reentrancy && (lt->thread_id[ROFFSET] != pthread_self())) {
log_mutex_error(canlog, "%s line %d (%s): attempted unlock mutex '%s' without owning it!\n",
filename, lineno, func, mutex_name);
log_mutex_error(canlog, "%s line %d (%s): '%s' was locked here.\n",
lt->file[ROFFSET], lt->lineno[ROFFSET], lt->func[ROFFSET], mutex_name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[ROFFSET], canlog);
#endif
DO_THREAD_CRASH;
}
if (--lt->reentrancy < 0) {
log_mutex_error(canlog, "%s line %d (%s): mutex '%s' freed more times than we've locked!\n",
filename, lineno, func, mutex_name);
lt->reentrancy = 0;
}
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = NULL;
lt->lineno[lt->reentrancy] = 0;
lt->func[lt->reentrancy] = NULL;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = 0;
}
#ifdef HAVE_BKTR
if (lt->reentrancy) {
bt = &lt->backtrace[lt->reentrancy - 1];
}
#endif
ast_reentrancy_unlock(lt);
ast_remove_lock_info(t, bt);
}
#endif /* DEBUG_THREADS */
res = pthread_mutex_unlock(&t->mutex);
#ifdef DEBUG_THREADS
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error releasing mutex: %s\n",
filename, lineno, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_cond_init(const char *filename, int lineno, const char *func,
const char *cond_name, ast_cond_t *cond, pthread_condattr_t *cond_attr)
{
return pthread_cond_init(cond, cond_attr);
}
int __ast_cond_signal(const char *filename, int lineno, const char *func,
const char *cond_name, ast_cond_t *cond)
{
return pthread_cond_signal(cond);
}
int __ast_cond_broadcast(const char *filename, int lineno, const char *func,
const char *cond_name, ast_cond_t *cond)
{
return pthread_cond_broadcast(cond);
}
int __ast_cond_destroy(const char *filename, int lineno, const char *func,
const char *cond_name, ast_cond_t *cond)
{
return pthread_cond_destroy(cond);
}
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
#ifdef DEBUG_THREADS
static void restore_lock_tracking(struct ast_lock_track *lt, struct ast_lock_track *lt_saved)
{
ast_reentrancy_lock(lt);
/*
* The following code must match the struct ast_lock_track
* definition with the explicit exception of the reentr_mutex
* member.
*/
memcpy(lt->file, lt_saved->file, sizeof(lt->file));
memcpy(lt->lineno, lt_saved->lineno, sizeof(lt->lineno));
lt->reentrancy = lt_saved->reentrancy;
memcpy(lt->func, lt_saved->func, sizeof(lt->func));
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
memcpy(lt->thread_id, lt_saved->thread_id, sizeof(lt->thread_id));
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
#ifdef HAVE_BKTR
memcpy(lt->backtrace, lt_saved->backtrace, sizeof(lt->backtrace));
#endif
ast_reentrancy_unlock(lt);
}
#endif /* DEBUG_THREADS */
int __ast_cond_wait(const char *filename, int lineno, const char *func,
const char *cond_name, const char *mutex_name,
ast_cond_t *cond, ast_mutex_t *t)
{
int res;
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
struct ast_lock_track *lt = NULL;
struct ast_lock_track lt_orig;
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if ((t->mutex) == ((pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER)) {
log_mutex_error(canlog, "%s line %d (%s): Error: mutex '%s' is uninitialized.\n",
filename, lineno, func, mutex_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
return EINVAL;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
lt = ast_get_reentrancy(&t->track, &t->flags, 0);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
if (lt->reentrancy && (lt->thread_id[ROFFSET] != pthread_self())) {
log_mutex_error(canlog, "%s line %d (%s): attempted wait using mutex '%s' without owning it!\n",
filename, lineno, func, mutex_name);
log_mutex_error(canlog, "%s line %d (%s): '%s' was locked here.\n",
lt->file[ROFFSET], lt->lineno[ROFFSET], lt->func[ROFFSET], mutex_name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[ROFFSET], canlog);
#endif
DO_THREAD_CRASH;
} else if (lt->reentrancy <= 0) {
log_mutex_error(canlog, "%s line %d (%s): attempted wait using an unlocked mutex '%s'\n",
filename, lineno, func, mutex_name);
DO_THREAD_CRASH;
}
/* Waiting on a condition completely suspends a recursive mutex,
* even if it's been recursively locked multiple times. Make a
* copy of the lock tracking, and reset reentrancy to zero */
lt_orig = *lt;
lt->reentrancy = 0;
ast_reentrancy_unlock(lt);
ast_suspend_lock_info(t);
}
#endif /* DEBUG_THREADS */
res = pthread_cond_wait(cond, &t->mutex);
#ifdef DEBUG_THREADS
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error waiting on condition mutex '%s'\n",
filename, lineno, func, strerror(res));
DO_THREAD_CRASH;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
} else if (lt) {
restore_lock_tracking(lt, &lt_orig);
ast_restore_lock_info(t);
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_cond_timedwait(const char *filename, int lineno, const char *func,
const char *cond_name, const char *mutex_name, ast_cond_t *cond,
ast_mutex_t *t, const struct timespec *abstime)
{
int res;
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
struct ast_lock_track *lt = NULL;
struct ast_lock_track lt_orig;
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if ((t->mutex) == ((pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER)) {
log_mutex_error(canlog, "%s line %d (%s): Error: mutex '%s' is uninitialized.\n",
filename, lineno, func, mutex_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
return EINVAL;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
lt = ast_get_reentrancy(&t->track, &t->flags, 0);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
if (lt->reentrancy && (lt->thread_id[ROFFSET] != pthread_self())) {
log_mutex_error(canlog, "%s line %d (%s): attempted wait using mutex '%s' without owning it!\n",
filename, lineno, func, mutex_name);
log_mutex_error(canlog, "%s line %d (%s): '%s' was locked here.\n",
lt->file[ROFFSET], lt->lineno[ROFFSET], lt->func[ROFFSET], mutex_name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[ROFFSET], canlog);
#endif
DO_THREAD_CRASH;
} else if (lt->reentrancy <= 0) {
log_mutex_error(canlog, "%s line %d (%s): attempted wait using an unlocked mutex '%s'\n",
filename, lineno, func, mutex_name);
DO_THREAD_CRASH;
}
/* Waiting on a condition completely suspends a recursive mutex,
* even if it's been recursively locked multiple times. Make a
* copy of the lock tracking, and reset reentrancy to zero */
lt_orig = *lt;
lt->reentrancy = 0;
ast_reentrancy_unlock(lt);
ast_suspend_lock_info(t);
}
#endif /* DEBUG_THREADS */
res = pthread_cond_timedwait(cond, &t->mutex, abstime);
#ifdef DEBUG_THREADS
if (res && (res != ETIMEDOUT)) {
log_mutex_error(canlog, "%s line %d (%s): Error waiting on condition mutex '%s'\n",
filename, lineno, func, strerror(res));
DO_THREAD_CRASH;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
} else if (lt) {
restore_lock_tracking(lt, &lt_orig);
ast_restore_lock_info(t);
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_init(int tracking, const char *filename, int lineno, \
const char *func, const char *rwlock_name, ast_rwlock_t *t)
{
int res;
pthread_rwlockattr_t attr;
#if defined(DEBUG_THREADS) && defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && \
defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if (t->lock != ((pthread_rwlock_t) __AST_RWLOCK_INIT_VALUE)) {
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
int canlog = tracking && strcmp(filename, "logger.c");
log_mutex_error(canlog, "%s line %d (%s): Warning: rwlock '%s' is already initialized.\n",
filename, lineno, func, rwlock_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
return EBUSY;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
#if defined(DEBUG_THREADS) || defined(DETECT_DEADLOCKS)
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
t->track = NULL;
t->flags.tracking = tracking;
t->flags.setup = 0;
#endif /* DEBUG_THREADS */
pthread_rwlockattr_init(&attr);
#ifdef HAVE_PTHREAD_RWLOCK_PREFER_WRITER_NP
pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NP);
#endif
res = pthread_rwlock_init(&t->lock, &attr);
pthread_rwlockattr_destroy(&attr);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
return res;
}
int __ast_rwlock_destroy(const char *filename, int lineno, const char *func, const char *rwlock_name, ast_rwlock_t *t)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 1);
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if (t->lock == ((pthread_rwlock_t) __AST_RWLOCK_INIT_VALUE)) {
log_mutex_error(canlog, "%s line %d (%s): Warning: rwlock '%s' is uninitialized.\n",
filename, lineno, func, rwlock_name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
res = EINVAL;
goto lt_cleanup;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
#endif /* DEBUG_THREADS */
res = pthread_rwlock_destroy(&t->lock);
#ifdef DEBUG_THREADS
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error destroying rwlock %s: %s\n",
filename, lineno, func, rwlock_name, strerror(res));
}
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
lt_cleanup:
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
if (lt) {
ast_reentrancy_lock(lt);
lt->file[0] = filename;
lt->lineno[0] = lineno;
lt->func[0] = func;
lt->reentrancy = 0;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[0] = 0;
#ifdef HAVE_BKTR
memset(&lt->backtrace[0], 0, sizeof(lt->backtrace[0]));
#endif
ast_reentrancy_unlock(lt);
delete_reentrancy_cs(&t->track);
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_unlock(const char *filename, int line, const char *func, ast_rwlock_t *t, const char *name)
{
int res;
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
struct ast_lock_track *lt = NULL;
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
struct ast_bt *bt = NULL;
int lock_found = 0;
#if defined(AST_MUTEX_INIT_W_CONSTRUCTORS) && defined(CAN_COMPARE_MUTEX_TO_INIT_VALUE)
if ((t->lock) == ((pthread_rwlock_t) __AST_RWLOCK_INIT_VALUE)) {
log_mutex_error(canlog, "%s line %d (%s): Warning: rwlock '%s' is uninitialized.\n",
filename, line, func, name);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
DO_THREAD_CRASH;
return EINVAL;
}
#endif /* AST_MUTEX_INIT_W_CONSTRUCTORS */
lt = ast_get_reentrancy(&t->track, &t->flags, 0);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy) {
int i;
pthread_t self = pthread_self();
for (i = lt->reentrancy - 1; i >= 0; --i) {
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
if (lt->thread_id[i] == self) {
lock_found = 1;
if (i != lt->reentrancy - 1) {
lt->file[i] = lt->file[lt->reentrancy - 1];
lt->lineno[i] = lt->lineno[lt->reentrancy - 1];
lt->func[i] = lt->func[lt->reentrancy - 1];
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[i] = lt->thread_id[lt->reentrancy - 1];
}
#ifdef HAVE_BKTR
bt = &lt->backtrace[i];
#endif
lt->file[lt->reentrancy - 1] = NULL;
lt->lineno[lt->reentrancy - 1] = 0;
lt->func[lt->reentrancy - 1] = NULL;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy - 1] = AST_PTHREADT_NULL;
break;
}
}
}
if (lock_found && --lt->reentrancy < 0) {
log_mutex_error(canlog, "%s line %d (%s): rwlock '%s' freed more times than we've locked!\n",
filename, line, func, name);
lt->reentrancy = 0;
}
ast_reentrancy_unlock(lt);
ast_remove_lock_info(t, bt);
}
#endif /* DEBUG_THREADS */
res = pthread_rwlock_unlock(&t->lock);
#ifdef DEBUG_THREADS
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error releasing rwlock: %s\n",
filename, line, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_rdlock(const char *filename, int line, const char *func,
ast_rwlock_t *t, const char *name)
{
int res;
#if defined(DEBUG_THREADS) || defined(DETECT_DEADLOCKS)
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#endif
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_RDLOCK, filename, line, func, name, t, bt);
}
#endif /* DEBUG_THREADS */
#if defined(DETECT_DEADLOCKS)
{
time_t seconds = time(NULL);
time_t wait_time, reported_wait = 0;
do {
res = pthread_rwlock_tryrdlock(&t->lock);
if (res == EBUSY) {
wait_time = time(NULL) - seconds;
if (wait_time > reported_wait && (wait_time % 5) == 0) {
log_mutex_error(canlog, "%s line %d (%s): Deadlock? waited %d sec for readlock '%s'?\n",
filename, line, func, (int)wait_time, name);
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[lt->reentrancy], canlog);
#endif
log_mutex_error(canlog, "%s line %d (%s): '%s' was locked here.\n",
lt->file[lt->reentrancy-1], lt->lineno[lt->reentrancy-1],
lt->func[lt->reentrancy-1], name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[lt->reentrancy-1], canlog);
#endif
ast_reentrancy_unlock(lt);
}
#endif
reported_wait = wait_time;
}
usleep(200);
}
} while (res == EBUSY);
}
#else /* !DETECT_DEADLOCKS */
res = pthread_rwlock_rdlock(&t->lock);
#endif /* !DETECT_DEADLOCKS */
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (!res && lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = line;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
#ifdef HAVE_BKTR
if (lt->reentrancy) {
ast_reentrancy_lock(lt);
bt = &lt->backtrace[lt->reentrancy-1];
ast_reentrancy_unlock(lt);
} else {
bt = NULL;
}
#endif
ast_remove_lock_info(t, bt);
}
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error obtaining read lock: %s\n",
filename, line, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_wrlock(const char *filename, int line, const char *func, \
ast_rwlock_t *t, const char *name)
{
int res;
#if defined(DEBUG_THREADS) || defined(DETECT_DEADLOCKS)
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
#endif
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_WRLOCK, filename, line, func, name, t, bt);
}
#endif /* DEBUG_THREADS */
#ifdef DETECT_DEADLOCKS
{
time_t seconds = time(NULL);
time_t wait_time, reported_wait = 0;
do {
res = pthread_rwlock_trywrlock(&t->lock);
if (res == EBUSY) {
wait_time = time(NULL) - seconds;
if (wait_time > reported_wait && (wait_time % 5) == 0) {
log_mutex_error(canlog, "%s line %d (%s): Deadlock? waited %d sec for writelock '%s'?\n",
filename, line, func, (int)wait_time, name);
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
ast_reentrancy_lock(lt);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[lt->reentrancy], canlog);
#endif
log_mutex_error(canlog, "%s line %d (%s): '%s' was locked here.\n",
lt->file[lt->reentrancy-1], lt->lineno[lt->reentrancy-1],
lt->func[lt->reentrancy-1], name);
#ifdef HAVE_BKTR
__dump_backtrace(&lt->backtrace[lt->reentrancy-1], canlog);
#endif
ast_reentrancy_unlock(lt);
}
#endif
reported_wait = wait_time;
}
usleep(200);
}
} while (res == EBUSY);
}
#else /* !DETECT_DEADLOCKS */
res = pthread_rwlock_wrlock(&t->lock);
#endif /* !DETECT_DEADLOCKS */
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (!res && lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = line;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
#ifdef HAVE_BKTR
if (lt->reentrancy) {
ast_reentrancy_lock(lt);
bt = &lt->backtrace[lt->reentrancy-1];
ast_reentrancy_unlock(lt);
} else {
bt = NULL;
}
#endif
ast_remove_lock_info(t, bt);
}
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error obtaining write lock: %s\n",
filename, line, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_timedrdlock(const char *filename, int line, const char *func, ast_rwlock_t *t, const char *name,
const struct timespec *abs_timeout)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_WRLOCK, filename, line, func, name, t, bt);
}
#endif /* DEBUG_THREADS */
#ifdef HAVE_PTHREAD_RWLOCK_TIMEDWRLOCK
res = pthread_rwlock_timedrdlock(&t->lock, abs_timeout);
#else
do {
struct timeval _now;
for (;;) {
if (!(res = pthread_rwlock_tryrdlock(&t->lock))) {
break;
}
_now = ast_tvnow();
if (_now.tv_sec > abs_timeout->tv_sec || (_now.tv_sec == abs_timeout->tv_sec && _now.tv_usec * 1000 > abs_timeout->tv_nsec)) {
break;
}
usleep(1);
}
} while (0);
#endif
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (!res && lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = line;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
#ifdef HAVE_BKTR
if (lt->reentrancy) {
ast_reentrancy_lock(lt);
bt = &lt->backtrace[lt->reentrancy-1];
ast_reentrancy_unlock(lt);
} else {
bt = NULL;
}
#endif
ast_remove_lock_info(t, bt);
}
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error obtaining read lock: %s\n",
filename, line, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_timedwrlock(const char *filename, int line, const char *func, ast_rwlock_t *t, const char *name,
const struct timespec *abs_timeout)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
int canlog = t->flags.tracking && strcmp(filename, "logger.c");
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_WRLOCK, filename, line, func, name, t, bt);
}
#endif /* DEBUG_THREADS */
#ifdef HAVE_PTHREAD_RWLOCK_TIMEDWRLOCK
res = pthread_rwlock_timedwrlock(&t->lock, abs_timeout);
#else
do {
struct timeval _now;
for (;;) {
if (!(res = pthread_rwlock_trywrlock(&t->lock))) {
break;
}
_now = ast_tvnow();
if (_now.tv_sec > abs_timeout->tv_sec || (_now.tv_sec == abs_timeout->tv_sec && _now.tv_usec * 1000 > abs_timeout->tv_nsec)) {
break;
}
usleep(1);
}
} while (0);
#endif
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (!res && lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = line;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
#ifdef HAVE_BKTR
if (lt->reentrancy) {
ast_reentrancy_lock(lt);
bt = &lt->backtrace[lt->reentrancy-1];
ast_reentrancy_unlock(lt);
} else {
bt = NULL;
}
#endif
ast_remove_lock_info(t, bt);
}
if (res) {
log_mutex_error(canlog, "%s line %d (%s): Error obtaining read lock: %s\n",
filename, line, func, strerror(res));
DO_THREAD_CRASH;
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_tryrdlock(const char *filename, int line, const char *func, ast_rwlock_t *t, const char *name)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_RDLOCK, filename, line, func, name, t, bt);
}
#endif /* DEBUG_THREADS */
res = pthread_rwlock_tryrdlock(&t->lock);
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (!res && lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = line;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
}
ast_reentrancy_unlock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
ast_mark_lock_acquired(t);
} else if (lt) {
ast_mark_lock_failed(t);
}
#endif /* DEBUG_THREADS */
return res;
}
int __ast_rwlock_trywrlock(const char *filename, int line, const char *func, ast_rwlock_t *t, const char *name)
{
int res;
#ifdef DEBUG_THREADS
struct ast_lock_track *lt = ast_get_reentrancy(&t->track, &t->flags, 0);
struct ast_bt *bt = NULL;
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt) {
#ifdef HAVE_BKTR
struct ast_bt tmp;
/* The implementation of backtrace() may have its own locks.
* Capture the backtrace outside of the reentrancy lock to
* avoid deadlocks. See ASTERISK-22455. */
ast_bt_get_addresses(&tmp);
ast_reentrancy_lock(lt);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->backtrace[lt->reentrancy] = tmp;
bt = &lt->backtrace[lt->reentrancy];
}
ast_reentrancy_unlock(lt);
#endif
ast_store_lock_info(AST_WRLOCK, filename, line, func, name, t, bt);
}
#endif /* DEBUG_THREADS */
res = pthread_rwlock_trywrlock(&t->lock);
#ifdef DEBUG_THREADS
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
if (!res && lt) {
ast_reentrancy_lock(lt);
if (lt->reentrancy < AST_MAX_REENTRANCY) {
lt->file[lt->reentrancy] = filename;
lt->lineno[lt->reentrancy] = line;
lt->func[lt->reentrancy] = func;
chan_sip: Address runaway when realtime peers subscribe to mailboxes Users upgrading from asterisk 13.5 to a later version and who use realtime with peers that have mailboxes were experiencing runaway situations that manifested as a continuous stream of taskprocessor congestion errors, memory leaks and an unresponsive chan_sip. A related issue was that setting rtcachefriends=no NEVER worked in asterisk 13 (since the move to stasis). In 13.5 and earlier, when a peer tried to register, all of the stasis threads would block and chan_sip would again become unresponsive. After 13.5, the runaway would happen. There were a number of causes... * mwi_event_cb was (indirectly) calling build_peer even though calls to mwi_event_cb are often caused by build_peer. * In an effort to prevent chan_sip from being unloaded while messages were still in flight, destroy_mailboxes was calling stasis_unsubscribe_and_join but in some cases waited forever for the final message. * add_peer_mailboxes wasn't properly marking the existing mailboxes on a peer as "keep" so build_peer would always delete them all. * add_peer_mwi_subs was unsubscribing existing mailbox subscriptions then just creating them again. All of this was causing a flood of subscribes and unsubscribes on multiple threads all for the same peer and mailbox. Fixes... * add_peer_mailboxes now marks mailboxes correctly and build_peer only deletes the ones that really are no longer needed by the peer. * add_peer_mwi_subs now only adds subscriptions marked as "new" instead of unsubscribing and resubscribing everything. It also adds the peer object's address to the mailbox instead of its name to the subscription userdata so mwi_event_cb doesn't have to call build_peer. With these changes, with rtcachefriends=yes (the most common setting), there are no leaks, locks, loops or crashes at shutdown. rtcachefriends=no still causes leaks but at least it doesn't lock, loop or crash. Since making rtcachefriends=no work wasnt in scope for this issue, further work will have to be deferred to a separate patch. Side fixes... * The ast_lock_track structure had a member named "thread" which gdb doesn't like since it conflicts with it's "thread" command. That member was renamed to "thread_id". ASTERISK-25468 #close Change-Id: I07519ef7f092629e1e844f855abd279d6475cdd0
2016-09-20 14:42:15 +00:00
lt->thread_id[lt->reentrancy] = pthread_self();
lt->reentrancy++;
}
ast_reentrancy_unlock(lt);
ast_mark_lock_acquired(t);
DEBUG_THREADS: Fix regression and lock tracking initialization problems. This patch started with David Lee's patch at https://reviewboard.asterisk.org/r/2826/ and includes a regression fix introduced by the ASTERISK-22455 patch. The initialization of a mutex's lock tracking structure was not protected in a critical section. This is fine for any mutex that is explicitly initialized, but a static mutex may have its lock tracking double initialized if multiple threads attempt the first lock simultaneously. * Added a global mutex to properly serialize initialization of the lock tracking structure. The painful global lock can be mitigated by adding a double checked lock flag as discussed on the original review request. * Defer lock tracking initialization until first use. * Don't be "helpful" and initialize an uninitialized lock when DEBUG_THREADS is enabled. Debug code is not supposed to fix or change normal code behavior. We don't need a lock initialization race that would force a re-setup of lock tracking. Lock tracking already handles initialization on first use. * Properly handle allocation failures of the lock tracking structure. * No need to initialize tracking data in __ast_pthread_mutex_destroy() just to turn around and destroy it. The regression introduced by ASTERISK-22455 is the result of manipulating a pthread_mutex_t struct outside of the pthread library code. The pthread_mutex_t struct seems to have a global linked list pointer member that can get changed by other threads. Therefore, saving and restoring the contents of a pthread_mutex_t struct is a bad thing. Thanks to Thomas Airmont for finding this obscure regression. * Don't overwrite the struct ast_lock_track.reentr_mutex member to restore tracking data in __ast_cond_wait() and __ast_cond_timedwait(). The pthread_mutex_t struct must be treated as a read-only opaque variable. Miscellaneous other items fixed by this patch: * Match ast_suspend_lock_info() with ast_restore_lock_info() in __ast_cond_timedwait(). * Made some uninitialized lock sanity checks return EINVAL and try a DO_THREAD_CRASH. * Fix bad canlog initialization expressions. ASTERISK-24614 #close Reported by: Thomas Airmont Review: https://reviewboard.asterisk.org/r/4247/ Review: https://reviewboard.asterisk.org/r/2826/ ........ Merged revisions 429539 from http://svn.asterisk.org/svn/asterisk/branches/11 git-svn-id: https://origsvn.digium.com/svn/asterisk/trunk@429541 65c4cc65-6c06-0410-ace0-fbb531ad65f3
2014-12-12 23:49:36 +00:00
} else if (lt) {
ast_mark_lock_failed(t);
}
#endif /* DEBUG_THREADS */
return res;
}