sched: fix and test a double deref on delete of an executing call back
sched: Avoid a double deref when AST_SCHED_DEL_UNREF is called on an executing call-back. This is done by adding a new variable 'rescheduled' to the struct sched which is set in ast_sched_runq and checked in ast_sched_del_nonrunning. ast_sched_del_nonrunning is a replacement for now deprecated ast_sched_del which returns a new possible value -2 if called on an executing call-back with rescheduled set. ast_sched_del is modified to call ast_sched_del_nonrunning to maintain existing code. AST_SCHED_DEL_UNREF is also updated to look for the -2 in which case it will not throw a warning or invoke refcall. test_sched: Add a new unit test sched_test_freebird that will check the reference count in the resolved scenario. ASTERISK-29698 Change-Id: Icfb16b3acbc29cf5b4cef74183f7531caaefe21d
This commit is contained in:
parent
93d090147f
commit
b79a571279
|
@ -72,20 +72,22 @@ extern "C" {
|
|||
/*!
|
||||
* \brief schedule task to get deleted and call unref function
|
||||
*
|
||||
* Only calls unref function if the delete succeeded.
|
||||
* Only calls the unref function if the task is actually deleted by
|
||||
* ast_sched_del_nonrunning. If a failure occurs or the task is
|
||||
* currently running and not rescheduled then refcall is not invoked.
|
||||
*
|
||||
* \sa AST_SCHED_DEL
|
||||
* \since 1.6.1
|
||||
*/
|
||||
#define AST_SCHED_DEL_UNREF(sched, id, refcall) \
|
||||
do { \
|
||||
int _count = 0, _id; \
|
||||
while ((_id = id) > -1 && ast_sched_del(sched, _id) && ++_count < 10) { \
|
||||
int _count = 0, _id, _ret = 0; \
|
||||
while ((_id = id) > -1 && (( _ret = ast_sched_del_nonrunning(sched, _id)) == -1) && ++_count < 10) { \
|
||||
usleep(1); \
|
||||
} \
|
||||
if (_count == 10) { \
|
||||
ast_log(LOG_WARNING, "Unable to cancel schedule ID %d. This is probably a bug (%s: %s, line %d).\n", _id, __FILE__, __PRETTY_FUNCTION__, __LINE__); \
|
||||
} else if (_id > -1) { \
|
||||
} else if (_id > -1 && _ret >-2) { \
|
||||
refcall; \
|
||||
id = -1; \
|
||||
} \
|
||||
|
@ -294,9 +296,29 @@ const void *ast_sched_find_data(struct ast_sched_context *con, int id);
|
|||
*
|
||||
* \retval -1 on failure
|
||||
* \retval 0 on success
|
||||
*
|
||||
* \deprecated in favor of ast_sched_del_nonrunning which checks if the event is running and rescheduled
|
||||
*
|
||||
*/
|
||||
int ast_sched_del(struct ast_sched_context *con, int id) attribute_warn_unused_result;
|
||||
|
||||
/*!
|
||||
* \brief Deletes a scheduled event with care against the event running
|
||||
*
|
||||
* Remove this event from being run. A procedure should not remove its own
|
||||
* event, but return 0 instead. In most cases, you should not call this
|
||||
* routine directly, but use the AST_SCHED_DEL() macro instead (especially if
|
||||
* you don't intend to do something different when it returns failure).
|
||||
*
|
||||
* \param con scheduling context to delete item from
|
||||
* \param id ID of the scheduled item to delete
|
||||
*
|
||||
* \retval -1 on failure
|
||||
* \retval -2 event was running but was deleted because it was not rescheduled
|
||||
* \retval 0 on success
|
||||
*/
|
||||
int ast_sched_del_nonrunning(struct ast_sched_context *con, int id) attribute_warn_unused_result;
|
||||
|
||||
/*!
|
||||
* \brief Determines number of seconds until the next outstanding event to take place
|
||||
*
|
||||
|
|
45
main/sched.c
45
main/sched.c
|
@ -98,6 +98,8 @@ struct sched {
|
|||
ast_cond_t cond;
|
||||
/*! Indication that a running task was deleted. */
|
||||
unsigned int deleted:1;
|
||||
/*! Indication that a running task was rescheduled. */
|
||||
unsigned int rescheduled:1;
|
||||
};
|
||||
|
||||
struct sched_thread {
|
||||
|
@ -606,11 +608,27 @@ const void *ast_sched_find_data(struct ast_sched_context *con, int id)
|
|||
* "id". It's nearly impossible that there
|
||||
* would be two or more in the list with that
|
||||
* id.
|
||||
* Deprecated in favor of ast_sched_del_nonrunning
|
||||
* which checks running event status.
|
||||
*/
|
||||
int ast_sched_del(struct ast_sched_context *con, int id)
|
||||
{
|
||||
return ast_sched_del_nonrunning(con, id) ? -1 : 0;
|
||||
}
|
||||
|
||||
/*! \brief
|
||||
* Delete the schedule entry with number "id".
|
||||
* If running, wait for the task to complete,
|
||||
* check to see if it is rescheduled then
|
||||
* schedule the release.
|
||||
* It's nearly impossible that there would be
|
||||
* two or more in the list with that id.
|
||||
*/
|
||||
int ast_sched_del_nonrunning(struct ast_sched_context *con, int id)
|
||||
{
|
||||
struct sched *s = NULL;
|
||||
int *last_id = ast_threadstorage_get(&last_del_id, sizeof(int));
|
||||
int res = 0;
|
||||
|
||||
DEBUG(ast_debug(1, "ast_sched_del(%d)\n", id));
|
||||
|
||||
|
@ -645,7 +663,17 @@ int ast_sched_del(struct ast_sched_context *con, int id)
|
|||
while (con->currently_executing && (id == con->currently_executing->sched_id->id)) {
|
||||
ast_cond_wait(&s->cond, &con->lock);
|
||||
}
|
||||
/* Do not sched_release() here because ast_sched_runq() will do it */
|
||||
/* This is not rescheduled so the caller of ast_sched_del_nonrunning needs to know
|
||||
* that it was still deleted
|
||||
*/
|
||||
if (!s->rescheduled) {
|
||||
res = -2;
|
||||
}
|
||||
/* ast_sched_runq knows we are waiting on this item and is passing responsibility for
|
||||
* its destruction to us
|
||||
*/
|
||||
sched_release(con, s);
|
||||
s = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -658,7 +686,10 @@ int ast_sched_del(struct ast_sched_context *con, int id)
|
|||
}
|
||||
ast_mutex_unlock(&con->lock);
|
||||
|
||||
if (!s && *last_id != id) {
|
||||
if(res == -2){
|
||||
return res;
|
||||
}
|
||||
else if (!s && *last_id != id) {
|
||||
ast_debug(1, "Attempted to delete nonexistent schedule entry %d!\n", id);
|
||||
/* Removing nonexistent schedule entry shouldn't trigger assert (it was enabled in DEV_MODE);
|
||||
* because in many places entries is deleted without having valid id. */
|
||||
|
@ -668,7 +699,7 @@ int ast_sched_del(struct ast_sched_context *con, int id)
|
|||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
void ast_sched_report(struct ast_sched_context *con, struct ast_str **buf, struct ast_cb_names *cbnames)
|
||||
|
@ -793,7 +824,13 @@ int ast_sched_runq(struct ast_sched_context *con)
|
|||
con->currently_executing = NULL;
|
||||
ast_cond_signal(¤t->cond);
|
||||
|
||||
if (res && !current->deleted) {
|
||||
if (current->deleted) {
|
||||
/*
|
||||
* Another thread is waiting on this scheduled item. That thread
|
||||
* will be responsible for it's destruction
|
||||
*/
|
||||
current->rescheduled = res ? 1 : 0;
|
||||
} else if (res) {
|
||||
/*
|
||||
* If they return non-zero, we should schedule them to be
|
||||
* run again.
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "asterisk/sched.h"
|
||||
#include "asterisk/test.h"
|
||||
#include "asterisk/cli.h"
|
||||
#include "asterisk/astobj2.h"
|
||||
|
||||
static int sched_cb(const void *data)
|
||||
{
|
||||
|
@ -336,6 +337,132 @@ return_cleanup:
|
|||
return CLI_SUCCESS;
|
||||
}
|
||||
|
||||
struct test_obj {
|
||||
ast_mutex_t lock;
|
||||
ast_cond_t cond;
|
||||
int scheduledCBstarted;
|
||||
int id;
|
||||
};
|
||||
|
||||
static void test_obj_cleanup(void *data)
|
||||
{
|
||||
struct test_obj *obj = data;
|
||||
ast_mutex_destroy(&obj->lock);
|
||||
ast_cond_destroy(&obj->cond);
|
||||
}
|
||||
|
||||
static int lockingcb(const void *data)
|
||||
{
|
||||
struct test_obj *obj = (struct test_obj *)data;
|
||||
struct timespec delay = {3,0};
|
||||
|
||||
ast_mutex_lock(&obj->lock);
|
||||
|
||||
obj->scheduledCBstarted = 1;
|
||||
ast_cond_signal(&obj->cond);
|
||||
|
||||
ast_mutex_unlock(&obj->lock);
|
||||
|
||||
ao2_ref(obj, -1);
|
||||
while (nanosleep(&delay, &delay));
|
||||
/* sleep to force this scheduled event to remain running long
|
||||
* enough for the scheduling thread to unlock and call
|
||||
* AST_SCHED_DEL_UNREF
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
AST_TEST_DEFINE(sched_test_freebird)
|
||||
{
|
||||
struct test_obj * obj;
|
||||
struct ast_sched_context * con;
|
||||
enum ast_test_result_state res = AST_TEST_FAIL;
|
||||
int refs;
|
||||
|
||||
switch (cmd) {
|
||||
case TEST_INIT:
|
||||
info->name = "sched_test_freebird";
|
||||
info->category = "/main/sched/";
|
||||
info->summary = "Test deadlock avoidance and double-unref";
|
||||
info->description =
|
||||
"This tests a call to AST_SCHED_DEL_UNREF on a running event.";
|
||||
return AST_TEST_NOT_RUN;
|
||||
case TEST_EXECUTE:
|
||||
res = AST_TEST_PASS;
|
||||
break;
|
||||
}
|
||||
|
||||
obj = ao2_alloc(sizeof(struct test_obj), test_obj_cleanup);
|
||||
if (!obj) {
|
||||
ast_test_status_update(test,
|
||||
"ao2_alloc() did not return an object\n");
|
||||
return AST_TEST_FAIL;
|
||||
}
|
||||
|
||||
obj->scheduledCBstarted = 0;
|
||||
|
||||
con = ast_sched_context_create();
|
||||
if (!con) {
|
||||
ast_test_status_update(test,
|
||||
"ast_sched_context_create() did not return a context\n");
|
||||
ao2_cleanup(obj);
|
||||
return AST_TEST_FAIL;
|
||||
}
|
||||
|
||||
if (ast_sched_start_thread(con)) {
|
||||
ast_test_status_update(test, "Failed to start test thread\n");
|
||||
ao2_cleanup(obj);
|
||||
ast_sched_context_destroy(con);
|
||||
return AST_TEST_FAIL;
|
||||
}
|
||||
|
||||
/* This double reference is to ensure that the object isn't destroyed prematurely
|
||||
* in a case where it is unreffed an additional time.
|
||||
*/
|
||||
ao2_ref(obj, +2);
|
||||
if ((obj->id = ast_sched_add(con, 0, lockingcb, obj)) == -1) {
|
||||
ast_test_status_update(test, "Failed to add scheduler entry\n");
|
||||
ao2_ref(obj, -3);
|
||||
ast_sched_context_destroy(con);
|
||||
return AST_TEST_FAIL;
|
||||
}
|
||||
|
||||
ast_mutex_lock(&obj->lock);
|
||||
while(obj->scheduledCBstarted == 0) {
|
||||
/* Wait for the scheduled thread to indicate that it has started so we can
|
||||
* then call the AST_SCHED_DEL_UNREF macro
|
||||
*/
|
||||
ast_cond_wait(&obj->cond,&obj->lock);
|
||||
}
|
||||
ast_mutex_unlock(&obj->lock);
|
||||
|
||||
ast_test_status_update(test, "Received signal, calling Scedule and UNREF\n");
|
||||
ast_test_status_update(test, "ID: %d\n", obj->id);
|
||||
AST_SCHED_DEL_UNREF(con, obj->id, ao2_ref(obj, -1));
|
||||
|
||||
refs = ao2_ref(obj, 0);
|
||||
|
||||
switch(refs){
|
||||
case 2:
|
||||
ast_test_status_update(test, "Correct number of references '2'\n");
|
||||
break;
|
||||
default:
|
||||
ast_test_status_update(test, "Incorrect number of references '%d'\n",
|
||||
refs);
|
||||
res = AST_TEST_FAIL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Based on success or failure, the refcount could change
|
||||
*/
|
||||
while(ao2_ref(obj, -1) > 1);
|
||||
|
||||
ast_sched_context_destroy(con);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static struct ast_cli_entry cli_sched[] = {
|
||||
AST_CLI_DEFINE(handle_cli_sched_bench, "Benchmark ast_sched add/del performance"),
|
||||
};
|
||||
|
@ -343,6 +470,7 @@ static struct ast_cli_entry cli_sched[] = {
|
|||
static int unload_module(void)
|
||||
{
|
||||
AST_TEST_UNREGISTER(sched_test_order);
|
||||
AST_TEST_UNREGISTER(sched_test_freebird);
|
||||
ast_cli_unregister_multiple(cli_sched, ARRAY_LEN(cli_sched));
|
||||
return 0;
|
||||
}
|
||||
|
@ -350,6 +478,7 @@ static int unload_module(void)
|
|||
static int load_module(void)
|
||||
{
|
||||
AST_TEST_REGISTER(sched_test_order);
|
||||
AST_TEST_REGISTER(sched_test_freebird);
|
||||
ast_cli_register_multiple(cli_sched, ARRAY_LEN(cli_sched));
|
||||
return AST_MODULE_LOAD_SUCCESS;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue