res_pjsip_refer: Refactor progress locking and serialization

Although refer_progress_notify() always runs in the progress
serializer, the pjproject evsub module itself can cause the
subscription to be destroyed which then triggers
refer_progress_on_evsub_state() to clean it up.  In this case,
it's possible that refer_progress_notify() could get the
subscription pulled out from under it while it's trying to use
it.

At one point we tried to have refer_progress_on_evsub_state()
push the cleanup to the serializer and wait for its return before
returning to pjproject but since pjproject calls its state
callbacks with the dialog locked, this required us to unlock the
dialog while waiting for the serialized cleanup, then lock it
again before returning to pjproject. There were also still some
cases where other callers of refer_progress_notify() weren't
using the serializer and crashes were resulting.

Although all callers of refer_progress_notify() now use the
progress serializer, we decided to simplify the locking so we
didn't have to unlock and relock the dialog in
refer_progress_on_evsub_state().

Now, refer_progress_notify() holds the dialog lock for its
duration and since pjproject also holds the dialog lock while
calling refer_progress_on_evsub_state() (which does the cleanup),
there should be no more chances for the subscription to be
cleaned up while still being used to send NOTIFYs.

To be extra safe, we also now increment the session count on
the dialog when we create a progress object and decrement
the count when the progress is destroyed.

ASTERISK-29313

Change-Id: I97a8bb01771a3c85345649b8124507f7622a8480
This commit is contained in:
George Joseph 2021-02-19 12:25:13 -07:00
parent be0a61bc3d
commit 15afabdf8e
1 changed files with 61 additions and 59 deletions

View File

@ -108,34 +108,49 @@ static struct refer_progress_notification *refer_progress_notification_alloc(str
return notification;
}
/*! \brief Serialized callback for subscription notification */
/*! \brief Serialized callback for subscription notification
*
* Locking and serialization:
*
* Although refer_progress_notify() always runs in the progress serializer,
* the pjproject evsub module itself can cause the subscription to be
* destroyed which then triggers refer_progress_on_evsub_state() to clean
* it up. In this case, it's possible that refer_progress_notify() could
* get the subscription pulled out from under it while it's trying to use it.
*
* At one point we tried to have refer_progress_on_evsub_state() push the
* cleanup to the serializer and wait for its return before returning to
* pjproject but since pjproject calls its state callbacks with the dialog
* locked, this required us to unlock the dialog while waiting for the
* serialized cleanup, then lock it again before returning to pjproject.
* There were also still some cases where other callers of
* refer_progress_notify() weren't using the serializer and crashes were
* resulting.
*
* Although all callers of refer_progress_notify() now use the progress
* serializer, we decided to simplify the locking so we didn't have to
* unlock and relock the dialog in refer_progress_on_evsub_state().
*
* Now, refer_progress_notify() holds the dialog lock for all its work
* rather than just when calling pjsip_evsub_set_mod_data() to clear the
* module data. Since pjproject also holds the dialog lock while calling
* refer_progress_on_evsub_state(), there should be no more chances for
* the subscription to be cleaned up while still being used to send NOTIFYs.
*/
static int refer_progress_notify(void *data)
{
RAII_VAR(struct refer_progress_notification *, notification, data, ao2_cleanup);
pjsip_evsub *sub;
pjsip_tx_data *tdata;
pjsip_dlg_inc_lock(notification->progress->dlg);
/* If the subscription has already been terminated we can't send a notification */
if (!(sub = notification->progress->sub)) {
ast_debug(3, "Not sending NOTIFY of response '%d' and state '%u' on progress monitor '%p' as subscription has been terminated\n",
notification->response, notification->state, notification->progress);
return 0;
}
/* If the subscription is being terminated we want to actually remove the progress structure here to
* stop a deadlock from occurring - basically terminated changes the state which queues a synchronous task
* but we are already running a task... thus it would deadlock */
if (notification->state == PJSIP_EVSUB_STATE_TERMINATED) {
ast_debug(3, "Subscription '%p' is being terminated as a result of a NOTIFY, removing REFER progress structure early on progress monitor '%p'\n",
notification->progress->sub, notification->progress);
pjsip_dlg_inc_lock(notification->progress->dlg);
pjsip_evsub_set_mod_data(notification->progress->sub, refer_progress_module.id, NULL);
pjsip_dlg_dec_lock(notification->progress->dlg);
/* This is for dropping the reference on the subscription */
ao2_cleanup(notification->progress);
notification->progress->sub = NULL;
return 0;
}
/* Send a deferred initial 100 Trying SIP frag NOTIFY if we haven't already. */
@ -158,6 +173,8 @@ static int refer_progress_notify(void *data)
pjsip_xfer_send_request(sub, tdata);
}
pjsip_dlg_dec_lock(notification->progress->dlg);
return 0;
}
@ -293,50 +310,28 @@ static void refer_progress_framehook_destroy(void *data)
ao2_cleanup(progress);
}
/*! \brief Serialized callback for subscription termination */
static int refer_progress_terminate(void *data)
{
struct refer_progress *progress = data;
/* The subscription is no longer valid */
progress->sub = NULL;
return 0;
}
/*! \brief Callback for REFER subscription state changes */
/*!
* \brief Callback for REFER subscription state changes
* \see refer_progress_notify
*
* The documentation attached to refer_progress_notify has more
* information about the locking issues with cleaning up
* the subscription.
*
* \note pjproject holds the dialog lock while calling this function.
*/
static void refer_progress_on_evsub_state(pjsip_evsub *sub, pjsip_event *event)
{
struct refer_progress *progress = pjsip_evsub_get_mod_data(sub, refer_progress_module.id);
/* If being destroyed queue it up to the serializer */
/*
* If being destroyed, remove the progress object from the subscription
* and release the reference it had.
*/
if (progress && (pjsip_evsub_get_state(sub) == PJSIP_EVSUB_STATE_TERMINATED)) {
/* To prevent a deadlock race condition we unlock the dialog so other serialized tasks can execute */
ast_debug(3, "Subscription '%p' has been remotely terminated, waiting for other tasks to complete on progress monitor '%p'\n",
sub, progress);
/* It's possible that a task is waiting to remove us already, so bump the refcount of progress so it doesn't get destroyed */
ao2_ref(progress, +1);
pjsip_dlg_dec_lock(progress->dlg);
/*
* XXX We are always going to execute this inline rather than
* in the serializer because this function is a PJPROJECT
* callback and thus has to be a SIP servant thread.
*
* The likely remedy is to push most of this function into
* refer_progress_terminate() with ast_sip_push_task().
*/
ast_sip_push_task_wait_servant(progress->serializer, refer_progress_terminate, progress);
pjsip_dlg_inc_lock(progress->dlg);
ao2_ref(progress, -1);
ast_debug(3, "Subscription '%p' removed from progress monitor '%p'\n", sub, progress);
/* Since it was unlocked it is possible for this to have been removed already, so check again */
if (pjsip_evsub_get_mod_data(sub, refer_progress_module.id)) {
pjsip_evsub_set_mod_data(sub, refer_progress_module.id, NULL);
ao2_cleanup(progress);
}
pjsip_evsub_set_mod_data(progress->sub, refer_progress_module.id, NULL);
progress->sub = NULL;
ao2_cleanup(progress);
}
}
@ -354,6 +349,10 @@ static void refer_progress_destroy(void *obj)
progress->bridge_sub = stasis_unsubscribe(progress->bridge_sub);
}
if (progress->dlg) {
pjsip_dlg_dec_session(progress->dlg, &refer_progress_module);
}
ao2_cleanup(progress->transfer_data);
ast_free(progress->transferee);
@ -388,9 +387,6 @@ static int refer_progress_alloc(struct ast_sip_session *session, pjsip_rx_data *
(*progress)->framehook = -1;
/* To prevent a potential deadlock we need the dialog so we can lock/unlock */
(*progress)->dlg = session->inv_session->dlg;
/* Create name with seq number appended. */
ast_taskprocessor_build_name(tps_name, sizeof(tps_name), "pjsip/refer/%s",
ast_sorcery_object_get_id(session->endpoint));
@ -404,6 +400,12 @@ static int refer_progress_alloc(struct ast_sip_session *session, pjsip_rx_data *
goto error;
}
/* To prevent a potential deadlock we need the dialog so we can lock/unlock */
(*progress)->dlg = session->inv_session->dlg;
/* We also need to make sure it stays around until we're done with it */
pjsip_dlg_inc_session((*progress)->dlg, &refer_progress_module);
/* Associate the REFER progress structure with the subscription */
ao2_ref(*progress, +1);
pjsip_evsub_set_mod_data((*progress)->sub, refer_progress_module.id, *progress);