[PFCP] Session removal while waiting PFCP reply (#3040)

'node_timeout' and some other functions can remove a smf_sess_t
while that session is still waiting for a PFCP reply
and has an active PFCP xact.

In this case, xact->data points to the deleted session
and xact's timeout function (sess_5gc_timeout for example)
eventually refers to this already freed session.

This fix prevents duplicate deletes from occurring by checking to see
if the session context has already been deleted when the timeout occurs.

Additionally, it moves session deletions out of timer callbacks into
state machine by reselect_upf().

Due to the way 'ogs_timer_mgr_expire' calls timer callbacks,
one must not stop or expire timers from within a timer callback.

And now one must not remove sessions from within a timer callback.
This commit is contained in:
Sukchan Lee 2024-03-24 09:46:25 +09:00
parent a667525041
commit cf4ad1f248
12 changed files with 60 additions and 30 deletions

View File

@ -199,6 +199,8 @@ void ogs_timer_mgr_expire(ogs_timer_mgr_t *manager)
ogs_list_add(&list, &this->lnode);
}
/* You should not perform a delete on a timer using ogs_timer_delete()
* in a callback function this->cb(). */
ogs_list_for_each(&list, lnode) {
this = ogs_rb_entry(lnode, ogs_timer_t, lnode);
ogs_timer_stop(this);

View File

@ -870,6 +870,11 @@ ogs_pfcp_node_t *ogs_pfcp_node_new(ogs_sockaddr_t *sa_list)
return node;
}
ogs_pfcp_node_t *ogs_pfcp_node_cycle(ogs_pfcp_node_t *node)
{
return ogs_pool_cycle(&ogs_pfcp_node_pool, node);
}
void ogs_pfcp_node_free(ogs_pfcp_node_t *node)
{
ogs_assert(node);

View File

@ -393,6 +393,7 @@ ogs_pfcp_context_t *ogs_pfcp_self(void);
int ogs_pfcp_context_parse_config(const char *local, const char *remote);
ogs_pfcp_node_t *ogs_pfcp_node_new(ogs_sockaddr_t *sa_list);
ogs_pfcp_node_t *ogs_pfcp_node_cycle(ogs_pfcp_node_t *node);
void ogs_pfcp_node_free(ogs_pfcp_node_t *node);
ogs_pfcp_node_t *ogs_pfcp_node_add(

View File

@ -161,11 +161,6 @@ static ogs_pfcp_xact_t *ogs_pfcp_xact_remote_create(
return xact;
}
ogs_pfcp_xact_t *ogs_pfcp_xact_cycle(ogs_pfcp_xact_t *xact)
{
return ogs_pool_cycle(&pool, xact);
}
void ogs_pfcp_xact_delete_all(ogs_pfcp_node_t *node)
{
ogs_pfcp_xact_t *xact = NULL, *next_xact = NULL;

View File

@ -133,7 +133,6 @@ void ogs_pfcp_xact_final(void);
ogs_pfcp_xact_t *ogs_pfcp_xact_local_create(ogs_pfcp_node_t *node,
void (*cb)(ogs_pfcp_xact_t *xact, void *data), void *data);
ogs_pfcp_xact_t *ogs_pfcp_xact_cycle(ogs_pfcp_xact_t *xact);
void ogs_pfcp_xact_delete_all(ogs_pfcp_node_t *node);
int ogs_pfcp_xact_update_tx(ogs_pfcp_xact_t *xact,

View File

@ -356,10 +356,6 @@ void sgwc_pfcp_state_associated(ogs_fsm_t *s, sgwc_event_t *e)
}
break;
case SGWC_EVT_SXA_NO_HEARTBEAT:
/* 'node' context was removed in ogs_pfcp_xact_delete(xact)
* So, we should not use PFCP node here */
ogs_warn("No Heartbeat from SGW-U [%s]:%d",
OGS_ADDR(addr, buf), OGS_PORT(addr));
OGS_FSM_TRAN(s, sgwc_pfcp_state_will_associate);

View File

@ -318,10 +318,6 @@ void sgwu_pfcp_state_associated(ogs_fsm_t *s, sgwu_event_t *e)
}
break;
case SGWU_EVT_SXA_NO_HEARTBEAT:
/* 'node' context was removed in ogs_pfcp_xact_delete(xact)
* So, we should not use PFCP node here */
ogs_warn("No Heartbeat from SGW-C [%s]:%d",
OGS_ADDR(addr, buf), OGS_PORT(addr));
OGS_FSM_TRAN(s, sgwu_pfcp_state_will_associate);

View File

@ -222,8 +222,11 @@ static void sess_5gc_timeout(ogs_pfcp_xact_t *xact, void *data)
ogs_assert(xact);
ogs_assert(data);
sess = data;
ogs_assert(sess);
sess = smf_sess_cycle(data);
if (!sess) {
ogs_warn("Session has already been removed");
return;
}
smf_ue = sess->smf_ue;
ogs_assert(smf_ue);
@ -288,7 +291,21 @@ static void sess_5gc_timeout(ogs_pfcp_xact_t *xact, void *data)
ogs_free(strerror);
smf_sess_remove(sess);
/* We mustn't remove sess here. Removing a session may delete PFCP xact
timers and we must not delete any timers from within a timer
callback. Instead, we shall emit a new event to trigger session
removal from pfcp-sm state machine. */
e = smf_event_new(SMF_EVT_N4_TIMER);
ogs_assert(e);
e->sess = sess;
e->h.timer_id = SMF_TIMER_PFCP_NO_DELETION_RESPONSE;
e->pfcp_node = sess->pfcp_node;
rv = ogs_queue_push(ogs_app()->queue, e);
if (rv != OGS_OK) {
ogs_error("ogs_queue_push() failed:%d", (int)rv);
ogs_event_free(e);
}
break;
default:
ogs_error("Not implemented [type:%d]", type);

View File

@ -117,14 +117,21 @@ void smf_pfcp_state_will_associate(ogs_fsm_t *s, smf_event_t *e)
ogs_pfcp_cp_send_association_setup_request(node, node_timeout);
break;
case SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE:
sess = e->sess;
sess = smf_sess_cycle(sess);
sess = smf_sess_cycle(e->sess);
if (!sess) {
ogs_warn("Session has already been removed");
break;
}
ogs_fsm_dispatch(&sess->sm, e);
break;
case SMF_TIMER_PFCP_NO_DELETION_RESPONSE:
sess = smf_sess_cycle(e->sess);
if (!sess) {
ogs_warn("Session has already been removed");
break;
}
SMF_SESS_CLEAR(sess);
break;
default:
ogs_error("Unknown timer[%s:%d]",
smf_timer_get_name(e->h.timer_id), e->h.timer_id);
@ -386,14 +393,21 @@ void smf_pfcp_state_associated(ogs_fsm_t *s, smf_event_t *e)
ogs_pfcp_send_heartbeat_request(node, node_timeout));
break;
case SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE:
sess = e->sess;
sess = smf_sess_cycle(sess);
sess = smf_sess_cycle(e->sess);
if (!sess) {
ogs_warn("Session has already been removed");
break;
}
ogs_fsm_dispatch(&sess->sm, e);
break;
case SMF_TIMER_PFCP_NO_DELETION_RESPONSE:
sess = smf_sess_cycle(e->sess);
if (!sess) {
ogs_warn("Session has already been removed");
break;
}
SMF_SESS_CLEAR(sess);
break;
default:
ogs_error("Unknown timer[%s:%d]",
smf_timer_get_name(e->h.timer_id), e->h.timer_id);
@ -401,12 +415,19 @@ void smf_pfcp_state_associated(ogs_fsm_t *s, smf_event_t *e)
}
break;
case SMF_EVT_N4_NO_HEARTBEAT:
/* 'node' context was removed in ogs_pfcp_xact_delete(xact)
* So, we should not use PFCP node here */
ogs_warn("No Heartbeat from UPF [%s]:%d",
OGS_ADDR(addr, buf), OGS_PORT(addr));
/*
* reselect_upf() should not be executed on node_timeout
* because the timer cannot be deleted in the timer expiration function.
*
* Note that reselct_upf contains SMF_SESS_CLEAR.
*/
node = e->pfcp_node;
ogs_assert(node);
reselect_upf(node);
OGS_FSM_TRAN(s, smf_pfcp_state_will_associate);
break;
default:
@ -550,7 +571,6 @@ static void node_timeout(ogs_pfcp_xact_t *xact, void *data)
switch (type) {
case OGS_PFCP_HEARTBEAT_REQUEST_TYPE:
ogs_assert(data);
reselect_upf(data);
e = smf_event_new(SMF_EVT_N4_NO_HEARTBEAT);
e->pfcp_node = data;

View File

@ -42,6 +42,8 @@ const char *smf_timer_get_name(int timer_id)
return "SMF_TIMER_PFCP_NO_HEARTBEAT";
case SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE:
return "SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE";
case SMF_TIMER_PFCP_NO_DELETION_RESPONSE:
return "SMF_TIMER_PFCP_NO_DELETION_RESPONSE";
default:
break;
}

View File

@ -33,6 +33,7 @@ typedef enum {
SMF_TIMER_PFCP_ASSOCIATION,
SMF_TIMER_PFCP_NO_HEARTBEAT,
SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE,
SMF_TIMER_PFCP_NO_DELETION_RESPONSE,
MAX_NUM_OF_SMF_TIMER,

View File

@ -322,10 +322,6 @@ void upf_pfcp_state_associated(ogs_fsm_t *s, upf_event_t *e)
}
break;
case UPF_EVT_N4_NO_HEARTBEAT:
/* 'node' context was removed in ogs_pfcp_xact_delete(xact)
* So, we should not use PFCP node here */
ogs_warn("No Heartbeat from SMF [%s]:%d",
OGS_ADDR(addr, buf), OGS_PORT(addr));
OGS_FSM_TRAN(s, upf_pfcp_state_will_associate);