From cf4ad1f2484ce8229aa0c468af77d7474736e4ec Mon Sep 17 00:00:00 2001 From: Sukchan Lee Date: Sun, 24 Mar 2024 09:46:25 +0900 Subject: [PATCH] [PFCP] Session removal while waiting PFCP reply (#3040) 'node_timeout' and some other functions can remove a smf_sess_t while that session is still waiting for a PFCP reply and has an active PFCP xact. In this case, xact->data points to the deleted session and xact's timeout function (sess_5gc_timeout for example) eventually refers to this already freed session. This fix prevents duplicate deletes from occurring by checking to see if the session context has already been deleted when the timeout occurs. Additionally, it moves session deletions out of timer callbacks into state machine by reselect_upf(). Due to the way 'ogs_timer_mgr_expire' calls timer callbacks, one must not stop or expire timers from within a timer callback. And now one must not remove sessions from within a timer callback. --- lib/core/ogs-timer.c | 2 ++ lib/pfcp/context.c | 5 +++++ lib/pfcp/context.h | 1 + lib/pfcp/xact.c | 5 ----- lib/pfcp/xact.h | 1 - src/sgwc/pfcp-sm.c | 4 ---- src/sgwu/pfcp-sm.c | 4 ---- src/smf/pfcp-path.c | 23 ++++++++++++++++++++--- src/smf/pfcp-sm.c | 38 +++++++++++++++++++++++++++++--------- src/smf/timer.c | 2 ++ src/smf/timer.h | 1 + src/upf/pfcp-sm.c | 4 ---- 12 files changed, 60 insertions(+), 30 deletions(-) diff --git a/lib/core/ogs-timer.c b/lib/core/ogs-timer.c index cc14a7675..c9dfc47e4 100644 --- a/lib/core/ogs-timer.c +++ b/lib/core/ogs-timer.c @@ -199,6 +199,8 @@ void ogs_timer_mgr_expire(ogs_timer_mgr_t *manager) ogs_list_add(&list, &this->lnode); } + /* You should not perform a delete on a timer using ogs_timer_delete() + * in a callback function this->cb(). */ ogs_list_for_each(&list, lnode) { this = ogs_rb_entry(lnode, ogs_timer_t, lnode); ogs_timer_stop(this); diff --git a/lib/pfcp/context.c b/lib/pfcp/context.c index 352e04df2..36860553d 100644 --- a/lib/pfcp/context.c +++ b/lib/pfcp/context.c @@ -870,6 +870,11 @@ ogs_pfcp_node_t *ogs_pfcp_node_new(ogs_sockaddr_t *sa_list) return node; } +ogs_pfcp_node_t *ogs_pfcp_node_cycle(ogs_pfcp_node_t *node) +{ + return ogs_pool_cycle(&ogs_pfcp_node_pool, node); +} + void ogs_pfcp_node_free(ogs_pfcp_node_t *node) { ogs_assert(node); diff --git a/lib/pfcp/context.h b/lib/pfcp/context.h index 679fc20ff..e77cd4869 100644 --- a/lib/pfcp/context.h +++ b/lib/pfcp/context.h @@ -393,6 +393,7 @@ ogs_pfcp_context_t *ogs_pfcp_self(void); int ogs_pfcp_context_parse_config(const char *local, const char *remote); ogs_pfcp_node_t *ogs_pfcp_node_new(ogs_sockaddr_t *sa_list); +ogs_pfcp_node_t *ogs_pfcp_node_cycle(ogs_pfcp_node_t *node); void ogs_pfcp_node_free(ogs_pfcp_node_t *node); ogs_pfcp_node_t *ogs_pfcp_node_add( diff --git a/lib/pfcp/xact.c b/lib/pfcp/xact.c index f469f962b..53a947df5 100644 --- a/lib/pfcp/xact.c +++ b/lib/pfcp/xact.c @@ -161,11 +161,6 @@ static ogs_pfcp_xact_t *ogs_pfcp_xact_remote_create( return xact; } -ogs_pfcp_xact_t *ogs_pfcp_xact_cycle(ogs_pfcp_xact_t *xact) -{ - return ogs_pool_cycle(&pool, xact); -} - void ogs_pfcp_xact_delete_all(ogs_pfcp_node_t *node) { ogs_pfcp_xact_t *xact = NULL, *next_xact = NULL; diff --git a/lib/pfcp/xact.h b/lib/pfcp/xact.h index 580b80b46..0229883c3 100644 --- a/lib/pfcp/xact.h +++ b/lib/pfcp/xact.h @@ -133,7 +133,6 @@ void ogs_pfcp_xact_final(void); ogs_pfcp_xact_t *ogs_pfcp_xact_local_create(ogs_pfcp_node_t *node, void (*cb)(ogs_pfcp_xact_t *xact, void *data), void *data); -ogs_pfcp_xact_t *ogs_pfcp_xact_cycle(ogs_pfcp_xact_t *xact); void ogs_pfcp_xact_delete_all(ogs_pfcp_node_t *node); int ogs_pfcp_xact_update_tx(ogs_pfcp_xact_t *xact, diff --git a/src/sgwc/pfcp-sm.c b/src/sgwc/pfcp-sm.c index 06d47e2cb..946d1f9a7 100644 --- a/src/sgwc/pfcp-sm.c +++ b/src/sgwc/pfcp-sm.c @@ -356,10 +356,6 @@ void sgwc_pfcp_state_associated(ogs_fsm_t *s, sgwc_event_t *e) } break; case SGWC_EVT_SXA_NO_HEARTBEAT: - - /* 'node' context was removed in ogs_pfcp_xact_delete(xact) - * So, we should not use PFCP node here */ - ogs_warn("No Heartbeat from SGW-U [%s]:%d", OGS_ADDR(addr, buf), OGS_PORT(addr)); OGS_FSM_TRAN(s, sgwc_pfcp_state_will_associate); diff --git a/src/sgwu/pfcp-sm.c b/src/sgwu/pfcp-sm.c index b14772baf..2cf5512fe 100644 --- a/src/sgwu/pfcp-sm.c +++ b/src/sgwu/pfcp-sm.c @@ -318,10 +318,6 @@ void sgwu_pfcp_state_associated(ogs_fsm_t *s, sgwu_event_t *e) } break; case SGWU_EVT_SXA_NO_HEARTBEAT: - - /* 'node' context was removed in ogs_pfcp_xact_delete(xact) - * So, we should not use PFCP node here */ - ogs_warn("No Heartbeat from SGW-C [%s]:%d", OGS_ADDR(addr, buf), OGS_PORT(addr)); OGS_FSM_TRAN(s, sgwu_pfcp_state_will_associate); diff --git a/src/smf/pfcp-path.c b/src/smf/pfcp-path.c index 700fceae6..37fdb6916 100644 --- a/src/smf/pfcp-path.c +++ b/src/smf/pfcp-path.c @@ -222,8 +222,11 @@ static void sess_5gc_timeout(ogs_pfcp_xact_t *xact, void *data) ogs_assert(xact); ogs_assert(data); - sess = data; - ogs_assert(sess); + sess = smf_sess_cycle(data); + if (!sess) { + ogs_warn("Session has already been removed"); + return; + } smf_ue = sess->smf_ue; ogs_assert(smf_ue); @@ -288,7 +291,21 @@ static void sess_5gc_timeout(ogs_pfcp_xact_t *xact, void *data) ogs_free(strerror); - smf_sess_remove(sess); + /* We mustn't remove sess here. Removing a session may delete PFCP xact + timers and we must not delete any timers from within a timer + callback. Instead, we shall emit a new event to trigger session + removal from pfcp-sm state machine. */ + e = smf_event_new(SMF_EVT_N4_TIMER); + ogs_assert(e); + e->sess = sess; + e->h.timer_id = SMF_TIMER_PFCP_NO_DELETION_RESPONSE; + e->pfcp_node = sess->pfcp_node; + + rv = ogs_queue_push(ogs_app()->queue, e); + if (rv != OGS_OK) { + ogs_error("ogs_queue_push() failed:%d", (int)rv); + ogs_event_free(e); + } break; default: ogs_error("Not implemented [type:%d]", type); diff --git a/src/smf/pfcp-sm.c b/src/smf/pfcp-sm.c index 3d6eaf022..084f985f6 100644 --- a/src/smf/pfcp-sm.c +++ b/src/smf/pfcp-sm.c @@ -117,14 +117,21 @@ void smf_pfcp_state_will_associate(ogs_fsm_t *s, smf_event_t *e) ogs_pfcp_cp_send_association_setup_request(node, node_timeout); break; case SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE: - sess = e->sess; - sess = smf_sess_cycle(sess); + sess = smf_sess_cycle(e->sess); if (!sess) { ogs_warn("Session has already been removed"); break; } ogs_fsm_dispatch(&sess->sm, e); break; + case SMF_TIMER_PFCP_NO_DELETION_RESPONSE: + sess = smf_sess_cycle(e->sess); + if (!sess) { + ogs_warn("Session has already been removed"); + break; + } + SMF_SESS_CLEAR(sess); + break; default: ogs_error("Unknown timer[%s:%d]", smf_timer_get_name(e->h.timer_id), e->h.timer_id); @@ -386,14 +393,21 @@ void smf_pfcp_state_associated(ogs_fsm_t *s, smf_event_t *e) ogs_pfcp_send_heartbeat_request(node, node_timeout)); break; case SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE: - sess = e->sess; - sess = smf_sess_cycle(sess); + sess = smf_sess_cycle(e->sess); if (!sess) { ogs_warn("Session has already been removed"); break; } ogs_fsm_dispatch(&sess->sm, e); break; + case SMF_TIMER_PFCP_NO_DELETION_RESPONSE: + sess = smf_sess_cycle(e->sess); + if (!sess) { + ogs_warn("Session has already been removed"); + break; + } + SMF_SESS_CLEAR(sess); + break; default: ogs_error("Unknown timer[%s:%d]", smf_timer_get_name(e->h.timer_id), e->h.timer_id); @@ -401,12 +415,19 @@ void smf_pfcp_state_associated(ogs_fsm_t *s, smf_event_t *e) } break; case SMF_EVT_N4_NO_HEARTBEAT: - - /* 'node' context was removed in ogs_pfcp_xact_delete(xact) - * So, we should not use PFCP node here */ - ogs_warn("No Heartbeat from UPF [%s]:%d", OGS_ADDR(addr, buf), OGS_PORT(addr)); + + /* + * reselect_upf() should not be executed on node_timeout + * because the timer cannot be deleted in the timer expiration function. + * + * Note that reselct_upf contains SMF_SESS_CLEAR. + */ + node = e->pfcp_node; + ogs_assert(node); + reselect_upf(node); + OGS_FSM_TRAN(s, smf_pfcp_state_will_associate); break; default: @@ -550,7 +571,6 @@ static void node_timeout(ogs_pfcp_xact_t *xact, void *data) switch (type) { case OGS_PFCP_HEARTBEAT_REQUEST_TYPE: ogs_assert(data); - reselect_upf(data); e = smf_event_new(SMF_EVT_N4_NO_HEARTBEAT); e->pfcp_node = data; diff --git a/src/smf/timer.c b/src/smf/timer.c index fa2882d75..0ee2f3ff5 100644 --- a/src/smf/timer.c +++ b/src/smf/timer.c @@ -42,6 +42,8 @@ const char *smf_timer_get_name(int timer_id) return "SMF_TIMER_PFCP_NO_HEARTBEAT"; case SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE: return "SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE"; + case SMF_TIMER_PFCP_NO_DELETION_RESPONSE: + return "SMF_TIMER_PFCP_NO_DELETION_RESPONSE"; default: break; } diff --git a/src/smf/timer.h b/src/smf/timer.h index 9a30701c7..0a3255223 100644 --- a/src/smf/timer.h +++ b/src/smf/timer.h @@ -33,6 +33,7 @@ typedef enum { SMF_TIMER_PFCP_ASSOCIATION, SMF_TIMER_PFCP_NO_HEARTBEAT, SMF_TIMER_PFCP_NO_ESTABLISHMENT_RESPONSE, + SMF_TIMER_PFCP_NO_DELETION_RESPONSE, MAX_NUM_OF_SMF_TIMER, diff --git a/src/upf/pfcp-sm.c b/src/upf/pfcp-sm.c index ba8d1ed7d..f7f788e30 100644 --- a/src/upf/pfcp-sm.c +++ b/src/upf/pfcp-sm.c @@ -322,10 +322,6 @@ void upf_pfcp_state_associated(ogs_fsm_t *s, upf_event_t *e) } break; case UPF_EVT_N4_NO_HEARTBEAT: - - /* 'node' context was removed in ogs_pfcp_xact_delete(xact) - * So, we should not use PFCP node here */ - ogs_warn("No Heartbeat from SMF [%s]:%d", OGS_ADDR(addr, buf), OGS_PORT(addr)); OGS_FSM_TRAN(s, upf_pfcp_state_will_associate);