Skip to content
This repository has been archived by the owner on May 3, 2024. It is now read-only.

Commit

Permalink
fdmi: fix source dock fom wakeup race
Browse files Browse the repository at this point in the history
Existing code:

    * fdmi source dock from (fdmi_sd_fom_tick()) waits in
      FDMI_SRC_DOCK_FOM_PHASE_WAIT after fdmi record list was exhausted;

    * fdmi source deck timer fom (fdmi_sd_timer_fom_tick()) loops in
      FDMI_SRC_DOCK_TIMER_FOM_PHASE_WAIT->FDMI_SRC_DOCK_TIMER_FOM_PHASE_ALARMED
      phases, waking the source dock fom periodically.

This wakeup is done via an ast and there is no synchronisation to guarantee that
ast execution completes before the ast is posted again. This leads to the
spurious UT failures:

Thread 5 (Thread 0x7f08a07f8700 (LWP 1686367)):
    m0_debugger_invoke () at lib/user_space/uassert.c:153
    m0_arch_panic (c=0x7f08ae448280 <signal_panic>, ap=0x7f08a07f6eb8) at lib/user_space/uassert.c:129
    m0_panic (ctx=0x7f08ae448280 <signal_panic>) at lib/assert.c:52
    sigsegv (sig=11) at lib/user_space/ucookie.c:52
    <signal handler called>
    ?? ()
    m0_sm_asts_run (grp=0x16a9010) at sm/sm.c:175
    loc_handler_thread (th=0x16a74b0) at fop/fom.c:925
(gdb) up 6
175				M0_ADDB2_HIST(grp->s_addb2->ga_forq,
(gdb) p *ast
$6 = {sa_cb = 0x7f08ade110ca <wakeup_iff_waiting>, sa_datum = 0x7f08ae5a2f18 <fdmi_global_src_dock+152>, sa_next = 0x7f08aee4d620 <eoq>, sa_mach = 0x0}
(gdb) disassemble m0_sm_asts_run
Dump of assembler code for function m0_sm_asts_run:
   0x00007f08adf9d6d3 <+0>:	push   %rbp
   0x00007f08adf9d6d4 <+1>:	mov    %rsp,%rbp
   ...
   0x00007f08adf9d865 <+402>:	mov    (%rax),%rax
   0x00007f08adf9d868 <+405>:	mov    -0x8(%rbp),%rcx
   0x00007f08adf9d86c <+409>:	mov    -0x58(%rbp),%rdx
   0x00007f08adf9d870 <+413>:	mov    %rcx,%rsi
   0x00007f08adf9d873 <+416>:	mov    %rdx,%rdi
   0x00007f08adf9d876 <+419>:	callq  *%rax
=> 0x00007f08adf9d878 <+421>:	callq  0x7f08adce6210 <m0_time_now@plt>
   0x00007f08adf9d87d <+426>:	mov    %rax,-0x30(%rbp)
   ...
(gdb) info registers rax
rax            0x0                 0

That is, ast->sa_cb was re-assigned from NULL to wakeup_iff_waiting, right at
the moment of crash.

Fix: instead of using ast, directly wake up the from by introducing a special
channel fdmi_sd_fom::fsf_wake.

Signed-off-by: Nikita Danilov <nikita.danilov@seagate.com>
  • Loading branch information
Nikita Danilov authored and rkothiya committed Jun 17, 2022
1 parent f2ea773 commit c254605
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 47 deletions.
57 changes: 12 additions & 45 deletions fdmi/source_dock_fom.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ m0_fdmi__src_dock_fom_start(struct m0_fdmi_src_dock *src_dock,
M0_PRE(!src_dock->fsdc_filters_defined);

m0_semaphore_init(&sd_fom->fsf_shutdown, 0);
m0_mutex_init(&sd_fom->fsf_chan_guard);
m0_chan_init(&sd_fom->fsf_wake, &sd_fom->fsf_chan_guard);

rpc_mach = m0_reqh_rpc_mach_tlist_head(&reqh->rh_rpc_machines);
M0_SET0(&sd_fom->fsf_conn_pool);
Expand Down Expand Up @@ -325,61 +327,19 @@ m0_fdmi__src_dock_fom_start(struct m0_fdmi_src_dock *src_dock,
return M0_RC(0);
}

static void wakeup_iff_waiting(struct m0_sm_group *grp, struct m0_sm_ast *ast)
{
struct m0_fom *fom = ast->sa_datum;

M0_ENTRY();
if (m0_fom_is_waiting(fom))
m0_fom_ready(fom);
M0_LEAVE();
}

M0_INTERNAL void m0_fdmi__src_dock_fom_wakeup(struct fdmi_sd_fom *sd_fom)
{
struct m0_fom *fom;

M0_ENTRY("sd_fom %p", sd_fom);
M0_PRE(sd_fom != NULL);

fom = &sd_fom->fsf_fom;

/**
* FOM can be uninitialized here, because posting
* is allowed even if FDMI service is not started
* @todo Small possibility of races exist (Phase 2).
*/
if (fom == NULL || fom->fo_loc == NULL) {
M0_LEAVE("FDMI FOM is not initialized yet");
return;
}
if (sd_fom->fsf_wakeup_ast.sa_next == NULL) {
sd_fom->fsf_wakeup_ast = (struct m0_sm_ast) {
.sa_cb = wakeup_iff_waiting,
.sa_datum = fom
};
m0_sm_ast_post(&fom->fo_loc->fl_group, &sd_fom->fsf_wakeup_ast);
}
m0_chan_signal_lock(&sd_fom->fsf_wake);
M0_LEAVE();
}

static void fdmi__src_dock_timer_fom_wakeup(struct fdmi_sd_timer_fom *timer_fom)
{
struct m0_fom *fom;

M0_ENTRY("sd_timer_fom %p", timer_fom);
M0_PRE(timer_fom != NULL);

fom = &timer_fom->fstf_fom;

if (timer_fom->fstf_wakeup_ast.sa_next == NULL) {
timer_fom->fstf_wakeup_ast = (struct m0_sm_ast) {
.sa_cb = wakeup_iff_waiting,
.sa_datum = fom
};
m0_sm_ast_post(&fom->fo_loc->fl_group,
&timer_fom->fstf_wakeup_ast);
}
m0_fom_wakeup(&timer_fom->fstf_fom);
M0_LEAVE();
}

Expand All @@ -403,6 +363,8 @@ m0_fdmi__src_dock_fom_stop(struct m0_fdmi_src_dock *src_dock)
/* Wait for fom finished */
m0_semaphore_down(&src_dock->fsdc_sd_fom.fsf_shutdown);
m0_semaphore_fini(&src_dock->fsdc_sd_fom.fsf_shutdown);
m0_chan_fini_lock(&src_dock->fsdc_sd_fom.fsf_wake);
m0_mutex_fini(&src_dock->fsdc_sd_fom.fsf_chan_guard);

M0_LEAVE();
}
Expand Down Expand Up @@ -988,9 +950,14 @@ static int fdmi_sd_fom_tick(struct m0_fom *fom)
if (m0_reqh_service_state_get(rsvc) == M0_RST_STOPPING)
m0_fom_phase_set(fom,
FDMI_SRC_DOCK_FOM_PHASE_FINI);
else
else {
m0_mutex_lock(&sd_fom->fsf_chan_guard);
m0_fom_wait_on(fom, &sd_fom->fsf_wake,
&fom->fo_cb);
m0_mutex_unlock(&sd_fom->fsf_chan_guard);
m0_fom_phase_set(fom,
FDMI_SRC_DOCK_FOM_PHASE_WAIT);
}
return M0_RC(M0_FSO_WAIT);
} else {
M0_LOG(M0_DEBUG, "popped from record list id =" U128X_F,
Expand Down
4 changes: 2 additions & 2 deletions fdmi/source_dock_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ M0_TL_DECLARE(fdmi_matched_filter_list, M0_EXTERN, struct m0_conf_fdmi_filter);

/** FDMI source dock FOM */
struct fdmi_sd_fom {
uint64_t fsf_magic;
struct m0_fom fsf_fom;
struct m0_sm_ast fsf_wakeup_ast;
struct m0_chan fsf_wake;
struct m0_mutex fsf_chan_guard;
struct m0_filterc_ctx fsf_filter_ctx;
struct m0_filterc_iter fsf_filter_iter;
struct m0_fdmi_eval_ctx fsf_flt_eval;
Expand Down

0 comments on commit c254605

Please sign in to comment.