From 97f8baae647b3f140313d24c2be3d6dd0dc80a03 Mon Sep 17 00:00:00 2001 From: Sawsane Date: Thu, 8 Oct 2020 17:23:35 +0200 Subject: [PATCH 1/2] Added Fast-Forward param for config --- include/fti-defs.h | 3 +++ include/fti-intern.h | 1 + src/api.c | 2 +- src/checkpoint.c | 3 ++- src/checkpoint.h | 2 +- src/conf.c | 8 ++++++++ 6 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 include/fti-defs.h diff --git a/include/fti-defs.h b/include/fti-defs.h new file mode 100644 index 000000000..2dcdd15ce --- /dev/null +++ b/include/fti-defs.h @@ -0,0 +1,3 @@ +#ifndef __FTI_DEFS__ +#define __FTI_DEFS__ +#endif // __FTI_DEFS__ diff --git a/include/fti-intern.h b/include/fti-intern.h index 236af9015..bf9c5284c 100644 --- a/include/fti-intern.h +++ b/include/fti-intern.h @@ -525,6 +525,7 @@ extern "C" { size_t cHostBufSize; /**< Host buffer size for GPU data.*/ char suffix[4]; /** Suffix of the checkpoint files */ FTIT_dcpConfigurationPosix dcpInfoPosix; /**< dCP info for posix I/O */ + int fastForward; /**< Fast forward rate for ckpt intervals */ } FTIT_configuration; /** @typedef FTIT_topology diff --git a/src/api.c b/src/api.c index cd981608b..b19ebcafa 100644 --- a/src/api.c +++ b/src/api.c @@ -2637,7 +2637,7 @@ int FTI_Snapshot() { } } else { // If it is a checkpoint test res = FTI_SCES; - FTI_UpdateIterTime(&FTI_Exec); + FTI_UpdateIterTime(&FTI_Exec, &FTI_Conf); if (FTI_Exec.ckptNext == FTI_Exec.ckptIcnt) { // If it is time to check for possible ckpt. (every minute) FTI_Print("Checking if it is time to checkpoint.", FTI_DBUG); diff --git a/src/checkpoint.c b/src/checkpoint.c index 2c34c7955..75dd2a3e4 100644 --- a/src/checkpoint.c +++ b/src/checkpoint.c @@ -59,7 +59,7 @@ -int FTI_UpdateIterTime(FTIT_execution* FTI_Exec) { +int FTI_UpdateIterTime(FTIT_execution* FTI_Exec, FTIT_configuration* FTI_Conf) { int nbProcs, res; char str[FTI_BUFS]; double last = FTI_Exec->iterTime; @@ -79,6 +79,7 @@ int FTI_UpdateIterTime(FTIT_execution* FTI_Exec) { FTI_Exec->ckptIntv = 1; } else { FTI_Exec->ckptIntv = rint(60.0 / FTI_Exec->globMeanIter); + FTI_Exec->ckptIntv = ceil((double)FTI_Exec->ckptIntv/FTI_Conf->fastForward); } res = FTI_Exec->ckptLast + FTI_Exec->ckptIntv; if (FTI_Exec->ckptLast == 0) { diff --git a/src/checkpoint.h b/src/checkpoint.h index fd050269a..4b1222bbc 100644 --- a/src/checkpoint.h +++ b/src/checkpoint.h @@ -10,7 +10,7 @@ #include "interface.h" -int FTI_UpdateIterTime(FTIT_execution* FTI_Exec); +int FTI_UpdateIterTime(FTIT_execution* FTI_Exec, FTIT_configuration* FTI_Conf); int FTI_WriteCkpt(FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_keymap* FTI_Data); diff --git a/src/conf.c b/src/conf.c index ab1858172..9d5ada23d 100644 --- a/src/conf.c +++ b/src/conf.c @@ -142,6 +142,8 @@ int FTI_ReadConf(FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, // 0 -> disabled FTI_Ckpt[4].ckptDcpIntv = (int)iniparser_getint(ini, "Basic:dcp_l4", 0); FTI_Ckpt[4].ckptIntv = (int)iniparser_getint(ini, "Basic:ckpt_l4", -1); + // Fast Forward flag + FTI_Conf->fastForward = (int)iniparser_getint(ini, "Basic:fast_forward", 1); FTI_Ckpt[1].isInline = (int)1; FTI_Ckpt[2].isInline = (int)iniparser_getint(ini, "Basic:inline_l2", 1); FTI_Ckpt[3].isInline = (int)iniparser_getint(ini, "Basic:inline_l3", 1); @@ -387,6 +389,12 @@ int FTI_TestConfig(FTIT_configuration* FTI_Conf, FTIT_topology* FTI_Topo, return FTI_NSCS; } + //fast forward + if (FTI_Conf->fastForward < 1 || FTI_Conf->fastForward > 10) { + FTI_Print("Fast Forward should be between 1 and 10, inclusive", FTI_WARN); + return FTI_NSCS; + } + // check dCP settings only if dCP is enabled if (FTI_Conf->dcpPosix) { if (FTI_Conf->dcpInfoPosix.StackSize > MAX_STACK_SIZE) { From d6f0fac36e5d92f9ade0da420f60af4f48cb3672 Mon Sep 17 00:00:00 2001 From: Sawsane Date: Thu, 8 Oct 2020 18:28:28 +0200 Subject: [PATCH 2/2] updated fast_forward feature --- include/fti-intern.h | 3 ++- src/api.c | 2 +- src/checkpoint.c | 4 ++-- src/checkpoint.h | 2 +- src/conf.c | 10 ++++++++-- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/fti-intern.h b/include/fti-intern.h index bf9c5284c..216a68318 100644 --- a/include/fti-intern.h +++ b/include/fti-intern.h @@ -525,7 +525,7 @@ extern "C" { size_t cHostBufSize; /**< Host buffer size for GPU data.*/ char suffix[4]; /** Suffix of the checkpoint files */ FTIT_dcpConfigurationPosix dcpInfoPosix; /**< dCP info for posix I/O */ - int fastForward; /**< Fast forward rate for ckpt intervals */ + // int fastForward; /**< Fast forward rate for ckpt intervals */ } FTIT_configuration; /** @typedef FTIT_topology @@ -700,6 +700,7 @@ extern "C" { MPI_Comm groupComm; /**< Group communicator. */ MPI_Comm nodeComm; FTIT_dcpExecutionPosix dcpInfoPosix; /**< dCP info for posix I/O */ + int fastForward; /**< Fast forward rate for ckpt intervals */ /** A function pointer pointing to the function which actually the * checkpoint file. Noticeably We need 2 function pointers, One for the * Level 4 checkpoint And one for the remaining cases */ diff --git a/src/api.c b/src/api.c index b19ebcafa..cd981608b 100644 --- a/src/api.c +++ b/src/api.c @@ -2637,7 +2637,7 @@ int FTI_Snapshot() { } } else { // If it is a checkpoint test res = FTI_SCES; - FTI_UpdateIterTime(&FTI_Exec, &FTI_Conf); + FTI_UpdateIterTime(&FTI_Exec); if (FTI_Exec.ckptNext == FTI_Exec.ckptIcnt) { // If it is time to check for possible ckpt. (every minute) FTI_Print("Checking if it is time to checkpoint.", FTI_DBUG); diff --git a/src/checkpoint.c b/src/checkpoint.c index 75dd2a3e4..631abc98b 100644 --- a/src/checkpoint.c +++ b/src/checkpoint.c @@ -59,7 +59,7 @@ -int FTI_UpdateIterTime(FTIT_execution* FTI_Exec, FTIT_configuration* FTI_Conf) { +int FTI_UpdateIterTime(FTIT_execution* FTI_Exec) { int nbProcs, res; char str[FTI_BUFS]; double last = FTI_Exec->iterTime; @@ -79,7 +79,7 @@ int FTI_UpdateIterTime(FTIT_execution* FTI_Exec, FTIT_configuration* FTI_Conf) { FTI_Exec->ckptIntv = 1; } else { FTI_Exec->ckptIntv = rint(60.0 / FTI_Exec->globMeanIter); - FTI_Exec->ckptIntv = ceil((double)FTI_Exec->ckptIntv/FTI_Conf->fastForward); + FTI_Exec->ckptIntv = ceil((double)FTI_Exec->ckptIntv/FTI_Exec->fastForward); } res = FTI_Exec->ckptLast + FTI_Exec->ckptIntv; if (FTI_Exec->ckptLast == 0) { diff --git a/src/checkpoint.h b/src/checkpoint.h index 4b1222bbc..fd050269a 100644 --- a/src/checkpoint.h +++ b/src/checkpoint.h @@ -10,7 +10,7 @@ #include "interface.h" -int FTI_UpdateIterTime(FTIT_execution* FTI_Exec, FTIT_configuration* FTI_Conf); +int FTI_UpdateIterTime(FTIT_execution* FTI_Exec); int FTI_WriteCkpt(FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTIT_topology* FTI_Topo, FTIT_checkpoint* FTI_Ckpt, FTIT_keymap* FTI_Data); diff --git a/src/conf.c b/src/conf.c index 9d5ada23d..8bdd144f8 100644 --- a/src/conf.c +++ b/src/conf.c @@ -143,7 +143,8 @@ int FTI_ReadConf(FTIT_configuration* FTI_Conf, FTIT_execution* FTI_Exec, FTI_Ckpt[4].ckptDcpIntv = (int)iniparser_getint(ini, "Basic:dcp_l4", 0); FTI_Ckpt[4].ckptIntv = (int)iniparser_getint(ini, "Basic:ckpt_l4", -1); // Fast Forward flag - FTI_Conf->fastForward = (int)iniparser_getint(ini, "Basic:fast_forward", 1); + // FTI_Conf->fastForward = (int)iniparser_getint(ini, "Basic:fast_forward", 1); + FTI_Exec->fastForward = (int)iniparser_getint(ini, "Advanced:fast_forward", 1); FTI_Ckpt[1].isInline = (int)1; FTI_Ckpt[2].isInline = (int)iniparser_getint(ini, "Basic:inline_l2", 1); FTI_Ckpt[3].isInline = (int)iniparser_getint(ini, "Basic:inline_l3", 1); @@ -390,11 +391,16 @@ int FTI_TestConfig(FTIT_configuration* FTI_Conf, FTIT_topology* FTI_Topo, } //fast forward - if (FTI_Conf->fastForward < 1 || FTI_Conf->fastForward > 10) { + if (FTI_Exec->fastForward < 1 || FTI_Exec->fastForward > 10) { FTI_Print("Fast Forward should be between 1 and 10, inclusive", FTI_WARN); return FTI_NSCS; } + if ( FTI_Exec->fastForward < 10 && FTI_Exec->fastForward > 1) { + FTI_Print("Fast Forward flag is set.", FTI_WARN); + return FTI_NSCS; + } + // check dCP settings only if dCP is enabled if (FTI_Conf->dcpPosix) { if (FTI_Conf->dcpInfoPosix.StackSize > MAX_STACK_SIZE) {