diff --git a/Makefile.am b/Makefile.am index acd4c1e2..883d2f56 100644 --- a/Makefile.am +++ b/Makefile.am @@ -34,7 +34,7 @@ SPEC = $(PACKAGE_NAME).spec TARFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz EXTRA_DIST = autogen.sh conf/booth.conf.example \ - script/booth-keygen script/lsb script/ocf script/service-runnable.in \ + script/booth-keygen script/lsb script/ocf script/service-runnable.in script/crmv1.in \ script/unit-test.py.in script/wireshark-dissector.lua \ test/arbtests.py test/assertions.py test/booth_path test/boothrunner.py \ test/boothtestenv.py.in test/clientenv.py test/clienttests.py test/live_test.sh \ diff --git a/README.crmv1 b/README.crmv1 new file mode 100644 index 00000000..b2708a2d --- /dev/null +++ b/README.crmv1 @@ -0,0 +1,49 @@ +CRMv1 cluster +============= + +Heartbeat is a predecessor to Pacemaker and here we make a +comeback to that kind of clustering. Why should we do that? +Firstly, Pacemaker became a behemoth, something that can brew +your coffee, but also something that is rather unwieldy and +difficult to manage. Secondly, booth is a very reliable +distributed engine and in our testing it was used also in a +typical LAN and passed all the tests with flying colours. So, +this is something for people who don't need all the bells and +whistles of Pacemaker, but still want to have HA. + +STONITH is missing, but the cluster must have at least three +members. Hence, the booth arbitrator serves as a fencing +replacement. This is as it should be: a two node cluster is +indeed very difficult to run. The booth arbitrator can be a +smallish instance running wherever in your network. As with +fencing, it doesn't even have to be particularly reliable, it +just have to be there when we need it. + +Setup +----- + +Just like with heartbeat, CRMv1 in booth is very simple to setup. +There is a helper program called `crmv1` which is going to handle +all the details. In the simplest setup, which is anyway the most +common, there is just one group. The resources are run in order, +there is no parallelism. + +Here the usage with one realistic example: + + Usage: crmv1 {group ...|group delete } + + Examples: + + crmv1 group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + + crmv1 group delete bigdb + +There is no monitoring of resources, but it is easy to run an +external monitor of the topmost resource, i.e. the service which +is actually used by the users. If that monitor fails, then it +makes sense to move the group to the other node. + diff --git a/conf/booth.conf.example b/conf/booth.conf.example index 1d63547c..440046a5 100644 --- a/conf/booth.conf.example +++ b/conf/booth.conf.example @@ -25,3 +25,7 @@ ticket="ticketA" ticket="ticketB" expire = 600 weights = 1,2,3 + +# Use the CRMv1 feature, i.e. make the booth a cluster in its own +# right (run resource, etc) +crmv1 diff --git a/conf/crmv1.conf.example b/conf/crmv1.conf.example new file mode 100644 index 00000000..e1082657 --- /dev/null +++ b/conf/crmv1.conf.example @@ -0,0 +1,15 @@ +# The crmv1 configuration file is "/etc/booth/crmv1/conf". You need to +# prepare the same configuration file on each arbitrator and +# each node in the cluster sites where the booth daemon can be launched. + +# The configuration consists of groups definition with parameters for resources +# It is recommended to use the crmv1 program to prepare this +# configuration file. +# Here is one example: + +group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + diff --git a/script/crmv1 b/script/crmv1 new file mode 100755 index 00000000..b9b04163 --- /dev/null +++ b/script/crmv1 @@ -0,0 +1,116 @@ +#!/bin/bash +# +# This is crmv1, a tool to configure booth as a crmv1 style +# cluster. +# It basically manages groups. There is no concept of a group in +# booth, but we can get by by using the before-acquire-handler. +# Essentially, the handler is used to run programs (resource +# agents). Just how the resource agents are configured is another +# matter. +# + +CONF_DIR=/etc/booth + +cnt=0 + +usage() { + cat<&2 + +Usage: $0 {group ...|group delete } + +Examples: + + crmv1 group bigdb \\ + IPaddr ip=192.168.1.1 \\ + ocf:linbit:drbd drbd_resource=bigdisk \\ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \\ + oracle sid=bigdb + + crmv1 group delete bigdb + +EOF + exit $1 +} +fatal() { + cat<&2 + +FATAL: $* + +EOF + exit 1 +} + +add_group() { + mkdir -p $CONF_DIR/crmv1/$2 + echo "$@" >> $CONF_DIR/crmv1/conf +} + +del_group() { + rm -rf $CONF_DIR/crmv1/$1 + sed -i "/group $1/d" $CONF_DIR/crmv1/conf +} + +get_ra() { + local ra + ra=$1 + set `echo $ra | sed 's/:/ /g'` + if [ $# -eq 1 ]; then + dir=/usr/lib/ocf/resource.d/heartbeat + else + # 1:2:3 + dir=/usr/lib/ocf/resource.d/$1/$2 + ra=$3 + fi + if [ -f $dir/$ra ]; then + echo $dir/$ra + else + fatal "no resource agent $1, did you install resource-agents?" + fi +} + +mk_link() { + ln -fs $2 $CONF_DIR/crmv1/$1/`printf '%02d' $3`_`basename $2` +} +ln_ra() { + ra_f=`get_ra $2` + mk_link $1 $ra_f $cnt + cnt=$((cnt+1)) +} + +# this is not really creating a group, we just parse the input to +# make sure that the group is well defined; the group is then +# created by boothd on starting; consider this a document on how +# creating a group should be implemented +new_group() { + group=$2 + shift 2 + for p; do + save_ra=$p + if echo $p | grep -qs '='; then + args="$args $p" + else + if [ "$save_ra" ]; then + ln_ra $group $save_ra + save_ra='' + continue + fi + fi + ln_ra $group $p + done + add_group group $group $@ +} + +if [ $# -lt 3 ]; then + usage 1 +fi +if [ $1 != group ]; then + usage 1 +fi +if [ $2 != delete ]; then + if grep -qs "^group $2" $CONF_DIR/crmv1/conf; then + fatal "group $2 already exists" + fi + new_group $@ +else + del_group $3 +fi diff --git a/src/booth.h b/src/booth.h index 0cd43c00..65824d3b 100644 --- a/src/booth.h +++ b/src/booth.h @@ -38,6 +38,7 @@ #define BOOTH_DEFAULT_CONF_EXT ".conf" #define BOOTH_DEFAULT_CONF \ BOOTH_DEFAULT_CONF_DIR BOOTH_DEFAULT_CONF_NAME BOOTH_DEFAULT_CONF_EXT +#define BOOTH_DEFAULT_CRMV1_CONF BOOTH_DEFAULT_CONF_DIR "crmv1/conf" #define DAEMON_NAME "boothd" #define BOOTH_PATH_LEN PATH_MAX @@ -380,7 +381,4 @@ extern struct command_line cl; _a > _b ? _a : _b; }) - - - #endif /* _BOOTH_H */ diff --git a/src/config.c b/src/config.c index f0ca4aa9..12d2a2f9 100644 --- a/src/config.c +++ b/src/config.c @@ -538,6 +538,178 @@ static int parse_attr_prereq(char *val, struct ticket_config *tk) extern int poll_timeout; +void +get_keyval(char *key, char *val, struct args *a) { + char *p; + + strncpy(a->key, key, 16); + p = skip_while(val, isspace); + *(p-1) = '\0'; + strncpy(a->val, val, 16); +} + +struct crmv1_group { + char name[16]; + char ra[128]; + struct args { + char *key[16]; + char *val[16]; + } args[16]; +}; + +#define OCF_HB_PATH "/usr/lib/ocf/resource.d/heartbeat/" +#define OCF_PATH "/usr/lib/ocf/resource.d/" + +void ln_ra(char *ra, char *s, int cnt) +{ + char *p, *q, *r; + int fd; + char ra_target_s[128]; + + p = s; + q = strchr(":", s); + if (!q) { + strcpy(ra, OCF_HB_PATH); + strncpy(ra+strlen(OCF_HB_PATH), s, 128-strlen(OCF_HB_PATH)); + r = s; + } else { + /* s -> p ':' q ':' r + * copy to ra + */ + *q = '\0'; q++; + strcpy(ra, OCF_PATH); + strncpy(ra+strlen(OCF_PATH), p, 128-strlen(OCF_PATH)); + *(q-p+1) = '/'; + strncpy(ra+strlen(OCF_PATH)+1, q, 128-strlen(OCF_PATH)-strlen(q)); + r = strchr(":", q); + *r = '\0'; r++; + strncat(ra, r, 128-strlen(OCF_PATH)); + } + if (strlen(p) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + /* now test if there is a file containing this RA + */ + if (!(fd = open(ra))) { + log_error("RA does not exist: %s", s); + exit(1); + } + close(fd); + /* finally, create a soft link + */ + if (snprintf(ra_target_s, 128, "%02d_%s", cnt, r) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + if (symlink(BOOTH_DEFAULT_CRMV1_CONF, ra_target_s) != 0) { + log_error("failed to symlink %s: %s", ra_target_s, + strerror(errno)); + exit(1); + } +} + +/* mimic the shell parsing + */ + +int parse_crmv1_conf(struct ticket_config *current_tk) +{ + struct crmv1_group *groups[16], *curr_group; + char line[1024], *buf; + char error_str_buf[1024]; + FILE *fp; + char *s, *key, *val; + const char *error; + char *save_ra; + int i, grp_i = 0, key_i = 0, in_key, grp_wait; + int cnt = 0, args_cnt = 0; + + curr_group = groups[0]; + fp = fopen(BOOTH_DEFAULT_CRMV1_CONF, "r"); + if (!fp) { + log_error("failed to open %s: %s", BOOTH_DEFAULT_CRMV1_CONF, + strerror(errno)); + return -1; + } + + log_debug("reading config file %s", BOOTH_DEFAULT_CRMV1_CONF); + /* make one long line */ + while (fgets(line, sizeof(line), fp)) { + s = skip_while(line, isspace); + if (is_end_of_line(s) || *s == '#') + continue; + /* is line continued? */ + if (*(s+strlen(s)-2) == '\\' && *(s+strlen(s)-1) == '\n') { + *(s+strlen(s)-2) = ' '; + } + } + buf = line; + + /* now parse the line */ + for (s = buf; ; ) { + /* a '=' b or ra */ + s = skip_while(s, isspace); + save_ra = s; + if ( *s == '=' ) { + *s = '\0'; + s++; + get_keyval(save_ra, s, curr_group->args[args_cnt++]); + } else { + if ( save_ra ) { + ln_ra(curr_group->ra, save_ra, cnt); + curr_group->args[0] = NULL; + save_ra = NULL; + cnt++; + continue; + } + } + ln_ra(curr_group->ra, s, cnt); + cnt++; + + if (strcmp(key, "group") == 0) { + grp_wait = 1; + continue; + } + + (void)snprintf(error_str_buf, sizeof(error_str_buf), + "Unknown keyword \"%s\"", key); + error = error_str_buf; + goto err; + + curr_group++; + } + fclose(fp); + + /* Default: make config name match config filename. */ + if (!booth_conf->name[0]) { + cp = strrchr(path, '/'); + cp = cp ? cp+1 : (char *)path; + cp2 = strrchr(cp, '.'); + if (!cp2) + cp2 = cp + strlen(cp); + if (cp2-cp >= BOOTH_NAME_LEN) { + log_error("token too long"); + goto out; + } + strncpy(booth_conf->name, cp, cp2-cp); + *(booth_conf->name+(cp2-cp)) = '\0'; + } + + if (!postproc_ticket(current_tk)) { + goto out; + } + + return 0; + +err: + fclose(fp); +out: + log_error("%s in config file line %d", + error, lineno); + booth_conf->crmv1 = 0; + return -1; +} + int read_config(const char *path, int type) { char line[1024]; @@ -787,6 +959,12 @@ int read_config(const char *path, int type) continue; } + if (strcmp(key, "crmv1") == 0) { + if ( !parse_crmv1_conf() ) + booth_conf->crmv1 = 1; + continue; + } + /* current_tk must be allocated at this point, otherwise * we don't know to which ticket the key refers */ diff --git a/src/config.h b/src/config.h index bca73bc7..834aa4e1 100644 --- a/src/config.h +++ b/src/config.h @@ -22,6 +22,7 @@ #include #include +#include #include "booth.h" #include "timer.h" #include "raft.h" @@ -321,6 +322,7 @@ struct booth_config { int ticket_count; int ticket_allocated; struct ticket_config *ticket; + int crmv1; }; extern struct booth_config *booth_conf; diff --git a/src/handler.c b/src/handler.c index a12857eb..2f6afbcd 100644 --- a/src/handler.c +++ b/src/handler.c @@ -64,17 +64,48 @@ closefiles(void) } } +static void +wait4proc(struct ticket_config *tk, char *prog) { + int rv, status; + + while (waitpid(curr_pid, &status, 0) != curr_pid) + ; + curr_pid = 0; + if (!ignore_status) { + rv = test_exit_status(tk, prog, status, 1); + if (rv) + _exit(rv); + } else { + /* + * To make ignore_rest function signal safe log_info + * must be removed from signal function. Information + * about signal delivery is important so put it here. + */ + log_info("external programs handler caught TERM, ignoring " + "status of external test programs"); + } + static void run_ext_prog(struct ticket_config *tk, char *prog) { - if (set_booth_env(tk)) { + int status, rv; + + switch(curr_pid=fork()) { + case -1: + log_error("fork: %s", strerror(errno)); _exit(1); + case 0: /* child */ + if (set_booth_env(tk)) { + _exit(1); + } + closefiles(); /* don't leak open files */ + tk_log_debug("running handler %s", prog); + execv(prog, tk_test.argv); + tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); + _exit(1); + default: /* parent */ + wait4proc(struct ticket_config *tk, char *prog); } - closefiles(); /* don't leak open files */ - tk_log_debug("running handler %s", prog); - execv(prog, tk_test.argv); - tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); - _exit(1); } static int @@ -214,30 +245,9 @@ process_ext_dir(struct ticket_config *tk) strcpy(prog, tk_test.path); strcat(prog, "/"); strcat(prog, dp->d_name); - switch(curr_pid=fork()) { - case -1: - log_error("fork: %s", strerror(errno)); - _exit(1); - case 0: /* child */ - run_ext_prog(tk, prog); - break; /* run_ext_prog effectively noreturn */ - default: /* parent */ - while (waitpid(curr_pid, &status, 0) != curr_pid) - ; - curr_pid = 0; - if (!ignore_status) { - rv = test_exit_status(tk, prog, status, 1); - if (rv) - _exit(rv); - } else { - /* - * To make ignore_rest function signal safe log_info - * must be removed from signal function. Information - * about signal delivery is important so put it here. - */ - log_info("external programs handler caught TERM, ignoring " - "status of external test programs"); - } + run_ext_prog(tk, prog); + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); } } _exit(0); @@ -277,6 +287,9 @@ int run_handler(struct ticket_config *tk) tk_test.pid = pid; set_progstate(tk, EXTPROG_RUNNING); rv = RUNCMD_MORE; /* program runs */ + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); + } } return rv; diff --git a/test/live_test.sh b/test/live_test.sh index bd60964d..485a32d3 100755 --- a/test/live_test.sh +++ b/test/live_test.sh @@ -1163,6 +1163,44 @@ applicable_attr_prereq_fail() { [ -n "`get_attr`" ] } +## TEST: crmv1_group_start ## + +add_crmv1_group() { + crmv1 group testgrp rsc1 Dummy rsc2 Dummy fake=test +} + +rm_crmv1_group() { + crmv1 group delete testgrp +} + +check_resources() { + export OCF_ROOT=/usr/lib/ocf + export OCF_RESOURCE_INSTANCE=rsc1 + . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + OCF_RESOURCE_INSTANCE=rsc2 + export OCF_RESKEY_fake=test + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + return 0 +} + +# crmv1 start a group +setup_crmv1_group_start_ok() { + add_crmv1_group +} +test_crmv1_group_start_ok() { + wait_exp + wait_timeout +} +check_crmv1_group_start_ok() { + check_resources +} +recover_crmv1_group_start_ok() { + stop_site `get_site 1` + stop_site `get_site 2` + rm_crmv1_group +} + # # environment modifications # @@ -1251,7 +1289,8 @@ grant_site_lost grant_site_reappear revoke simultaneous_start_even slow_start_granted restart_granted reload_granted restart_granted_nocib restart_notgranted failover split_leader split_follower split_edge -external_prog_failed attr_prereq_ok attr_prereq_fail"} +external_prog_failed attr_prereq_ok attr_prereq_fail +crmv1_group_start"} : ${MANUAL_TESTS:="grant longgrant grant_noarb grant_elsewhere grant_site_lost diff --git a/unit-tests/030_crmv1.txt b/unit-tests/030_crmv1.txt new file mode 100644 index 00000000..42e6ed18 --- /dev/null +++ b/unit-tests/030_crmv1.txt @@ -0,0 +1,48 @@ +# vim: ft=sh et : +# +# Testing crmv1 groups + + +ticket: + name "tick1" + state ST_LEADER + current_term 40 + leader local + # may keep ticket all the time + term_duration 3000 + # but shall start renewal now + term_expires time(0) + 1000 + req_sent_at time(0) - 10 + + +gdb0: + call parse_extprog("test `set|grep ^BOOTH|wc -l` -ge 5", booth_conf->ticket+0) + +outgoing0: + header.cmd OP_HEARTBEAT + + +testgrp: + call parse_extprog("bin/crmv1") + ext_verifier 'bin/crmv1' + # cause re-query of the verifier + req_sent_at time(0) - 10 + +# +#gdb1: +# break ticket_broadcast_proposed_state § commands § bt § c § end + + +outgoing1: + header.cmd OP_HEARTBEAT + + +# now say that we may not have it anymore. +ticket2: + ext_verifier 'test "$BOOTH_TICKET" == "tick2FOO"' + # cause re-query of the verifier + req_sent_at time(0) - 10 + +finally: + state ST_LEADER + leader local diff --git a/unit-tests/bin/checkcrmv1 b/unit-tests/bin/checkcrmv1 new file mode 100755 index 00000000..94396cfa --- /dev/null +++ b/unit-tests/bin/checkcrmv1 @@ -0,0 +1,39 @@ +#!/bin/sh + +add_crmv1_group() { + crmv1 group testgrp rsc1 Dummy rsc2 Dummy fake=test +} + +rm_crmv1_group() { + crmv1 group delete testgrp +} + +check_resources() { + export OCF_ROOT=/usr/lib/ocf + export OCF_RESOURCE_INSTANCE=rsc1 + . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + OCF_RESOURCE_INSTANCE=rsc2 + export OCF_RESKEY_fake=test + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + return 0 +} + +# crmv1 start a group +setup_crmv1_group_start_ok() { + add_crmv1_group +} +test_crmv1_group_start_ok() { + wait_exp + wait_timeout +} +check_crmv1_group_start_ok() { + check_resources +} +recover_crmv1_group_start_ok() { + stop_site `get_site 1` + stop_site `get_site 2` + rm_crmv1_group +} + +check_crmv1_group_start_ok