Skip to content

Commit

Permalink
[crmsh-4.6] Fix: qnetd fails to start when TLS is disabled (bsc#12276…
Browse files Browse the repository at this point in the history
…49) (#1492)

backport:

* #1489
  • Loading branch information
liangxin1300 authored Jul 18, 2024
2 parents c51f137 + 764ed2f commit c5fa894
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 236 deletions.
2 changes: 1 addition & 1 deletion crmsh/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,7 @@ def configure_qdevice_interactive():
valid_func=qdevice.QDevice.check_qdevice_algo)
qdevice_tie_breaker = prompt_for_string("QNetd TIE_BREAKER (lowest/highest/valid node id)", default="lowest",
valid_func=qdevice.QDevice.check_qdevice_tie_breaker)
qdevice_tls = prompt_for_string("Whether using TLS on QDevice/QNetd (on/off/required)", default="on",
qdevice_tls = prompt_for_string("Whether using TLS on QDevice (on/off/required)", default="on",
valid_func=qdevice.QDevice.check_qdevice_tls)
qdevice_heuristics = prompt_for_string("Heuristics COMMAND to run with absolute path; For multiple commands, use \";\" to separate",
valid_func=qdevice.QDevice.check_qdevice_heuristics,
Expand Down
8 changes: 6 additions & 2 deletions crmsh/prun/prun.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

_DEFAULT_CONCURRENCY = 32

_SUDO_SFTP_SERVER = 'sudo PATH=/usr/lib/ssh:/usr/lib/openssh:/usr/libexec/ssh:/usr/libexec/openssh /bin/sh -c "exec sftp-server"'


class ProcessResult:
def __init__(self, returncode: int, stdout: bytes, stderr: bytes):
Expand Down Expand Up @@ -185,9 +187,10 @@ def pcopy_to_remote(

def _build_copy_task(ssh: str, script: str, host: str):
_, remote_sudoer = UserOfHost.instance().user_pair_for_ssh(host)
cmd = "sftp {} {} -o BatchMode=yes -s 'sudo PATH=/usr/lib/ssh:/usr/libexec/ssh /bin/sh -c \"exec sftp-server\"' -b - {}@{}".format(
cmd = "sftp {} {} -o BatchMode=yes -s '{}' -b - {}@{}".format(
ssh,
crmsh.constants.SSH_OPTION,
_SUDO_SFTP_SERVER,
remote_sudoer, _enclose_inet6_addr(host),
)
return Task(
Expand Down Expand Up @@ -249,9 +252,10 @@ def pfetch_from_remote(

def _build_fetch_task( ssh: str, host: str, src: str, dst: str, flags: str) -> Task:
_, remote_sudoer = UserOfHost.instance().user_pair_for_ssh(host)
cmd = "sftp {} {} -o BatchMode=yes -s 'sudo PATH=/usr/lib/ssh:/usr/libexec/ssh /bin/sh -c \"exec sftp-server\"' -b - {}@{}".format(
cmd = "sftp {} {} -o BatchMode=yes -s '{}' -b - {}@{}".format(
ssh,
crmsh.constants.SSH_OPTION,
_SUDO_SFTP_SERVER,
remote_sudoer, _enclose_inet6_addr(host),
)
os.makedirs(f"{dst}/{host}", exist_ok=True)
Expand Down
196 changes: 65 additions & 131 deletions crmsh/qdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,29 +98,12 @@ def wrapper(*args, **kwargs):


class QDevice(object):
"""Class to manage qdevice configuration and services
Call `certificate_process_on_init` to generate all of CA, server, and client certs.
Call `certificate_process_on_join` to generate a single client cert for the local node.
"""
Class to manage qdevice configuration and services
Whole certification process:
For init
Step 1: init_db_on_qnetd
Step 2: fetch_qnetd_crt_from_qnetd
Step 3: copy_qnetd_crt_to_cluster
Step 4: init_db_on_cluster
Step 5: create_ca_request
Step 6: copy_crq_to_qnetd
Step 7: sign_crq_on_qnetd
Step 8: fetch_cluster_crt_from_qnetd
Step 9: import_cluster_crt
Step 10: copy_p12_to_cluster
Step 11: import_p12_on_cluster
For join
Step 1: fetch_qnetd_crt_from_cluster
Step 2: init_db_on_local
Step 3: fetch_p12_from_cluster
Step 4: import_p12_on_local
"""

qnetd_service = "corosync-qnetd.service"
qnetd_cacert_filename = "qnetd-cacert.crt"
qdevice_crq_filename = "qdevice-net-node.crq"
Expand Down Expand Up @@ -294,27 +277,17 @@ def valid_qnetd(self):
"""
exception_msg = ""
suggest = ""
duplicated_cluster_name = False
shell = sh.cluster_shell()
if not utils.package_is_installed("corosync-qnetd", remote_addr=self.qnetd_addr):
exception_msg = "Package \"corosync-qnetd\" not installed on {}!".format(self.qnetd_addr)
suggest = "install \"corosync-qnetd\" on {}".format(self.qnetd_addr)
elif ServiceManager().service_is_active("corosync-qnetd", remote_addr=self.qnetd_addr):
else:
self.init_tls_certs_on_qnetd()
self.start_qnetd()
cmd = "corosync-qnetd-tool -l -c {}".format(self.cluster_name)
if shell.get_stdout_or_raise_error(cmd, self.qnetd_addr):
duplicated_cluster_name = True
else:
cmd = "test -f {}".format(self.qnetd_cluster_crt_on_qnetd)
try:
shell.get_stdout_or_raise_error(cmd, self.qnetd_addr)
except ValueError:
# target file not exist
pass
else:
duplicated_cluster_name = True
if duplicated_cluster_name:
exception_msg = "This cluster's name \"{}\" already exists on qnetd server!".format(self.cluster_name)
suggest = "consider to use the different cluster-name property"
exception_msg = "This cluster's name \"{}\" already exists on qnetd server!".format(self.cluster_name)
suggest = "consider to use the different cluster-name property"

if exception_msg:
if self.is_stage:
Expand Down Expand Up @@ -342,51 +315,37 @@ def set_cluster_name(self):
raise ValueError("No cluster_name found in {}".format(corosync.conf()))

@qnetd_lock_for_multi_cluster
def init_db_on_qnetd(self):
"""
Certificate process for init
Step 1
Initialize database on QNetd server by running corosync-qnetd-certutil -i
"""
def init_tls_certs_on_qnetd(self):
"""Initialize NSS database and generates CA and server certs on QNetd server."""
cmd = "test -f {}".format(self.qnetd_cacert_on_qnetd)
try:
parallax.parallax_call([self.qnetd_addr], cmd)
return
except ValueError:
# target file not exist
pass
else:
return

logger.info('Generating QNetd CA and server certificates on %s', self.qnetd_addr)
cmd = "corosync-qnetd-certutil -i"
desc = "Step 1: Initialize database on {}".format(self.qnetd_addr)
QDevice.log_only_to_file(desc, cmd)
parallax.parallax_call([self.qnetd_addr], cmd)

def fetch_qnetd_crt_from_qnetd(self):
"""
Certificate process for init
Step 2
Fetch QNetd CA certificate(qnetd-cacert.crt) from QNetd server
"""
def fetch_qnetd_crt_from_qnetd(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Fetch QNetd CA certificate(qnetd-cacert.crt) from QNetd server"""
if os.path.exists(self.qnetd_cacert_on_local):
return

desc = "Step 2: Fetch {} from {}".format(self.qnetd_cacert_filename, self.qnetd_addr)
QDevice.log_only_to_file(desc)
desc = "Fetch {} from {}".format(self.qnetd_cacert_filename, self.qnetd_addr)
log(desc)
crmsh.parallax.parallax_slurp([self.qnetd_addr], self.qdevice_path, self.qnetd_cacert_on_qnetd)

def copy_qnetd_crt_to_cluster(self):
"""
Certificate process for init
Step 3
Copy exported QNetd CA certificate (qnetd-cacert.crt) to every node
"""
def copy_qnetd_crt_to_cluster(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Copy exported QNetd CA certificate (qnetd-cacert.crt) to every node"""
node_list = utils.list_cluster_nodes_except_me()
if not node_list:
return

desc = "Step 3: Copy exported {} to {}".format(self.qnetd_cacert_filename, node_list)
QDevice.log_only_to_file(desc)
desc = "Copy exported {} to {}".format(self.qnetd_cacert_filename, node_list)
log(desc)
self._copy_file_to_remote_hosts(
os.path.dirname(self.qnetd_cacert_on_local),
node_list, self.qdevice_path,
Expand All @@ -404,122 +363,97 @@ def _enclose_inet6_addr(addr: str):
def _copy_file_to_remote_hosts(cls, local_file, remote_hosts: typing.Iterable[str], remote_path, recursive=False):
crmsh.parallax.parallax_copy(remote_hosts, local_file, remote_path, recursive)

def init_db_on_cluster(self):
def init_db_on_cluster(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""
Certificate process for init
Step 4
On one of cluster node initialize database by running
/usr/sbin/corosync-qdevice-net-certutil -i -c qnetd-cacert.crt
"""
node_list = utils.list_cluster_nodes()
cmd = "corosync-qdevice-net-certutil -i -c {}".format(self.qnetd_cacert_on_local)
desc = "Step 4: Initialize database on {}".format(node_list)
QDevice.log_only_to_file(desc, cmd)
desc = "Initialize database on {}".format(node_list)
log(desc, cmd)
crmsh.parallax.parallax_call(node_list, cmd)

def create_ca_request(self):
"""
Certificate process for init
Step 5
Generate certificate request:
def create_ca_request(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Generate certificate request:
/usr/sbin/corosync-qdevice-net-certutil -r -n Cluster
(Cluster name must match cluster_name key in the corosync.conf)
"""
cmd = "corosync-qdevice-net-certutil -r -n {}".format(self.cluster_name)
QDevice.log_only_to_file("Step 5: Generate certificate request {}".format(self.qdevice_crq_filename), cmd)
log("Generate certificate request {}".format(self.qdevice_crq_filename), cmd)
sh.cluster_shell().get_stdout_or_raise_error(cmd)

def copy_crq_to_qnetd(self):
"""
Certificate process for init
Step 6
Copy exported CRQ to QNetd server
"""
desc = "Step 6: Copy {} to {}".format(self.qdevice_crq_filename, self.qnetd_addr)
QDevice.log_only_to_file(desc)
def copy_crq_to_qnetd(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Copy exported CRQ to QNetd server"""
desc = "Copy {} to {}".format(self.qdevice_crq_filename, self.qnetd_addr)
log(desc)
self._copy_file_to_remote_hosts(self.qdevice_crq_on_local, [self.qnetd_addr], self.qdevice_crq_on_qnetd)

def sign_crq_on_qnetd(self):
"""
Certificate process for init
Step 7
On QNetd server sign and export cluster certificate by running
def sign_crq_on_qnetd(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""On QNetd server sign and export cluster certificate by running
corosync-qnetd-certutil -s -c qdevice-net-node.crq -n Cluster
"""
desc = "Step 7: Sign and export cluster certificate on {}".format(self.qnetd_addr)
desc = "Sign and export cluster certificate on {}".format(self.qnetd_addr)
cmd = "corosync-qnetd-certutil -s -c {} -n {}".\
format(self.qdevice_crq_on_qnetd, self.cluster_name)
QDevice.log_only_to_file(desc, cmd)
log(desc, cmd)
parallax.parallax_call([self.qnetd_addr], cmd)

def fetch_cluster_crt_from_qnetd(self):
"""
Certificate process for init
Step 8
Copy exported CRT to node where certificate request was created
"""
desc = "Step 8: Fetch {} from {}".format(os.path.basename(self.qnetd_cluster_crt_on_qnetd), self.qnetd_addr)
QDevice.log_only_to_file(desc)
def fetch_cluster_crt_from_qnetd(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Copy exported CRT to node where certificate request was created"""
desc = "Fetch {} from {}".format(os.path.basename(self.qnetd_cluster_crt_on_qnetd), self.qnetd_addr)
log(desc)
crmsh.parallax.parallax_slurp([self.qnetd_addr], self.qdevice_path, self.qnetd_cluster_crt_on_qnetd)

def import_cluster_crt(self):
"""
Certificate process for init
Step 9
Import certificate on node where certificate request was created by
def import_cluster_crt(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Import certificate on node where certificate request was created by
running /usr/sbin/corosync-qdevice-net-certutil -M -c cluster-Cluster.crt
"""
cmd = "corosync-qdevice-net-certutil -M -c {}".format(self.qnetd_cluster_crt_on_local)
QDevice.log_only_to_file(
"Step 9: Import certificate file {} on local".format(os.path.basename(self.qnetd_cluster_crt_on_local)),
cmd)
log("Import certificate file {} on local".format(os.path.basename(self.qnetd_cluster_crt_on_local)), cmd)
sh.cluster_shell().get_stdout_or_raise_error(cmd)

def copy_p12_to_cluster(self):
"""
Certificate process for init
Step 10
Copy output qdevice-net-node.p12 to all other cluster nodes
"""
def copy_p12_to_cluster(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Copy output qdevice-net-node.p12 to all other cluster nodes"""
node_list = utils.list_cluster_nodes_except_me()
if not node_list:
return

desc = "Step 10: Copy {} to {}".format(self.qdevice_p12_filename, node_list)
QDevice.log_only_to_file(desc)
desc = "Copy {} to {}".format(self.qdevice_p12_filename, node_list)
log(desc)
self._copy_file_to_remote_hosts(self.qdevice_p12_on_local, node_list, self.qdevice_p12_on_local)

def import_p12_on_cluster(self):
"""
Certificate process for init
Step 11
Import cluster certificate and key on all other cluster nodes:
def import_p12_on_cluster(self, log: typing.Callable[[str, typing.Optional[str]], None]):
"""Import cluster certificate and key on all other cluster nodes:
/usr/sbin/corosync-qdevice-net-certutil -m -c qdevice-net-node.p12
"""
node_list = utils.list_cluster_nodes_except_me()
if not node_list:
return

desc = "Step 11: Import {} on {}".format(self.qdevice_p12_filename, node_list)
desc = "Import {} on {}".format(self.qdevice_p12_filename, node_list)
cmd = "corosync-qdevice-net-certutil -m -c {}".format(self.qdevice_p12_on_local)
log(desc, cmd)
QDevice.log_only_to_file(desc, cmd)
parallax.parallax_call(node_list, cmd)

def certificate_process_on_init(self):
"""
The qdevice certificate process on init node
"""
self.init_db_on_qnetd()
self.fetch_qnetd_crt_from_qnetd()
self.copy_qnetd_crt_to_cluster()
self.init_db_on_cluster()
self.create_ca_request()
self.copy_crq_to_qnetd()
self.sign_crq_on_qnetd()
self.fetch_cluster_crt_from_qnetd()
self.import_cluster_crt()
self.copy_p12_to_cluster()
self.import_p12_on_cluster()
for i, step in enumerate([
self.fetch_qnetd_crt_from_qnetd,
self.copy_qnetd_crt_to_cluster,
self.init_db_on_cluster,
self.create_ca_request,
self.copy_crq_to_qnetd,
self.sign_crq_on_qnetd,
self.fetch_cluster_crt_from_qnetd,
self.import_cluster_crt,
self.copy_p12_to_cluster,
self.import_p12_on_cluster,
]):
step(lambda s, cmd=None: self.log_only_to_file(f'Step {i+1}: {s}', cmd))

def fetch_qnetd_crt_from_cluster(self):
"""
Expand Down Expand Up @@ -695,7 +629,7 @@ def config_and_start_qdevice(self):
Wrap function to collect functions to config and start qdevice
"""
QDevice.remove_qdevice_db()
if self.tls == "on":
if self.tls == "on" or self.tls == 'required':
with logger_utils.status_long("Qdevice certification process"):
self.certificate_process_on_init()
self.adjust_sbd_watchdog_timeout_with_qdevice()
Expand Down
2 changes: 1 addition & 1 deletion crmsh/ui_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def do_init(self, context, *args):
qdevice_group.add_argument("--qdevice-tie-breaker", dest="qdevice_tie_breaker", metavar="TIE_BREAKER", default="lowest",
help="QNetd TIE_BREAKER (lowest/highest/valid_node_id, default:lowest)")
qdevice_group.add_argument("--qdevice-tls", dest="qdevice_tls", metavar="TLS", default="on", choices=['on', 'off', 'required'],
help="Whether using TLS on QDevice/QNetd (on/off/required, default:on)")
help="Whether using TLS on QDevice (on/off/required, default:on)")
qdevice_group.add_argument("--qdevice-heuristics", dest="qdevice_heuristics", metavar="COMMAND",
help="COMMAND to run with absolute path. For multiple commands, use \";\" to separate (details about heuristics can see man 8 corosync-qdevice)")
qdevice_group.add_argument("--qdevice-heuristics-mode", dest="qdevice_heuristics_mode", metavar="MODE", choices=['on', 'sync', 'off'],
Expand Down
1 change: 1 addition & 0 deletions test/features/qdevice_options.feature
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Feature: corosync qdevice/qnetd options
When Run "crm cluster init --qnetd-hostname=qnetd-node --qdevice-tls=off -y" on "hanode1"
Then Cluster service is "started" on "hanode1"
And Service "corosync-qdevice" is "started" on "hanode1"
And Service "corosync-qnetd" is "started" on "qnetd-node"
And Show corosync qdevice configuration

@clean
Expand Down
4 changes: 2 additions & 2 deletions test/features/qdevice_usercase.feature
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ Feature: Verify usercase master survive when split-brain
And Service "corosync-qnetd" is "started" on "qnetd-node"
And Show corosync qdevice configuration
When Run "crm corosync status qnetd" on "hanode1"
Then Expected regrex "Heuristics:\s+Fail" in stdout
Then Expected regex "Heuristics:\s+Fail" in stdout
When Run "touch /tmp/heuristics.txt" on "hanode1"
When Run "sleep 30" on "hanode1"
When Run "crm corosync status qnetd" on "hanode1"
Then Expected regrex "Heuristics:\s+Pass" in stdout
Then Expected regex "Heuristics:\s+Pass" in stdout

@clean
Scenario: Master survive when split-brain
Expand Down
2 changes: 1 addition & 1 deletion test/features/qdevice_validate.feature
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Feature: corosync qdevice/qnetd options validate
When Run "crm cluster init -n cluster1 -y" on "hanode3"
Then Cluster service is "started" on "hanode3"
When Try "crm cluster init qdevice --qnetd-hostname=qnetd-node -y" on "hanode2,hanode3"
Then Except "ERROR: cluster.init: Duplicated cluster name "cluster1"!"
Then Expected regex "(?:Duplicated cluster name|cluster's name .* already exists)" in stderr
When Run "crm cluster stop" on "hanode2"
When Run "crm cluster stop" on "hanode3"

Expand Down
2 changes: 1 addition & 1 deletion test/features/steps/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
--qdevice-tie-breaker TIE_BREAKER
QNetd TIE_BREAKER (lowest/highest/valid_node_id,
default:lowest)
--qdevice-tls TLS Whether using TLS on QDevice/QNetd (on/off/required,
--qdevice-tls TLS Whether using TLS on QDevice (on/off/required,
default:on)
--qdevice-heuristics COMMAND
COMMAND to run with absolute path. For multiple
Expand Down
Loading

0 comments on commit c5fa894

Please sign in to comment.