Skip to content

Commit

Permalink
Merge pull request #3126 from GoogleCloudPlatform/pr2979
Browse files Browse the repository at this point in the history
Use sackd for the login nodes
  • Loading branch information
mr0re1 authored Oct 12, 2024
2 parents dec5719 + b59f80c commit 52bd393
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
# This file is managed by a script. Manual modifications will be overwritten.
"""

login_nodeset = "x-login"


def dict_to_conf(conf, delim=" ") -> str:
Expand Down Expand Up @@ -130,24 +129,6 @@ def get(key, default):
return dict_to_conf(conf_options, delim="\n")


def loginlines() -> str:
nodeset = {
"NodeSet": login_nodeset,
"Feature": login_nodeset,
}
partition = {
"PartitionName": login_nodeset,
"Nodes": login_nodeset,
"State": "UP",
"DefMemPerCPU": 1,
"Hidden": "YES",
"RootOnly": "YES",
}
lines = [
dict_to_conf(nodeset),
dict_to_conf(partition),
]
return "\n".join(lines)


def nodeset_lines(nodeset, lkp: util.Lookup) -> str:
Expand Down Expand Up @@ -254,7 +235,7 @@ def suspend_exc_lines(lkp: util.Lookup) -> Iterable[str]:
for p in lkp.cfg.partitions.values()
if len(p.partition_nodeset_dyn) > 0
]
suspend_exc_parts = {"SuspendExcParts": [login_nodeset, *dyn_parts]}
suspend_exc_parts = {"SuspendExcParts": [*dyn_parts]}

return filter(
None,
Expand All @@ -270,7 +251,6 @@ def make_cloud_conf(lkp: util.Lookup) -> str:
lines = [
FILE_PREAMBLE,
conflines(lkp),
loginlines(),
*(nodeset_lines(n, lkp) for n in lkp.cfg.nodeset.values()),
*(nodeset_dyn_lines(n) for n in lkp.cfg.nodeset_dyn.values()),
*(nodeset_tpu_lines(n, lkp) for n in lkp.cfg.nodeset_tpu.values()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def setup_sudoers():
content = """
# Allow SlurmUser to manage the slurm daemons
slurm ALL= NOPASSWD: /usr/bin/systemctl restart slurmd.service
slurm ALL= NOPASSWD: /usr/bin/systemctl restart sackd.service
slurm ALL= NOPASSWD: /usr/bin/systemctl restart slurmctld.service
"""
sudoers_file = Path("/etc/sudoers.d/slurm")
Expand Down Expand Up @@ -366,27 +367,25 @@ def setup_login():
slurmctld_host = f"{lookup().control_host}"
if lookup().control_addr:
slurmctld_host = f"{lookup().control_host}({lookup().control_addr})"
slurmd_options = [
sackd_options = [
f'--conf-server="{slurmctld_host}:{lookup().control_host_port}"',
f'--conf="Feature={conf.login_nodeset}"',
"-Z",
]
sysconf = f"""SLURMD_OPTIONS='{" ".join(slurmd_options)}'"""
update_system_config("slurmd", sysconf)
sysconf = f"""SACKD_OPTIONS='{" ".join(sackd_options)}'"""
update_system_config("sackd", sysconf)
install_custom_scripts()

setup_network_storage()
setup_sudoers()
run("systemctl restart munge")
run("systemctl enable slurmd", timeout=30)
run("systemctl restart slurmd", timeout=30)
run("systemctl enable sackd", timeout=30)
run("systemctl restart sackd", timeout=30)
run("systemctl enable --now slurmcmd.timer", timeout=30)

run_custom_scripts()

log.info("Check status of cluster services")
run("systemctl status munge", timeout=30)
run("systemctl status slurmd", timeout=30)
run("systemctl status sackd", timeout=30)

log.info("Done setting up login")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,11 +428,16 @@ def reconfigure_slurm():
log.exception("failed to reconfigure slurmctld")
util.run(f"wall '{update_msg}'", timeout=30)
log.debug("Done.")
elif lookup().instance_role_safe in ["compute", "login"]:
elif lookup().instance_role_safe == "compute":
log.info("Restarting slurmd to make changes take effect.")
run("systemctl restart slurmd")
util.run(f"wall '{update_msg}'", timeout=30)
log.debug("Done.")
elif lookup().instance_role_safe == "login":
log.info("Restarting sackd to make changes take effect.")
run("systemctl restart sackd")
util.run(f"wall '{update_msg}'", timeout=30)
log.debug("Done.")


def update_topology(lkp: util.Lookup) -> None:
Expand Down

0 comments on commit 52bd393

Please sign in to comment.