Skip to content

Commit

Permalink
Fix multiprocessing executor shutdown (#621)
Browse files Browse the repository at this point in the history
With this patch, mp executor does not hang at the end of application out
of the box, and exits gracefully.
  • Loading branch information
michalkuligowski authored Dec 11, 2024
2 parents df7dd05 + 07dbd34 commit d312c92
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
3 changes: 3 additions & 0 deletions vllm/executor/multiproc_hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def _check_executor_parameters(self):
f"please ensure that world_size ({world_size}) "
f"is less than than max local hpu count ({hpu_device_count})")

def __del__(self):
self.shutdown()


class MultiprocessingHPUExecutorAsync(MultiprocessingHPUExecutor,
MultiprocessingGPUExecutorAsync):
Expand Down
17 changes: 17 additions & 0 deletions vllm/executor/multiproc_worker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import vllm.envs as envs
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.triton_utils.importing import HAS_TRITON
from vllm.utils import cuda_is_initialized

Expand Down Expand Up @@ -291,6 +292,22 @@ def set_multiprocessing_worker_envs(parallel_config):
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

if (current_platform.is_hpu()
and parallel_config.distributed_executor_backend == 'mp'
and envs.VLLM_WORKER_MULTIPROC_METHOD == 'fork'):
if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD", None) is not None:
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might "
"cause application hangs on exit. Using "
"VLLM_WORKER_MULTIPROC_METHOD=fork anyway, "
"as it was explicitly requested.")
else:
logger.warning("On HPU, VLLM_WORKER_MULTIPROC_METHOD=fork might "
"cause application hangs on exit. Setting "
"VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. "
"To override that behavior, please set "
"VLLM_WORKER_MULTIPROC_METHOD=fork explicitly.")
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

# Configure thread parallelism if OMP_NUM_THREADS isn't set
#
# Helps to avoid CPU contention. The default of spawning a thread per
Expand Down

0 comments on commit d312c92

Please sign in to comment.