diff --git a/changes/1770.fix.md b/changes/1770.fix.md new file mode 100644 index 0000000000..be05096f01 --- /dev/null +++ b/changes/1770.fix.md @@ -0,0 +1 @@ +Include the shared memory when comparing the requested memory slot to the minimum resource slot required by an image and Add configuration of a ratio between shared memory and memory. diff --git a/changes/1963.fix.md b/changes/1963.fix.md new file mode 100644 index 0000000000..044b278dad --- /dev/null +++ b/changes/1963.fix.md @@ -0,0 +1 @@ +Fix `caf54fcc17ab` migration to drop a primary key only if it exists and in `589c764a18f1` migration, add missing table arguments. \ No newline at end of file diff --git a/changes/1965.doc.md b/changes/1965.doc.md new file mode 100644 index 0000000000..7a478086d3 --- /dev/null +++ b/changes/1965.doc.md @@ -0,0 +1 @@ +Resize font-size of footer text in ethical ads in documentation hosted by read-the-docs \ No newline at end of file diff --git a/changes/1966.feature.md b/changes/1966.feature.md new file mode 100644 index 0000000000..2f1ead9027 --- /dev/null +++ b/changes/1966.feature.md @@ -0,0 +1 @@ +Always enable `ai.backend.accelerator.cuda_open` in the scie-based installer diff --git a/src/ai/backend/client/cli/service.py b/src/ai/backend/client/cli/service.py index 3441464a89..a56795c577 100644 --- a/src/ai/backend/client/cli/service.py +++ b/src/ai/backend/client/cli/service.py @@ -192,7 +192,11 @@ def info(ctx: CLIContext, service_name_or_id: str): metavar="KEY=VAL", type=str, multiple=True, - help="Resource options for creating compute session (e.g: shmem=64m)", + help=( + "Resource options for creating compute session (e.g: shmem=64m). " + "The session APIs compare the total resources (the sum of this value and `resources`) " + "to the minimum/maximum resources requirements specified by an image." + ), ) @click.option( "--cluster-size", @@ -393,7 +397,11 @@ def create( metavar="KEY=VAL", type=str, multiple=True, - help="Resource options for creating compute session (e.g: shmem=64m)", + help=( + "Resource options for creating compute session (e.g: shmem=64m). " + "The session APIs compare the total resources (the sum of this value and `resources`) " + "to the minimum/maximum resources requirements specified by an image." + ), ) @click.option( "--cluster-size", diff --git a/src/ai/backend/client/cli/session/args.py b/src/ai/backend/client/cli/session/args.py index 250449f0cb..091f537f96 100644 --- a/src/ai/backend/client/cli/session/args.py +++ b/src/ai/backend/client/cli/session/args.py @@ -131,7 +131,11 @@ metavar="KEY=VAL", type=str, multiple=True, - help="Resource options for creating compute session (e.g: shmem=64m)", + help=( + "Resource options for creating compute session (e.g: shmem=64m). " + "The session APIs compare the total resources (the sum of this value and `resources`) " + "to the minimum/maximum resources requirements specified by an image." + ), ), # resource grouping click.option( diff --git a/src/ai/backend/client/cli/session/execute.py b/src/ai/backend/client/cli/session/execute.py index a708e2f28b..3a3e308ef6 100644 --- a/src/ai/backend/client/cli/session/execute.py +++ b/src/ai/backend/client/cli/session/execute.py @@ -365,7 +365,11 @@ def prepare_mount_arg( metavar="KEY=VAL", type=str, multiple=True, - help="Resource options for creating compute session. (e.g: shmem=64m)", + help=( + "Resource options for creating compute session (e.g: shmem=64m). " + "The session APIs compare the total resources (the sum of this value and `resources`) " + "to the minimum/maximum resources requirements specified by an image." + ), ) @click.option( "--arch", diff --git a/src/ai/backend/common/defs.py b/src/ai/backend/common/defs.py index 9dadda4c90..1f5556e46b 100644 --- a/src/ai/backend/common/defs.py +++ b/src/ai/backend/common/defs.py @@ -1,3 +1,4 @@ +from decimal import Decimal from typing import Final # Redis database IDs depending on purposes @@ -10,3 +11,8 @@ DEFAULT_FILE_IO_TIMEOUT: Final = 10 + + +DEFAULT_SHARED_MEMORY_SIZE: Final[str] = "64m" +DEFAULT_ALLOWED_MAX_SHMEM_RATIO: Final[Decimal] = Decimal(1.0) +SHMEM_RATIO_KEY: Final[str] = "resources/shmem-mem-ratio" diff --git a/src/ai/backend/manager/registry.py b/src/ai/backend/manager/registry.py index e1bb4679f0..de3cad8729 100644 --- a/src/ai/backend/manager/registry.py +++ b/src/ai/backend/manager/registry.py @@ -2,7 +2,6 @@ import asyncio import base64 -import copy import itertools import logging import re @@ -18,7 +17,7 @@ Sequence, ) from datetime import datetime -from decimal import Decimal +from decimal import Decimal, InvalidOperation from io import BytesIO from typing import ( TYPE_CHECKING, @@ -55,6 +54,11 @@ from ai.backend.common import msgpack, redis_helper from ai.backend.common.asyncio import cancel_tasks +from ai.backend.common.defs import ( + DEFAULT_ALLOWED_MAX_SHMEM_RATIO, + DEFAULT_SHARED_MEMORY_SIZE, + SHMEM_RATIO_KEY, +) from ai.backend.common.docker import ImageRef from ai.backend.common.events import ( AgentHeartbeatEvent, @@ -132,7 +136,11 @@ TooManySessionsMatched, ) from .config import LocalConfig, SharedConfig -from .defs import DEFAULT_IMAGE_ARCH, DEFAULT_ROLE, INTRINSIC_SLOTS +from .defs import ( + DEFAULT_IMAGE_ARCH, + DEFAULT_ROLE, + INTRINSIC_SLOTS, +) from .exceptions import MultiAgentError, convert_to_status_data from .models import ( AGENT_RESOURCE_OCCUPYING_KERNEL_STATUSES, @@ -1125,16 +1133,21 @@ async def enqueue_session( ) # Shared memory. - # We need to subtract the amount of shared memory from the memory limit of - # a container, since tmpfs including /dev/shm uses host-side kernel memory - # and cgroup's memory limit does not apply. - shmem = resource_opts.get("shmem", None) - if shmem is None: - shmem = labels.get("ai.backend.resource.preferred.shmem", "64m") - shmem = BinarySize.from_str(shmem) + raw_shmem: Optional[str] = resource_opts.get("shmem") + if raw_shmem is None: + raw_shmem = labels.get("ai.backend.resource.preferred.shmem") + if not raw_shmem: + # raw_shmem is None or empty string ("") + raw_shmem = DEFAULT_SHARED_MEMORY_SIZE + try: + shmem = BinarySize.from_str(raw_shmem) + except ValueError: + log.warning( + f"Failed to convert raw `shmem({raw_shmem})` " + f"to a decimal value. Fallback to default({DEFAULT_SHARED_MEMORY_SIZE})." + ) + shmem = BinarySize.from_str(DEFAULT_SHARED_MEMORY_SIZE) resource_opts["shmem"] = shmem - image_min_slots = copy.deepcopy(image_min_slots) - image_min_slots["mem"] += shmem # Sanitize user input: does it have resource config? if (resources := creation_config.get("resources")) is not None: @@ -1187,6 +1200,30 @@ async def enqueue_session( if tpu is not None: raise InvalidAPIParameters("Client upgrade required to use TPUs (v19.03+).") + # Check if the user has allocated an "imbalanced" shared memory amount. + raw_allowed_max_shmem_ratio = self.shared_config.data.get(SHMEM_RATIO_KEY) + try: + allowed_max_shmem_ratio = ( + Decimal(raw_allowed_max_shmem_ratio) + if raw_allowed_max_shmem_ratio is not None + else DEFAULT_ALLOWED_MAX_SHMEM_RATIO + ) + except (TypeError, InvalidOperation): + log.warning( + f"Failed to convert `raw_allowed_max_shmem_ratio({raw_allowed_max_shmem_ratio})` " + "to a decimal value. Fallback to default." + ) + allowed_max_shmem_ratio = DEFAULT_ALLOWED_MAX_SHMEM_RATIO + if Decimal(shmem) >= Decimal(requested_slots["mem"]) * allowed_max_shmem_ratio: + raise InvalidAPIParameters( + f"Too large shared memory. Maximum ratio of 'shared memory / memory' is {str(allowed_max_shmem_ratio)}. " + f"(s:{str(shmem)}, m:{str(BinarySize(requested_slots["mem"]))}" + ) + + # Compare ai.backend.resource.min.mem to (Memory + Shared-memory) + # because for most use cases, client side hides detailed shared-memory configuration. + requested_slots["mem"] += shmem + # Check the image resource slots. log_fmt = "s:{} k:{} r:{}-{}" log_args = (session_id, kernel_id, kernel["cluster_role"], kernel["cluster_idx"]) @@ -1219,14 +1256,6 @@ async def enqueue_session( ) ) - # Check if: shmem < memory - if shmem >= requested_slots["mem"]: - raise InvalidAPIParameters( - "Shared memory should be less than the main memory. (s:{}, m:{})".format( - str(shmem), str(BinarySize(requested_slots["mem"])) - ), - ) - # Add requested resource slot data to session session_requested_slots += requested_slots