Skip to content

Commit

Permalink
Update CPU and memory details by switching to lshw method instead of …
Browse files Browse the repository at this point in the history
…cpuinfo

Add CPU information (model, vendor, frequency) and memory details (clock, size, type) to API
  • Loading branch information
aliel authored and hoh committed Mar 7, 2024
1 parent d6025f5 commit 55fae43
Show file tree
Hide file tree
Showing 10 changed files with 142 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code-quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
run: |
sudo apt-get update
sudo apt-get -y upgrade
sudo apt-get install -y python3 python3-pip python3-aiohttp python3-msgpack python3-aiodns python3-alembic python3-sqlalchemy python3-setproctitle redis python3-aioredis python3-psutil sudo acl curl systemd-container squashfs-tools debootstrap python3-packaging python3-cpuinfo python3-nftables python3-jsonschema nftables
sudo apt-get install -y python3 python3-pip python3-aiohttp python3-msgpack python3-aiodns python3-alembic python3-sqlalchemy python3-setproctitle redis python3-aioredis python3-psutil sudo acl curl systemd-container squashfs-tools debootstrap python3-packaging python3-nftables python3-jsonschema nftables lshw
pip install --upgrade typing-extensions types-PyYAML
- name: Install required Python packages
Expand Down
2 changes: 1 addition & 1 deletion docker/vm_supervisor-dev.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FROM debian:bullseye
RUN apt-get update && apt-get -y upgrade && apt-get install -y \
sudo acl curl squashfs-tools git \
python3 python3-aiohttp python3-alembic python3-msgpack python3-pip python3-aiodns python3-aioredis\
python3-nftables python3-psutil python3-setproctitle python3-sqlalchemy python3-packaging python3-cpuinfo ndppd nftables \
python3-nftables python3-psutil python3-setproctitle python3-sqlalchemy python3-packaging ndppd nftables \
&& rm -rf /var/lib/apt/lists/*

RUN useradd jailman
Expand Down
2 changes: 1 addition & 1 deletion packaging/aleph-vm/DEBIAN/control
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ Version: 0.1.8
Architecture: all
Maintainer: Aleph.im
Description: Aleph.im VM execution engine
Depends: python3,python3-pip,python3-aiohttp,python3-msgpack,python3-aiodns,python3-alembic,python3-sqlalchemy,python3-setproctitle,redis,python3-aioredis,python3-psutil,sudo,acl,curl,systemd-container,squashfs-tools,debootstrap,python3-packaging,python3-cpuinfo,python3-nftables,python3-jsonschema,cloud-image-utils,ndppd,python3-yaml,python3-dotenv,python3-schedule,qemu-system-x86,qemu-utils,python3-systemd,python3-dbus,btrfs-progs,nftables
Depends: python3,python3-pip,python3-aiohttp,python3-msgpack,python3-aiodns,python3-alembic,python3-sqlalchemy,python3-setproctitle,redis,python3-aioredis,python3-psutil,sudo,acl,curl,systemd-container,squashfs-tools,debootstrap,python3-packaging,python3-nftables,python3-jsonschema,cloud-image-utils,ndppd,python3-yaml,python3-dotenv,python3-schedule,qemu-system-x86,qemu-utils,python3-systemd,python3-dbus,btrfs-progs,nftables,lshw
Section: aleph-im
Priority: Extra
1 change: 0 additions & 1 deletion packaging/requirements-debian-11.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ multidict==5.1.0
git+https://salsa.debian.org/pkg-netfilter-team/pkg-nftables#egg=nftables&subdirectory=py
packaging==20.9
psutil==5.8.0
py-cpuinfo==5.0.0
pycares==3.1.1
pyparsing==2.4.7
pyrsistent==0.15.5
Expand Down
1 change: 0 additions & 1 deletion packaging/requirements-ubuntu-20.04.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ multidict==4.7.3
git+https://salsa.debian.org/pkg-netfilter-team/pkg-nftables#egg=nftables&subdirectory=py
packaging==20.3
psutil==5.5.1
py-cpuinfo==5.0.0
pycares==3.1.1
PyGObject==3.36.0
pyparsing==2.4.6
Expand Down
1 change: 0 additions & 1 deletion packaging/requirements-ubuntu-22.04.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ multidict==5.1.0
git+https://salsa.debian.org/pkg-netfilter-team/pkg-nftables#egg=nftables&subdirectory=py
packaging==21.3
psutil==5.9.0
py-cpuinfo==5.0.0
pycares==4.1.2
PyGObject==3.42.1
pyparsing==2.4.7
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ dependencies = [
"sentry-sdk==1.31.0",
"aioredis==1.3.1",
"psutil==5.9.5",
"py-cpuinfo==9.0.0",
"schedule==1.2.1",
"nftables @ git+https://salsa.debian.org/pkg-netfilter-team/pkg-nftables#egg=nftables&subdirectory=py",
"msgpack==1.0.7",
Expand Down
77 changes: 77 additions & 0 deletions src/aleph/vm/orchestrator/machine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json
import re
import subprocess
from functools import lru_cache

import psutil


@lru_cache
def get_hardware_info():
lshw = subprocess.Popen(["lshw", "-sanitize", "-json"], stdout=subprocess.PIPE, shell=False)
output, _ = lshw.communicate()
data = json.loads(output)

hw_info = {}

for hw in data["children"][0]["children"]:
if hw["id"] == "cpu":
hw_info["cpu"] = hw
elif hw["class"] == "memory" and hw["id"] == "memory":
hw_info["memory"] = hw

return hw_info


@lru_cache
def get_cpu_info():
hw = get_hardware_info()

cpu_info = hw["cpu"]
architecture = cpu_info["width"]

if "x86_64" in cpu_info["capabilities"] or "x86-64" in cpu_info["capabilities"]:
architecture = "x86_64"
elif "arm64" in cpu_info["capabilities"] or "arm-64" in cpu_info["capabilities"]:
architecture = "arm64"

vendor = cpu_info["vendor"]
# lshw vendor implementation => https://github.com/lyonel/lshw/blob/15e4ca64647ad119b69be63274e5de2696d3934f/src/core/cpuinfo.cc#L308

if "Intel Corp" in vendor:
vendor = "GenuineIntel"
elif "Advanced Micro Devices [AMD]" in vendor:
vendor = "AuthenticAMD"

return {
"architecture": architecture,
"vendor": vendor,
"model": cpu_info["product"],
"frequency": cpu_info["capacity"],
"count": psutil.cpu_count(),
}


@lru_cache
def get_memory_info():
hw = get_hardware_info()
mem_info = hw["memory"]

memory_type = ""
memory_clock = ""

for bank in mem_info["children"]:
memory_clock = bank["clock"]
try:
memory_type = re.search("(DDR[2-6])", bank["description"]).group(0)
break
except:
pass

return {
"size": mem_info["size"],
"units": mem_info["units"],
"type": memory_type,
"clock": memory_clock,
"clock_units": "Hz",
}
66 changes: 60 additions & 6 deletions src/aleph/vm/orchestrator/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
from functools import lru_cache
from typing import Optional

import cpuinfo
import psutil
from aiohttp import web
from aleph.vm.conf import settings
from aleph.vm.orchestrator.machine import get_cpu_info, get_memory_info
from aleph_message.models import ItemHash
from aleph_message.models.execution.environment import CpuProperties
from pydantic import BaseModel, Field

from aleph.vm.conf import settings


class Period(BaseModel):
datetime: datetime
Expand Down Expand Up @@ -76,18 +75,66 @@ class MachineUsage(BaseModel):
active: bool = True


class ExtendedCpuProperties(CpuProperties):
"""CPU properties."""

model: Optional[str] = Field(default=None, description="CPU model")
frequency: Optional[str] = Field(default=None, description="CPU frequency")
count: Optional[str] = Field(default=None, description="CPU count")



class MemoryProperties(BaseModel):
"""MEMORY properties."""

size: Optional[str] = Field(default=None, description="Memory size")
units: Optional[str] = Field(default=None, description="Memory size units")
type: Optional[str] = Field(default=None, description="Memory type")
clock: Optional[str] = Field(default=None, description="Memory clock")
clock_units: Optional[str] = Field(default=None, description="Memory clock units")


class MachineCapability(BaseModel):
cpu: ExtendedCpuProperties
memory: MemoryProperties


@lru_cache
def get_machine_properties() -> MachineProperties:
"""Fetch machine properties such as architecture, CPU vendor, ...
These should not change while the supervisor is running.
In the future, some properties may have to be fetched from within a VM.
"""
cpu_info = cpuinfo.get_cpu_info() # Slow

cpu_info = get_cpu_info()
return MachineProperties(
cpu=CpuProperties(
architecture=cpu_info["raw_arch_string"],
vendor=cpu_info["vendor_id"],
architecture=cpu_info["architecture"],
vendor=cpu_info["vendor"],
),
)


@lru_cache
def get_machine_capability() -> MachineCapability:
cpu_info = get_cpu_info()
mem_info = get_memory_info()

return MachineCapability(
cpu=ExtendedCpuProperties(
architecture=cpu_info["architecture"],
vendor=cpu_info["vendor"],
model=cpu_info["model"],
frequency=cpu_info["frequency"],
count=cpu_info["count"],
),
memory=MemoryProperties(
size=mem_info["size"],
units=mem_info["units"],
type=mem_info["type"],
clock=mem_info["clock"],
clock_units=mem_info["clock_units"],
),
)

Expand Down Expand Up @@ -119,6 +166,13 @@ async def about_system_usage(_: web.Request):
return web.json_response(text=usage.json(exclude_none=True), headers={"Access-Control-Allow-Origin:": "*"})


async def about_capability(_: web.Request):
"""Public endpoint to expose information about the CRN capability."""

capability: MachineCapability = get_machine_capability()
return web.json_response(text=capability.json(exclude_none=False), headers={"Access-Control-Allow-Origin:": "*"})


class Allocation(BaseModel):
"""An allocation is the set of resources that are currently allocated on this orchestrator.
It contains the item_hashes of all persistent VMs, instances, on-demand VMs and jobs.
Expand Down
4 changes: 2 additions & 2 deletions src/aleph/vm/orchestrator/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@

import aiohttp_cors
from aiohttp import web

from aleph.vm.conf import settings
from aleph.vm.pool import VmPool
from aleph.vm.version import __version__

from .metrics import create_tables, setup_engine
from .resources import about_system_usage
from .resources import about_capability, about_system_usage
from .tasks import (
start_payment_monitoring_task,
start_watch_for_messages_task,
Expand Down Expand Up @@ -94,6 +93,7 @@ async def allow_cors_on_endpoint(request: web.Request):
web.get("/about/executions/records", about_execution_records),
web.get("/about/usage/system", about_system_usage),
web.get("/about/config", about_config),
web.get("/about/capability", about_capability),
# /control APIs are used to control the VMs and access their logs
web.post("/control/allocations", update_allocations),
web.post("/control/allocation/notify", notify_allocation),
Expand Down

0 comments on commit 55fae43

Please sign in to comment.