Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CWS] Allow specifying cgroup managers for dumps generation #32287

Merged
merged 14 commits into from
Dec 18, 2024
1 change: 1 addition & 0 deletions pkg/config/setup/system_probe_cws.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func initCWSSystemProbeConfig(cfg pkgconfigmodel.Config) {
cfg.BindEnvAndSetDefault("runtime_security_config.activity_dump.min_timeout", "10m")
cfg.BindEnvAndSetDefault("runtime_security_config.activity_dump.max_dump_size", 1750)
cfg.BindEnvAndSetDefault("runtime_security_config.activity_dump.traced_cgroups_count", 5)
cfg.BindEnvAndSetDefault("runtime_security_config.activity_dump.cgroup_managers", []string{"docker", "podman", "containerd", "cri-o"})
cfg.BindEnvAndSetDefault("runtime_security_config.activity_dump.traced_event_types", []string{"exec", "open", "dns", "imds"})
cfg.BindEnv("runtime_security_config.activity_dump.cgroup_dump_timeout") // deprecated in favor of dump_duration
cfg.BindEnvAndSetDefault("runtime_security_config.activity_dump.dump_duration", "900s")
Expand Down
4 changes: 4 additions & 0 deletions pkg/security/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ type RuntimeSecurityConfig struct {
// ActivityDumpTracedCgroupsCount defines the maximum count of cgroups that should be monitored concurrently. Leave this parameter to 0 to prevent the generation
// of activity dumps based on cgroups.
ActivityDumpTracedCgroupsCount int
// ActivityDumpCgroupsManagers defines the cgroup managers we generate dumps for.
ActivityDumpCgroupsManagers []string

// ActivityDumpTracedEventTypes defines the list of events that should be captured in an activity dump. Leave this
// parameter empty to monitor all event types. If not already present, the `exec` event will automatically be added
// to this list.
Expand Down Expand Up @@ -368,6 +371,7 @@ func NewRuntimeSecurityConfig() (*RuntimeSecurityConfig, error) {
ActivityDumpLoadControlPeriod: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.activity_dump.load_controller_period"),
ActivityDumpLoadControlMinDumpTimeout: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.activity_dump.min_timeout"),
ActivityDumpTracedCgroupsCount: pkgconfigsetup.SystemProbe().GetInt("runtime_security_config.activity_dump.traced_cgroups_count"),
ActivityDumpCgroupsManagers: pkgconfigsetup.SystemProbe().GetStringSlice("runtime_security_config.activity_dump.cgroup_managers"),
spikat marked this conversation as resolved.
Show resolved Hide resolved
ActivityDumpTracedEventTypes: parseEventTypeStringSlice(pkgconfigsetup.SystemProbe().GetStringSlice("runtime_security_config.activity_dump.traced_event_types")),
ActivityDumpCgroupDumpTimeout: pkgconfigsetup.SystemProbe().GetDuration("runtime_security_config.activity_dump.dump_duration"),
ActivityDumpRateLimiter: pkgconfigsetup.SystemProbe().GetInt("runtime_security_config.activity_dump.rate_limiter"),
Expand Down
4 changes: 4 additions & 0 deletions pkg/security/ebpf/c/include/constants/custom.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,8 @@ static __attribute__((always_inline)) u64 get_imds_ip() {
#define CGROUP_MANAGER_CRI 4
#define CGROUP_MANAGER_SYSTEMD 5

#define CGROUP_MANAGER_MASK 0b111
#define CGROUP_SYSTEMD_SERVICE (0 << 8)
#define CGROUP_SYSTEMD_SCOPE (1 << 8)

#endif
22 changes: 14 additions & 8 deletions pkg/security/ebpf/c/include/helpers/activity_dump.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,16 @@ __attribute__((always_inline)) struct cgroup_tracing_event_t *get_cgroup_tracing
return evt;
}

__attribute__((always_inline)) u32 is_cgroup_activity_dumps_supported(struct cgroup_context_t *cgroup) {
u32 cgroup_manager = cgroup->cgroup_flags & CGROUP_MANAGER_MASK;
u32 supported = (cgroup->cgroup_flags != 0) && (bpf_map_lookup_elem(&activity_dump_config_defaults, &cgroup_manager) != NULL);
return supported;
}

__attribute__((always_inline)) bool reserve_traced_cgroup_spot(struct cgroup_context_t *cgroup, u64 now, u64 cookie, struct activity_dump_config *config) {
// insert dump config defaults
u32 defaults_key = 0;
struct activity_dump_config *defaults = bpf_map_lookup_elem(&activity_dump_config_defaults, &defaults_key);
u32 cgroup_flags = cgroup->cgroup_flags;
struct activity_dump_config *defaults = bpf_map_lookup_elem(&activity_dump_config_defaults, &cgroup_flags);
if (defaults == NULL) {
// should never happen, ignore
return false;
Expand Down Expand Up @@ -102,11 +108,15 @@ __attribute__((always_inline)) u64 trace_new_cgroup(void *ctx, u64 now, struct c
return 0;
}

if ((container->cgroup_context.cgroup_flags & 0b111) == CGROUP_MANAGER_SYSTEMD) {
if (!is_cgroup_activity_dumps_supported(&container->cgroup_context)) {
return 0;
}

copy_container_id(container->container_id, evt->container.container_id);
if ((container->cgroup_context.cgroup_flags&CGROUP_MANAGER_MASK) != CGROUP_MANAGER_SYSTEMD) {
copy_container_id(container->container_id, evt->container.container_id);
} else {
evt->container.container_id[0] = '\0';
}
evt->container.cgroup_context = container->cgroup_context;
evt->cookie = cookie;
evt->config = config;
Expand All @@ -115,10 +125,6 @@ __attribute__((always_inline)) u64 trace_new_cgroup(void *ctx, u64 now, struct c
return cookie;
}

__attribute__((always_inline)) u64 is_cgroup_activity_dumps_supported(struct cgroup_context_t *cgroup) {
return (cgroup->cgroup_flags != 0) && ((cgroup->cgroup_flags&0b111) != CGROUP_MANAGER_SYSTEMD);
}

__attribute__((always_inline)) u64 should_trace_new_process_cgroup(void *ctx, u64 now, u32 pid, struct container_context_t *container) {
// should we start tracing this cgroup ?
struct cgroup_context_t cgroup_context;
Expand Down
14 changes: 7 additions & 7 deletions pkg/security/ebpf/c/include/hooks/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,13 @@ static __attribute__((always_inline)) int trace__cgroup_write(ctx_t *ctx) {
#endif

int length = bpf_probe_read_str(prefix, sizeof(cgroup_prefix_t), container_id) & 0xff;
if (cgroup_flags == 0 && (
(length >= 9 && (*prefix)[length-9] == '.' && (*prefix)[length-8] == 's' && (*prefix)[length-7] == 'e' && (*prefix)[length-6] == 'r' && (*prefix)[length-5] == 'v' && (*prefix)[length-4] == 'i' && (*prefix)[length-3] == 'c' && (*prefix)[length-2] == 'e')
||
(length >= 7 && (*prefix)[length-7] == '.' && (*prefix)[length-6] == 's' && (*prefix)[length-5] == 'c' && (*prefix)[length-4] == 'o' && (*prefix)[length-3] == 'p' && (*prefix)[length-2] == 'e')
)) {
cgroup_flags = CGROUP_MANAGER_SYSTEMD;
} else if (cgroup_flags != 0) {
if (cgroup_flags == 0) {
if (length >= 9 && (*prefix)[length-9] == '.' && (*prefix)[length-8] == 's' && (*prefix)[length-7] == 'e' && (*prefix)[length-6] == 'r' && (*prefix)[length-5] == 'v' && (*prefix)[length-4] == 'i' && (*prefix)[length-3] == 'c' && (*prefix)[length-2] == 'e') {
cgroup_flags = CGROUP_MANAGER_SYSTEMD | CGROUP_SYSTEMD_SERVICE;
} else if (length >= 7 && (*prefix)[length-7] == '.' && (*prefix)[length-6] == 's' && (*prefix)[length-5] == 'c' && (*prefix)[length-4] == 'o' && (*prefix)[length-3] == 'p' && (*prefix)[length-2] == 'e') {
cgroup_flags = CGROUP_MANAGER_SYSTEMD | CGROUP_SYSTEMD_SCOPE;
}
} else {
bpf_probe_read(&new_entry.container.container_id, sizeof(new_entry.container.container_id), container_id);
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/security/ebpf/c/include/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ BPF_ARRAY_MAP(syscall_ctx_gen_id, u32, 1)
BPF_ARRAY_MAP(syscall_ctx, char[MAX_SYSCALL_CTX_SIZE], MAX_SYSCALL_CTX_ENTRIES)

BPF_HASH_MAP(activity_dumps_config, u64, struct activity_dump_config, 1) // max entries will be overridden at runtime
BPF_HASH_MAP(activity_dump_config_defaults, u32, struct activity_dump_config, 1)
BPF_HASH_MAP(activity_dump_config_defaults, u32, struct activity_dump_config, 5)
BPF_HASH_MAP(traced_cgroups, struct path_key_t, u64, 1) // max entries will be overridden at runtime
BPF_HASH_MAP(cgroup_wait_list, struct path_key_t, u64, 1) // max entries will be overridden at runtime
BPF_HASH_MAP(traced_pids, u32, u64, 8192) // max entries will be overridden at runtime
Expand Down
8 changes: 2 additions & 6 deletions pkg/security/probe/probe_ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -818,15 +818,11 @@ func (p *EBPFProbe) handleEvent(CPU int, data []byte) {
return
}

if cgroupContext, err := p.Resolvers.ResolveCGroupContext(event.CgroupTracing.CGroupContext.CGroupFile, containerutils.CGroupFlags(event.CgroupTracing.CGroupContext.CGroupFlags)); err != nil {
cgroupContext, err := p.Resolvers.ResolveCGroupContext(event.CgroupTracing.CGroupContext.CGroupFile, containerutils.CGroupFlags(event.CgroupTracing.CGroupContext.CGroupFlags))
if err != nil {
seclog.Debugf("Failed to resolve cgroup: %s", err)
} else {
event.CgroupTracing.CGroupContext = *cgroupContext
if cgroupContext.CGroupFlags.IsContainer() {
containerID, _ := containerutils.FindContainerID(cgroupContext.CGroupID)
event.CgroupTracing.ContainerContext.ContainerID = containerID
}

p.profileManagers.activityDumpManager.HandleCGroupTracingEvent(&event.CgroupTracing)
}

Expand Down
18 changes: 17 additions & 1 deletion pkg/security/resolvers/cgroup/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ type ResolverInterface interface {
type Resolver struct {
*utils.Notifier[Event, *cgroupModel.CacheEntry]
sync.Mutex
cgroups *simplelru.LRU[model.PathKey, *model.CGroupContext]
hostWorkloads *simplelru.LRU[containerutils.CGroupID, *cgroupModel.CacheEntry]
containerWorkloads *simplelru.LRU[containerutils.ContainerID, *cgroupModel.CacheEntry]
}
Expand Down Expand Up @@ -80,6 +81,11 @@ func NewResolver() (*Resolver, error) {
return nil, err
}

cr.cgroups, err = simplelru.NewLRU(2048, func(_ model.PathKey, _ *model.CGroupContext) {})
if err != nil {
return nil, err
}

return cr, nil
}

Expand Down Expand Up @@ -121,10 +127,19 @@ func (cr *Resolver) AddPID(process *model.ProcessCacheEntry) {
} else {
cr.hostWorkloads.Add(process.CGroup.CGroupID, newCGroup)
}
cr.cgroups.Add(process.CGroup.CGroupFile, &process.CGroup)

cr.NotifyListeners(CGroupCreated, newCGroup)
}

// GetCGroupContext returns the cgroup context with the specified path key
func (cr *Resolver) GetCGroupContext(cgroupPath model.PathKey) (*model.CGroupContext, bool) {
cr.Lock()
defer cr.Unlock()

return cr.cgroups.Get(cgroupPath)
}

// GetWorkload returns the workload referenced by the provided ID
func (cr *Resolver) GetWorkload(id containerutils.ContainerID) (*cgroupModel.CacheEntry, bool) {
if id == "" {
Expand Down Expand Up @@ -171,6 +186,7 @@ func (cr *Resolver) deleteWorkloadPID(pid uint32, workload *cgroupModel.CacheEnt

// check if the workload should be deleted
if len(workload.PIDs) <= 0 {
cr.cgroups.Remove(workload.CGroupFile)
cr.hostWorkloads.Remove(workload.CGroupID)
if workload.ContainerID != "" {
cr.containerWorkloads.Remove(workload.ContainerID)
Expand All @@ -183,5 +199,5 @@ func (cr *Resolver) Len() int {
cr.Lock()
defer cr.Unlock()

return cr.hostWorkloads.Len() + cr.containerWorkloads.Len()
return cr.cgroups.Len()
}
2 changes: 1 addition & 1 deletion pkg/security/resolvers/process/resolver_ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ func (p *EBPFResolver) enrichEventFromProc(entry *model.ProcessCacheEntry, proc
// Retrieve the container ID of the process from /proc
containerID, cgroup, err := p.containerResolver.GetContainerContext(pid)
if err != nil {
return fmt.Errorf("snapshot failed for %d: couldn't parse container ID: %w", proc.Pid, err)
return fmt.Errorf("snapshot failed for %d: couldn't parse container and cgroup context: %w", proc.Pid, err)
}

entry.ContainerID = containerID
Expand Down
4 changes: 4 additions & 0 deletions pkg/security/resolvers/resolvers_ebpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ func (r *EBPFResolvers) Start(ctx context.Context) error {

// ResolveCGroupContext resolves the cgroup context from a cgroup path key
func (r *EBPFResolvers) ResolveCGroupContext(pathKey model.PathKey, cgroupFlags containerutils.CGroupFlags) (*model.CGroupContext, error) {
if cgroupContext, found := r.CGroupResolver.GetCGroupContext(pathKey); found {
return cgroupContext, nil
}

path, err := r.DentryResolver.Resolve(pathKey, true)
if err != nil {
return nil, fmt.Errorf("failed to resolve cgroup file %v: %w", pathKey, err)
Expand Down
6 changes: 6 additions & 0 deletions pkg/security/secl/containerutils/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ const (
CGroupManagerSystemd // systemd
)

// CGroup flags
const (
SystemdService CGroupFlags = (0 << 8)
spikat marked this conversation as resolved.
Show resolved Hide resolved
SystemdScope CGroupFlags = (1 << 8)
)

const (
// ContainerRuntimeDocker is used to specify that a container is managed by Docker
ContainerRuntimeDocker = "docker"
Expand Down
33 changes: 18 additions & 15 deletions pkg/security/secl/containerutils/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,39 +29,42 @@ func init() {
containerIDPattern = regexp.MustCompile(ContainerIDPatternStr)
}

func isSystemdCgroup(cgroup CGroupID) bool {
return strings.HasSuffix(string(cgroup), ".service") || strings.HasSuffix(string(cgroup), ".scope")
func isSystemdScope(cgroup CGroupID) bool {
return strings.HasSuffix(string(cgroup), ".scope")
}

func isSystemdService(cgroup CGroupID) bool {
return strings.HasSuffix(string(cgroup), ".service")
}

func getSystemdCGroupFlags(cgroup CGroupID) uint64 {
if isSystemdScope(cgroup) {
return uint64(CGroupManagerSystemd) | uint64(SystemdScope)
} else if isSystemdService(cgroup) {
return uint64(CGroupManagerSystemd) | uint64(SystemdService)
}
return 0
}

// FindContainerID extracts the first sub string that matches the pattern of a container ID along with the container flags induced from the container runtime prefix
func FindContainerID(s CGroupID) (ContainerID, uint64) {
match := containerIDPattern.FindIndex([]byte(s))
if match == nil {
if isSystemdCgroup(s) {
return "", uint64(CGroupManagerSystemd)
}

return "", 0
return "", getSystemdCGroupFlags(s)
}

// first, check what's before
if match[0] != 0 {
previousChar := string(s[match[0]-1])
if strings.ContainsAny(previousChar, containerIDCoreChars) {
if isSystemdCgroup(s) {
return "", uint64(CGroupManagerSystemd)
}
return "", 0
return "", getSystemdCGroupFlags(s)
}
}
// then, check what's after
if match[1] < len(s) {
nextChar := string(s[match[1]])
if strings.ContainsAny(nextChar, containerIDCoreChars) {
if isSystemdCgroup(s) {
return "", uint64(CGroupManagerSystemd)
}
return "", 0
return "", getSystemdCGroupFlags(s)
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/security/secl/containerutils/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func TestFindContainerID(t *testing.T) {
{ // Some random path which could match garden format
input: "/user.slice/user-1000.slice/user@1000.service/apps.slice/apps-org.gnome.Terminal.slice/vte-spawn-f9176c6a-2a34-4ce2-86af-60d16888ed8e.scope",
output: "",
flags: CGroupManagerSystemd,
flags: CGroupManagerSystemd | CGroupManager(SystemdScope),
},
{ // GARDEN with prefix / suffix
input: "prefix01234567-0123-4567-890a-bcdesuffix",
Expand Down
42 changes: 36 additions & 6 deletions pkg/security/security_profile/dump/load_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (

"github.com/DataDog/datadog-agent/pkg/security/config"
"github.com/DataDog/datadog-agent/pkg/security/metrics"
"github.com/DataDog/datadog-agent/pkg/security/secl/containerutils"
"github.com/DataDog/datadog-agent/pkg/security/secl/model"
"github.com/DataDog/datadog-agent/pkg/security/seclog"
)
Expand All @@ -34,6 +35,7 @@ type ActivityDumpLoadController struct {

// eBPF maps
activityDumpConfigDefaults *ebpf.Map
activityDumpLoadConfig map[containerutils.CGroupManager]*model.ActivityDumpLoadConfig
}

// NewActivityDumpLoadController returns a new activity dump load controller
Expand All @@ -58,7 +60,11 @@ func NewActivityDumpLoadController(adm *ActivityDumpManager) (*ActivityDumpLoadC
}, nil
}

func (lc *ActivityDumpLoadController) getDefaultLoadConfig() *model.ActivityDumpLoadConfig {
func (lc *ActivityDumpLoadController) getDefaultLoadConfigs() (map[containerutils.CGroupManager]*model.ActivityDumpLoadConfig, error) {
spikat marked this conversation as resolved.
Show resolved Hide resolved
if lc.activityDumpLoadConfig != nil {
return lc.activityDumpLoadConfig, nil
}

defaults := NewActivityDumpLoadConfig(
lc.adm.config.RuntimeSecurity.ActivityDumpTracedEventTypes,
lc.adm.config.RuntimeSecurity.ActivityDumpCgroupDumpTimeout,
Expand All @@ -68,14 +74,38 @@ func (lc *ActivityDumpLoadController) getDefaultLoadConfig() *model.ActivityDump
lc.adm.resolvers.TimeResolver,
)
defaults.WaitListTimestampRaw = uint64(lc.adm.config.RuntimeSecurity.ActivityDumpCgroupWaitListTimeout)
return defaults

allDefaultConfigs := map[string]containerutils.CGroupManager{
containerutils.CGroupManagerDocker.String(): containerutils.CGroupManagerDocker,
containerutils.CGroupManagerPodman.String(): containerutils.CGroupManagerPodman,
containerutils.CGroupManagerCRI.String(): containerutils.CGroupManagerCRI,
containerutils.CGroupManagerCRIO.String(): containerutils.CGroupManagerCRIO,
containerutils.CGroupManagerSystemd.String(): containerutils.CGroupManagerSystemd,
}
defaultConfigs := make(map[containerutils.CGroupManager]*model.ActivityDumpLoadConfig)
for _, cgroupManager := range lc.adm.config.RuntimeSecurity.ActivityDumpCgroupsManagers {
cgroupManager, found := allDefaultConfigs[cgroupManager]
if !found {
return nil, fmt.Errorf("unsupported cgroup manager '%s'", cgroupManager)
}
defaultConfigs[cgroupManager] = defaults
}
lc.activityDumpLoadConfig = defaultConfigs
return defaultConfigs, nil
}

// PushCurrentConfig pushes the current load controller config to kernel space
func (lc *ActivityDumpLoadController) PushCurrentConfig() error {
// PushDefaultCurrentConfigs pushes the current load controller configs to kernel space
func (lc *ActivityDumpLoadController) PushDefaultCurrentConfigs() error {
defaultConfigs, err := lc.getDefaultLoadConfigs()
if err != nil {
return err
}

// push default load config values
if err := lc.activityDumpConfigDefaults.Put(uint32(0), lc.getDefaultLoadConfig()); err != nil {
return fmt.Errorf("couldn't update default activity dump load config: %w", err)
for cgroupManager, defaultConfig := range defaultConfigs {
if err := lc.activityDumpConfigDefaults.Put(uint32(cgroupManager), defaultConfig); err != nil {
return fmt.Errorf("couldn't update default activity dump load config: %w", err)
}
}
return nil
}
Expand Down
Loading
Loading