Skip to content

Commit

Permalink
feat: improved resource manager based on Rainbow
Browse files Browse the repository at this point in the history
  • Loading branch information
hacdias committed May 17, 2024
1 parent 366c4cd commit 6f73bc6
Show file tree
Hide file tree
Showing 10 changed files with 315 additions and 37 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ The following emojis are used to highlight certain changes:

### Changed

- The resource manager's defaults have been improved based on Rainbow's and Kubo's defaults. In addition, you can now customize a few options using flags, or [environment variables](./docs/environment-variables.md).

### Removed

### Fixed
Expand All @@ -29,8 +31,6 @@ The following emojis are used to highlight certain changes:

- The `/routing/v1/peers` endpoint correctly filters out private addresses.

### Security

## [v0.2.1]

### Fixed
Expand Down
35 changes: 35 additions & 0 deletions docs/environment-variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
- [`SOMEGUY_PROVIDER_ENDPOINTS`](#someguy_provider_endpoints)
- [`SOMEGUY_PEER_ENDPOINTS`](#someguy_peer_endpoints)
- [`SOMEGUY_IPNS_ENDPOINTS`](#someguy_ipns_endpoints)
- [`SOMEGUY_CONNMGR_LOW`](#someguy_connmgr_low)
- [`SOMEGUY_CONNMGR_HIGH`](#someguy_connmgr_high)
- [`SOMEGUY_CONNMGR_GRACE_PERIOD`](#someguy_connmgr_grace_period)
- [`SOMEGUY_MAX_MEMORY`](#someguy_max_memory)
- [`SOMEGUY_MAX_FD`](#someguy_max_fd)
- [Logging](#logging)
- [`GOLOG_LOG_LEVEL`](#golog_log_level)
- [`GOLOG_LOG_FMT`](#golog_log_fmt)
Expand Down Expand Up @@ -46,6 +51,36 @@ Comma-separated list of other Delegated Routing V1 endpoints to proxy IPNS reque

Default: none

### `SOMEGUY_CONNMGR_LOW`

Minimum number of connections to keep.

Default: 100

### `SOMEGUY_CONNMGR_HIGH`

Maximum number of connections to keep.

Default: 100

### `SOMEGUY_CONNMGR_GRACE_PERIOD`

Minimum connection TTL.

Default: 1m

### `SOMEGUY_MAX_MEMORY`

Maximum memory to use.

Default: 0 (85% of the system's available RAM)

### `SOMEGUY_MAX_FD`

Maximum number of file descriptors.

Default: 0 (50% of the process' limit)

## Logging

### `GOLOG_LOG_LEVEL`
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.21

require (
github.com/CAFxX/httpcompression v0.0.9
github.com/dustin/go-humanize v1.0.1
github.com/felixge/httpsnoop v1.0.4
github.com/ipfs/boxo v0.19.1-0.20240515083429-ac0bab3926a8
github.com/ipfs/go-cid v0.4.1
Expand All @@ -14,11 +15,13 @@ require (
github.com/multiformats/go-multiaddr v0.12.3
github.com/multiformats/go-multibase v0.2.0
github.com/multiformats/go-multihash v0.2.3
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58
github.com/prometheus/client_golang v1.19.0
github.com/rs/cors v1.10.1
github.com/slok/go-http-metrics v0.11.0
github.com/stretchr/testify v1.9.0
github.com/urfave/cli/v2 v2.27.1
golang.org/x/sys v0.19.0
)

require (
Expand Down Expand Up @@ -89,7 +92,6 @@ require (
github.com/onsi/ginkgo/v2 v2.17.1 // indirect
github.com/opencontainers/runtime-spec v1.2.0 // indirect
github.com/opentracing/opentracing-go v1.2.0 // indirect
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/polydawn/refmt v0.89.0 // indirect
Expand Down Expand Up @@ -120,7 +122,6 @@ require (
golang.org/x/mod v0.17.0 // indirect
golang.org/x/net v0.24.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.19.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.20.0 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDD
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo=
github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs=
Expand Down
7 changes: 7 additions & 0 deletions internal/fd/sys_not_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build !linux && !darwin && !windows

package fd

func GetNumFDs() int {
return 0
}
17 changes: 17 additions & 0 deletions internal/fd/sys_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//go:build linux || darwin
// +build linux darwin

// Package fd provides filesystem descriptor count for different architectures.
package fd

import (
"golang.org/x/sys/unix"
)

func GetNumFDs() int {
var l unix.Rlimit
if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &l); err != nil {
return 0

Check warning on line 14 in internal/fd/sys_unix.go

View check run for this annotation

Codecov / codecov/patch

internal/fd/sys_unix.go#L11-L14

Added lines #L11 - L14 were not covered by tests
}
return int(l.Cur)

Check warning on line 16 in internal/fd/sys_unix.go

View check run for this annotation

Codecov / codecov/patch

internal/fd/sys_unix.go#L16

Added line #L16 was not covered by tests
}
11 changes: 11 additions & 0 deletions internal/fd/sys_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//go:build windows

package fd

import (
"math"
)

func GetNumFDs() int {
return math.MaxInt
}
48 changes: 47 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"log"
"os"
"time"

"github.com/ipfs/boxo/ipns"
"github.com/ipfs/go-cid"
Expand Down Expand Up @@ -53,9 +54,54 @@ func main() {
EnvVars: []string{"SOMEGUY_IPNS_ENDPOINTS"},
Usage: "other Delegated Routing V1 endpoints to proxy IPNS requests to",
},
&cli.IntFlag{
Name: "connmgr-low",
Value: 100,
EnvVars: []string{"SOMEGUY_CONNMGR_LOW"},
Usage: "minimum number of connections to keep",
},
&cli.IntFlag{
Name: "connmgr-high",
Value: 3000,
EnvVars: []string{"SOMEGUY_CONNMGR_HIGH"},
Usage: "maximum number of connections to keep",
},
&cli.DurationFlag{
Name: "connmgr-grace",
Value: time.Minute,
EnvVars: []string{"SOMEGUY_CONNMGR_GRACE_PERIOD"},
Usage: "minimum connection TTL",
},
&cli.Uint64Flag{
Name: "max-memory",
Value: 0,
EnvVars: []string{"SOMEGUY_MAX_MEMORY"},
Usage: "maximum memory to use. Defaults to 85% of the system's available RAM",
},
&cli.Uint64Flag{
Name: "max-fd",
Value: 0,
EnvVars: []string{"SOMEGUY_MAX_FD"},
Usage: "maximum number of file descriptors. Defaults to 50% of the process' limit",
},

Check warning on line 86 in main.go

View check run for this annotation

Codecov / codecov/patch

main.go#L57-L86

Added lines #L57 - L86 were not covered by tests
},
Action: func(ctx *cli.Context) error {
return start(ctx.Context, ctx.String("listen-address"), ctx.Bool("accelerated-dht"), ctx.StringSlice("provider-endpoints"), ctx.StringSlice("peer-endpoints"), ctx.StringSlice("ipns-endpoints"))
cfg := &config{
listenAddress: ctx.String("listen-address"),
acceleratedDHTClient: ctx.Bool("accelerated-dht"),

Check warning on line 91 in main.go

View check run for this annotation

Codecov / codecov/patch

main.go#L89-L91

Added lines #L89 - L91 were not covered by tests

contentEndpoints: ctx.StringSlice("provider-endpoints"),
peerEndpoints: ctx.StringSlice("peer-endpoints"),
ipnsEndpoints: ctx.StringSlice("ipns-endpoints"),

Check warning on line 95 in main.go

View check run for this annotation

Codecov / codecov/patch

main.go#L93-L95

Added lines #L93 - L95 were not covered by tests

connMgrLow: ctx.Int("connmgr-low"),
connMgrHi: ctx.Int("connmgr-high"),
connMgrGrace: ctx.Duration("connmgr-grace"),
maxMemory: ctx.Uint64("max-memory"),
maxFD: ctx.Int("max-fd"),

Check warning on line 101 in main.go

View check run for this annotation

Codecov / codecov/patch

main.go#L97-L101

Added lines #L97 - L101 were not covered by tests
}

return start(ctx.Context, cfg)

Check warning on line 104 in main.go

View check run for this annotation

Codecov / codecov/patch

main.go#L104

Added line #L104 was not covered by tests
},
},
{
Expand Down
155 changes: 155 additions & 0 deletions rcmgr.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
package main

import (
"log"

"github.com/dustin/go-humanize"
"github.com/pbnjay/memory"

"github.com/ipfs/someguy/internal/fd"
"github.com/libp2p/go-libp2p"
"github.com/libp2p/go-libp2p/core/network"
rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager"
)

// Note: this comes from rainbow/rcmgr.go with minimal adaptations.

var infiniteResourceLimits = rcmgr.InfiniteLimits.ToPartialLimitConfig().System

func makeResourceMgrs(maxMemory uint64, maxFD int, connMgrHighWater int) (rm network.ResourceManager, err error) {
if maxMemory == 0 {
maxMemory = uint64((float64(memory.TotalMemory()) * 0.85))

Check warning on line 21 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L19-L21

Added lines #L19 - L21 were not covered by tests
}
if maxFD == 0 {
maxFD = fd.GetNumFDs() / 2

Check warning on line 24 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L23-L24

Added lines #L23 - L24 were not covered by tests
}
return rcmgr.NewResourceManager(rcmgr.NewFixedLimiter(makeResourceManagerConfig(maxMemory, maxFD, connMgrHighWater)))

Check warning on line 26 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L26

Added line #L26 was not covered by tests
}

func makeResourceManagerConfig(maxMemory uint64, maxFD int, connMgrHighWater int) (limitConfig rcmgr.ConcreteLimitConfig) {
if maxMemory == 0 {
maxMemory = uint64((float64(memory.TotalMemory()) * 0.85))

Check warning on line 31 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L29-L31

Added lines #L29 - L31 were not covered by tests
}
if maxFD == 0 {
maxFD = fd.GetNumFDs() / 2

Check warning on line 34 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L33-L34

Added lines #L33 - L34 were not covered by tests
}

maxMemoryMB := maxMemory / (1024 * 1024)

Check warning on line 37 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L37

Added line #L37 was not covered by tests

// At least as of 2023-01-25, it's possible to open a connection that
// doesn't ask for any memory usage with the libp2p Resource Manager/Accountant
// (see https://github.com/libp2p/go-libp2p/issues/2010#issuecomment-1404280736).
// As a result, we can't currently rely on Memory limits to full protect us.
// Until https://github.com/libp2p/go-libp2p/issues/2010 is addressed,
// we take a proxy now of restricting to 1 inbound connection per MB.
// Note: this is more generous than go-libp2p's default autoscaled limits which do
// 64 connections per 1GB
// (see https://github.com/libp2p/go-libp2p/blob/master/p2p/host/resource-manager/limit_defaults.go#L357 ).
systemConnsInbound := int(1 * maxMemoryMB)

Check warning on line 48 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L48

Added line #L48 was not covered by tests

partialLimits := rcmgr.PartialLimitConfig{
System: rcmgr.ResourceLimits{
Memory: rcmgr.LimitVal64(maxMemory),
FD: rcmgr.LimitVal(maxFD),

Check warning on line 53 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L50-L53

Added lines #L50 - L53 were not covered by tests

Conns: rcmgr.Unlimited,
ConnsInbound: rcmgr.LimitVal(systemConnsInbound),
ConnsOutbound: rcmgr.Unlimited,

Check warning on line 57 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L55-L57

Added lines #L55 - L57 were not covered by tests

Streams: rcmgr.Unlimited,
StreamsOutbound: rcmgr.Unlimited,
StreamsInbound: rcmgr.Unlimited,
},

Check warning on line 62 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L59-L62

Added lines #L59 - L62 were not covered by tests

// Transient connections won't cause any memory to be accounted for by the resource manager/accountant.
// Only established connections do.
// As a result, we can't rely on System.Memory to protect us from a bunch of transient connection being opened.
// We limit the same values as the System scope, but only allow the Transient scope to take 25% of what is allowed for the System scope.
Transient: rcmgr.ResourceLimits{
Memory: rcmgr.LimitVal64(maxMemory / 4),
FD: rcmgr.LimitVal(maxFD / 4),

Check warning on line 70 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L68-L70

Added lines #L68 - L70 were not covered by tests

Conns: rcmgr.Unlimited,
ConnsInbound: rcmgr.LimitVal(systemConnsInbound / 4),
ConnsOutbound: rcmgr.Unlimited,

Check warning on line 74 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L72-L74

Added lines #L72 - L74 were not covered by tests

Streams: rcmgr.Unlimited,
StreamsInbound: rcmgr.Unlimited,
StreamsOutbound: rcmgr.Unlimited,
},

Check warning on line 79 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L76-L79

Added lines #L76 - L79 were not covered by tests

// Lets get out of the way of the allow list functionality.
// If someone specified "Swarm.ResourceMgr.Allowlist" we should let it go through.
AllowlistedSystem: infiniteResourceLimits,

Check warning on line 83 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L83

Added line #L83 was not covered by tests

AllowlistedTransient: infiniteResourceLimits,

Check warning on line 85 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L85

Added line #L85 was not covered by tests

// Keep it simple by not having Service, ServicePeer, Protocol, ProtocolPeer, Conn, or Stream limits.
ServiceDefault: infiniteResourceLimits,

Check warning on line 88 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L88

Added line #L88 was not covered by tests

ServicePeerDefault: infiniteResourceLimits,

Check warning on line 90 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L90

Added line #L90 was not covered by tests

ProtocolDefault: infiniteResourceLimits,

Check warning on line 92 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L92

Added line #L92 was not covered by tests

ProtocolPeerDefault: infiniteResourceLimits,

Check warning on line 94 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L94

Added line #L94 was not covered by tests

Conn: infiniteResourceLimits,

Check warning on line 96 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L96

Added line #L96 was not covered by tests

Stream: infiniteResourceLimits,

Check warning on line 98 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L98

Added line #L98 was not covered by tests

// Limit the resources consumed by a peer.
// This doesn't protect us against intentional DoS attacks since an attacker can easily spin up multiple peers.
// We specify this limit against unintentional DoS attacks (e.g., a peer has a bug and is sending too much traffic intentionally).
// In that case we want to keep that peer's resource consumption contained.
// To keep this simple, we only constrain inbound connections and streams.
PeerDefault: rcmgr.ResourceLimits{
Memory: rcmgr.Unlimited64,
FD: rcmgr.Unlimited,
Conns: rcmgr.Unlimited,
ConnsInbound: rcmgr.DefaultLimit,
ConnsOutbound: rcmgr.Unlimited,
Streams: rcmgr.Unlimited,
StreamsInbound: rcmgr.DefaultLimit,
StreamsOutbound: rcmgr.Unlimited,
},

Check warning on line 114 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L105-L114

Added lines #L105 - L114 were not covered by tests
}

scalingLimitConfig := rcmgr.DefaultLimits
libp2p.SetDefaultServiceLimits(&scalingLimitConfig)

Check warning on line 118 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L117-L118

Added lines #L117 - L118 were not covered by tests

// Anything set above in partialLimits that had a value of rcmgr.DefaultLimit will be overridden.
// Anything in scalingLimitConfig that wasn't defined in partialLimits above will be added (e.g., libp2p's default service limits).
partialLimits = partialLimits.Build(scalingLimitConfig.Scale(int64(maxMemory), maxFD)).ToPartialLimitConfig()

Check warning on line 122 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L122

Added line #L122 was not covered by tests

// Simple checks to override autoscaling ensuring limits make sense versus the connmgr values.
// There are ways to break this, but this should catch most problems already.
// We might improve this in the future.
// See: https://github.com/ipfs/kubo/issues/9545
if partialLimits.System.ConnsInbound > rcmgr.DefaultLimit {
maxInboundConns := int(partialLimits.System.ConnsInbound)
if connmgrHighWaterTimesTwo := connMgrHighWater * 2; maxInboundConns < connmgrHighWaterTimesTwo {
maxInboundConns = connmgrHighWaterTimesTwo

Check warning on line 131 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L128-L131

Added lines #L128 - L131 were not covered by tests
}

if maxInboundConns < 800 {
maxInboundConns = 800

Check warning on line 135 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L134-L135

Added lines #L134 - L135 were not covered by tests
}

// Scale System.StreamsInbound as well, but use the existing ratio of StreamsInbound to ConnsInbound
if partialLimits.System.StreamsInbound > rcmgr.DefaultLimit {
partialLimits.System.StreamsInbound = rcmgr.LimitVal(int64(maxInboundConns) * int64(partialLimits.System.StreamsInbound) / int64(partialLimits.System.ConnsInbound))

Check warning on line 140 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L139-L140

Added lines #L139 - L140 were not covered by tests
}
partialLimits.System.ConnsInbound = rcmgr.LimitVal(maxInboundConns)

Check warning on line 142 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L142

Added line #L142 was not covered by tests
}

log.Printf(`

Check warning on line 145 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L145

Added line #L145 was not covered by tests
go-libp2p Resource Manager limits based on:
- --max-memory: %s
- --max-fd: %d

Check warning on line 149 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L147-L149

Added lines #L147 - L149 were not covered by tests
`, humanize.Bytes(maxMemory), maxFD)

Check warning on line 151 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L151

Added line #L151 was not covered by tests

// We already have a complete value thus pass in an empty ConcreteLimitConfig.
return partialLimits.Build(rcmgr.ConcreteLimitConfig{})

Check warning on line 154 in rcmgr.go

View check run for this annotation

Codecov / codecov/patch

rcmgr.go#L154

Added line #L154 was not covered by tests
}
Loading

0 comments on commit 6f73bc6

Please sign in to comment.