Skip to content

Commit

Permalink
Implement transition function for MANIFEST_PULLED state (#4152)
Browse files Browse the repository at this point in the history
  • Loading branch information
amogh09 committed Apr 29, 2024
1 parent 960b7ed commit 9c66114
Show file tree
Hide file tree
Showing 21 changed files with 1,504 additions and 71 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ additional details on each available environment variable.
| `ECS_APPARMOR_CAPABLE` | `true` | Whether AppArmor is available on the container instance. | `false` | `false` |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` | 10m | Default time to wait to delete containers for a stopped task (see also `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER`). If set to less than 1 second, the value is ignored. | 3h | 3h |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER` | 1h | Jitter value for the task engine cleanup wait duration. When specified, the actual cleanup wait duration time for each task will be the duration specified in `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` plus a random duration between 0 and the jitter duration. | blank | blank |
| `ECS_MANIFEST_PULL_TIMEOUT` | 10m | Timeout before giving up on fetching image manifest for a container image. | 1m | 1m |
| `ECS_CONTAINER_STOP_TIMEOUT` | 10m | Instance scoped configuration for time to wait for the container to exit normally before being forcibly killed. | 30s | 30s |
| `ECS_CONTAINER_START_TIMEOUT` | 10m | Timeout before giving up on starting a container. | 3m | 8m |
| `ECS_CONTAINER_CREATE_TIMEOUT` | 10m | Timeout before giving up on creating a container. Minimum value is 1m. If user sets a value below minimum it will be set to min. | 4m | 4m |
Expand Down
3 changes: 3 additions & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ func environmentConfig() (Config, error) {
DeleteNonECSImagesEnabled: parseBooleanDefaultFalseConfig("ECS_ENABLE_UNTRACKED_IMAGE_CLEANUP"),
TaskCPUMemLimit: parseBooleanDefaultTrueConfig("ECS_ENABLE_TASK_CPU_MEM_LIMIT"),
DockerStopTimeout: parseDockerStopTimeout(),
ManifestPullTimeout: parseManifestPullTimeout(),
ContainerStartTimeout: parseContainerStartTimeout(),
ContainerCreateTimeout: parseContainerCreateTimeout(),
DependentContainersPullUpfront: parseBooleanDefaultFalseConfig("ECS_PULL_DEPENDENT_CONTAINERS_UPFRONT"),
Expand Down Expand Up @@ -625,6 +626,7 @@ func (cfg *Config) String() string {
"PollingMetricsWaitDuration: %v, "+
"ReservedMem: %v, "+
"TaskCleanupWaitDuration: %v, "+
"ManifestPullTimeout: %v, "+
"DockerStopTimeout: %v, "+
"ContainerStartTimeout: %v, "+
"ContainerCreateTimeout: %v, "+
Expand All @@ -644,6 +646,7 @@ func (cfg *Config) String() string {
cfg.PollingMetricsWaitDuration,
cfg.ReservedMemory,
cfg.TaskCleanupWaitDuration,
cfg.ManifestPullTimeout,
cfg.DockerStopTimeout,
cfg.ContainerStartTimeout,
cfg.ContainerCreateTimeout,
Expand Down
26 changes: 26 additions & 0 deletions agent/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func TestEnvironmentConfig(t *testing.T) {
defer setTestEnv("ECS_CLUSTER", "myCluster")()
defer setTestEnv("ECS_RESERVED_PORTS_UDP", "[42,99]")()
defer setTestEnv("ECS_RESERVED_MEMORY", "20")()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "85s")()
defer setTestEnv("ECS_CONTAINER_STOP_TIMEOUT", "60s")()
defer setTestEnv("ECS_CONTAINER_START_TIMEOUT", "5m")()
defer setTestEnv("ECS_CONTAINER_CREATE_TIMEOUT", "4m")()
Expand Down Expand Up @@ -176,6 +177,7 @@ func TestEnvironmentConfig(t *testing.T) {
assert.Contains(t, conf.ReservedPortsUDP, uint16(42))
assert.Contains(t, conf.ReservedPortsUDP, uint16(99))
assert.Equal(t, uint16(20), conf.ReservedMemory)
assert.Equal(t, 85*time.Second, conf.ManifestPullTimeout)
expectedDurationDockerStopTimeout, _ := time.ParseDuration("60s")
assert.Equal(t, expectedDurationDockerStopTimeout, conf.DockerStopTimeout)
expectedDurationContainerStartTimeout, _ := time.ParseDuration("5m")
Expand Down Expand Up @@ -361,6 +363,14 @@ func TestInvalidFormatContainerCreateTimeout(t *testing.T) {
assert.Equal(t, defaultContainerCreateTimeout, conf.ContainerCreateTimeout, "Wrong value for ContainerCreateTimeout")
}

func TestInvalidFormatManifestPullTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "invalid")()
conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, defaultManifestPullTimeout, conf.ManifestPullTimeout)
}

func TestInvalidFormatDockerInactivityTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_IMAGE_PULL_INACTIVITY_TIMEOUT", "invalid")()
Expand Down Expand Up @@ -402,6 +412,14 @@ func TestZeroValueContainerCreateTimeout(t *testing.T) {
assert.Equal(t, defaultContainerCreateTimeout, conf.ContainerCreateTimeout, "Wrong value for ContainerCreateTimeout")
}

func TestZeroValueManifestPullTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "0s")()
conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, defaultManifestPullTimeout, conf.ManifestPullTimeout)
}

func TestInvalidValueContainerStartTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_CONTAINER_START_TIMEOUT", "-10s")()
Expand All @@ -418,6 +436,14 @@ func TestInvalidValueContainerCreateTimeout(t *testing.T) {
assert.Equal(t, minimumContainerCreateTimeout, conf.ContainerCreateTimeout, "Wrong value for ContainerCreataeTimeout")
}

func TestInvalidValueManifestPullTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_MANIFEST_PULL_TIMEOUT", "-10s")()
conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, minimumManifestPullTimeout, conf.ManifestPullTimeout)
}

func TestZeroValueDockerPullInactivityTimeout(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_DOCKER_PULL_INACTIVITY_TIMEOUT", "0s")()
Expand Down
5 changes: 5 additions & 0 deletions agent/config/config_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ const (
// Default cgroup memory system root path, this is the default used if the
// path has not been configured through ECS_CGROUP_PATH
defaultCgroupPath = "/sys/fs/cgroup"
// minimumManifestPullTimeout is the minimum timeout allowed for manifest pulls
minimumManifestPullTimeout = 30 * time.Second
// defaultManifestPullTimeout is the default timeout for manifest pulls
defaultManifestPullTimeout = 1 * time.Minute
// defaultContainerStartTimeout specifies the value for container start timeout duration
defaultContainerStartTimeout = 3 * time.Minute
// minimumContainerStartTimeout specifies the minimum value for starting a container
Expand All @@ -70,6 +74,7 @@ func DefaultConfig() Config {
ReservedMemory: 0,
AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver},
TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration,
ManifestPullTimeout: defaultManifestPullTimeout,
DockerStopTimeout: defaultDockerStopTimeout,
ContainerStartTimeout: defaultContainerStartTimeout,
ContainerCreateTimeout: defaultContainerCreateTimeout,
Expand Down
1 change: 1 addition & 0 deletions agent/config/config_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func TestConfigDefault(t *testing.T) {
assert.Equal(t, 30*time.Second, cfg.DockerStopTimeout, "Default docker stop container timeout set incorrectly")
assert.Equal(t, 3*time.Minute, cfg.ContainerStartTimeout, "Default docker start container timeout set incorrectly")
assert.Equal(t, 4*time.Minute, cfg.ContainerCreateTimeout, "Default docker create container timeout set incorrectly")
assert.Equal(t, 1*time.Minute, cfg.ManifestPullTimeout, "Default manifest pull timeout set incorrectly")
assert.False(t, cfg.PrivilegedDisabled.Enabled(), "Default PrivilegedDisabled set incorrectly")
assert.Equal(t, []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver},
cfg.AvailableLoggingDrivers, "Default logging drivers set incorrectly")
Expand Down
5 changes: 5 additions & 0 deletions agent/config/config_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ const (
dnsPort = 53
// NetBIOS over TCP/IP
netBIOSPort = 139
// minimumManifestPullTimeout is the minimum timeout allowed for manifest pulls
minimumManifestPullTimeout = 30 * time.Second
// defaultManifestPullTimeout is the default timeout for manifest pulls
defaultManifestPullTimeout = 1 * time.Minute
// defaultContainerStartTimeout specifies the value for container start timeout duration
defaultContainerStartTimeout = 8 * time.Minute
// minimumContainerStartTimeout specifies the minimum value for starting a container
Expand Down Expand Up @@ -121,6 +125,7 @@ func DefaultConfig() Config {
ReservedMemory: 0,
AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver, dockerclient.AWSLogsDriver},
TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration,
ManifestPullTimeout: defaultManifestPullTimeout,
DockerStopTimeout: defaultDockerStopTimeout,
ContainerStartTimeout: defaultContainerStartTimeout,
ContainerCreateTimeout: defaultContainerCreateTimeout,
Expand Down
13 changes: 13 additions & 0 deletions agent/config/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,19 @@ func parseDockerStopTimeout() time.Duration {
return dockerStopTimeout
}

func parseManifestPullTimeout() time.Duration {
var timeout time.Duration
parsedTimeout := parseEnvVariableDuration("ECS_MANIFEST_PULL_TIMEOUT")
if parsedTimeout >= minimumManifestPullTimeout {
timeout = parsedTimeout
} else if parsedTimeout != 0 {
// Parsed timeout too low
timeout = minimumManifestPullTimeout
seelog.Warnf("Discarded invalid value for manifest pull timeout, parsed as: %v", parsedTimeout)
}
return timeout
}

func parseContainerStartTimeout() time.Duration {
var containerStartTimeout time.Duration
parsedStartTimeout := parseEnvVariableDuration("ECS_CONTAINER_START_TIMEOUT")
Expand Down
3 changes: 3 additions & 0 deletions agent/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ type Config struct {
// This doesn't reserve memory usage on the instance
ReservedMemory uint16

// ManifestPullTimeout is the amount of time to wait for a manifest pull
ManifestPullTimeout time.Duration

// DockerStopTimeout specifies the amount of time before a SIGKILL is issued to
// containers managed by ECS
DockerStopTimeout time.Duration
Expand Down
6 changes: 6 additions & 0 deletions agent/dockerclient/dockerapi/docker_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ func (dg *dockerGoClient) WithVersion(version dockerclient.DockerVersion) (Docke
versionedClient := &dockerGoClient{
sdkClientFactory: dg.sdkClientFactory,
version: version,
ecrClientFactory: dg.ecrClientFactory,
auth: dg.auth,
ecrTokenCache: dg.ecrTokenCache,
config: dg.config,
context: dg.context,
manifestPullBackoff: dg.manifestPullBackoff,
Expand Down Expand Up @@ -371,6 +373,10 @@ func (dg *dockerGoClient) PullImageManifest(
})

if err != nil {
if errors.Is(err, context.DeadlineExceeded) {
timeoutErr := &DockerTimeoutError{time.Since(startTime), "MANIFEST_PULLED"}
return registry.DistributionInspect{}, timeoutErr
}
return registry.DistributionInspect{}, wrapManifestPullErrorAsNamedError(imageRef, err)
}
if ctxErr := ctx.Err(); ctxErr != nil {
Expand Down
9 changes: 9 additions & 0 deletions agent/engine/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/aws/aws-sdk-go/aws"
"github.com/cihub/seelog"
dockercontainer "github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/registry"
"github.com/golang/mock/gomock"
"github.com/pborman/uuid"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -146,6 +147,7 @@ func waitForTaskStoppedByCheckStatus(task *apitask.Task) {
// removed matches with the generated container name during cleanup operation in the
// test.
func validateContainerRunWorkflow(t *testing.T,
ctrl *gomock.Controller,
container *apicontainer.Container,
task *apitask.Task,
imageManager *mock_engine.MockImageManager,
Expand All @@ -156,6 +158,13 @@ func validateContainerRunWorkflow(t *testing.T,
createdContainerName chan<- string,
assertions func(),
) {
// Prepare mock image manifest digest for test
manifestPullClient := mock_dockerapi.NewMockDockerClient(ctrl)
client.EXPECT().WithVersion(dockerclient.Version_1_35).Return(manifestPullClient, nil)
manifestPullClient.EXPECT().
PullImageManifest(gomock.Any(), container.Image, container.RegistryAuthentication).
Return(registry.DistributionInspect{}, nil)

imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes()
client.EXPECT().PullImage(gomock.Any(), container.Image, nil, gomock.Any()).Return(dockerapi.DockerContainerMetadata{})
imageManager.EXPECT().RecordContainerReference(container).Return(nil)
Expand Down
8 changes: 6 additions & 2 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,12 @@ func (imageManager *dockerImageManager) RecordContainerReference(container *apic
return err
}
container.ImageID = imageInspected.ID
imageDigest := imageManager.fetchRepoDigest(imageInspected, container)
container.SetImageDigest(imageDigest)
// For older Docker versions imageDigest is not populated during transition to
// MANIFEST_PULLED state. Populate it here if that's the case.
if container.GetImageDigest() == "" {
imageDigest := imageManager.fetchRepoDigest(imageInspected, container)
container.SetImageDigest(imageDigest)
}
added := imageManager.addContainerReferenceToExistingImageState(container)
if !added {
imageManager.addContainerReferenceToNewImageState(container, imageInspected.Size)
Expand Down
Loading

0 comments on commit 9c66114

Please sign in to comment.