From 9632953d528ab0754ceb5e9ff8c0fab7af40d501 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Thu, 9 Sep 2021 17:37:40 -0500 Subject: [PATCH 01/11] feat: add IdleTimeLimit to options --- hatchery/hatchery.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index e2770dd7..3dd1c781 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -84,10 +84,11 @@ func status(w http.ResponseWriter, r *http.Request) { func options(w http.ResponseWriter, r *http.Request) { type container struct { - Name string `json:"name"` - CPULimit string `json:"cpu-limit"` - MemoryLimit string `json:"memory-limit"` - ID string `json:"id"` + Name string `json:"name"` + CPULimit string `json:"cpu-limit"` + MemoryLimit string `json:"memory-limit"` + ID string `json:"id"` + IdleTimeLimit int `json:"idle-time-limit"` } var options []container for k, v := range Config.ContainersMap { @@ -97,6 +98,16 @@ func options(w http.ResponseWriter, r *http.Request) { MemoryLimit: v.MemoryLimit, ID: k, } + for _, arg := range v.Args { + if strings.Contains(arg, "shutdown_no_activity_timeout=") { + argSplit := strings.Split(arg, "=") + idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) + if err == nil { + c.IdleTimeLimit = idleTimeLimit + } + break + } + } options = append(options, c) } From dc3137c5479bd6675459c2c33a41aa08b6c6602e Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Sat, 11 Sep 2021 14:21:49 -0500 Subject: [PATCH 02/11] feat: idle time limit and last act time --- hatchery/ecs.go | 52 ++++++++++++++++++++++++++++++++++++-- hatchery/hatchery.go | 60 ++++++++++++++++++++++++++++++++++++-------- hatchery/pods.go | 48 ++++++++++++++++++++++++++--------- 3 files changed, 135 insertions(+), 25 deletions(-) diff --git a/hatchery/ecs.go b/hatchery/ecs.go index 38f3c665..71a28e3f 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "strconv" "strings" "github.com/aws/aws-sdk-go/aws" @@ -104,7 +105,7 @@ func (sess *CREDS) findEcsCluster(userName string) (*ecs.Cluster, error) { } // Status of workspace running in ECS -func (sess *CREDS) statusEcsWorkspace(userName string) (*WorkspaceStatus, error) { +func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, accessToken string) (*WorkspaceStatus, error) { status := WorkspaceStatus{} cluster, err := sess.findEcsCluster(userName) if err != nil { @@ -121,8 +122,54 @@ func (sess *CREDS) statusEcsWorkspace(userName string) (*WorkspaceStatus, error) } statusMessage := "INACTIVE" + status.IdleTimeLimit = -1 + status.LastActivityTime = -1 + var taskDefName string if len(service.Services) > 0 { statusMessage = *service.Services[0].Status + taskDefName = *service.Services[0].TaskDefinition + if taskDefName == "" { + Config.Logger.Printf("No task definition found for user %s", userName) + } else { + desTaskDefOutput, err := sess.svc.DescribeTaskDefinition(&ecs.DescribeTaskDefinitionInput{ + TaskDefinition: &taskDefName, + }) + if err == nil { + containerDefs := desTaskDefOutput.TaskDefinition.ContainerDefinitions + if len(containerDefs) > 0 { + args := containerDefs[0].Command + if len(args) > 0 { + for i, arg := range args { + if strings.Contains(*arg, "shutdown_no_activity_timeout=") { + Config.Logger.Printf("Found kernel idle shutdown time in args. Attempting to get last activity time\n") + argSplit := strings.Split(*arg, "=") + idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) + if err == nil { + status.IdleTimeLimit = idleTimeLimit + lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) + status.LastActivityTime = lastActivityTime + if err != nil { + Config.Logger.Println(err.Error()) + } + } else { + Config.Logger.Println(err.Error()) + } + break + } + if i == len(args)-1 { + Config.Logger.Printf("Unable to find kernel idle shutdown time in args\n") + } + } + } else { + Config.Logger.Printf("No env vars found for task definition %s\n", taskDefName) + } + } else { + Config.Logger.Printf("No container definition found for task definition %s\n", taskDefName) + } + } + } + } else { + Config.Logger.Printf("No service found for user %s", userName) } statusMap := map[string]string{ @@ -133,6 +180,7 @@ func (sess *CREDS) statusEcsWorkspace(userName string) (*WorkspaceStatus, error) } status.Status = statusMap[statusMessage] + // TODO: last activity time for ECS return &status, nil } @@ -188,7 +236,7 @@ func terminateEcsWorkspace(ctx context.Context, userName string, accessToken str break } if i == len(envVars)-1 { - Config.Logger.Printf("Unable to fund API Key ID in env vars for user %s\n", userName) + Config.Logger.Printf("Unable to find API Key ID in env vars for user %s\n", userName) } } } else { diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index eaf9f176..dbd3bf32 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -22,6 +22,10 @@ type APIKeyStruct struct { KeyID string `json:"key_id"` } +type WorkspaceKernelStatusStruct struct { + LastActivityTime string `json:"last_activity"` +} + // Config package-global shared hatchery config var Config *FullHatcheryConfig @@ -70,12 +74,13 @@ func paymodels(w http.ResponseWriter, r *http.Request) { func status(w http.ResponseWriter, r *http.Request) { userName := r.Header.Get("REMOTE_USER") + accessToken := getBearerToken(r) pm := Config.PayModelMap[userName] if pm.Ecs == "true" { - statusEcs(w, r) + statusEcs(r.Context(), w, userName, accessToken) } else { - result, err := statusK8sPod(r.Context(), userName) + result, err := statusK8sPod(r.Context(), userName, accessToken) if err != nil { http.Error(w, err.Error(), 500) return @@ -107,6 +112,7 @@ func options(w http.ResponseWriter, r *http.Request) { MemoryLimit: v.MemoryLimit, ID: k, } + c.IdleTimeLimit = -1 for _, arg := range v.Args { if strings.Contains(arg, "shutdown_no_activity_timeout=") { argSplit := strings.Split(arg, "=") @@ -148,7 +154,7 @@ func launch(w http.ResponseWriter, r *http.Request) { if pm.Ecs == "true" { launchEcs(w, r) } else { - err := createK8sPod(r.Context(), string(hash), accessToken, userName) + err := createK8sPod(r.Context(), string(hash), userName, accessToken) if err != nil { http.Error(w, err.Error(), 500) return @@ -168,7 +174,7 @@ func terminate(w http.ResponseWriter, r *http.Request) { if pm.Ecs == "true" { terminateEcs(w, r) } else { - err := deleteK8sPod(r.Context(), accessToken, userName) + err := deleteK8sPod(r.Context(), userName, accessToken) if err != nil { http.Error(w, err.Error(), 500) return @@ -267,8 +273,7 @@ func ecsCluster(w http.ResponseWriter, r *http.Request) { } // Function to check status of ECS workspace. -func statusEcs(w http.ResponseWriter, r *http.Request) { - userName := r.Header.Get("REMOTE_USER") +func statusEcs(ctx context.Context, w http.ResponseWriter, userName string, accessToken string) { if payModelExistsForUser(userName) { pm := Config.PayModelMap[userName] roleARN := "arn:aws:iam::" + pm.AWSAccountId + ":role/csoc_adminvm" @@ -277,7 +282,7 @@ func statusEcs(w http.ResponseWriter, r *http.Request) { Region: aws.String("us-east-1"), })) svc := NewSession(sess, roleARN) - result, err := svc.statusEcsWorkspace(userName) + result, err := svc.statusEcsWorkspace(ctx, userName, accessToken) if err != nil { Config.Logger.Printf("Error: %s", err) fmt.Fprintf(w, fmt.Sprintf("%s", err)) @@ -333,10 +338,16 @@ func getFenceURL() string { fenceURL := "http://fence-service/" _, ok := os.LookupEnv("BASE_URL") if ok { - fenceURL = "https://" + os.Getenv("BASE_URL") + "/user" + fenceURL = "https://" + os.Getenv("BASE_URL") + "/user/" } - if !strings.HasSuffix(fenceURL, "/") { - fenceURL += "/" + return fenceURL +} + +func getAmbassadorURL() string { + fenceURL := "http://ambassador-service/" + _, ok := os.LookupEnv("BASE_URL") + if ok { + fenceURL = "https://" + os.Getenv("BASE_URL") + "/lw-workspace/proxy/" } return fenceURL } @@ -373,7 +384,7 @@ func deleteAPIKeyWithContext(ctx context.Context, accessToken string, apiKeyID s } fenceDeleteAPIKeyURL := getFenceURL() + "credentials/api/" + apiKeyID - resp, err := MakeARequestWithContext(ctx, "DELETE", fenceDeleteAPIKeyURL, accessToken, "application/json", nil, nil) + resp, err := MakeARequestWithContext(ctx, "DELETE", fenceDeleteAPIKeyURL, accessToken, "", nil, nil) if err != nil { return err } @@ -382,3 +393,30 @@ func deleteAPIKeyWithContext(ctx context.Context, accessToken string, apiKeyID s } return nil } + +func getKernelIdleTimeWithContext(ctx context.Context, accessToken string) (lastActivityTime int64, err error) { + if accessToken == "" { + return -1, errors.New("No valid access token") + } + + workspaceKernelStatusURL := getAmbassadorURL() + "api/status" + resp, err := MakeARequestWithContext(ctx, "GET", workspaceKernelStatusURL, accessToken, "", nil, nil) + if err != nil { + return -1, err + } + if resp != nil && resp.StatusCode != 200 { + return -1, errors.New("Error occurred when getting workspace kernel status with error code " + strconv.Itoa(resp.StatusCode)) + } + defer resp.Body.Close() + + workspaceKernelStatusResponse := new(WorkspaceKernelStatusStruct) + err = json.NewDecoder(resp.Body).Decode(workspaceKernelStatusResponse) + if err != nil { + return -1, errors.New("Unable to decode workspace kernel status response: " + err.Error()) + } + lastAct, err := time.Parse(time.RFC3339, workspaceKernelStatusResponse.LastActivityTime) + if err != nil { + return -1, errors.New("Unable to parse last activity time: " + err.Error()) + } + return lastAct.Unix(), nil +} diff --git a/hatchery/pods.go b/hatchery/pods.go index b050a164..e5931e60 100644 --- a/hatchery/pods.go +++ b/hatchery/pods.go @@ -4,7 +4,10 @@ import ( "context" "encoding/base64" "fmt" + "log" "os" + "strconv" + "strings" k8sv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -60,9 +63,11 @@ type ContainerStates struct { } type WorkspaceStatus struct { - Status string `json:"status"` - Conditions []PodConditions `json:"conditions"` - ContainerStates []ContainerStates `json:"containerStates"` + Status string `json:"status"` + Conditions []PodConditions `json:"conditions"` + ContainerStates []ContainerStates `json:"containerStates"` + IdleTimeLimit int `json:"idleTimeLimit"` + LastActivityTime int64 `json:"lastActivityTime"` } func getPodClient(ctx context.Context, userName string) (corev1.CoreV1Interface, bool, error) { @@ -157,7 +162,7 @@ func checkPodReadiness(pod *k8sv1.Pod) bool { return true } -func podStatus(ctx context.Context, userName string) (*WorkspaceStatus, error) { +func podStatus(ctx context.Context, userName string, accessToken string) (*WorkspaceStatus, error) { status := WorkspaceStatus{} podClient, isExternalClient, err := getPodClient(ctx, userName) if err != nil { @@ -220,6 +225,25 @@ func podStatus(ctx context.Context, userName string) (*WorkspaceStatus, error) { } status.ContainerStates = containerStates } + for _, container := range pod.Spec.Containers { + for _, arg := range container.Args { + if strings.Contains(arg, "shutdown_no_activity_timeout=") { + argSplit := strings.Split(arg, "=") + idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) + if err == nil { + status.IdleTimeLimit = idleTimeLimit + lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) + status.LastActivityTime = lastActivityTime + if err != nil { + log.Println(err.Error()) + } + } else { + log.Println(err.Error()) + } + break + } + } + } default: fmt.Printf("Unknown pod status for %s: %s\n", podName, string(pod.Status.Phase)) } @@ -227,8 +251,8 @@ func podStatus(ctx context.Context, userName string) (*WorkspaceStatus, error) { return &status, nil } -func statusK8sPod(ctx context.Context, userName string) (*WorkspaceStatus, error) { - status, err := podStatus(ctx, userName) +func statusK8sPod(ctx context.Context, userName string, accessToken string) (*WorkspaceStatus, error) { + status, err := podStatus(ctx, userName, accessToken) if err != nil { status.Status = fmt.Sprintf("%v", err) Config.Logger.Printf("Error getting status: %v", err) @@ -236,7 +260,7 @@ func statusK8sPod(ctx context.Context, userName string) (*WorkspaceStatus, error return status, nil } -func deleteK8sPod(ctx context.Context, accessToken string, userName string) error { +func deleteK8sPod(ctx context.Context, userName string, accessToken string) error { podClient, _, err := getPodClient(ctx, userName) if err != nil { return err @@ -630,15 +654,15 @@ func scaleEKSNodes(ctx context.Context, userName string, scale int) { } } -func createK8sPod(ctx context.Context, hash string, accessToken string, userName string) error { +func createK8sPod(ctx context.Context, hash string, userName string, accessToken string) error { if payModelExistsForUser(userName) { - return createExternalK8sPod(ctx, hash, accessToken, userName) + return createExternalK8sPod(ctx, hash, userName, accessToken) } else { - return createLocalK8sPod(ctx, hash, accessToken, userName) + return createLocalK8sPod(ctx, hash, userName, accessToken) } } -func createLocalK8sPod(ctx context.Context, hash string, accessToken string, userName string) error { +func createLocalK8sPod(ctx context.Context, hash string, userName string, accessToken string) error { hatchApp := Config.ContainersMap[hash] var extraVars []k8sv1.EnvVar @@ -744,7 +768,7 @@ func createLocalK8sPod(ctx context.Context, hash string, accessToken string, use return nil } -func createExternalK8sPod(ctx context.Context, hash string, accessToken string, userName string) error { +func createExternalK8sPod(ctx context.Context, hash string, userName string, accessToken string) error { hatchApp := Config.ContainersMap[hash] podClient, err := NewEKSClientset(ctx, userName) From fc3ae21cf963ce00c9aba492bc817f71e359c820 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Sep 2021 16:52:21 -0500 Subject: [PATCH 03/11] fix: typo --- hatchery/hatchery.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index dbd3bf32..4f088665 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -344,12 +344,12 @@ func getFenceURL() string { } func getAmbassadorURL() string { - fenceURL := "http://ambassador-service/" + ambassadorURL := "http://ambassador-service/" _, ok := os.LookupEnv("BASE_URL") if ok { - fenceURL = "https://" + os.Getenv("BASE_URL") + "/lw-workspace/proxy/" + ambassadorURL = "https://" + os.Getenv("BASE_URL") + "/lw-workspace/proxy/" } - return fenceURL + return ambassadorURL } func getAPIKeyWithContext(ctx context.Context, accessToken string) (apiKey *APIKeyStruct, err error) { From f4e8a6e06b6cc94400446862ec7cfc9a4b97d191 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Mon, 13 Sep 2021 17:39:57 -0500 Subject: [PATCH 04/11] fix: status --- hatchery/ecs.go | 25 +++++++++++++------------ hatchery/hatchery.go | 15 +++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/hatchery/ecs.go b/hatchery/ecs.go index 71a28e3f..512ff35d 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -107,9 +107,20 @@ func (sess *CREDS) findEcsCluster(userName string) (*ecs.Cluster, error) { // Status of workspace running in ECS func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, accessToken string) (*WorkspaceStatus, error) { status := WorkspaceStatus{} + statusMap := map[string]string{ + "ACTIVE": "Running", + "DRAINING": "Terminating", + "STOPPED": "Not Found", + "INACTIVE": "Not Found", + } + statusMessage := "INACTIVE" + status.Status = statusMap[statusMessage] + status.IdleTimeLimit = -1 + status.LastActivityTime = -1 + cluster, err := sess.findEcsCluster(userName) if err != nil { - return nil, err + return &status, err } service, err := sess.svc.DescribeServices(&ecs.DescribeServicesInput{ Cluster: cluster.ClusterName, @@ -118,12 +129,9 @@ func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, acce }, }) if err != nil { - return nil, err + return &status, err } - statusMessage := "INACTIVE" - status.IdleTimeLimit = -1 - status.LastActivityTime = -1 var taskDefName string if len(service.Services) > 0 { statusMessage = *service.Services[0].Status @@ -172,13 +180,6 @@ func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, acce Config.Logger.Printf("No service found for user %s", userName) } - statusMap := map[string]string{ - "ACTIVE": "Running", - "DRAINING": "Terminating", - "STOPPED": "Not Found", - "INACTIVE": "Not Found", - } - status.Status = statusMap[statusMessage] // TODO: last activity time for ECS return &status, nil diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index 4f088665..a7446d36 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -286,15 +286,14 @@ func statusEcs(ctx context.Context, w http.ResponseWriter, userName string, acce if err != nil { Config.Logger.Printf("Error: %s", err) fmt.Fprintf(w, fmt.Sprintf("%s", err)) - } else { - out, err := json.Marshal(result) - if err != nil { - http.Error(w, err.Error(), 500) - return - } - - fmt.Fprintf(w, string(out)) } + out, err := json.Marshal(result) + if err != nil { + http.Error(w, err.Error(), 500) + return + } + + fmt.Fprintf(w, string(out)) } else { http.Error(w, "Paymodel has not been setup for user", 404) } From dae0decf45cd654f8102ca7f2aa763149c183134 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Sep 2021 09:48:04 -0500 Subject: [PATCH 05/11] fix up --- hatchery/hatchery.go | 1 - 1 file changed, 1 deletion(-) diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index a7446d36..259288d8 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -285,7 +285,6 @@ func statusEcs(ctx context.Context, w http.ResponseWriter, userName string, acce result, err := svc.statusEcsWorkspace(ctx, userName, accessToken) if err != nil { Config.Logger.Printf("Error: %s", err) - fmt.Fprintf(w, fmt.Sprintf("%s", err)) } out, err := json.Marshal(result) if err != nil { From 810ec16ff3c0554ed77d8f09dae51673fb23d290 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Sep 2021 11:24:13 -0500 Subject: [PATCH 06/11] fix: check time when running --- hatchery/ecs.go | 64 +++++++++++++++++++++++--------------------- hatchery/hatchery.go | 2 +- hatchery/pods.go | 38 +++++++++++++------------- 3 files changed, 53 insertions(+), 51 deletions(-) diff --git a/hatchery/ecs.go b/hatchery/ecs.go index 512ff35d..3d3c3fdb 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -135,44 +135,46 @@ func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, acce var taskDefName string if len(service.Services) > 0 { statusMessage = *service.Services[0].Status - taskDefName = *service.Services[0].TaskDefinition - if taskDefName == "" { - Config.Logger.Printf("No task definition found for user %s", userName) - } else { - desTaskDefOutput, err := sess.svc.DescribeTaskDefinition(&ecs.DescribeTaskDefinitionInput{ - TaskDefinition: &taskDefName, - }) - if err == nil { - containerDefs := desTaskDefOutput.TaskDefinition.ContainerDefinitions - if len(containerDefs) > 0 { - args := containerDefs[0].Command - if len(args) > 0 { - for i, arg := range args { - if strings.Contains(*arg, "shutdown_no_activity_timeout=") { - Config.Logger.Printf("Found kernel idle shutdown time in args. Attempting to get last activity time\n") - argSplit := strings.Split(*arg, "=") - idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) - if err == nil { - status.IdleTimeLimit = idleTimeLimit - lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) - status.LastActivityTime = lastActivityTime - if err != nil { + if statusMessage == "ACTIVE" { + taskDefName = *service.Services[0].TaskDefinition + if taskDefName == "" { + Config.Logger.Printf("No task definition found for user %s", userName) + } else { + desTaskDefOutput, err := sess.svc.DescribeTaskDefinition(&ecs.DescribeTaskDefinitionInput{ + TaskDefinition: &taskDefName, + }) + if err == nil { + containerDefs := desTaskDefOutput.TaskDefinition.ContainerDefinitions + if len(containerDefs) > 0 { + args := containerDefs[0].Command + if len(args) > 0 { + for i, arg := range args { + if strings.Contains(*arg, "shutdown_no_activity_timeout=") { + Config.Logger.Printf("Found kernel idle shutdown time in args. Attempting to get last activity time\n") + argSplit := strings.Split(*arg, "=") + idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) + if err == nil { + status.IdleTimeLimit = idleTimeLimit + lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) + status.LastActivityTime = lastActivityTime + if err != nil { + Config.Logger.Println(err.Error()) + } + } else { Config.Logger.Println(err.Error()) } - } else { - Config.Logger.Println(err.Error()) + break + } + if i == len(args)-1 { + Config.Logger.Printf("Unable to find kernel idle shutdown time in args\n") } - break - } - if i == len(args)-1 { - Config.Logger.Printf("Unable to find kernel idle shutdown time in args\n") } + } else { + Config.Logger.Printf("No env vars found for task definition %s\n", taskDefName) } } else { - Config.Logger.Printf("No env vars found for task definition %s\n", taskDefName) + Config.Logger.Printf("No container definition found for task definition %s\n", taskDefName) } - } else { - Config.Logger.Printf("No container definition found for task definition %s\n", taskDefName) } } } diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index 259288d8..3a94e98c 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -236,7 +236,7 @@ func launchEcs(w http.ResponseWriter, r *http.Request) { if payModelExistsForUser(userName) { result, err := launchEcsWorkspace(r.Context(), userName, hash, accessToken) if err != nil { - fmt.Fprintf(w, fmt.Sprintf("%s", err)) + http.Error(w, fmt.Sprintf("%s", err), 500) Config.Logger.Printf("Error: %s", err) } diff --git a/hatchery/pods.go b/hatchery/pods.go index e5931e60..2fbeca4c 100644 --- a/hatchery/pods.go +++ b/hatchery/pods.go @@ -209,6 +209,25 @@ func podStatus(ctx context.Context, userName string, accessToken string) (*Works allReady := checkPodReadiness(pod) if allReady == true { status.Status = "Running" + for _, container := range pod.Spec.Containers { + for _, arg := range container.Args { + if strings.Contains(arg, "shutdown_no_activity_timeout=") { + argSplit := strings.Split(arg, "=") + idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) + if err == nil { + status.IdleTimeLimit = idleTimeLimit + lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) + status.LastActivityTime = lastActivityTime + if err != nil { + log.Println(err.Error()) + } + } else { + log.Println(err.Error()) + } + break + } + } + } } else { status.Status = "Launching" conditions := make([]PodConditions, len(pod.Status.Conditions)) @@ -225,25 +244,6 @@ func podStatus(ctx context.Context, userName string, accessToken string) (*Works } status.ContainerStates = containerStates } - for _, container := range pod.Spec.Containers { - for _, arg := range container.Args { - if strings.Contains(arg, "shutdown_no_activity_timeout=") { - argSplit := strings.Split(arg, "=") - idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) - if err == nil { - status.IdleTimeLimit = idleTimeLimit - lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) - status.LastActivityTime = lastActivityTime - if err != nil { - log.Println(err.Error()) - } - } else { - log.Println(err.Error()) - } - break - } - } - } default: fmt.Printf("Unknown pod status for %s: %s\n", podName, string(pod.Status.Phase)) } From 034aad5c44a2a16f7fc6a4f1cad8861ade3877f2 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Sep 2021 15:10:31 -0500 Subject: [PATCH 07/11] feat: go 1.17 and milli time --- Dockerfile | 2 +- go.mod | 18 +++++++++++++++++- go.sum | 3 --- hatchery/hatchery.go | 2 +- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index c4071150..d94b3313 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/cdis/golang:1.14 as build-deps +FROM quay.io/cdis/golang:1.17-bullseye as build-deps RUN apt-get update && apt-get install -y --no-install-recommends \ vim diff --git a/go.mod b/go.mod index b13a4766..72d91f92 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/uc-cdis/hatchery -go 1.14 +go 1.17 require ( github.com/DataDog/datadog-go v4.8.1+incompatible // indirect @@ -36,6 +36,22 @@ require ( sigs.k8s.io/aws-iam-authenticator v0.5.3 ) +require ( + github.com/DataDog/gostackparse v0.5.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/google/gofuzz v1.2.0 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.1 // indirect + github.com/philhofer/fwd v1.1.1 // indirect + golang.org/x/text v0.3.6 // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.1.2 // indirect + sigs.k8s.io/yaml v1.2.0 // indirect +) + replace ( github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompatible github.com/go-logr/logr => github.com/go-logr/logr v0.2.1 diff --git a/go.sum b/go.sum index cfad2265..77ed047a 100644 --- a/go.sum +++ b/go.sum @@ -124,7 +124,6 @@ github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dp github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/gofrs/flock v0.7.0 h1:pGFUjl501gafK9HBt1VGL1KCOd/YhIooID+xgyJCf3g= github.com/gofrs/flock v0.7.0/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= @@ -192,7 +191,6 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20210125172800-10e9aeb4a998 h1:ruQkWz0PK91vTVrWtzAgv3VqTMgIN1FAIvwWr5MY+GQ= github.com/google/pprof v0.0.0-20210125172800-10e9aeb4a998/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210804190019-f964ff605595 h1:uNrRgpnKjTfxu4qHaZAAs3eKTYV1EzGF3dAykpnxgDE= github.com/google/pprof v0.0.0-20210804190019-f964ff605595/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= @@ -311,7 +309,6 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.7.0 h1:ShrD1U9pZB12TX0cVy0DtePoCH97K8EtX+mg7ZARUtM= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index 3a94e98c..4e898bd6 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -416,5 +416,5 @@ func getKernelIdleTimeWithContext(ctx context.Context, accessToken string) (last if err != nil { return -1, errors.New("Unable to parse last activity time: " + err.Error()) } - return lastAct.Unix(), nil + return lastAct.UnixMilli(), nil } From 8ea8a0dc904ca50a8bd8119cd520e80e05304d96 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Tue, 14 Sep 2021 15:14:04 -0500 Subject: [PATCH 08/11] chore: all milli sec --- hatchery/ecs.go | 2 +- hatchery/hatchery.go | 2 +- hatchery/pods.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hatchery/ecs.go b/hatchery/ecs.go index 3d3c3fdb..1e904c83 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -154,7 +154,7 @@ func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, acce argSplit := strings.Split(*arg, "=") idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) if err == nil { - status.IdleTimeLimit = idleTimeLimit + status.IdleTimeLimit = idleTimeLimit * 1000 lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) status.LastActivityTime = lastActivityTime if err != nil { diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index 4e898bd6..38ba01d6 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -118,7 +118,7 @@ func options(w http.ResponseWriter, r *http.Request) { argSplit := strings.Split(arg, "=") idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) if err == nil { - c.IdleTimeLimit = idleTimeLimit + c.IdleTimeLimit = idleTimeLimit * 1000 } break } diff --git a/hatchery/pods.go b/hatchery/pods.go index 2fbeca4c..fc890439 100644 --- a/hatchery/pods.go +++ b/hatchery/pods.go @@ -215,7 +215,7 @@ func podStatus(ctx context.Context, userName string, accessToken string) (*Works argSplit := strings.Split(arg, "=") idleTimeLimit, err := strconv.Atoi(argSplit[len(argSplit)-1]) if err == nil { - status.IdleTimeLimit = idleTimeLimit + status.IdleTimeLimit = idleTimeLimit * 1000 lastActivityTime, err := getKernelIdleTimeWithContext(ctx, accessToken) status.LastActivityTime = lastActivityTime if err != nil { From 82e63f94156c1dc44e32155307f32c9cb1b5d194 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Thu, 16 Sep 2021 14:42:07 -0500 Subject: [PATCH 09/11] stay with go 1.14 for now --- Dockerfile | 2 +- go.mod | 18 +----------------- hatchery/ecs.go | 1 - hatchery/hatchery.go | 2 +- 4 files changed, 3 insertions(+), 20 deletions(-) diff --git a/Dockerfile b/Dockerfile index d94b3313..c4071150 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/cdis/golang:1.17-bullseye as build-deps +FROM quay.io/cdis/golang:1.14 as build-deps RUN apt-get update && apt-get install -y --no-install-recommends \ vim diff --git a/go.mod b/go.mod index 72d91f92..b13a4766 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/uc-cdis/hatchery -go 1.17 +go 1.14 require ( github.com/DataDog/datadog-go v4.8.1+incompatible // indirect @@ -36,22 +36,6 @@ require ( sigs.k8s.io/aws-iam-authenticator v0.5.3 ) -require ( - github.com/DataDog/gostackparse v0.5.0 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/google/gofuzz v1.2.0 // indirect - github.com/jmespath/go-jmespath v0.4.0 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.1 // indirect - github.com/philhofer/fwd v1.1.1 // indirect - golang.org/x/text v0.3.6 // indirect - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect - gopkg.in/inf.v0 v0.9.1 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.1.2 // indirect - sigs.k8s.io/yaml v1.2.0 // indirect -) - replace ( github.com/dgrijalva/jwt-go => github.com/golang-jwt/jwt v3.2.1+incompatible github.com/go-logr/logr => github.com/go-logr/logr v0.2.1 diff --git a/hatchery/ecs.go b/hatchery/ecs.go index 1279404f..a2b078b7 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -183,7 +183,6 @@ func (sess *CREDS) statusEcsWorkspace(ctx context.Context, userName string, acce } status.Status = statusMap[statusMessage] - // TODO: last activity time for ECS return &status, nil } diff --git a/hatchery/hatchery.go b/hatchery/hatchery.go index 38ba01d6..43b1f964 100644 --- a/hatchery/hatchery.go +++ b/hatchery/hatchery.go @@ -416,5 +416,5 @@ func getKernelIdleTimeWithContext(ctx context.Context, accessToken string) (last if err != nil { return -1, errors.New("Unable to parse last activity time: " + err.Error()) } - return lastAct.UnixMilli(), nil + return lastAct.Unix() * 1000, nil } From de62002a8b21ad3148d389151c03e54f9ee55fdd Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Thu, 16 Sep 2021 16:03:51 -0500 Subject: [PATCH 10/11] fix: try create ECS cluster if not found --- hatchery/ecs.go | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/hatchery/ecs.go b/hatchery/ecs.go index a2b078b7..61be3426 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -57,9 +57,9 @@ func (input *CreateTaskDefinitionInput) Environment() []*ecs.KeyValuePair { // TODO: Evaluate if this is still this needed.. func (sess *CREDS) launchEcsCluster(userName string) (*ecs.Cluster, error) { svc := sess.svc - cluster_name := strings.ReplaceAll(Config.Config.Sidecar.Env["BASE_URL"], ".", "-") + "-cluster" + clusterName := strings.ReplaceAll(Config.Config.Sidecar.Env["BASE_URL"], ".", "-") + "-cluster" input := &ecs.CreateClusterInput{ - ClusterName: aws.String(cluster_name), + ClusterName: aws.String(clusterName), } result, err := svc.CreateCluster(input) @@ -77,16 +77,37 @@ func (sess *CREDS) launchEcsCluster(userName string) (*ecs.Cluster, error) { func (sess *CREDS) findEcsCluster(userName string) (*ecs.Cluster, error) { svc := sess.svc - cluster_name := strings.ReplaceAll(Config.Config.Sidecar.Env["HOSTNAME"], ".", "-") + "-cluster" - cluster_input := &ecs.DescribeClustersInput{ + clusterName := strings.ReplaceAll(Config.Config.Sidecar.Env["BASE_URL"], ".", "-") + "-cluster" + clusterInput := &ecs.DescribeClustersInput{ Clusters: []*string{ - aws.String(cluster_name), + aws.String(clusterName), }, } - result, err := svc.DescribeClusters(cluster_input) + describeClusterResult, err := svc.DescribeClusters(clusterInput) if err != nil { if aerr, ok := err.(awserr.Error); ok { switch aerr.Code() { + case ecs.ErrCodeClusterNotFoundException: + input := &ecs.CreateClusterInput{ + ClusterName: aws.String(clusterName), + } + + _, err := svc.CreateCluster(input) + if err != nil { + if aerr, ok := err.(awserr.Error); ok { + switch aerr.Code() { + default: + return nil, aerr + } + } + return nil, err + } + Config.Logger.Printf("ECS cluster %s created for user %s", clusterName, userName) + describeClusterResult, err = svc.DescribeClusters(clusterInput) + if err != nil { + Config.Logger.Printf("Error: %s", err) + return nil, err + } default: return nil, aerr } @@ -96,11 +117,11 @@ func (sess *CREDS) findEcsCluster(userName string) (*ecs.Cluster, error) { Config.Logger.Println(err.Error()) } } - if len(result.Failures) > 0 { - Config.Logger.Printf("ECS cluster named %s not found", cluster_name) - return nil, errors.New(fmt.Sprintf("ECS cluster named %s not found", cluster_name)) + if len(describeClusterResult.Failures) > 0 { + Config.Logger.Printf("ECS cluster named %s not found", clusterName) + return nil, errors.New(fmt.Sprintf("ECS cluster named %s not found", clusterName)) } else { - return result.Clusters[0], nil + return describeClusterResult.Clusters[0], nil } } From 24879b8e1fd0cb567faabcdc4138f994ea0943b2 Mon Sep 17 00:00:00 2001 From: Mingfei Shao Date: Thu, 16 Sep 2021 16:39:06 -0500 Subject: [PATCH 11/11] fix: logic --- hatchery/ecs.go | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/hatchery/ecs.go b/hatchery/ecs.go index 61be3426..c2644f56 100644 --- a/hatchery/ecs.go +++ b/hatchery/ecs.go @@ -87,7 +87,19 @@ func (sess *CREDS) findEcsCluster(userName string) (*ecs.Cluster, error) { if err != nil { if aerr, ok := err.(awserr.Error); ok { switch aerr.Code() { - case ecs.ErrCodeClusterNotFoundException: + default: + return nil, aerr + } + } else { + // Print the error, cast err to awserr.Error to get the Code and + // Message from an error. + Config.Logger.Println(err.Error()) + } + } + if len(describeClusterResult.Failures) > 0 { + for _, failure := range describeClusterResult.Failures { + if *failure.Reason == "MISSING" { + Config.Logger.Printf("ECS cluster named %s not found, trying to create this ECS cluster", clusterName) input := &ecs.CreateClusterInput{ ClusterName: aws.String(clusterName), } @@ -97,29 +109,21 @@ func (sess *CREDS) findEcsCluster(userName string) (*ecs.Cluster, error) { if aerr, ok := err.(awserr.Error); ok { switch aerr.Code() { default: - return nil, aerr + return nil, errors.New(fmt.Sprintf("Cannot create ECS cluster named %s: %s", clusterName, aerr.Code())) } } - return nil, err + return nil, errors.New(fmt.Sprintf("Cannot create ECS cluster named %s: %s", clusterName, err.Error())) } Config.Logger.Printf("ECS cluster %s created for user %s", clusterName, userName) describeClusterResult, err = svc.DescribeClusters(clusterInput) - if err != nil { - Config.Logger.Printf("Error: %s", err) - return nil, err + if err != nil || len(describeClusterResult.Failures) > 0 { + return nil, errors.New(fmt.Sprintf("Still cannot find ECS cluster named %s: %s", clusterName, err.Error())) } - default: - return nil, aerr + return describeClusterResult.Clusters[0], nil } - } else { - // Print the error, cast err to awserr.Error to get the Code and - // Message from an error. - Config.Logger.Println(err.Error()) } - } - if len(describeClusterResult.Failures) > 0 { - Config.Logger.Printf("ECS cluster named %s not found", clusterName) - return nil, errors.New(fmt.Sprintf("ECS cluster named %s not found", clusterName)) + Config.Logger.Printf("ECS cluster named %s cannot be described", clusterName) + return nil, errors.New(fmt.Sprintf("ECS cluster named %s cannot be described", clusterName)) } else { return describeClusterResult.Clusters[0], nil } @@ -410,11 +414,13 @@ func launchEcsWorkspace(ctx context.Context, userName string, hash string, acces } taskDefResult, err := svc.CreateTaskDefinition(&taskDef, userName, hash) if err != nil { + deleteAPIKeyWithContext(ctx, accessToken, apiKey.KeyID) return "", err } launchTask, err := svc.launchService(ctx, taskDefResult, userName, hash) if err != nil { + deleteAPIKeyWithContext(ctx, accessToken, apiKey.KeyID) return "", err }