From 4ae11edbe725c52005587091b39e3f84816641fb Mon Sep 17 00:00:00 2001 From: helenxie-bit Date: Thu, 19 Sep 2024 20:47:11 -0700 Subject: [PATCH] check the logs of container 'pytorch' and 'storage_initializer' Signed-off-by: helenxie-bit --- .github/workflows/e2e-test-tune-api.yaml | 1 - .../scripts/gh-actions/run-e2e-tune-api.py | 16 ++++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e-test-tune-api.yaml b/.github/workflows/e2e-test-tune-api.yaml index 32cf0a12add..7b41130f499 100644 --- a/.github/workflows/e2e-test-tune-api.yaml +++ b/.github/workflows/e2e-test-tune-api.yaml @@ -52,7 +52,6 @@ jobs: run: | kubectl get pods -n default POD_NAME=$(kubectl get pods -n default --no-headers -o custom-columns=":metadata.name" | grep tune-example-2 | grep master) - echo "Fetching logs for pod: $POD_NAME" kubectl describe pod $POD_NAME -n default kubectl top pods $POD_NAME kubectl get events -n default | grep "tune-example-2" diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py index 4168a8e3786..e5eb39c0d4a 100644 --- a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py +++ b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py @@ -31,12 +31,24 @@ def get_experiment_pods_logs(katib_client: KatibClient, exp_name: str, exp_names logging.info(f"Fetching logs for pod: {pod.metadata.name}") try: # Specify the container name when retrieving logs - pod_logs = v1.read_namespaced_pod_log( + pod_logs1 = v1.read_namespaced_pod_log( name=pod.metadata.name, namespace=exp_namespace, container="metrics-logger-and-collector" # Specify the desired container ) - logging.info(f"Logs for pod {pod.metadata.name}:\n{pod_logs}") + logging.info(f"Logs for pod {pod.metadata.name}:\n{pod_logs1}") + pod_logs2 = v1.read_namespaced_pod_log( + name=pod.metadata.name, + namespace=exp_namespace, + container="pytorch" + ) + logging.info(f"Logs for pod {pod.metadata.name}:\n{pod_logs2}") + pod_logs3 = v1.read_namespaced_pod_log( + name=pod.metadata.name, + namespace=exp_namespace, + container="storage-initializer" + ) + logging.info(f"Logs for pod {pod.metadata.name}:\n{pod_logs3}") except Exception as e: logging.error(f"Failed to get logs for pod {pod.metadata.name}: {str(e)}")