diff --git a/.gitignore b/.gitignore
index 30c060ec47..37c834877e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,7 @@ __pycache__
*.swp
*.swo
.DS_Store
+*.sln
+*.pyproj.user
+*.pyproj
+*.vs
diff --git a/build/core/build_center.py b/build/core/build_center.py
index 73d89530aa..d1c4f437f1 100644
--- a/build/core/build_center.py
+++ b/build/core/build_center.py
@@ -23,6 +23,7 @@
import os
import sys
+import traceback
import logging
import logging.config
@@ -122,8 +123,9 @@ def build_center(self):
build_worker.build_single_component(self.graph.services[item])
self.logger.info("Build all components succeed")
- except:
+ except Exception, err:
self.logger.error("Build all components failed")
+ traceback.print_exc()
sys.exit(1)
finally:
@@ -156,7 +158,3 @@ def push_center(self):
self.docker_cli.docker_image_tag(image,self.build_config['dockerRegistryInfo']['dockerTag'])
self.docker_cli.docker_image_push(image,self.build_config['dockerRegistryInfo']['dockerTag'])
self.logger.info("Push image:{0} successfully".format(image))
-
-
-
-
diff --git a/deployment/k8sPaiLibrary/maintaintool/restart-etcd-server.sh b/deployment/k8sPaiLibrary/maintaintool/restart-etcd-server.sh
index 70f6c377e3..34212318f3 100755
--- a/deployment/k8sPaiLibrary/maintaintool/restart-etcd-server.sh
+++ b/deployment/k8sPaiLibrary/maintaintool/restart-etcd-server.sh
@@ -18,3 +18,4 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
cp etcd-reconfiguration-restart/etcd.yaml /etc/kubernetes/manifests/
+
diff --git a/deployment/k8sPaiLibrary/template/kubelet.sh.template b/deployment/k8sPaiLibrary/template/kubelet.sh.template
index 22e6125bb4..8cb2d59b79 100644
--- a/deployment/k8sPaiLibrary/template/kubelet.sh.template
+++ b/deployment/k8sPaiLibrary/template/kubelet.sh.template
@@ -66,7 +66,9 @@ docker run \
--image-pull-progress-deadline=10m \
--docker-root=${DOCKER_ROOT_DIR_FOR_KUBELET} \
--system-reserved=memory=3Gi \
- --eviction-hard="memory.available<5%,nodefs.available<5%,imagefs.available<5%,nodefs.inodesFree<5%,imagefs.inodesFree<5%" \
+ --eviction-hard= \
+ --image-gc-high-threshold=100 \
+ --image-gc-low-threshold=95 \
--healthz-bind-address="0.0.0.0" \
--healthz-port="10248" \
--feature-gates="DevicePlugins=true,TaintBasedEvictions=true" \
diff --git a/deployment/quick-start/services-configuration.yaml.template b/deployment/quick-start/services-configuration.yaml.template
index 842e455214..81f0993f70 100644
--- a/deployment/quick-start/services-configuration.yaml.template
+++ b/deployment/quick-start/services-configuration.yaml.template
@@ -128,3 +128,8 @@ rest-server:
# uncomment following section if you want to customize the port of pylon
# pylon:
# port: 80
+
+# uncomment following section if you want to customize the threshold of cleaner
+# cleaner:
+# threshold: 94
+# interval: 60
\ No newline at end of file
diff --git a/examples/cluster-configuration/services-configuration.yaml b/examples/cluster-configuration/services-configuration.yaml
index 8e22bdc8d0..2c6fed8bba 100644
--- a/examples/cluster-configuration/services-configuration.yaml
+++ b/examples/cluster-configuration/services-configuration.yaml
@@ -128,3 +128,8 @@ rest-server:
# uncomment following section if you want to customize the port of pylon
# pylon:
# port: 80
+
+# uncomment following section if you want to customize the threshold of cleaner
+# cleaner:
+# threshold: 94
+# interval: 60
\ No newline at end of file
diff --git a/paictl.py b/paictl.py
index 1f68602c38..ba05218549 100755
--- a/paictl.py
+++ b/paictl.py
@@ -99,4 +99,3 @@ def main(args):
setup_logging()
main(sys.argv[1:])
-
diff --git a/src/base-image/build/base-image.dockerfile b/src/base-image/build/base-image.dockerfile
index 4ffaef3d3f..8d76018a81 100644
--- a/src/base-image/build/base-image.dockerfile
+++ b/src/base-image/build/base-image.dockerfile
@@ -38,8 +38,8 @@ RUN apt-get -y update && \
python-dev \
python-pip \
python-mysqldb \
- openjdk-8-jre=8u191-b12-0ubuntu0.16.04.1 \
- openjdk-8-jdk=8u191-b12-0ubuntu0.16.04.1 \
+ openjdk-8-jre \
+ openjdk-8-jdk \
openssh-server \
openssh-client \
git \
diff --git a/src/cleaner/cleaner_main.py b/src/cleaner/cleaner_main.py
index d4e8806d7e..2af78875ff 100644
--- a/src/cleaner/cleaner_main.py
+++ b/src/cleaner/cleaner_main.py
@@ -19,7 +19,7 @@
import argparse
import os
from datetime import timedelta
-from cleaner.scripts import clean_docker_cache, check_deleted_files
+from cleaner.scripts.clean_docker import DockerCleaner
from cleaner.worker import Worker
from cleaner.utils.logger import LoggerMixin
from cleaner.utils import common
@@ -76,33 +76,21 @@ def sync(self):
time.sleep(1)
-def get_worker(arg):
- if arg == "docker_cache":
- worker = Worker(clean_docker_cache.check_and_clean, 50, timeout=timedelta(minutes=10), cool_down_time=1800)
- elif arg == "deleted_files":
- worker = Worker(check_deleted_files.list_and_check_files, None, timeout=timedelta(minutes=10), cool_down_time=1800)
- else:
- raise ValueError("arguments %s is not supported.", arg)
- return worker
-
-
-liveness_files = {
- "docker_cache": "docker-cache-cleaner-healthy",
- "deleted_files": "deleted-files-cleaner-healthy"
-}
+def get_worker(threshold):
+ worker = Worker(clean_docker.check_and_clean, threshold, timeout=timedelta(minutes=10), cool_down_time=60)
+ return worker;
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("option", help="the functions currently supported: [docker_cache | deleted_files]")
+ parser.add_argument("-t", "--threshold", help="the disk usage precent to start cleaner")
+ parser.add_argument("-i", "--interval", help="the base interval to check disk usage")
args = parser.parse_args()
common.setup_logging()
- cleaner = Cleaner(liveness_files[args.option])
- cleaner.add_worker(args.option, get_worker(args.option))
- cleaner.start()
- cleaner.sync()
+ cleaner = DockerCleaner(args.threshold, args.interval, timedelta(minutes=10))
+ cleaner.run()
if __name__ == "__main__":
diff --git a/src/cleaner/config/cleaner.md b/src/cleaner/config/cleaner.md
new file mode 100644
index 0000000000..4acd44a15b
--- /dev/null
+++ b/src/cleaner/config/cleaner.md
@@ -0,0 +1,54 @@
+## Cleaner section parser
+
+- [Default Configuration](#D_Config)
+- [How to Configure](#HT_Config)
+- [Generated Configuraiton](#G_Config)
+- [Data Table](#T_config)
+
+#### Default configuration
+
+[cleaner default configuration](cleaner.yaml)
+
+#### How to configure cluster section in service-configuraiton.yaml
+
+All configurations in this section is optional. If you want to customized these value, you can configure it in service-configuration.yaml.
+
+For example, if you want to use different threshold than the default value 94, add following to your service-configuration.yaml as following:
+```yaml
+cleaner:
+ threshold: new-value
+ interval: new-value
+```
+
+#### Generated Configuration
+
+After parsing, object model looks like:
+```yaml
+cleaner:
+ threshold: 94
+ interval: 60
+```
+
+
+#### Table
+
+
+
+ Data in Configuration File |
+ Data in Cluster Object Model |
+ Data in Jinja2 Template |
+ Data type |
+
+
+ cleaner.threshold |
+ com["cleaner"]["threshold"] |
+ cluster_cfg["cleaner"]["threshold"] |
+ Int |
+
+
+ cleaner.interval |
+ com["cleaner"]["interval"] |
+ cluster_cfg["cleaner"]["interval"] |
+ Int |
+
+
diff --git a/src/cleaner/config/cleaner.py b/src/cleaner/config/cleaner.py
index 71c8050638..2eccd24f8a 100644
--- a/src/cleaner/config/cleaner.py
+++ b/src/cleaner/config/cleaner.py
@@ -18,23 +18,40 @@
import logging
import logging.config
+import copy
+class Cleaner(object):
-class Cleaner:
-
- def __init__(self, cluster_configuration, service_configuration, default_service_configuraiton):
+ def __init__(self, cluster_conf, service_conf, default_service_conf):
self.logger = logging.getLogger(__name__)
-
- self.cluster_configuration = cluster_configuration
+ self.cluster_conf = cluster_conf
+ self.service_conf = service_conf
+ self.default_service_conf = default_service_conf
def validation_pre(self):
return True, None
def run(self):
- com = {}
-
- return com
-
- def validation_post(self, cluster_object_model):
+ result = copy.deepcopy(self.default_service_conf)
+ result.update(self.service_conf)
+ return result
+
+ def validation_post(self, conf):
+ threshold = conf["cleaner"].get("threshold")
+ if type(threshold) != int:
+ msg = "expect threshold in cleaner to be int but get %s with type %s" % \
+ (threshold, type(threshold))
+ return False, msg
+ else:
+ if threshold < 0 or threshold > 100:
+ msg = "expect threshold in [0, 100]"
+ return False, msg
+
+ interval = conf["cleaner"].get("interval")
+ if type(interval) != int:
+ msg = "expect interval in cleaner to be int but get %s with type %s" % \
+ (interval, type(interval))
+ return False, msg
+
return True, None
diff --git a/src/cleaner/config/cleaner.yaml b/src/cleaner/config/cleaner.yaml
index dbde8d9a14..4f9a94e88b 100644
--- a/src/cleaner/config/cleaner.yaml
+++ b/src/cleaner/config/cleaner.yaml
@@ -15,3 +15,5 @@
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+threshold: 94
+interval: 60
\ No newline at end of file
diff --git a/src/cleaner/deploy/cleaner.yaml.template b/src/cleaner/deploy/cleaner.yaml.template
index 147729677b..0eb259f576 100644
--- a/src/cleaner/deploy/cleaner.yaml.template
+++ b/src/cleaner/deploy/cleaner.yaml.template
@@ -31,42 +31,17 @@ spec:
hostPID: true
hostNetwork: true
containers:
- - name: docker-cache-cleaner
+ - name: docker-cleaner
image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}cleaner:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }}
args:
- - 'docker_cache'
+ - -t {{ cluster_cfg["cleaner"]["threshold"] }}
+ - -i {{ cluster_cfg["cleaner"]["interval"] }}
imagePullPolicy: Always
securityContext:
privileged: True
volumeMounts:
- mountPath: /var/run/docker.sock
name: docker-socket
- livenessProbe:
- exec:
- command:
- - test
- - '`find /tmp/docker-cache-cleaner-healthy -mmin -1`'
- initialDelaySeconds: 60
- periodSeconds: 30
- {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %}
- resources:
- limits:
- memory: "1Gi"
- {%- endif %}
- - name: deleted-files-cleaner
- image: {{ cluster_cfg["cluster"]["docker-registry"]["prefix"] }}cleaner:{{ cluster_cfg["cluster"]["docker-registry"]["tag"] }}
- args:
- - 'deleted_files'
- imagePullPolicy: Always
- securityContext:
- privileged: True
- livenessProbe:
- exec:
- command:
- - test
- - '`find /tmp/deleted-files-cleaner-healthy -mmin -1`'
- initialDelaySeconds: 60
- periodSeconds: 30
{%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %}
resources:
limits:
@@ -78,3 +53,8 @@ spec:
- name: docker-socket
hostPath:
path: /var/run/docker.sock
+ tolerations:
+ - key: node.kubernetes.io/memory-pressure
+ operator: "Exists"
+ - key: node.kubernetes.io/disk-pressure
+ operator: "Exists"
\ No newline at end of file
diff --git a/src/cleaner/scripts/clean_docker.py b/src/cleaner/scripts/clean_docker.py
new file mode 100644
index 0000000000..d70251686d
--- /dev/null
+++ b/src/cleaner/scripts/clean_docker.py
@@ -0,0 +1,103 @@
+# Copyright (c) Microsoft Corporation
+# All rights reserved.
+#
+# MIT License
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
+# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from cleaner.utils.logger import LoggerMixin
+from cleaner.utils.timer import CountdownTimer, Timeout
+from cleaner.utils import common
+from datetime import timedelta
+import subprocess
+import multiprocessing
+import re
+import time
+
+class DockerCleaner(LoggerMixin):
+ def __init__(self, threshold, interval, timeout=timedelta(hours=1)):
+ self.__threshold = int(threshold)
+ self.__interval = int(interval)
+ self.__timeout = timeout
+
+ def _exec(self):
+ exc = None
+ try:
+ with CountdownTimer(duration=self.__timeout):
+ self.check_and_clean()
+ except Timeout as e:
+ self.logger.error("Cleaner timeout.")
+ exc = e
+ except Exception as e:
+ self.logger.error("Unexpected error to run cleaner.")
+ exc = e
+
+ if exc is not None:
+ self.logger.exception(exc)
+
+ def run(self):
+ while True:
+ # allow a delay before the cleaning
+ time.sleep(self.__interval)
+ self._exec()
+
+
+ def check_disk_usage(self, partition):
+ df = subprocess.Popen(["df","-h", partition], stdout=subprocess.PIPE)
+ size = 0
+ try:
+ for line in df.stdout:
+ splitline = line.decode().split()
+ if splitline[5] == partition:
+ size = int(splitline[4][:-1])
+ except ValueError:
+ self.logger.error("cannot get disk size, reset size to 0")
+ size = 0
+ self.logger.info("Checking disk, disk usage = {0}%".format(size))
+ return size
+
+
+ def check_and_clean(self):
+ if self.check_disk_usage("/") >= self.__threshold:
+ self.logger.info("Disk usage is above {0}%, Try to remove containers".format(self.__threshold))
+ self.kill_largest_container()
+
+
+ # Clean logic v1: kill largest container
+ white_list = ["k8s_POD", "k8s_kube", "k8s_pylon", "k8s_zookeeper", "k8s_rest-server", "k8s_yarn", "k8s_hadoop", "k8s_job-exporter", "k8s_watchdog", "k8s_grafana", "k8s_node-exporter", "k8s_webportal", "k8s_prometheus", "k8s_nvidia-drivers", "k8s_etcd-container", "k8s_apiserver-container", "k8s_docker-cleaner", "kubelet", "dev-box"]
+ def kill_largest_container(self):
+ containers = []
+ # Only try to stop PAI jobs and user created containers
+ containers_source = subprocess.Popen(["docker", "ps", "-a", "--format", r'{{.ID}}\t{{.Image}}\t{{.Size}}\t{{.Names}}'], stdout=subprocess.PIPE)
+ for line in containers_source.stdout:
+ splitline = line.split("\t")
+ for prefix in self.white_list:
+ if (splitline[3].startswith(prefix)):
+ break
+ else:
+ size = common.calculate_size(splitline[2].split()[0])
+ containers.append([size, splitline[0], splitline[1]])
+
+ containers.sort(key=lambda x:x[0], reverse=True)
+
+ if containers.count > 0 and containers[0][0] > 1024**3:
+ self.logger.warning("Kill container {0} due to disk pressure. Container size: {1}".format(containers[0][1], containers[0][0]))
+ subprocess.Popen(["docker", "kill", "--signal=10", containers[0][1]])
+
+ # Because docker stop will not immedicately stop container, we can not remove docker image right after stop container
+ #container_image = subprocess.Popen(["docker", "inspect", containers[0][1], r"--format='{{.Image}}'"], stdout=subprocess.PIPE).stdout.readline()
+ #subprocess.Popen(["docker", "image", "rmi", container_image])
+ return True
+ else:
+ return False
+
diff --git a/src/cleaner/scripts/reclaimable_docker_cache.sh b/src/cleaner/scripts/reclaimable_docker_cache.sh
index baa85db0f2..a907394d0e 100644
--- a/src/cleaner/scripts/reclaimable_docker_cache.sh
+++ b/src/cleaner/scripts/reclaimable_docker_cache.sh
@@ -28,14 +28,16 @@
#
# We summer up the result in column 5 (RECLAIMABLE) and return the size in gigabytes.
-docker system df | \
+docker system df --format "{{.Reclaimable}}" | \
gawk 'BEGIN {s=0}
END {print s}
- match($5, /([0-9]+\.?[0-9]*)(M|G|B)/, a) {
+ match($1, /([0-9]+\.?[0-9]*)(M|G|B|T)/, a) {
if(a[2] == "M")
s += a[1]/1024;
else if(a[2] == "B")
s += a[1]/1024/1024;
+ else if(a[2] == "T")
+ s += a[1]*1024;
else
s += a[1];
}'
diff --git a/src/cleaner/test/cleaner-test-job.json b/src/cleaner/test/cleaner-test-job.json
new file mode 100644
index 0000000000..c71a71e375
--- /dev/null
+++ b/src/cleaner/test/cleaner-test-job.json
@@ -0,0 +1,22 @@
+{
+ "jobName": "cleaner-test-job",
+ "image": "ubuntu",
+ "authFile": "",
+ "dataDir": "",
+ "outputDir": "",
+ "codeDir": "",
+ "virtualCluster": "default",
+ "gpuType": "",
+ "retryCount": 0,
+ "taskRoles": [
+ {
+ "name": "main",
+ "taskNumber": 1,
+ "cpuNumber": 4,
+ "memoryMB": 8192,
+ "gpuNumber": 0,
+ "command": "fallocate -l 200G \"fake_base\"; for var in {1..100}; do fallocate -l 1G \"fake$var\"; sleep 5; done"
+ }
+ ],
+ "jobEnvs": {}
+}
\ No newline at end of file
diff --git a/src/cleaner/utils/common.py b/src/cleaner/utils/common.py
index 70b98942e7..875d74c097 100644
--- a/src/cleaner/utils/common.py
+++ b/src/cleaner/utils/common.py
@@ -22,7 +22,7 @@
import os
import psutil
import signal
-
+import re
def kill_process_tree(pid, time_to_die, logger):
"""
@@ -129,3 +129,9 @@ def setup_logging():
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
+
+
+size_defs={'B':1, 'K':1024, 'M':1024**2, 'G':1024**3, 'T':1024**4, 'b':1, 'k':1024, 'm':1024**2, 'g':1024**3, 't':1024**4}
+def calculate_size(size_str):
+ size_search = re.search(r"[BbKkMmGgTt]", size_str)
+ return float(size_str[0:size_search.start()]) * size_defs[size_search.group()]
\ No newline at end of file
diff --git a/src/dev-box/build/dev-box.dockerfile b/src/dev-box/build/dev-box.dockerfile
index 8d0e97ff29..3427011a20 100644
--- a/src/dev-box/build/dev-box.dockerfile
+++ b/src/dev-box/build/dev-box.dockerfile
@@ -39,8 +39,8 @@ RUN apt-get -y update && \
python-dev \
python-pip \
python-mysqldb \
- openjdk-8-jre=8u191-b12-0ubuntu0.16.04.1 \
- openjdk-8-jdk=8u191-b12-0ubuntu0.16.04.1 \
+ openjdk-8-jre \
+ openjdk-8-jdk \
openssh-server \
openssh-client \
git \
diff --git a/src/hadoop-ai/build/hadoop-ai b/src/hadoop-ai/build/hadoop-ai
index dda11091e1..3a9d0709ce 100644
--- a/src/hadoop-ai/build/hadoop-ai
+++ b/src/hadoop-ai/build/hadoop-ai
@@ -38,8 +38,8 @@ RUN apt-get -y update && \
python-dev \
python-pip \
python-mysqldb \
- openjdk-8-jre=8u191-b12-0ubuntu0.16.04.1 \
- openjdk-8-jdk=8u191-b12-0ubuntu0.16.04.1 \
+ openjdk-8-jre \
+ openjdk-8-jdk \
openssh-server \
openssh-client \
git \
diff --git a/src/job-exporter/src/collector.py b/src/job-exporter/src/collector.py
index 2df34dc29d..6e85e68fa9 100644
--- a/src/job-exporter/src/collector.py
+++ b/src/job-exporter/src/collector.py
@@ -331,7 +331,8 @@ class ContainerCollector(Collector):
"node-exporter",
"job-exporter",
"yarn-exporter",
- "nvidia-drivers"
+ "nvidia-drivers",
+ "docker-cleaner"
]))
def __init__(self, name, sleep_time, atomic_ref, iteration_counter, gpu_info_ref,
diff --git a/src/rest-server/src/templates/dockerContainerScript.mustache b/src/rest-server/src/templates/dockerContainerScript.mustache
index 753e768163..fce0b9bb60 100644
--- a/src/rest-server/src/templates/dockerContainerScript.mustache
+++ b/src/rest-server/src/templates/dockerContainerScript.mustache
@@ -29,10 +29,18 @@ function exit_handler()
"[DEBUG]" "Docker container exit handler: EXIT signal received in docker container, exiting ..."
}
+# The cleaner will send SIGUSR1(10) to container to kill it. Trap the signal here and exit with code 1.
+function kill_handler()
+{
+ printf "%s %s\n" \
+ "[INFO]" "Docker container killed by cleaner due to disk pressure."
+ exit 1
+}
+
set -x
PS4="+[\t] "
trap exit_handler EXIT
-
+trap kill_handler 10
touch "/alive/docker_$PAI_CONTAINER_ID"
@@ -44,7 +52,6 @@ HDFS_LAUNCHER_PREFIX=$PAI_DEFAULT_FS_URI/Container
export CLASSPATH="$(hadoop classpath --glob)"
task_role_no={{ idx }}
-
printf "%s %s\n%s\n\n" "[INFO]" "ENV" "$(printenv | sort)"
cp -r /pai/code/* ./
@@ -101,7 +108,6 @@ function get_ssh_key_files()
info_source="webhdfs"
localKeyPath=/root/.ssh/{{ jobData.jobName }}.pub
localPrivateKeyPath=/root/.ssh/{{ jobData.jobName }}
-
if [[ -f $localKeyPath ]]; then
rm -f $localKeyPath
fi
@@ -164,14 +170,12 @@ function azure_rdma_preparation()
{{# paiMachineList }}
echo {{hostip}} {{hostname}} >> /hosts.tmp
{{/ paiMachineList }}
-
cat /hosts.tmp > /etc/hosts
}
azure_rdma_preparation
{{/ azRDMA }}
{{/ reqAzRDMA }}
-
# Write env to system-wide environment
env | grep -E "^PAI|PATH|PREFIX|JAVA|HADOOP|NVIDIA|CUDA" > /etc/environment