diff --git a/deployment/utility/__init__.py b/deployment/utility/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/deployment/utility/sftp_copy.py b/deployment/utility/sftp_copy.py new file mode 100644 index 0000000000..37eda3fca6 --- /dev/null +++ b/deployment/utility/sftp_copy.py @@ -0,0 +1,93 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from ..clusterObjectModel import cluster_object_model +from ..k8sPaiLibrary.maintainlib import common + +import sys +import logging +import logging.config + + +class OpenPaiSftpCopy: + + def __init__(self, filename, source, dest, machine_list, filter): + self.filename = filename + self.source = source + self.dest = dest + self.origin_machine_list = machine_list + self.filter_rule = filter + self.machine_list = {} + + self.logger = logging.getLogger(__name__) + + + def construct_machine_list(self): + rule_list = [] + self.logger.info("=============================================") + self.logger.info("================ Filter Rule ================") + self.logger.info("=============================================") + if self.filter_rule != None: + for rule in self.filter_rule: + kv = rule.split("=") + rule_list.append({"key":kv[0], "value":kv[1]}) + self.logger.info("key = {0}, value = {1}".format(kv[0], kv[1])) + else: + self.logger.info("No filter rule.") + self.logger.info("\n") + self.logger.info("\n") + + self.logger.info("=============================================") + self.logger.info("======= Machine List After filtered =========") + self.logger.info("=============================================") + for hostname in self.origin_machine_list: + host = self.origin_machine_list[hostname] + for rule in rule_list: + if rule["key"] not in host: + break + if host[rule["key"]] != rule["value"]: + break + else: + self.machine_list[hostname] = host + self.logger.info("Machine Host Name: {0}, Machine Ip Address: {1}".format(hostname, host["hostip"])) + self.logger.info("\n") + self.logger.info("\n") + + count_input = 0 + while True: + user_input = raw_input("Do you want to continue this operation? (Y/N) ") + if user_input == "N": + sys.exit(1) + elif user_input == "Y": + break + else: + print(" Please type Y or N.") + count_input = count_input + 1 + if count_input == 3: + self.logger.warning("3 Times......... Sorry, we will force stopping your operation.") + sys.exit(1) + + def run(self): + + self.construct_machine_list() + + for hostname in self.machine_list: + host = self.machine_list[hostname] + if common.sftp_paramiko(self.source, self.dest, self.filename, host) == False: + self.logger.error("[ Failed ]: Task on the machine [ hostname: {0}, ip-address: {1} ]".format(hostname, host["hostip"])) + else: + self.logger.info("[ Successful ]: Task on the machine [ hostname: {0}, ip-address: {1} ]".format(hostname, host["hostip"])) diff --git a/deployment/utility/ssh.py b/deployment/utility/ssh.py new file mode 100644 index 0000000000..b0b101b75c --- /dev/null +++ b/deployment/utility/ssh.py @@ -0,0 +1,94 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from ..clusterObjectModel import cluster_object_model +from ..k8sPaiLibrary.maintainlib import common +import sys +import logging +import logging.config + + +class OpenPaiSSH: + + def __init__(self, command, machine_list, filter): + self.cmd = command + self.origin_machine_list = machine_list + self.filter_rule = filter + self.machine_list = {} + + self.logger = logging.getLogger(__name__) + + + def construct_machine_list(self): + rule_list = [] + self.logger.info("=============================================") + self.logger.info("================ Filter Rule ================") + self.logger.info("=============================================") + if self.filter_rule != None: + for rule in self.filter_rule: + kv = rule.split("=") + rule_list.append({"key":kv[0], "value":kv[1]}) + self.logger.info("key = {0}, value = {1}".format(kv[0], kv[1])) + else: + self.logger.info("No filter rule.") + self.logger.info("\n") + self.logger.info("\n") + + self.logger.info("=============================================") + self.logger.info("======= Machine List After filtered =========") + self.logger.info("=============================================") + for hostname in self.origin_machine_list: + host = self.origin_machine_list[hostname] + for rule in rule_list: + if rule["key"] not in host: + break + if host[rule["key"]] != rule["value"]: + break + else: + self.machine_list[hostname] = host + self.logger.info("Machine Host Name: {0}, Machine Ip Address: {1}".format(hostname, host["hostip"])) + self.logger.info("\n") + self.logger.info("\n") + + count_input = 0 + while True: + user_input = raw_input("Do you want to continue this operation? (Y/N) ") + if user_input == "N": + sys.exit(1) + elif user_input == "Y": + break + else: + print(" Please type Y or N.") + count_input = count_input + 1 + if count_input == 3: + self.logger.warning("3 Times......... Sorry, we will force stopping your operation.") + sys.exit(1) + + + def run(self): + + self.construct_machine_list() + + for hostname in self.machine_list: + host = self.machine_list[hostname] + if common.ssh_shell_with_password_input_paramiko(host, self.cmd) == False: + self.logger.error("[ Failed ]: Task on the machine [ hostname: {0}, ip-address: {1} ]".format(hostname, host["hostip"])) + else: + self.logger.info("[ Successful ]: Task on the machine [ hostname: {0}, ip-address: {1} ]".format(hostname, host["hostip"])) + + + diff --git a/docs/job_tutorial.md b/docs/job_tutorial.md index 3f6206ed79..96dcef4b11 100644 --- a/docs/job_tutorial.md +++ b/docs/job_tutorial.md @@ -123,6 +123,7 @@ Below please find the detailed explanation for each of the parameters in the con | `gpuType` | String, optional | Specify the GPU type to be used in the tasks. If omitted, the job will run on any gpu type | | `retryCount` | Integer, optional | Job retry count, no less than 0 | | `jobEnvs` | Object, optional | Job env parameters, key-value pairs, available in job container and **no substitution allowed** | +| `jobEnvs.paiAzRDMA` | Boolean, optional | If you cluster is azure rdma capable, you could specify the parameter to make your container azure rdma capable. How to use azure rdma? Please follow this [job example](../examples/azure-rdma-inte-mpi-benchmark-with-horovod-image) | For more details on explanation, please refer to [frameworklauncher usermanual](../subprojects/frameworklauncher/yarn/doc/USERMANUAL.md). diff --git a/docs/pai-management/doc/azure/enable-az-rdma.md b/docs/pai-management/doc/azure/enable-az-rdma.md new file mode 100644 index 0000000000..6b38dc3151 --- /dev/null +++ b/docs/pai-management/doc/azure/enable-az-rdma.md @@ -0,0 +1,102 @@ + + + +### Enable the capability of RDMA for your VM in azure + +#### Knowledge +The RDMA-capable instances + (Important): https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-hpc#rdma-capable-instances + +The cluster configuraiton options to enable rdma (Important): https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-hpc#cluster-configuration-options + +The network topology considerations(Important): https://docs.microsoft.com/en-us/azure/virtual-machines/linux/sizes-hpc#network-topology-considerations + +#### Steps + +###### ``` 1. Mark the RDMA capable machines with label``` + + +Based on [the knowledge section](#knowledge), you should mark your RDMA-capable machine with a specify label in [cluster-configuration.yaml](../../../../examples/cluster-configuration/cluster-configuration.yaml). Of course, you could customize the label as what you like. + +For example, in this tutorial, the following label will be used. + +```YAML +machine-list: + - hostname: example-hosts + hostip: x.x.x.x + machine-type: example + k8s-role: worker + pai-worker: "true" + # The lable of RDMA capable machines in this example + rdma: "true" +``` + +###### ``` 2. Copy the Azure RDMA enable to the target path ``` + +```bash + +cd pai/ +sudo ./paictl.py utility sftp-copy -p /path/to/cluster/config -n Azure-RDMA-enable.sh -s src/azure-rdma -d /tmp -f rdma=true + +``` + +###### ``` 3. Enable Azure RDMA with the script ``` + +```bash + +cd pai/ +sudo ./paictl.py utility ssh -p /path/to/cluster/config -f rdma=true -c "sudo /bin/bash /tmp/Azure-RDMA-enable.sh" + +``` + + +###### ``` 4. Restart all your rdma capable machines in azure portal``` + +Please communicate with your cluster owner to reboot the rdma machines after the following steps. + +###### ``` 5. Open the switch configuration for az-rdma whose default value is false``` + +In the [services-configuration.yaml](../../../../examples/cluster-configuration/services-configuration.yaml), please uncomment the configuration field ```cluster.common.az-rdma``` and set its value as ```"true""```. + + +For example, you should modify it as following. +```YAML +cluster: +# + common: +# clusterid: pai +# +# # HDFS, zookeeper data path on your cluster machine. +# data-path: "/datastorage" +# +# # Enable QoS feature or not. Default value is "true" +# qos-switch: "true" +# +# # If your cluster is created by Azure and the machine is rdma enabled. +# # Set this configuration as "true", the rdma environment will be set into your container. + az-rdma: "true" +``` + + +###### Note +- If you wanna enable azure rdma feature in your cluster, please ensure all the worker machines in your cluster is azure rdma capable! + - TODO: YARN should only schedule the rdma job to the machine with azure rdma machine. +- After enabling azure rdma feature in your cluster, everytime adding new machine or remove machine from the cluster, you should restart restserver to refresh the machinelist in it. + - TODO: Make restserver able to update the machinelist through configmap in a loop. diff --git a/docs/pai-management/doc/distributed-deploy.md b/docs/pai-management/doc/distributed-deploy.md index e9f79aff25..3fd843494e 100644 --- a/docs/pai-management/doc/distributed-deploy.md +++ b/docs/pai-management/doc/distributed-deploy.md @@ -26,6 +26,7 @@ - [Step 4. Update cluster configuration into kubernetes](#c-step-4) - [Step 5. Start all OpenPAI services](#c-step-5) - [appendix. Validate deployment](#appendix) +- [appendix. Azure RDMA](#az_rdma) *** @@ -53,6 +54,9 @@ If you are very familiar with OpenPAI, you could directly write your configurait ### Step 3. Deploy Kubernetes + +If your cluster is deployed in Azure, and there are azure rdma capable machines. Please go to this [section](#az_rdma) first. + - [A Guide to deploy kubenretes with paictl](./how-to-bootup-k8s.md) *** @@ -77,3 +81,6 @@ If you are very familiar with OpenPAI, you could directly write your configurait *** +### appendix. Azure RDMA + +- [A Guide to Enable Azure RDMA](./azure/enable-az-rdma.md) \ No newline at end of file diff --git a/docs/pai-management/doc/how-to-congiure-service-config.md b/docs/pai-management/doc/how-to-congiure-service-config.md index f862658dce..94e37fbcb4 100644 --- a/docs/pai-management/doc/how-to-congiure-service-config.md +++ b/docs/pai-management/doc/how-to-congiure-service-config.md @@ -59,7 +59,7 @@ According to your requirements, choose the component which you wanna customized. | Service | Description | Tutorial | | --- | --- | --- | -| cluster | Configure data-path, cluster-id and docker-registry to pull image. | [Link](../../../src/cluster/config/cluster.md)| +| cluster | Configure data-path, cluster-id, azure rdma switch and docker-registry to pull image. | [Link](../../../src/cluster/config/cluster.md)| | drivers | Configure drivers version and nvidia runtime. | [Link](../../../src/drivers/config/drivers.md)| | hadoop-resource-manager | yarn exporter port and default vc configuration | [Link](../../../src/hadoop-resource-manager/config/hadoop-resource-manager.md)| | yarn-frameworklauncher | frameworklauncher port configuration | [Link](../../../src/yarn-frameworklauncher/config/yarn-frameworkerlauncher.md)| diff --git a/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/DOCKER.md b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/DOCKER.md new file mode 100644 index 0000000000..3768f6c571 --- /dev/null +++ b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/DOCKER.md @@ -0,0 +1,36 @@ +## Basic environment + +First of all, PAI runs all jobs in Docker container. + +1: Install Docker CE + +2: Get an account of docker hub to store your image. + +# AzureRDMA && IntelMpi on PAI docker env + +## Contents + + +We need to build a AzureRDMA&IntelMPI image to run intel benchmark workload on OpenPAI, this can be done with following steps: + + +- Get a license for your intel mpi. And then modify the ```ACTIVATION_TYPE``` in the [silent.cfg](./silent.cfg) + +- Write an AzureRDMA&IntelMPI Dockerfile and save it to `Dockerfile.example.horovod-intelmpi-az-rdma`: + + - You could refer to this [Dockerfile](./Dockerfile.example.horovod-intelmpi-az-rdma) + - If your intel MPI is activated by a license file. You should copy it to the docker image, when building it. + - You'd better keep the image in a private registry. Because you build the license in the image. + +- Build the Docker image from `Dockerfile.example.horovod-intelmpi-az-rdma`: + +```bash +$ sudo docker build -f Dockerfile.example.horovod-intelmpi-az-rdma -t USER/pai.example.horovod-intelmpi-az-rdma . +``` + +- Push the Docker image to a Docker registry: + +```bash +$ sudo docker push USER/pai.example.horovod-intelmpi-az-rdma +``` +Note: Replace USER with the Docker Hub username you registered, you will be required to login before pushing Docker image. diff --git a/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/Dockerfile.example.horovod-intelmpi-az-rdma b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/Dockerfile.example.horovod-intelmpi-az-rdma new file mode 100644 index 0000000000..9b4398c866 --- /dev/null +++ b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/Dockerfile.example.horovod-intelmpi-az-rdma @@ -0,0 +1,101 @@ +# Tag: nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 +# Label: com.nvidia.cuda.version: 9.0.176 +# Label: com.nvidia.cudnn.version: 7.1.2.21 +# Label: com.nvidia.volumes.needed: nvidia_driver +# Label: maintainer: NVIDIA CORPORATION +# Ubuntu 16.04 +FROM nvidia/cuda@sha256:40db1c98b66e133f54197ba1a66312b9c29842635c8cba5ae66fb56ded695b7c + +ENV TENSORFLOW_VERSION=1.12.0 + +RUN apt-get -y update && \ + apt-get -y install \ + nano \ + vim \ + joe \ + wget \ + curl \ + jq \ + gawk \ + psmisc \ + python \ + python-dev \ + python-pip \ + python3 \ + python3-dev \ + python3-pip \ + # SSH library is necessary for mpi workload. + openssh-server \ + openssh-client \ + build-essential \ + autotools-dev \ + cmake \ + git \ + bash-completion \ + ca-certificates \ + inotify-tools \ + rsync \ + realpath \ + libjpeg-dev \ + libpng-dev \ + net-tools \ + libsm6 \ + libxext6 \ + rpm \ + #For Azure RDMA and intel MPI installation + cpio \ + net-tools \ + libdapl2 \ + dapl2-utils \ + libmlx4-1 \ + libmlx5-1 \ + ibutils \ + librdmacm1 \ + libibverbs1 \ + libmthca1 \ + ibverbs-utils \ + rdmacm-utils \ + perftest \ + kmod + + +# Install NCCL v2.3.7, for CUDA 9.0 +RUN wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb && \ + dpkg -i nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb && \ + apt install libnccl2=2.3.7-1+cuda9.0 libnccl-dev=2.3.7-1+cuda9.0 + + +# Install intel MPI with the version which azure suggests. +COPY silent.cfg /silent.cfg +ENV MANPATH=/usr/share/man:/usr/local/man \ + COMPILERVARS_ARCHITECTURE=intel64 \ + COMPILERVARS_PLATFORM=linux \ + INTEL_MPI_PATH=/opt/intel/compilers_and_libraries/linux/mpi + +# Install Intel MPI in the Docker Image. +# You should prepare your own intel mpi license to active your intel MPI, and modify the file silent.cfg to set the configuration of activation type. +RUN wget http://registrationcenter-download.intel.com/akdlm/irc_nas/tec/9278/l_mpi_p_5.1.3.223.tgz && \ + tar -xvf /l_mpi_p_5.1.3.223.tgz && \ + cd /l_mpi_p_5.1.3.223 && \ + ./install.sh -s /silent.cfg && \ + . /opt/intel/bin/compilervars.sh && \ + . /opt/intel/compilers_and_libraries/linux/mpi/bin64/mpivars.sh && \ + echo "source /opt/intel/compilers_and_libraries/linux/mpi/bin64/mpivars.sh" >> /root/.bashrc && \ + echo LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:'$LD_LIBRARY_PATH' >> /root/.bashrc + +ENV PATH $PATH:/opt/intel/compilers_and_libraries/linux/mpi/bin64 + +# Install TensorFlow +RUN pip3 install tensorflow-gpu==${TENSORFLOW_VERSION} h5py && \ + pip install tensorflow-gpu==${TENSORFLOW_VERSION} h5py + +# Install Dependencies +RUN pip3 install --no-cache-dir scipy jupyter ipykernel numpy toolz pandas scikit-learn pillow && \ + pip install --no-cache-dir scipy numpy toolz pandas scikit-learn pillow + +# Install Horovod, temporarily using CUDA stubs +RUN ldconfig /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs && \ + /bin/bash -c "source /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh" && \ + pip3 install --no-cache-dir horovod==0.15.2 && \ + pip install --no-cache-dir horovod==0.15.2 && \ + ldconfig \ No newline at end of file diff --git a/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/README.md b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/README.md new file mode 100644 index 0000000000..768fe8fbd4 --- /dev/null +++ b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/README.md @@ -0,0 +1,75 @@ + + +## Intel MPI benchmark in OpenPAI based on Azure's RDMA Capable machines. + +#### Prerequisites + +Please contact with your cluster admin to ensure that whether azure rdma is enabled in your cluster or not. + +#### Prepare build an image with necessary library. + +To run AzureRDMA&IntelMPI benchmark in OpenPAI, you need to prepare a job configuration file and submit it through webportal. + +Users should prepare their own intel mpi licences to build the docker image. When building docker image, please refer to [DOCKER.md](./DOCKER.md) + +###### +```json +{ + "jobName": "intel-mpi-benchmark", + "image": "your/images/url", + "virtualCluster": "default", + "taskRoles": [ + { + "name": "master", + "taskNumber": 1, + "cpuNumber": 2, + "memoryMB": 10240, + "shmMB": 256, + "gpuNumber": 1, + "minFailedTaskCount": 1, + "minSucceededTaskCount": 1, + "command": "sleep 1000 && /opt/intel/impi/5.1.3.223/bin64/mpirun -d -hosts worker-0,master-0 -n 2 -ppn 1 -env I_MPI_FABRICS=shm:dapl -env I_MPI_DAPL_PROVIDER=ofa-v2-ib0 /opt/intel/impi/5.1.3.223/bin64/IMB-MPI1" + }, + { + "name": "worker", + "taskNumber": 1, + "cpuNumber": 2, + "memoryMB": 10240, + "shmMB": 256, + "gpuNumber": 1, + "minFailedTaskCount": 1, + "minSucceededTaskCount": 1, + "command": "sleep infinity" + } + ], + "jobEnvs": { + "paiAzRDMA": true + } +} +``` + +- For more details on how to write a job configuration file, please refer to [job tutorial](../../docs/job_tutorial.md#json-config-file-for-job-submission). +- Sleep 1000 in ```master-0``` is a hack to ensure that all work containers are ready. You could optimize it in a better way. For example try to ssh all the work containers with the hostanme ```${taskname}-${taskid}``` until sccessful. +- If user wanna a AZ-RDMA capable container. The following parameter is necessary. +```bash +"jobEnvs": { + "paiAzRDMA": true + } +``` \ No newline at end of file diff --git a/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/silent.cfg b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/silent.cfg new file mode 100644 index 0000000000..a3133979c6 --- /dev/null +++ b/examples/azure-rdma-inte-mpi-benchmark-with-horovod-image/silent.cfg @@ -0,0 +1,49 @@ +# Patterns used to check silent configuration file +# +# anythingpat - any string +# filepat - the file location pattern (/file/location/to/license.lic) +# lspat - the license server address pattern (0123@hostname) +# snpat - the serial number pattern (ABCD-01234567) + +# Accept EULA, valid values are: {accept, decline} +ACCEPT_EULA=accept + +# Optional error behavior, valid values are: {yes, no} +CONTINUE_WITH_OPTIONAL_ERROR=yes + +# Install location, valid values are: {/opt/intel, filepat} +PSET_INSTALL_DIR=/opt/intel + +# Continue with overwrite of existing installation directory, valid values are: {yes, no} +CONTINUE_WITH_INSTALLDIR_OVERWRITE=yes + +# List of components to install, valid values are: {ALL, anythingpat} +COMPONENTS=ALL + +# Installation mode, valid values are: {install, modify, repair, uninstall} +PSET_MODE=install + +# Directory for non-RPM database, valid values are: {filepat} +#NONRPM_DB_DIR=filepat + +# Serial number, valid values are: {snpat} +#ACTIVATION_SERIAL_NUMBER=snpat + +# License file or license server, valid values are: {lspat, filepat} +#ACTIVATION_LICENSE_FILE= + +# Activation type, valid values are: {exist_lic, license_server, license_file, trial_lic, serial_number} +ACTIVATION_TYPE=trial_lic + +# Path to the cluster description file, valid values are: {filepat} +#CLUSTER_INSTALL_MACHINES_FILE=filepat + +# Intel(R) Software Improvement Program opt-in, valid values are: {yes, no} +PHONEHOME_SEND_USAGE_DATA=no + +# Perform validation of digital signatures of RPM files, valid values are: {yes, no} +SIGNING_ENABLED=yes + +# Select target architecture of your applications, valid values are: {IA32, INTEL64, ALL} +ARCH_SELECTED=INTEL64 + diff --git a/examples/cluster-configuration/services-configuration.yaml b/examples/cluster-configuration/services-configuration.yaml index b003196cf3..8e22bdc8d0 100644 --- a/examples/cluster-configuration/services-configuration.yaml +++ b/examples/cluster-configuration/services-configuration.yaml @@ -24,9 +24,13 @@ # # HDFS, zookeeper data path on your cluster machine. # data-path: "/datastorage" # -# #Enable QoS feature or not. Default value is "true" +# # Enable QoS feature or not. Default value is "true" # qos-switch: "true" # +# # If your cluster is created by Azure and the machine is rdma enabled. +# # Set this configuration as "true", the rdma environment will be set into your container. +# az-rdma: "false" +# # # # the docker registry to store docker images that contain system services like frameworklauncher, hadoop, etc. # docker-registry: diff --git a/paictl.py b/paictl.py index 6e35cf21cf..c03bc99dbc 100755 --- a/paictl.py +++ b/paictl.py @@ -49,6 +49,9 @@ from deployment.clusterObjectModel.cluster_object_model import cluster_object_model +from deployment.utility.ssh import OpenPaiSSH +from deployment.utility.sftp_copy import OpenPaiSftpCopy + logger = logging.getLogger(__name__) @@ -502,6 +505,61 @@ def update_external_config(self, args): external_conf_update.update_latest_external_configuration() +class Utility(SubCmd): + def register(self, parser): + utility_parser = parser.add_subparsers(help="utility for maintaining in a easy way.") + + ssh_parser = SubCmd.add_handler(utility_parser, self.cluster_ssh, "ssh") + ssh_parser.add_argument("-p", "--config-path", dest="config_path", required=True, help="path of cluster configuration file") + ssh_parser.add_argument("-f", "--filter", dest="filter", nargs='+', help="Rule to filter machine. Format: key1=value1 key2=value2 ...") + ssh_parser.add_argument("-c", "--command", dest="command", required=True, help="The command to be executed remotely.") + + sftp_cp_parser = SubCmd.add_handler(utility_parser, self.cluster_sftp_copy, "sftp-copy") + sftp_cp_parser.add_argument("-f", "--filter", dest="filter", nargs='+', help="Rule to filter machine. Format: key1=value1 key2=value2 ...") + sftp_cp_parser.add_argument("-n", "--file-name", dest="file_name", required=True, help="File Name.") + sftp_cp_parser.add_argument("-s", "--source", dest="source", required=True, help="The source path of the file.") + sftp_cp_parser.add_argument("-d", "--destination", dest="dest", required=True, help="The target path of the file in the remote machines.") + sftp_cp_parser.add_argument("-p", "--config-path", dest="config_path", required=True, help="path of cluster configuration file") + + def get_machine_list(self, config_path): + objectModelFactoryHandler = cluster_object_model(configuration_path=config_path) + return objectModelFactoryHandler.run()["machine"]["machine-list"] + + def rule_check(self, rule_list): + if rule_list == None: + return + for rule in rule_list: + kv = rule.split("=") + if len(kv) != 2: + logger.error("Please check the filter rule {0}. It's invalid.".format(rule)) + sys.exit(1) + + def cluster_ssh(self, args): + if args.config_path != None: + args.config_path = os.path.expanduser(args.config_path) + machine_list = self.get_machine_list(args.config_path) + else: + machine_list = {} + self.rule_check(args.filter) + ssh_handler = OpenPaiSSH(args.command, machine_list, args.filter) + ssh_handler.run() + + def cluster_sftp_copy(self, args): + if args.config_path != None: + args.config_path = os.path.expanduser(args.config_path) + machine_list = self.get_machine_list(args.config_path) + else: + machine_list = {} + if args.source != None: + args.source = os.path.expanduser(args.source) + if args.dest != None and os.path.isabs(args.dest) is not True: + logger.error("The path of destination should an absolute path.") + sys.exit(1) + self.rule_check(args.filter) + sftp_copy_handler = OpenPaiSftpCopy(args.file_name, args.source, args.dest, machine_list, args.filter) + sftp_copy_handler.run() + + class Main(SubCmd): def __init__(self, subcmds): self.subcmds = subcmds @@ -521,7 +579,8 @@ def main(args): "machine": Machine(), "service": Service(), "cluster": Cluster(), - "config": Configuration() + "config": Configuration(), + "utility": Utility() }) main_handler.register(parser) diff --git a/src/azure-rdma/Azure-RDMA-enable.sh b/src/azure-rdma/Azure-RDMA-enable.sh new file mode 100644 index 0000000000..59841e8b78 --- /dev/null +++ b/src/azure-rdma/Azure-RDMA-enable.sh @@ -0,0 +1,33 @@ +# Copyright (c) Microsoft Corporation +# All rights reserved. +# +# MIT License +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +# documentation files (the "Software"), to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +# to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#!/bin/bash + +apt-get update -y +apt-get install -y libdapl2 libmlx4-1 + +sed -i "s/# OS.EnableRDMA=y/OS.EnableRDMA=y/g" /etc/waagent.conf +sed -i "s/# OS.UpdateRdmaDriver=y/OS.UpdateRdmaDriver=y/g" /etc/waagent.conf + +if cat /etc/security/limits.conf | grep -qE '* hard memlock unlimited'; then + echo "Configuration has been changed in /etc/security/limits.conf. Skip it" +else + sed -i '/# End of file/i\* hard memlock unlimited' /etc/security/limits.conf + sed -i '/# End of file/i\* soft memlock unlimited' /etc/security/limits.conf +fi + +echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope diff --git a/src/cluster/config/cluster.md b/src/cluster/config/cluster.md index 361dc30f9a..40a00d2b85 100644 --- a/src/cluster/config/cluster.md +++ b/src/cluster/config/cluster.md @@ -31,6 +31,7 @@ cluster: cluster-id: pai data-path: "/datastorage" qos-switch: "true" + az-rdma: "false" docker-registry: namespace: openpai domain: docker.io @@ -73,6 +74,12 @@ cluster: cluster_cfg["cluster"]["common"]["qos-switch"] string + + cluster.common.az-rdma + com["cluster"]["common"]["az-rdma"] + cluster_cfg["cluster"]["common"]["az-rdma"] + string, "true" or "false" + cluster.docker-registry.namespace com["cluster"]["docker-registry"]["namespace"] diff --git a/src/cluster/config/cluster.py b/src/cluster/config/cluster.py index 8ed30b701f..11183cb4b7 100644 --- a/src/cluster/config/cluster.py +++ b/src/cluster/config/cluster.py @@ -52,6 +52,10 @@ def validation_common(self, common_configuration): return False, "cluster-id is miss in service-configuration.yaml -> cluster -> common -> cluster-id" if "data-path" not in common_configuration: return False, "data-path is miss in service-configuration.yaml -> cluster -> common -> data-path" + if "qos-switch" not in common_configuration: + return False, "qos-switch is miss in service-configuration.yaml -> cluster -> common -> qos-switch" + if "az-rdma" not in common_configuration: + return False, "az-rdma is miss in service-configuration.yaml -> cluster -> common -> az-rdma" return True, None diff --git a/src/cluster/config/cluster.yaml b/src/cluster/config/cluster.yaml index 83387f0768..0407550ce6 100644 --- a/src/cluster/config/cluster.yaml +++ b/src/cluster/config/cluster.yaml @@ -24,6 +24,9 @@ common: qos-switch: "true" + az-rdma: "false" + + # the docker registry to store docker images that contain system services like frameworklauncher, hadoop, etc. docker-registry: diff --git a/src/rest-server/deploy/rest-server.yaml.template b/src/rest-server/deploy/rest-server.yaml.template index 78113d6ba3..aac7a86317 100644 --- a/src/rest-server/deploy/rest-server.yaml.template +++ b/src/rest-server/deploy/rest-server.yaml.template @@ -35,6 +35,9 @@ spec: - name: rest-server image: {{ cluster_cfg['cluster']['docker-registry']['prefix'] }}rest-server:{{ cluster_cfg['cluster']['docker-registry']['tag'] }} imagePullPolicy: Always + volumeMounts: + - mountPath: /pai-cluster-config + name: pai-configuration-rest-server env: - name: LAUNCHER_WEBSERVICE_URI value: {{ cluster_cfg['yarn-frameworklauncher']['webservice'] }} @@ -52,6 +55,8 @@ spec: value: {{ cluster_cfg['rest-server']['default-pai-admin-password'] }} - name: K8S_APISERVER_URI value: {{ cluster_cfg['kubernetes']['api-servers-url'] }} + - name: AZ_RDMA + value: "{{ cluster_cfg['cluster']['common']['az-rdma']}}" {% if cluster_cfg['rest-server']['github-owner'] %} - name: GITHUB_OWNER value: {{ cluster_cfg['rest-server']['github-owner'] }} @@ -75,3 +80,7 @@ spec: {%- endif %} imagePullSecrets: - name: {{ cluster_cfg["cluster"]["docker-registry"]["secret-name"] }} + volumes: + - name: pai-configuration-rest-server + configMap: + name: pai-configuration diff --git a/src/rest-server/src/config/azure.js b/src/rest-server/src/config/azure.js new file mode 100644 index 0000000000..6a275b0d7d --- /dev/null +++ b/src/rest-server/src/config/azure.js @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation +// All rights reserved. +// +// MIT License +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +// to permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// module dependencies +const Joi = require('joi'); + +let azureData = { + azRDMA: process.env.AZ_RDMA, +}; + +// define the schema for azure +const azureSchema = Joi.object().keys({ + azRDMA: Joi.string().empty('') + .valid('false', 'true'), +}).required(); + + +const {error, value} = Joi.validate(azureData, azureSchema); +if (error) { + throw new Error(`config error\n${error}`); +} +azureData = value; + +module.exports = azureData; diff --git a/src/rest-server/src/config/paiConfig.js b/src/rest-server/src/config/paiConfig.js new file mode 100644 index 0000000000..54c5d03577 --- /dev/null +++ b/src/rest-server/src/config/paiConfig.js @@ -0,0 +1,50 @@ +// Copyright (c) Microsoft Corporation +// All rights reserved. +// +// MIT License +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and +// to permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING +// BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// module dependencies +const Joi = require('joi'); +const yaml = require('js-yaml'); +const fs = require('fs'); +const logger = require('./logger'); + +let paiMachineList = []; +try { + paiMachineList = yaml.safeLoad(fs.readFileSync('/pai-cluster-config/cluster-configuration.yaml', 'utf8'))['machine-list']; +} catch (err) { + paiMachineList = []; + logger.info('Unable to load machine list from cluster-configuration.'); + logger.info(err.stack); +} + +let paiConfigData = { + machineList: paiMachineList, +}; + + +// define the schema for pai configuration +const paiConfigSchema = Joi.object().keys({ + machineList: Joi.array(), +}).required(); + + +const {error, value} = Joi.validate(paiConfigData, paiConfigSchema); +if (error) { + throw new Error(`config error\n${error}`); +} +paiConfigData = value; + +module.exports = paiConfigData; diff --git a/src/rest-server/src/models/job.js b/src/rest-server/src/models/job.js index 892ae11468..7978703a2d 100644 --- a/src/rest-server/src/models/job.js +++ b/src/rest-server/src/models/job.js @@ -28,6 +28,8 @@ const dockerContainerScriptTemplate = require('../templates/dockerContainerScrip const createError = require('../util/error'); const logger = require('../config/logger'); const Hdfs = require('../util/hdfs'); +const azureEnv = require('../config/azure'); +const paiConfig = require('../config/paiConfig'); class Job { constructor(name, namespace, next) { @@ -425,6 +427,8 @@ class Job { 'jobData': data, 'inspectFormat': '{{.State.Pid}}', 'jobEnvs': jobEnvs, + 'azRDMA': azureEnv.azRDMA === 'false' ? false : true, + 'reqAzRDMA': data.jobEnvs && data.jobEnvs.paiAzRDMA === true ? true : false, }); return yarnContainerScript; } @@ -437,6 +441,9 @@ class Job { 'taskData': data.taskRoles[idx], 'jobData': data, 'webHdfsUri': launcherConfig.webhdfsUri, + 'azRDMA': azureEnv.azRDMA === 'false' ? false : true, + 'paiMachineList': paiConfig.machineList, + 'reqAzRDMA': data.jobEnvs && data.jobEnvs.paiAzRDMA === true ? true : false, }); return dockerContainerScript; } diff --git a/src/rest-server/src/templates/dockerContainerScript.mustache b/src/rest-server/src/templates/dockerContainerScript.mustache index 174cffe599..753e768163 100644 --- a/src/rest-server/src/templates/dockerContainerScript.mustache +++ b/src/rest-server/src/templates/dockerContainerScript.mustache @@ -153,6 +153,25 @@ else "[INFO]" "Please refer to PAI dockerfile example to get ssh service enabled. https://github.com/Microsoft/pai/tree/master/examples/Dockerfiles" fi +{{# reqAzRDMA }} +{{# azRDMA }} +function azure_rdma_preparation() +{ + # https://unix.stackexchange.com/questions/404189/find-and-sed-string-in-docker-got-error-device-or-resource-busy + cp /etc/hosts /hosts.tmp + + sed -i "/127.0.0.1/c\127.0.0.1 localhost" /hosts.tmp +{{# paiMachineList }} + echo {{hostip}} {{hostname}} >> /hosts.tmp +{{/ paiMachineList }} + + cat /hosts.tmp > /etc/hosts +} + +azure_rdma_preparation +{{/ azRDMA }} +{{/ reqAzRDMA }} + # Write env to system-wide environment env | grep -E "^PAI|PATH|PREFIX|JAVA|HADOOP|NVIDIA|CUDA" > /etc/environment diff --git a/src/rest-server/src/templates/yarnContainerScript.mustache b/src/rest-server/src/templates/yarnContainerScript.mustache index 898835a0e5..d7669f51f1 100644 --- a/src/rest-server/src/templates/yarnContainerScript.mustache +++ b/src/rest-server/src/templates/yarnContainerScript.mustache @@ -266,6 +266,16 @@ docker run --name $docker_name \ $nvidia_devices \ --device=/dev/fuse \ --security-opt apparmor:unconfined \ +{{# reqAzRDMA }} +{{# azRDMA }} + --volume /var/lib/hyperv:/var/lib/hyperv \ + --volume /etc/dat.conf:/etc/dat.conf \ + --volume /opt/intel:/opt/intel \ + --device=/dev/infiniband/rdma_cm \ + --device=/dev/infiniband/uverbs0 \ + --ulimit memlock=-1 \ +{{/ azRDMA }} +{{/ reqAzRDMA }} --volume $container_local_dir/$alive_dir:/alive \ --volume $container_local_dir/$alive_dir/yarn_$CONTAINER_ID:/alive/yarn_$CONTAINER_ID:ro \ --volume $container_local_dir/$log_dir:/pai/log \ diff --git a/src/rest-server/test/setup.js b/src/rest-server/test/setup.js index f8de5ce20b..652ff0d085 100644 --- a/src/rest-server/test/setup.js +++ b/src/rest-server/test/setup.js @@ -27,6 +27,7 @@ process.env.DEFAULT_PAI_ADMIN_USERNAME = 'paiAdmin'; process.env.DEFAULT_PAI_ADMIN_PASSWORD = 'adminis'; process.env.YARN_URI = 'http://yarn.test.pai:8088'; process.env.K8S_APISERVER_URI = 'http://kubernetes.test.pai:8080'; +process.env.AZ_RDMA = 'false'; // module dependencies