Skip to content

Commit

Permalink
Demo script.
Browse files Browse the repository at this point in the history
Signed-off-by: Klaus Ma <mada3@huawei.com>
  • Loading branch information
Klaus Ma committed Jun 19, 2019
1 parent e42994d commit 04c2046
Show file tree
Hide file tree
Showing 21 changed files with 612 additions and 111 deletions.
23 changes: 15 additions & 8 deletions docs/samples/kubecon-2019-china/deploys/controllers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,19 @@ spec:
spec:
serviceAccount: vk-controllers
imagePullSecrets:
- name: default-secret
- name: default-secret
containers:
- name: vk-controllers
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/vk-controllers:latest
args:
- --alsologtostderr
- -v=4
- 2>&1
imagePullPolicy: "IfNotPresent"
- name: vk-controllers
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/vk-controllers:latest
args:
- --alsologtostderr
- -v=3
- 2>&1
imagePullPolicy: "IfNotPresent"
resources:
requests:
cpu: "1000m"
memory: "2048Mi"
limits:
cpu: "1000m"
memory: "2048Mi"
34 changes: 21 additions & 13 deletions docs/samples/kubecon-2019-china/deploys/scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ metadata:
name: vk-scheduler-configmap
namespace: volcano
data:
kube-batch.conf: |
actions: "enqueue, allocate, backfill"
tiers:
- plugins:
- name: priority
- name: gang
- name: conformance
- plugins:
- name: drf
- name: predicates
- name: proportion
- name: nodeorder
kube-batch.conf: |
actions: "enqueue, allocate, backfill"
tiers:
- plugins:
- name: priority
- name: gang
- name: conformance
- plugins:
- name: drf
- name: predicates
- name: proportion
- name: nodeorder
---
apiVersion: v1
kind: ServiceAccount
Expand Down Expand Up @@ -113,13 +113,21 @@ spec:
- name: default-secret
containers:
- name: vk-scheduler
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/vk-scheduler:latest
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/vk-kube-batch:latest
args:
- --alsologtostderr
- --scheduler-conf=/volcano.scheduler/kube-batch.conf
- --scheduler-name=volcano
- -v=3
- 2>&1
imagePullPolicy: "IfNotPresent"
resources:
requests:
cpu: "1000m"
memory: "2048Mi"
limits:
cpu: "1000m"
memory: "2048Mi"
volumeMounts:
- name: scheduler-config
mountPath: /volcano.scheduler
Expand Down
26 changes: 26 additions & 0 deletions docs/samples/kubecon-2019-china/drf/nginx-1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx-1
labels:
app: nginx-1
spec:
# modify replicas according to your case
replicas: 8
selector:
matchLabels:
app: nginx-1
template:
metadata:
labels:
app: nginx-1
spec:
schedulerName: volcano
containers:
- name: nginx-1
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
26 changes: 26 additions & 0 deletions docs/samples/kubecon-2019-china/drf/nginx-2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx-2
labels:
app: nginx-2
spec:
# modify replicas according to your case
replicas: 8
selector:
matchLabels:
app: nginx-2
template:
metadata:
labels:
app: nginx-2
spec:
schedulerName: volcano
containers:
- name: nginx-2
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
25 changes: 25 additions & 0 deletions docs/samples/kubecon-2019-china/drf/nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx
labels:
app: nginx
spec:
# modify replicas according to your case
replicas: 8
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
77 changes: 77 additions & 0 deletions docs/samples/kubecon-2019-china/gang/mpi-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
apiVersion: batch.volcano.sh/v1alpha1
kind: Job
metadata:
name: lm-mpi-job
labels:
# 根据业务需要设置作业类型
"volcano.sh/job-type": "MPI"
spec:
# 设置最小需要的服务 (小于总replicas数)
minAvailable: 4
schedulerName: volcano
plugins:
# 提供 ssh 免密认证
ssh: []
# 提供运行作业所需要的网络信息,hosts文件,headless service等
svc: []
# 如果有pod被 杀死,重启整个作业
policies:
- event: PodEvicted
action: RestartJob
tasks:
- replicas: 1
name: mpimaster
# 当 mpiexec 结束,认识整个mpi作业结束
policies:
- event: TaskCompleted
action: CompleteJob
template:
spec:
# Volcano 的信息会统一放到 /etc/volcano 目录下
containers:
- command:
- /bin/sh
- -c
- |
MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
mkdir -p /var/run/sshd; /usr/sbin/sshd;
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpimaster
ports:
- containerPort: 22
name: mpijob-port
workingDir: /home
resources:
requests:
cpu: "500m"
limits:
cpu: "500m"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
- replicas: 3
name: mpiworker
template:
spec:
containers:
- command:
- /bin/sh
- -c
- |
mkdir -p /var/run/sshd; /usr/sbin/sshd -D;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpiworker
ports:
- containerPort: 22
name: mpijob-port
workingDir: /home
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret

25 changes: 25 additions & 0 deletions docs/samples/kubecon-2019-china/gang/nginx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: nginx
labels:
app: nginx
spec:
# modify replicas according to your case
replicas: 6
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx
image: nginx
resources:
requests:
cpu: "1000m"
limits:
cpu: "1000m"
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,18 @@ apiVersion: batch.volcano.sh/v1alpha1
kind: Job
metadata:
name: lm-horovod-job
labels:
"volcano.sh/job-type": Horovod
spec:
minAvailable: 4
schedulerName: kube-batch
schedulerName: volcano
plugins:
ssh: []
svc: []
# 如果有pod被 杀死,重启整个作业
policies:
- event: PodEvicted
action: RestartJob
tasks:
- replicas: 1
name: master
Expand Down Expand Up @@ -56,11 +62,11 @@ spec:
name: job-port
resources:
requests:
cpu: "2048m"
memory: "4096Mi"
cpu: "1000m"
memory: "2048Mi"
limits:
cpu: "2048m"
memory: "4096Mi"
cpu: "1000m"
memory: "2048Mi"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
Expand Down
30 changes: 21 additions & 9 deletions docs/samples/kubecon-2019-china/mpi-sample/mpi-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,40 @@ apiVersion: batch.volcano.sh/v1alpha1
kind: Job
metadata:
name: lm-mpi-job
labels:
# 根据业务需要设置作业类型
"volcano.sh/job-type": "MPI"
spec:
minAvailable: 4
schedulerName: kube-batch
# 设置最小需要的服务 (小于总replicas数)
minAvailable: 3
schedulerName: volcano
plugins:
# 提供 ssh 免密认证
ssh: []
# 提供运行作业所需要的网络信息,hosts文件,headless service等
svc: []
# 如果有pod被 杀死,重启整个作业
policies:
- event: PodEvicted
action: RestartJob
tasks:
- replicas: 1
name: mpimaster
# 当 mpiexec 结束,认识整个mpi作业结束
policies:
- event: TaskCompleted
action: CompleteJob
template:
spec:
# Volcano 的信息会统一放到 /etc/volcano 目录下
containers:
- command:
- /bin/sh
- -c
- |
MPI_HOST=`cat /etc/volcano/mpiworker.host | tr "\n" ","`;
mkdir -p /var/run/sshd; /usr/sbin/sshd;
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 3 mpi_hello_world;
mpiexec --allow-run-as-root --host ${MPI_HOST} -np 2 mpi_hello_world;
image: swr.cn-north-1.myhuaweicloud.com/hwstaff_z00383385/example-mpi:0.0.1
name: mpimaster
ports:
Expand All @@ -40,7 +52,7 @@ spec:
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
- replicas: 3
- replicas: 2
name: mpiworker
template:
spec:
Expand All @@ -58,12 +70,12 @@ spec:
workingDir: /home
resources:
requests:
cpu: "2048m"
memory: "4096Mi"
cpu: "1024m"
memory: "2048Mi"
limits:
cpu: "2048m"
memory: "4096Mi"
cpu: "1024m"
memory: "2048Mi"
restartPolicy: OnFailure
imagePullSecrets:
- name: default-secret
---

Loading

0 comments on commit 04c2046

Please sign in to comment.