From 15dc54de13d66576bc075480eda66eae6f389bc5 Mon Sep 17 00:00:00 2001 From: Pankaj Garg Date: Tue, 3 Jul 2018 10:40:11 -0700 Subject: [PATCH] Start drone on remote leader node - Also, enables CUDA for leader node --- multinode-demo/start_nodes.sh | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/multinode-demo/start_nodes.sh b/multinode-demo/start_nodes.sh index 1d3bb07d81c3b4..7c5b44992d1d03 100755 --- a/multinode-demo/start_nodes.sh +++ b/multinode-demo/start_nodes.sh @@ -23,18 +23,24 @@ if [[ -z "$remote_user" ]]; then usage fi +echo "Build started at $(date)" +SECONDS=0 # Build and install locally PATH="$HOME"/.cargo/bin:"$PATH" cargo install --force +echo "Build took $SECONDS seconds" + ip_addr_array=() # Get IP address array # shellcheck source=/dev/null source "$ip_addr_file" # shellcheck disable=SC2089,SC2016 -ssh_command_prefix='export PATH="$HOME/.cargo/bin:$PATH"; cd solana; USE_INSTALL=1 ./multinode-demo/' +ssh_command_prefix='export PATH="$HOME/.cargo/bin:$PATH"; cd solana; USE_INSTALL=1' +echo "Deployment started at $(date)" +SECONDS=0 count=0 leader= for ip_addr in "${ip_addr_array[@]}"; do @@ -43,7 +49,7 @@ for ip_addr in "${ip_addr_array[@]}"; do ssh-keygen -R "$ip_addr" ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts - ssh "$remote_user@$ip_addr" 'mkdir -p ~/.ssh ~/solana ~/.cargo/bin' + ssh -n -f "$remote_user@$ip_addr" 'mkdir -p ~/.ssh ~/solana ~/.cargo/bin' # Killing sshguard for now. TODO: Find a better solution # sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts @@ -61,35 +67,41 @@ for ip_addr in "${ip_addr_array[@]}"; do fi # Stop current nodes - ssh "$remote_user@$ip_addr" 'pkill -9 solana-fullnode' - ssh "$remote_user@$ip_addr" 'pkill -9 solana-client-demo' + ssh "$remote_user@$ip_addr" 'pkill -9 solana-' if [[ -n $leader ]]; then echo "Adding known hosts for $ip_addr" ssh -n -f "$remote_user@$ip_addr" "ssh-keygen -R $leader" ssh -n -f "$remote_user@$ip_addr" "ssh-keyscan $leader >> ~/.ssh/known_hosts" - ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/.cargo/bin/solana* ~/.cargo/bin/" - ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/solana/multinode-demo ~/solana/" + ssh -n -f "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/.cargo/bin/solana* ~/.cargo/bin/" + ssh -n -f "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/solana/multinode-demo ~/solana/" + ssh -n -f "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/solana/fetch-perf-libs.sh ~/solana/" else # Deploy build and scripts to remote node rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/ rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/ + rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/ fi # Run setup - ssh "$remote_user@$ip_addr" "$ssh_command_prefix"'setup.sh -p "$ip_addr"' + ssh "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/setup.sh -p "$ip_addr"' if ((!count)); then # Start the leader on the first node echo "Starting leader node $ip_addr" - ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"'leader.sh > leader.log 2>&1' + ssh -n -f "$remote_user@$ip_addr" 'cd solana; ./fetch-perf-libs.sh' + ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' SOLANA_CUDA=1 ./multinode-demo/leader.sh > leader.log 2>&1' + ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/drone.sh > drone.log 2>&1' leader=${ip_addr_array[0]} else # Start validator on all other nodes echo "Starting validator node $ip_addr" - ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix""validator.sh $remote_user@$leader:~/solana $leader > validator.log 2>&1" + ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"" ./multinode-demo/validator.sh $remote_user@$leader:~/solana $leader > validator.log 2>&1" fi ((count++)) done + +echo "Deployment finished at $(date)" +echo "Deployment took $SECONDS seconds"