ovn-org · putnopvut · Aug 29, 2022 · Sep 21, 2022 · Sep 21, 2022 · Aug 24, 2022
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -8,18 +8,22 @@ low_scale_task:
 
   env:
     DEPENDENCIES: git ansible podman podman-docker
-    PHYS_DEPLOYMENT: ${CIRRUS_WORKING_DIR}/physical-deployments/localhost.yml
+    PHYS_DEPLOYMENT: ${CIRRUS_WORKING_DIR}/physical-deployments/ci.yml
 
   runtime_cache:
     folder: runtime-cache
 
   configure_ssh_script:
+    - |
+      export IP_ADDR=$(ip route get 8.8.8.8 | \
+      head -1 | \
+      sed 's/.*src \([0-9\.]*\).*/\1/')
     - mkdir -p /root/.ssh/
     - ssh-keygen -t rsa -N '' -q -f /root/.ssh/id_rsa
-    - ssh-keyscan localhost >> /root/.ssh/known_hosts
+    - ssh-keyscan ${IP_ADDR} >> /root/.ssh/known_hosts
     - cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
     - chmod og-wx /root/.ssh/authorized_keys
-    - ssh root@localhost -v echo Hello
+    - ssh root@${IP_ADDR} -v echo Hello
 
   install_dependencies_script:
     - dnf install -y ${DEPENDENCIES}
@@ -30,6 +34,11 @@ low_scale_task:
     - tar -xzf runtime-cache/runtime.tar.gz || true
 
   install_script:
+    - |
+      export IP_ADDR=$(ip route get 8.8.8.8 | \
+      head -1 | \
+      sed 's/.*src \([0-9\.]*\).*/\1/')
+    - 'sed -i "s/<ip>/${IP_ADDR}/g" ${PHYS_DEPLOYMENT}'
     - ./do.sh install
 
   pack_caches_script:

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,14 @@
+FROM ovn/ovn-multi-node
+
+ARG SSH_KEY
+ARG PHYS_DEPLOYMENT
+
+COPY ovn-tester /ovn-tester
+
+RUN mkdir -p /root/.ssh/
+COPY $SSH_KEY /root/.ssh/
+
+COPY $PHYS_DEPLOYMENT /physical-deployment.yml
+COPY ovn-fake-multinode-utils/process-monitor.py /tmp/
+
+RUN pip3 install -r /ovn-tester/requirements.txt
diff --git a/README.md b/README.md
@@ -12,20 +12,22 @@ insecure docker registries, cleanup existing docker containers).
 
 ## Physical topology
 
-* TESTER: One machine to run the tests which needs to be able to SSH
-  paswordless (preferably as `root`) to all other machines in the topology.
-  Performs the following:
+* ORCHESTRATOR: One machine that needs to be able to SSH paswordless
+  (preferably as `root`) to all other machines in the topology. Performs the
+  following:
   - prepares the test enviroment: clone the specified versions of `OVS` and
     `OVN` and build the `ovn-fake-multinode` image to be used by the `OVN`
     nodes.
   - provisions all other `OVN` nodes with the required software packages
     and with the correct version of `ovn-fake-multinode` to run simulated/fake
     `OVN` chassis.
   - runs a docker registry where the `ovn-fake-multinode` (i.e.,
-    `ovn/ovn-multi-node`) image is pushed and from which all other `OVN`
-    nodes will pull the image.
-  - runs the scale test scenarios.
+    `ovn/ovn-multi-node`) and `ovn-tester` images are pushed and from which all
+    other `OVN` nodes will pull the image.
 
+* TESTER: One machine to run the `ovn-tester` container which runs the python
+  ovn-tester code. Like the ORCHESTRATOR, the TESTER also needs to be able to
+  SSH passwordless to all other machines in the topology.
 * OVN-CENTRAL: One machine to run the `ovn-central` container(s) which
   run `ovn-northd` and the `Northbound` and `Southbound` databases.
 * OVN-WORKER-NODE(s): Machines to run `ovn-netlab` container(s), each of
@@ -41,12 +43,12 @@ single L2 switch. This interface will be used for traffic to/from the
 `Northbound` and `Southbound` databases and for tunneled traffic.
 
 **NOTE**: there's no restriction regarding physical machine roles so for
-debugging issues the TESTER, OVN-CENTRAL and OVN-WORKER-NODEs can all
-be the same physical machine in which case there's no need for the secondary
-Ethernet interface to exist.
+debugging issues the ORCHESTRATOR, TESTER, OVN-CENTRAL and OVN-WORKER-NODEs can
+all be the same physical machine in which case there's no need for the
+secondary Ethernet interface to exist.
 
 ## Sample physical topology:
-* TESTER: `host01.mydomain.com`
+* ORCHESTRATOR: `host01.mydomain.com`
 * OVN-CENTRAL: `host02.mydomain.com`
 * OVN-WORKER-NODEs:
   - `host03.mydomain.com`
@@ -55,14 +57,22 @@ Ethernet interface to exist.
 OVN-CENTRAL and OVN-WORKER-NODEs all have Ethernet interface `eno1`
 connected to a physical switch in a separate VLAN, as untagged interfaces.
 
-## Minimal requirements on the TESTER node (tested on Fedora 32)
+**NOTE**: The hostnames specified in the physical topology are used by both
+the ORCHESTRATOR and by the `ovn-tester` container running in the TESTER.
+Therefore, the values need to be resolvable by both of these entities and
+need to resolve to the same host. `localhost` will not work since this does
+not resolve to a unique host.
+
+## Minimal requirements on the ORCHESTRATOR node (tested on Fedora 32)
 
 ### Install required packages:
 ```
 dnf install -y git ansible
 ```
 
-### Make docker work with Fedora 32 (disable cgroup hierarchy):
+## Minimal requirements on the TESTER node (tested on Fedora 36)
+
+### Make docker work with Fedora 32+ (disable cgroup hierarchy):
 
 ```
 dnf install -y grubby
@@ -107,10 +117,16 @@ A sample file written for the deployment described above is available at
 
 The file should contain the following mandatory sections and fields:
 - `registry-node`: the hostname (or IP) of the node that will store the
-  docker private registry. In usual cases this is should be the TESTER
+  docker private registry. In usual cases this is should be the ORCHESTRATOR
   machine.
 - `internal-iface`: the name of the Ethernet interface used by the underlay
   (DB and tunnel traffic). This can be overridden per node if needed.
+- `tester-node`:
+  - `name`: the hostname (or IP) of the node that will run `ovn-tester` (the
+    python code that performs the actual test)
+  - `ssh_key`: An ssh private key to install in the TESTER that can be used
+    to communicate with the other machines in the cluster.
+    Default: `~/.ssh/id_rsa`
 - `central-node`:
   - `name`: the hostname (or IP) of the node that will run `ovn-central`
     (`ovn-northd` and databases).
@@ -162,6 +178,9 @@ This step will:
   can be enabled by setting the `EXTRA_OPTIMIZE=yes` environment variable
   (`EXTRA_OPTIMIZE=yes ./do.sh install`).
 - push the container image to all other nodes and prepare the test environment.
+- build the `ovn/ovn-tester` container image which will be used by the TESTER
+  node to run the ovn-tester application.
+- push the `ovn/ovn-tester` container image to the TESTER node.
 
 To override the OVS, OVN or ovn-fake-multinode repos/branches use the
 following environment variables:
@@ -176,6 +195,10 @@ cd ~/ovn-heater
 OVS_REPO=https://github.com/dceara/ovs OVS_BRANCH=tmp-branch OVN_REPO=https://github.com/dceara/ovn OVN_BRANCH=tmp-branch-2 ./do.sh install
 ```
 
+NOTE: Because the installation step is responsible for deploying the ovn-tester
+container to the TESTER, this means that if any changes are made to the
+ovn-tester application, the installation step must be re-run.
+
 ## Perform a reinstallation (e.g., new OVS/OVN versions are needed):
 
 For OVS, OVN or ovn-fake-multinode code changes to be reflected the
@@ -231,10 +254,11 @@ cd ~/ovn-heater
 ./do.sh run <scenario> <results-dir>
 ```
 
-This executes `<scenario>` on the physical deployment. Current
-scenarios also cleanup the environment, i.e., remove all docker containers
-from all physical nodes. **NOTE**: If the environment needs to be explictly
-cleaned up, we can also execute before running the scenario:
+This executes `<scenario>` on the physical deployment (specifically on the
+`ovn-tester` container on the TESTER). Current scenarios also cleanup the
+environment, i.e., remove all docker containers from all physical nodes.
+**NOTE**: If the environment needs to be explictly cleaned up, we can also
+execute before running the scenario:
 
 ```
 cd ~/ovn-heater

diff --git a/do.sh b/do.sh
@@ -17,6 +17,8 @@ ovn_fmn_playbooks=${ovn_fmn_utils}/playbooks
 ovn_fmn_generate=${ovn_fmn_utils}/generate-hosts.py
 ovn_fmn_docker=${ovn_fmn_utils}/generate-docker-cfg.py
 ovn_fmn_podman=${ovn_fmn_utils}/generate-podman-cfg.py
+ovn_fmn_get=${ovn_fmn_utils}/get-config-value.py
+ovn_fmn_ip=${rundir}/ovn-fake-multinode/ip_gen.py
 hosts_file=${rundir}/hosts
 installer_log_file=${rundir}/installer-log
 docker_daemon_file=${rundir}/docker-daemon.json
@@ -26,7 +28,6 @@ log_perf_file=${rundir}/perf.sh
 process_monitor_file=${rundir}/process-monitor.py
 
 ovn_tester=${topdir}/ovn-tester
-ovn_tester_log_file=test-log
 
 EXTRA_OPTIMIZE=${EXTRA_OPTIMIZE:-no}
 USE_OVSDB_ETCD=${USE_OVSDB_ETCD:-no}
@@ -92,7 +93,7 @@ function install_venv() {
         python3 -m virtualenv ${ovn_heater_venv}
     fi
     source ${ovn_heater_venv}/bin/activate
-    pip install -r ${ovn_tester}/requirements.txt
+    pip install -r ${topdir}/utils/requirements.txt
     deactivate
     popd
 }
@@ -228,6 +229,22 @@ function install_ovn_fake_multinode() {
     popd
 }
 
+function install_ovn_tester() {
+    rm -rf tester_files
+    mkdir tester_files
+    ssh_key=$(${ovn_fmn_get} ${phys_deployment} tester-node ssh_key)
+    # We need to copy the files into a known directory within the Docker
+    # context directory. Otherwise, Docker can't find the files we reference.
+    cp ${ssh_key} tester_files
+    cp ${phys_deployment} tester_files
+    ssh_key_file=tester_files/$(basename ${ssh_key})
+    phys_deployment_file=tester_files/$(basename ${phys_deployment})
+    docker build -t ovn/ovn-tester --build-arg SSH_KEY=${ssh_key_file} --build-arg PHYS_DEPLOYMENT=${phys_deployment_file} -f ${topdir}/Dockerfile .
+    docker tag ovn/ovn-tester localhost:5000/ovn/ovn-tester
+    docker push localhost:5000/ovn/ovn-tester
+    rm -rf tester_files
+}
+
 # Prepare OVS bridges and cleanup containers.
 function init_ovn_fake_multinode() {
     echo "-- Initializing ovn-fake-multinode cluster on all nodes"
@@ -240,6 +257,10 @@ function pull_ovn_fake_multinode() {
     ansible-playbook ${ovn_fmn_playbooks}/pull-fake-multinode.yml -i ${hosts_file}
 }
 
+function pull_ovn_tester() {
+    ansible-playbook ${ovn_fmn_playbooks}/pull-ovn-tester.yml -i ${hosts_file}
+}
+
 function install() {
     pushd ${rundir}
     install_deps
@@ -249,6 +270,10 @@ function install() {
     init_ovn_fake_multinode
     pull_ovn_fake_multinode
     popd
+    pushd ${topdir}
+    install_ovn_tester
+    pull_ovn_tester
+    popd
 }
 
 function record_test_config() {
@@ -279,11 +304,12 @@ function record_test_config() {
 
 function mine_data() {
     out_dir=$1
+    tester_host=$2
 
     echo "-- Mining data from logs in: ${out_dir}"
 
-
     pushd ${out_dir}
+
     mkdir -p mined-data
     for p in ovn-northd ovn-controller ovn-nbctl; do
         logs=$(find ${out_dir}/logs -name ${p}.log)
@@ -301,7 +327,8 @@ function mine_data() {
     grep ovn-installed ${logs} | cut -d ':' -f 2- | tr '|' ' ' \
         | cut -d ' ' -f 1,7 | tr 'T' ' ' | sort > mined-data/ovn-installed.log
 
-    python3 ${topdir}/utils/latency.py "$(date +%z)" \
+    source ${rundir}/${ovn_heater_venv}/bin/activate
+    python3 ${topdir}/utils/latency.py \
         ./mined-data/ovn-binding.log ./mined-data/ovn-installed.log \
         > mined-data/binding-to-ovn-installed-latency
 
@@ -321,9 +348,23 @@ function mine_data() {
                             | grep ovn-scale | head -3)
     python3 ${topdir}/utils/process-stats.py \
         resource-usage-report-worker.html ${resource_usage_logs}
+    deactivate
+
     popd
 }
 
+function get_tester_ip() {
+    local test_file=$1
+
+    # The tester gets the first IP address in the configured node_net.
+    node_net=$(${ovn_fmn_get} ${test_file} cluster node_net --default=192.16.0.0/16)
+    node_cidr=${node_net#*/}
+    node_ip=${node_net%/*}
+    ip_index=1
+    tester_ip=$(${ovn_fmn_ip} ${node_net} ${node_ip} ${ip_index})
+    echo "${tester_ip}/${node_cidr}"
+}
+
 function run_test() {
     local test_file=$1
     local out_dir=$2
@@ -338,11 +379,14 @@ function run_test() {
     # Perform a fast cleanup by doing a minimal redeploy.
     init_ovn_fake_multinode
 
-    source ${rundir}/${ovn_heater_venv}/bin/activate
-    pushd ${out_dir}
+    tester_ip=$(get_tester_ip ${test_file})
+    if ! ansible-playbook ${ovn_fmn_playbooks}/run-tester.yml -i ${hosts_file} --extra-vars "test_file=${test_file} tester_ip=${tester_ip}" ; then
+         echo "-- Failed to set up test!"
+    fi
 
-    if ! python -u ${ovn_tester}/ovn_tester.py $phys_deployment ${test_file} 2>&1 | tee ${ovn_tester_log_file}; then
-        echo "-- Failed to run test! Check logs at: $PWD/${ovn_tester_log_file}"
+    tester_host=$(${ovn_fmn_get} ${phys_deployment} tester-node name)
+    if ! ssh root@${tester_host} docker exec ovn-tester python3 -u /ovn-tester/ovn_tester.py /physical-deployment.yml /test-scenario.yml 2>&1 | tee ${out_dir}/test-log ; then
+        echo "-- Failed to run test. Check logs at: ${out_dir}/test-log"
     fi
 
     echo "-- Collecting logs to: ${out_dir}"
@@ -352,19 +396,19 @@ function run_test() {
     for f in *.tgz; do
        tar xvfz $f
     done
-    popd
+    # Prior to containerization of ovn-tester, HTML files written by ovn-tester
+    # were written directly to ${out_dir}. To make things easier for tools, we
+    # copy the HTML files back to this original location.
+    cp logs/${tester_host}/ovn-tester/*.html ${out_dir} || true
 
     # Once we successfully ran the test and collected its logs, the post
     # processing (e.g., data mining) can run in a subshell with errexit
     # disabled.  We don't want the whole thing to error out if the post
     # processing fails.
     (
         set +o errexit
-        mine_data ${out_dir}
+        mine_data ${out_dir} ${tester_host}
     )
-
-    popd
-    deactivate
 }
 
 function usage() {