Skip to content

Commit

Permalink
Merge pull request #1636 from NexGenAnalytics/5-google-bench-dot-test
Browse files Browse the repository at this point in the history
#5 Refactor blas1 test for benchmark feature
  • Loading branch information
lucbv authored Mar 16, 2023
2 parents afd686e + 3ec0cb7 commit ff097ec
Show file tree
Hide file tree
Showing 5 changed files with 434 additions and 0 deletions.
3 changes: 3 additions & 0 deletions perf_test/Benchmark_Context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#ifndef KOKKOSKERNELS_PERFTEST_BENCHMARK_CONTEXT_HPP
#define KOKKOSKERNELS_PERFTEST_BENCHMARK_CONTEXT_HPP

#include "KokkosKernels_PrintConfiguration.hpp"

#include <string>

#include <benchmark/benchmark.h>
Expand Down Expand Up @@ -46,6 +48,7 @@ std::string remove_unwanted_characters(std::string str) {
void add_kokkos_configuration(bool verbose) {
std::ostringstream msg;
Kokkos::print_configuration(msg, verbose);
KokkosKernels::print_configuration(msg);

// Iterate over lines returned from kokkos and extract key:value pairs
std::stringstream ss{msg.str()};
Expand Down
3 changes: 3 additions & 0 deletions perf_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ IF(KokkosKernels_ENABLE_BENCHMARK)
SET(
BENCHMARK_SOURCES
BenchmarkMain.cpp
blas/blas1/KokkosBlas_dot_perf_test_benchmark.cpp
blas/blas1/KokkosBlas_dot_mv_perf_test_benchmark.cpp
blas/blas1/KokkosBlas_team_dot_perf_test_benchmark.cpp
)

KOKKOSKERNELS_ADD_BENCHMARK(
Expand Down
142 changes: 142 additions & 0 deletions perf_test/blas/blas1/KokkosBlas_dot_mv_perf_test_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Siva Rajamanickam (srajama@sandia.gov)
//
// ************************************************************************
//@HEADER
*/

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>

#include "KokkosBlas_dot_perf_test.hpp"
#include <benchmark/benchmark.h>

///////////////////////////////////////////////////////////////////////////////////////////////////
// The Level 1 BLAS perform scalar, vector and vector-vector operations;
//
// https://github.com/kokkos/kokkos-kernels/wiki/BLAS-1%3A%3Adot
//
// Usage: result = KokkosBlas::dot(x,y); KokkosBlas::dot(r,x,y);
// Multiplies each value of x(i) [x(i,j)] with y(i) or [y(i,j)] and computes the
// sum. (If x and y have scalar type Kokkos::complex, the complex conjugate of
// x(i) or x(i,j) will be used.) VectorX: A rank-1 Kokkos::View VectorY: A
// rank-1 Kokkos::View ReturnVector: A rank-0 or rank-1 Kokkos::View
//
// REQUIREMENTS:
// Y.rank == 1 or X.rank == 1
// Y.extent(0) == X.extent(0)

// Dot Test design:
// 1) create 1D View containing 1D matrix, aka a vector; this will be your X
// input matrix; 2) create 1D View containing 1D matrix, aka a vector; this will
// be your Y input matrix; 3) perform the dot operation on the two inputs, and
// capture result in "result"

// Here, m represents the desired length for each 1D matrix;
// "m" is used here, because code from another test was adapted for this test.
///////////////////////////////////////////////////////////////////////////////////////////////////

template <class ExecSpace>
static void run(benchmark::State& state) {
const auto m = state.range(0);
const auto n = state.range(1);
const auto repeat = state.range(2);
// Declare type aliases
using Scalar = double;
using MemSpace = typename ExecSpace::memory_space;
using Device = Kokkos::Device<ExecSpace, MemSpace>;

std::cout << "Running BLAS Level 1 DOT perfomrance experiment ("
<< ExecSpace::name() << ")\n";

std::cout << "Each test input vector has a length of " << m << std::endl;

Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device> x(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "x"), m, n);

Kokkos::View<Scalar**, Kokkos::LayoutLeft, Device> y(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "y"), m, n);

Kokkos::View<Scalar*, Device> result(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "x dot y"), n);

// Declaring variable pool w/ a seeded random number;
// a parallel random number generator, so you
// won't get the same number with a given seed each time
Kokkos::Random_XorShift64_Pool<ExecSpace> pool(123);

Kokkos::fill_random(x, pool, 10.0);
Kokkos::fill_random(y, pool, 10.0);

for (auto _ : state) {
// do a warm up run of dot:
KokkosBlas::dot(result, x, y);

// The live test of dot:

Kokkos::fence();
Kokkos::Timer timer;

for (int i = 0; i < repeat; i++) {
KokkosBlas::dot(result, x, y);
ExecSpace().fence();
}

// Kokkos Timer set up
double total = timer.seconds();
double avg = total / repeat;
// Flops calculation for a 1D matrix dot product per test run;
size_t flopsPerRun = (size_t)2 * m * n;
printf("Avg DOT time: %f s.\n", avg);
printf("Avg DOT FLOP/s: %.3e\n", flopsPerRun / avg);
state.SetIterationTime(timer.seconds());

state.counters["Avg DOT time (s):"] =
benchmark::Counter(avg, benchmark::Counter::kDefaults);
state.counters["Avg DOT FLOP/s:"] =
benchmark::Counter(flopsPerRun / avg, benchmark::Counter::kDefaults);
}
}

BENCHMARK(run<Kokkos::DefaultExecutionSpace>)
->Name("KokkosBlas_dot_mv")
->ArgNames({"m", "n", "repeat"})
->Args({100000, 5, 20})
->UseManualTime();
140 changes: 140 additions & 0 deletions perf_test/blas/blas1/KokkosBlas_dot_perf_test_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Siva Rajamanickam (srajama@sandia.gov)
//
// ************************************************************************
//@HEADER
*/

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>

#include "KokkosBlas_dot_perf_test.hpp"
#include <benchmark/benchmark.h>

///////////////////////////////////////////////////////////////////////////////////////////////////
// The Level 1 BLAS perform scalar, vector and vector-vector operations;
//
// https://github.com/kokkos/kokkos-kernels/wiki/BLAS-1%3A%3Adot
//
// Usage: result = KokkosBlas::dot(x,y); KokkosBlas::dot(r,x,y);
// Multiplies each value of x(i) [x(i,j)] with y(i) or [y(i,j)] and computes the
// sum. (If x and y have scalar type Kokkos::complex, the complex conjugate of
// x(i) or x(i,j) will be used.) VectorX: A rank-1 Kokkos::View VectorY: A
// rank-1 Kokkos::View ReturnVector: A rank-0 or rank-1 Kokkos::View
//
// REQUIREMENTS:
// Y.rank == 1 or X.rank == 1
// Y.extent(0) == X.extent(0)

// Dot Test design:
// 1) create 1D View containing 1D matrix, aka a vector; this will be your X
// input matrix; 2) create 1D View containing 1D matrix, aka a vector; this will
// be your Y input matrix; 3) perform the dot operation on the two inputs, and
// capture result in "result"

// Here, m represents the desired length for each 1D matrix;
// "m" is used here, because code from another test was adapted for this test.
///////////////////////////////////////////////////////////////////////////////////////////////////

template <class ExecSpace>
static void run(benchmark::State& state) {
const auto m = state.range(0);
const auto repeat = state.range(1);
// Declare type aliases
using Scalar = double;
using MemSpace = typename ExecSpace::memory_space;
using Device = Kokkos::Device<ExecSpace, MemSpace>;

std::cout << "Running BLAS Level 1 DOT perfomrance experiment ("
<< ExecSpace::name() << ")\n";

std::cout << "Each test input vector has a length of " << m << std::endl;

// Create 1D view w/ Device as the ExecSpace; this is an input vector
// A(view_alloc(WithoutInitializing, "label"), m, n);
Kokkos::View<Scalar*, Device> x(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "x"), m);

// Create 1D view w/ Device as the ExecSpace; this is the output vector
Kokkos::View<Scalar*, Device> y(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "y"), m);

// Declaring variable pool w/ a seeded random number;
// a parallel random number generator, so you
// won't get the same number with a given seed each time
Kokkos::Random_XorShift64_Pool<ExecSpace> pool(123);

Kokkos::fill_random(x, pool, 10.0);
Kokkos::fill_random(y, pool, 10.0);

for (auto _ : state) {
// do a warm up run of dot:
KokkosBlas::dot(x, y);

// The live test of dot:
Kokkos::fence();
Kokkos::Timer timer;

for (int i = 0; i < repeat; i++) {
KokkosBlas::dot(x, y);
ExecSpace().fence();
}

// Kokkos Timer set up
double total = timer.seconds();
double avg = total / repeat;
// Flops calculation for a 1D matrix dot product per test run;
size_t flopsPerRun = (size_t)2 * m;
printf("Avg DOT time: %f s.\n", avg);
printf("Avg DOT FLOP/s: %.3e\n", flopsPerRun / avg);
state.SetIterationTime(timer.seconds());

state.counters["Avg DOT time (s):"] =
benchmark::Counter(avg, benchmark::Counter::kDefaults);
state.counters["Avg DOT FLOP/s:"] =
benchmark::Counter(flopsPerRun / avg, benchmark::Counter::kDefaults);
}
}

BENCHMARK(run<Kokkos::DefaultExecutionSpace>)
->Name("KokkosBlas_dot")
->ArgNames({"m", "repeat"})
->Args({100000, 1})
->UseManualTime();
Loading

0 comments on commit ff097ec

Please sign in to comment.