Add a new benchmark for mat-vec multiplication

gha3mi · Aug 21, 2023 · 620240f · 620240f
1 parent b9df723
commit 620240f
Show file tree

Hide file tree

Showing 3 changed files with 236 additions and 1 deletion.
diff --git a/benchmark/benchmark4.py b/benchmark/benchmark4.py
@@ -0,0 +1,136 @@
+# Formatmul
+# Seyed Ali Ghasemi
+import matplotlib.pyplot as plt
+from matplotlib.ticker import ScalarFormatter
+from itertools import cycle
+from collections import defaultdict
+import numpy as np
+
+# Read benchmark data from a file
+data = []
+with open('benchmark/benchmark4_im1.data', 'r') as file:
+    for _ in range(3):
+        next(file)  # Skip the first 3 rows
+    constant_value_row = next(file).strip().split()  # Read the constant value row
+    number_images = int(constant_value_row[1])  # Extract the number of images
+    for line in file:
+        values = line.strip().split()
+        identifier = values[0]
+        x_value = int(values[1])*int(values[3])  # m*o
+        elapsed_time = float(values[7])  # Extract elapsed time (y-value for elapsed time plot)
+        performance = float(values[8])  # Extract performance (y-value for performance plot)
+        data.append((identifier, x_value, elapsed_time, performance))
+
+# Organize data based on the identifier
+organized_data_elapsed_time = defaultdict(list)
+organized_data_performance = defaultdict(list)
+for row in data:
+    identifier = row[0]
+    organized_data_elapsed_time[identifier].append((row[1], row[2]))
+    organized_data_performance[identifier].append((row[1], row[3]))
+
+# Create a figure with two subplots
+fig, axes = plt.subplots(1, 2, figsize=(15, 6))
+
+# Assign unique markers and colors for each identifier
+identifier_marker_color = {}
+marker_cycle = cycle(['o','.', 'o', 's', '^', 'v', 'D', 'p', '*', '+', 'x', '<', '>','|'])
+color_cycle = cycle(['teal','black', 'blue', 'green', 'red', 'purple', 'orange', 'brown', 'pink', 'gray', 'gold', 'magenta', 'olive','maroon'])
+
+# Plot elapsed time data
+for key, values in organized_data_elapsed_time.items():
+    x_values = [v[0] for v in values]
+    elapsed_time_values = [v[1] for v in values]
+    marker = next(marker_cycle)
+    color = next(color_cycle)
+    identifier_marker_color[key] = (marker, color)
+
+    axes[0].plot(x_values, elapsed_time_values, marker=marker, color=color, linestyle='-', label=key)
+
+# Configure and show the first subplot
+axes[0].set_title(f'Coarray Matmul Benchmark - Average Elapsed Time\nnumber_images={number_images}')
+axes[0].set_xlabel('Number of Elements in the Resulting Matrix')
+axes[0].set_ylabel('Average Elapsed Time [s]')
+axes[0].legend(loc='upper left', fontsize='small')
+axes[0].grid(True)
+axes[0].xaxis.set_major_formatter(ScalarFormatter(useMathText=True))
+axes[0].ticklabel_format(axis='x', style='sci', scilimits=(-2,2))
+
+# Plot performance data
+for key, values in organized_data_performance.items():
+    x_values = [v[0] for v in values]
+    performance_values = [v[1] for v in values]
+
+    marker, color = identifier_marker_color[key]
+
+    axes[1].plot(x_values, performance_values, marker=marker, color=color, linestyle='-', label=key)
+
+# Configure and show the second subplot
+axes[1].set_title(f'Coarray Matmul Benchmark - Total Performance\nnumber_images={number_images}')
+axes[1].set_xlabel('Number of Elements in the Resulting Matrix')
+axes[1].set_ylabel('Total Performance [GFLOPS]')
+axes[1].legend(loc='upper left', fontsize='small')
+axes[1].grid(True)
+axes[1].xaxis.set_major_formatter(ScalarFormatter(useMathText=True))
+axes[1].ticklabel_format(axis='x', style='sci', scilimits=(-2,2))
+
+# Adjust layout, save the first set of plots, and display
+plt.tight_layout()
+plt.savefig(f'benchmark/benchmark4a_nim{number_images}.png', dpi=300)
+plt.show()
+
+# Calculate average values for elapsed time and performance
+average_values_elapsed_time = {}
+average_values_performance = {}
+for key, values in organized_data_elapsed_time.items():
+    elapsed_time_values = [v[1] for v in values]
+    average_elapsed_time = np.mean(elapsed_time_values)
+    average_values_elapsed_time[key] = average_elapsed_time
+
+for key, values in organized_data_performance.items():
+    performance_values = [v[1] for v in values]
+    average_performance = np.mean(performance_values)
+    average_values_performance[key] = average_performance
+
+# Find methods with the highest and lowest average elapsed time and performance
+max_elapsed_time_method = max(average_values_elapsed_time, key=average_values_elapsed_time.get)
+min_elapsed_time_method = min(average_values_elapsed_time, key=average_values_elapsed_time.get)
+
+max_performance_method = max(average_values_performance, key=average_values_performance.get)
+min_performance_method = min(average_values_performance, key=average_values_performance.get)
+
+# Calculate overall average values for elapsed time and performance
+overall_average_elapsed_time = np.mean(list(average_values_elapsed_time.values()))
+overall_average_performance = np.mean(list(average_values_performance.values()))
+
+# Create bar plots for average values
+fig, axes = plt.subplots(1, 2, figsize=(15, 6))
+
+# Bar plot for average elapsed time
+bar_colors_elapsed_time = ['green' if method == min_elapsed_time_method else 'red' if method == max_elapsed_time_method else 'blue' for method in average_values_elapsed_time.keys()]
+bars = axes[0].bar(average_values_elapsed_time.keys(), average_values_elapsed_time.values(), color=bar_colors_elapsed_time)
+
+# Add overall average value to the bar plot
+axes[0].axhline(y=overall_average_elapsed_time, color='gray', linestyle='dashed', label='Overall Average')
+axes[0].set_title(f'Coarray Matmul Benchmark - Average Elapsed Time\nnumber_images={number_images}')
+axes[0].set_xlabel('Methods')
+axes[0].set_ylabel('Average Elapsed Time [s]')
+axes[0].tick_params(axis='x', rotation=45)
+axes[0].grid(True)
+
+# Bar plot for average performance
+bar_colors_performance = ['green' if method == max_performance_method else 'red' if method == min_performance_method else 'blue' for method in average_values_performance.keys()]
+bars = axes[1].bar(average_values_performance.keys(), average_values_performance.values(), color=bar_colors_performance)
+
+# Add overall average value to the bar plot
+axes[1].axhline(y=overall_average_performance, color='gray', linestyle='dashed', label='Overall Average')
+axes[1].set_title(f'Coarray Matmul Benchmark - Average Total Performance\nnumber_images={number_images}')
+axes[1].set_xlabel('Methods')
+axes[1].set_ylabel('Average Total Performance [GFLOPS]')
+axes[1].tick_params(axis='x', rotation=45)
+axes[1].grid(True)
+
+# Adjust layout, save the second set of plots, and display
+plt.tight_layout()
+plt.savefig(f'benchmark/benchmark4b_nim{number_images}.png', dpi=300)
+plt.show()
diff --git a/example/benchmark4.f90 b/example/benchmark4.f90
@@ -0,0 +1,94 @@
+program benchmark4
+
+   use kinds,                         only: rk
+   use fortime,                       only: timer
+   use formatmul,                     only: matmul
+   use formatmul_benchmark,           only: start_benchmark, stop_benchmark, write_benchmark
+   use, intrinsic :: iso_fortran_env, only: compiler_version, compiler_options
+
+   implicit none
+
+   real(rk), allocatable         :: A(:,:), v(:)
+   real(rk), allocatable         :: w_ref(:), w(:)
+   type(timer)                   :: t[*]
+   integer                       :: m, n, i ,nloops, p, unit_num, im, nim
+   character(len=:), allocatable :: file_name
+   character(len=1000)           :: im_chr
+
+   nloops = 10
+
+   im  = this_image()
+   nim = num_images()
+
+   write (im_chr, '(i0)') im
+
+   file_name = "benchmark/benchmark4_im"//trim(im_chr)//".data"
+
+   open (newunit = unit_num, file = file_name)
+   write(unit_num,'(a)') 'ForMatmul'
+   write(unit_num,'(a)') compiler_version()
+   write(unit_num,'(a)') compiler_options()
+   write(unit_num,"(g0,' ',g0)") im, nim
+   close(unit_num)
+
+   do p = 250,30000,250
+
+      ! w(m) = A(m,n).v(n)
+      m = p
+      n = p
+
+      if (allocated(A))     deallocate(A)
+      if (allocated(v))     deallocate(v)
+      if (allocated(w))     deallocate(w)
+      if (allocated(w_ref)) deallocate(w_ref)
+      allocate(A(m,n))
+      allocate(v(n))
+      allocate(w(m))
+      allocate(w_ref(m))
+      call random_number(A)
+      call random_number(v)
+
+      call start_benchmark(t[im],m,n,1,"w_ref = matmul(A,v)")
+      do i = 1,nloops
+         w_ref = matmul(A,v)
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w_ref,w_ref,'Matmul',file_name)
+
+      call start_benchmark(t[im],m,n,1,"w = matmul(A,v,'coarray','m1')")
+      do i = 1,nloops
+         w = matmul(A,v,'coarray','m1')
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w,w_ref,'coarray_Matmul',file_name)
+      call start_benchmark(t[im],m,n,1,"w = matmul(A,v,'coarray','m2')")
+      do i = 1,nloops
+         w = matmul(A,v,'coarray','m2')
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w,w_ref,'coarray_dgemm',file_name)
+
+      call start_benchmark(t[im],m,n,1,"w = matmul(A,v,'coarray','m3')")
+      do i = 1,nloops
+         w = matmul(A,v,'coarray','m3')
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w,w_ref,'coarray_m3',file_name)
+
+      call start_benchmark(t[im],m,n,1,"w = matmul(A,v,'coarray','m4')")
+      do i = 1,nloops
+         w = matmul(A,v,'coarray','m4')
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w,w_ref,'coarray_m4',file_name)
+
+      call start_benchmark(t[im],m,n,1,"w = matmul(A,v,'coarray','m5')")
+      do i = 1,nloops
+         w = matmul(A,v,'coarray','m5')
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w,w_ref,'coarray_m5',file_name)
+
+      call start_benchmark(t[im],m,n,1,"w = matmul(A,v,'coarray','m6')")
+      do i = 1,nloops
+         w = matmul(A,v,'coarray','m6')
+      end do
+      call stop_benchmark(t[im],m,n,1,nloops,w,w_ref,'coarray_m6',file_name)
+
+   end do
+
+end program benchmark4
diff --git a/fpm.toml b/fpm.toml
@@ -91,4 +91,9 @@ main       = "benchmark2.f90"
 [[example]]
 name       = "benchmark3"
 source-dir = "example"
-main       = "benchmark3.f90"
+main       = "benchmark3.f90"
+
+[[example]]
+name       = "benchmark4"
+source-dir = "example"
+main       = "benchmark4.f90"