mnt: re-arranged codes and added many new versions

Benchmarks should not rely on convergence, rather a fixed number of iterations is more appropriate. The problem of inconsistent iterations between the C and Fortran versions was also fixed because of an indexing error when calculating the cell coordinates (C is 0-based, F is 1-based). So they now run the same number of iterations, regardless of M. Note, this issue still persists in 2 and 3, but not in 4, I can't seem to figure out why. Moved timings about to make them more comparable. All different benchmarks write out the same format output to more easily compare them. The later versions are more realistic since they use allocate/malloc which is more appropriate. Heavily updated the README.md file to explain what is going on.
fortran-lang · Jul 2, 2021 · 33d5b68 · 33d5b68
1 parent 795e462
commit 33d5b68
Show file tree

Hide file tree

Showing 17 changed files with 916 additions and 294 deletions.
diff --git a/poisson2d/README.md b/poisson2d/README.md
@@ -3,14 +3,48 @@
 This solver uses a simple Jacobi iteration to solve a Poisson equation. For details, see, for example,
 "Computational Physics" by Mark Newman, Chap 9. Compiler commands were:
 
-```gfortran sourcefile.f95 -Ofast -o program```
+```gfortran sourcefile.f95 -O3 -o program```
 
-```gcc sourcefile.c -Ofast -o program```
+```gcc sourcefile.c -O3 -o program```
 
 For Cython module (import it to run):
 
 ```python setup.py build_ext --inplace```
 
+
+There are various implementations using various strategies.
+All codes are named with a 1-1 correspondance, i.e. `c_i_*` has the same strategy as `fortran_i_*` files.
+The suffixed `_*` is only used if there are multiple alternatives that utilizes the same strategy between
+the different languages. For example the `c_4_1d` and `c_4_2d` are both using the same strategy for
+solving the Poisson equation, while the former uses a 1D array for the data while the latter uses a 2D
+array with some slight overhead of another pointer level.
+
+The two files `c_common.c` and `fortran_common.f90` are baseline programs used for printing
+out data and for parsing the command line arguments.
+
+All executables accept 2 arguments:
+
+    # N = 100
+    # N_ITER = 2000
+    ./c_1 100 2000
+    100             2000  5.658391137993336e+04  1.448613750000000e-03
+
+which will use a 2D array of size `100x100` and run for 2000 iterations.
+The output line consists of 4 values, 1) N, 2) N_ITER, 3) max-difference, 4) time per iteration.
+
+Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html. Also, there was
+discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 
+with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk).
+
+The entire code base has been re-written to allow command-line arguments allowing to sample different
+matrix sizes with the same executable (user zerothi).
+
+To run a preset benchmark suite, simply execute `bash run.sh` which will run the currently
+implemented benchmarks with dimensions up to 600.
+
+
+## Old timings
+
 The grid is a 2-dimensional array of size MxM. The timings for different values of M are, in seconds:
 
 | Language            | M=100           | M=200                 | M=300                  |
@@ -25,7 +59,3 @@ The grid is a 2-dimensional array of size MxM. The timings for different values
 * For all of these results, the amount of iterations performed by the respective codes was approximately
 the same, with the exception of the 100x100 grid C codes, which did nearly a double amount of iterations
 compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6GHz, using WSL2 on Windows 10.
-
-Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was
-discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 
-with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk)
diff --git a/poisson2d/c_1.c b/poisson2d/c_1.c
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "c_common.h"
+
+// COMMENT
+// I know, this is extremely bad practice.
+// But to accommodate argument sizes for the arguments
+// I felt this was ok.
+// It makes it much more versatile when testing
+// END COMMENT
+void swap(int M, double (*a)[M], double (*b)[M])
+{
+  double swp;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      swp = a[i][j];
+      a[i][j] = b[i][j];
+      b[i][j] = swp;
+    }
+  }
+}
+
+// See comment above
+double mmax(int M, double (*phi)[M], double (*phip)[M]) {
+  double max = 0.0;
+  double diff = 0.0;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      diff = fabs(phi[i][j]-phip[i][j]);
+      if (diff > max)
+        max = diff;
+    }
+  }
+  return max;
+}
+
+double rho(double x, double y) {
+  double s1 = 0.6;
+  double e1 = 0.8;
+  double s2 = 0.2;
+  double e2 = 0.4;
+
+  if (x > s1 && x < e1 && y > s1 && y < e1) {
+    return 1.0;
+  } else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
+    return -1.0;
+  } else {
+    return 0.0;
+  }
+}
+
+clock_t run(int M, int N_ITER) {
+
+  // COMMENT:
+  // I wouldn't even imagine any novice in C would
+  // do something like this. It really makes no sense :)
+  // END OF COMMENT
+  double (*phi)[M];
+  double (*phip)[M]; 
+  double (*rhoa)[M]; 
+  phi = malloc(sizeof(double[M][M]));
+  phip = malloc(sizeof(double[M][M]));
+  rhoa = malloc(sizeof(double[M][M]));
+
+  clock_t tic = clock();
+
+  // initialize matrices
+  memset(phip, 0, sizeof(phip[0][0])*M*M);
+  memset(phi, 0, sizeof(phi[0][0])*M*M);
+
+  double delta = 1.0;
+  double a2 = pow(SPACE,2.0);
+
+  int iter = 0;
+  while (iter < N_ITER ) {
+    iter += 1;
+
+    for (int i=1; i < M-1; i++) {
+      for (int j=1; j < M-1; j++) {
+        phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
+                      phi[i][j+1] + phi[i][j-1])/4.0 +
+	  a2/(4.0 * EPSILON0)*rho(i*SPACE,j*SPACE);
+      }
+    }
+    delta = mmax(M, phi, phip);
+    swap(M, phi, phip);
+
+  }
+
+  clock_t toc = clock();
+
+  free(phi);
+  free(phip);
+  free(rhoa);
+
+  write_timing(M, iter, delta, toc - tic);
+}
+
+
+int main(int argc, char *argv[]) {
+
+  int N, N_ITER;
+
+  // Retrieve arguments
+  parse_args(argc, argv, &N, &N_ITER);
+  run(N, N_ITER);
+}
diff --git a/poisson2d/c_2.c b/poisson2d/c_2.c
@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "c_common.h"
+
+// COMMENT
+// I know, this is extremely bad practice.
+// But to accommodate argument sizes for the arguments
+// I felt this was ok.
+// It makes it much more versatile when testing
+// END COMMENT
+void swap(int M, double a[M][M], double b[M][M]) {
+  double swp;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      swp = a[i][j];
+      a[i][j] = b[i][j];
+      b[i][j] = swp;
+    }
+  }
+}
+
+// See comment above
+double mmax(int M, double a[M][M], double b[M][M]) {
+  double max = 0.0;
+  double diff = 0.0;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      diff = fabs(a[i][j]-b[i][j]);
+      if (diff > max)
+        max = diff;
+    }
+  }
+  return max;
+}
+
+double rho(double x, double y) {
+  double s1 = 0.6;
+  double e1 = 0.8;
+  double s2 = 0.2;
+  double e2 = 0.4;
+
+  if (x > s1 && x < e1 && y > s1 && y < e1) {
+    return 1.0;
+  } else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
+    return -1.0;
+  } else {
+    return 0.0;
+  }
+}
+
+void run(int M, int N_ITER) {
+
+  double phi[M][M];
+  double phip[M][M]; 
+
+  clock_t tic = clock();
+  int iter = 0;
+
+  // initialize matrices
+  memset(&phip[0][0], 0, sizeof(double)*M*M);
+  memset(&phi[0][0], 0, sizeof(double)*M*M);
+
+  double delta;
+  double a2 = pow(SPACE,2.0);
+
+  while (iter < N_ITER ) {
+    iter += 1;
+
+    for (int i=1; i < M-1; i++) {
+      for (int j=1; j < M-1; j++) {
+        phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
+                      phi[i][j+1] + phi[i][j-1])/4.0 +
+	  a2/(4.0 * EPSILON0)*rho(i*SPACE,j*SPACE);
+      }
+    }
+    delta = mmax(M, phi, phip);
+    swap(M, phi, phip);
+
+  }
+
+  clock_t toc = clock();
+
+  write_timing(M, iter, delta, toc - tic);
+}
+
+
+int main(int argc, char *argv[]) {
+
+  int N, N_ITER;
+
+  // Retrieve arguments
+  parse_args(argc, argv, &N, &N_ITER);
+
+  run(N, N_ITER);
+}
diff --git a/poisson2d/c_3.c b/poisson2d/c_3.c
@@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "c_common.h"
+
+// COMMENT
+// I know, this is extremely bad practice.
+// But to accommodate argument sizes for the arguments
+// I felt this was ok.
+// It makes it much more versatile when testing
+// END COMMENT
+void swap(int M, double a[M][M], double b[M][M]) {
+  double swp;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      swp = a[i][j];
+      a[i][j] = b[i][j];
+      b[i][j] = swp;
+    }
+  }
+}
+
+// See comment above
+double mmax(int M, double a[M][M], double b[M][M]) {
+  double max = 0.0;
+  double diff = 0.0;
+  for (int i = 0; i < M; i++) {
+    for (int j = 0; j < M; j++) {
+      diff = fabs(a[i][j]-b[i][j]);
+      if (diff > max)
+        max = diff;
+    }
+  }
+  return max;
+}
+
+double rho(double x, double y) {
+  double s1 = 0.6;
+  double e1 = 0.8;
+  double s2 = 0.2;
+  double e2 = 0.4;
+
+  if (x > s1 && x < e1 && y > s1 && y < e1) {
+    return 1.0;
+  } else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
+    return -1.0;
+  } else {
+    return 0.0;
+  }
+}
+
+void run(int M, int N_ITER) {
+
+  double phi[M][M];
+  double phip[M][M]; 
+  double rhoa[M][M]; 
+
+  clock_t tic = clock();
+  int iter = 0;
+
+  // initialize matrices
+  memset(&phip[0][0], 0, sizeof(double)*M*M);
+  memset(&phi[0][0], 0, sizeof(double)*M*M);
+  for (int i=1; i<M-1; i++)  {
+    for (int j=1; j<M-1; j++) {
+      rhoa[i][j] =  rho(i*SPACE,j*SPACE);
+    }
+  }
+
+  double delta;
+  double a2 = pow(SPACE,2.0);
+
+  while (iter < N_ITER ) {
+    iter += 1;
+
+    for (int i=1; i < M-1; i++) {
+      for (int j=1; j < M-1; j++) {
+        phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
+                      phi[i][j+1] + phi[i][j-1])/4.0 +
+	  a2/(4.0 * EPSILON0)*rhoa[i][j];
+      }
+    }
+    delta = mmax(M, phi, phip);
+    swap(M, phi, phip);
+
+  }
+
+  clock_t toc = clock();
+
+  write_timing(M, iter, delta, toc - tic);
+}
+
+
+int main(int argc, char *argv[]) {
+
+  int N, N_ITER;
+
+  // Retrieve arguments
+  parse_args(argc, argv, &N, &N_ITER);
+
+  run(N, N_ITER);
+}