diff --git a/poisson2d/README.md b/poisson2d/README.md index 14ae309..fa2d742 100644 --- a/poisson2d/README.md +++ b/poisson2d/README.md @@ -3,14 +3,48 @@ This solver uses a simple Jacobi iteration to solve a Poisson equation. For details, see, for example, "Computational Physics" by Mark Newman, Chap 9. Compiler commands were: -```gfortran sourcefile.f95 -Ofast -o program``` +```gfortran sourcefile.f95 -O3 -o program``` -```gcc sourcefile.c -Ofast -o program``` +```gcc sourcefile.c -O3 -o program``` For Cython module (import it to run): ```python setup.py build_ext --inplace``` + +There are various implementations using various strategies. +All codes are named with a 1-1 correspondance, i.e. `c_i_*` has the same strategy as `fortran_i_*` files. +The suffixed `_*` is only used if there are multiple alternatives that utilizes the same strategy between +the different languages. For example the `c_4_1d` and `c_4_2d` are both using the same strategy for +solving the Poisson equation, while the former uses a 1D array for the data while the latter uses a 2D +array with some slight overhead of another pointer level. + +The two files `c_common.c` and `fortran_common.f90` are baseline programs used for printing +out data and for parsing the command line arguments. + +All executables accept 2 arguments: + + # N = 100 + # N_ITER = 2000 + ./c_1 100 2000 + 100 2000 5.658391137993336e+04 1.448613750000000e-03 + +which will use a 2D array of size `100x100` and run for 2000 iterations. +The output line consists of 4 values, 1) N, 2) N_ITER, 3) max-difference, 4) time per iteration. + +Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html. Also, there was +discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 +with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk). + +The entire code base has been re-written to allow command-line arguments allowing to sample different +matrix sizes with the same executable (user zerothi). + +To run a preset benchmark suite, simply execute `bash run.sh` which will run the currently +implemented benchmarks with dimensions up to 600. + + +## Old timings + The grid is a 2-dimensional array of size MxM. The timings for different values of M are, in seconds: | Language | M=100 | M=200 | M=300 | @@ -25,7 +59,3 @@ The grid is a 2-dimensional array of size MxM. The timings for different values * For all of these results, the amount of iterations performed by the respective codes was approximately the same, with the exception of the 100x100 grid C codes, which did nearly a double amount of iterations compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6GHz, using WSL2 on Windows 10. - -Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was -discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461 -with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk) diff --git a/poisson2d/c_1.c b/poisson2d/c_1.c new file mode 100644 index 0000000..8430ee1 --- /dev/null +++ b/poisson2d/c_1.c @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include +#include "c_common.h" + +// COMMENT +// I know, this is extremely bad practice. +// But to accommodate argument sizes for the arguments +// I felt this was ok. +// It makes it much more versatile when testing +// END COMMENT +void swap(int M, double (*a)[M], double (*b)[M]) +{ + double swp; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + swp = a[i][j]; + a[i][j] = b[i][j]; + b[i][j] = swp; + } + } +} + +// See comment above +double mmax(int M, double (*phi)[M], double (*phip)[M]) { + double max = 0.0; + double diff = 0.0; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + diff = fabs(phi[i][j]-phip[i][j]); + if (diff > max) + max = diff; + } + } + return max; +} + +double rho(double x, double y) { + double s1 = 0.6; + double e1 = 0.8; + double s2 = 0.2; + double e2 = 0.4; + + if (x > s1 && x < e1 && y > s1 && y < e1) { + return 1.0; + } else if (x > s2 && x < e2 && y > s2 && y < e2 ) { + return -1.0; + } else { + return 0.0; + } +} + +clock_t run(int M, int N_ITER) { + + // COMMENT: + // I wouldn't even imagine any novice in C would + // do something like this. It really makes no sense :) + // END OF COMMENT + double (*phi)[M]; + double (*phip)[M]; + double (*rhoa)[M]; + phi = malloc(sizeof(double[M][M])); + phip = malloc(sizeof(double[M][M])); + rhoa = malloc(sizeof(double[M][M])); + + clock_t tic = clock(); + + // initialize matrices + memset(phip, 0, sizeof(phip[0][0])*M*M); + memset(phi, 0, sizeof(phi[0][0])*M*M); + + double delta = 1.0; + double a2 = pow(SPACE,2.0); + + int iter = 0; + while (iter < N_ITER ) { + iter += 1; + + for (int i=1; i < M-1; i++) { + for (int j=1; j < M-1; j++) { + phip[i][j] = (phi[i+1][j] + phi[i-1][j] + + phi[i][j+1] + phi[i][j-1])/4.0 + + a2/(4.0 * EPSILON0)*rho(i*SPACE,j*SPACE); + } + } + delta = mmax(M, phi, phip); + swap(M, phi, phip); + + } + + clock_t toc = clock(); + + free(phi); + free(phip); + free(rhoa); + + write_timing(M, iter, delta, toc - tic); +} + + +int main(int argc, char *argv[]) { + + int N, N_ITER; + + // Retrieve arguments + parse_args(argc, argv, &N, &N_ITER); + run(N, N_ITER); +} diff --git a/poisson2d/c_2.c b/poisson2d/c_2.c new file mode 100644 index 0000000..05a0c42 --- /dev/null +++ b/poisson2d/c_2.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include +#include "c_common.h" + +// COMMENT +// I know, this is extremely bad practice. +// But to accommodate argument sizes for the arguments +// I felt this was ok. +// It makes it much more versatile when testing +// END COMMENT +void swap(int M, double a[M][M], double b[M][M]) { + double swp; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + swp = a[i][j]; + a[i][j] = b[i][j]; + b[i][j] = swp; + } + } +} + +// See comment above +double mmax(int M, double a[M][M], double b[M][M]) { + double max = 0.0; + double diff = 0.0; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + diff = fabs(a[i][j]-b[i][j]); + if (diff > max) + max = diff; + } + } + return max; +} + +double rho(double x, double y) { + double s1 = 0.6; + double e1 = 0.8; + double s2 = 0.2; + double e2 = 0.4; + + if (x > s1 && x < e1 && y > s1 && y < e1) { + return 1.0; + } else if (x > s2 && x < e2 && y > s2 && y < e2 ) { + return -1.0; + } else { + return 0.0; + } +} + +void run(int M, int N_ITER) { + + double phi[M][M]; + double phip[M][M]; + + clock_t tic = clock(); + int iter = 0; + + // initialize matrices + memset(&phip[0][0], 0, sizeof(double)*M*M); + memset(&phi[0][0], 0, sizeof(double)*M*M); + + double delta; + double a2 = pow(SPACE,2.0); + + while (iter < N_ITER ) { + iter += 1; + + for (int i=1; i < M-1; i++) { + for (int j=1; j < M-1; j++) { + phip[i][j] = (phi[i+1][j] + phi[i-1][j] + + phi[i][j+1] + phi[i][j-1])/4.0 + + a2/(4.0 * EPSILON0)*rho(i*SPACE,j*SPACE); + } + } + delta = mmax(M, phi, phip); + swap(M, phi, phip); + + } + + clock_t toc = clock(); + + write_timing(M, iter, delta, toc - tic); +} + + +int main(int argc, char *argv[]) { + + int N, N_ITER; + + // Retrieve arguments + parse_args(argc, argv, &N, &N_ITER); + + run(N, N_ITER); +} diff --git a/poisson2d/c_3.c b/poisson2d/c_3.c new file mode 100644 index 0000000..8806cd7 --- /dev/null +++ b/poisson2d/c_3.c @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include +#include "c_common.h" + +// COMMENT +// I know, this is extremely bad practice. +// But to accommodate argument sizes for the arguments +// I felt this was ok. +// It makes it much more versatile when testing +// END COMMENT +void swap(int M, double a[M][M], double b[M][M]) { + double swp; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + swp = a[i][j]; + a[i][j] = b[i][j]; + b[i][j] = swp; + } + } +} + +// See comment above +double mmax(int M, double a[M][M], double b[M][M]) { + double max = 0.0; + double diff = 0.0; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + diff = fabs(a[i][j]-b[i][j]); + if (diff > max) + max = diff; + } + } + return max; +} + +double rho(double x, double y) { + double s1 = 0.6; + double e1 = 0.8; + double s2 = 0.2; + double e2 = 0.4; + + if (x > s1 && x < e1 && y > s1 && y < e1) { + return 1.0; + } else if (x > s2 && x < e2 && y > s2 && y < e2 ) { + return -1.0; + } else { + return 0.0; + } +} + +void run(int M, int N_ITER) { + + double phi[M][M]; + double phip[M][M]; + double rhoa[M][M]; + + clock_t tic = clock(); + int iter = 0; + + // initialize matrices + memset(&phip[0][0], 0, sizeof(double)*M*M); + memset(&phi[0][0], 0, sizeof(double)*M*M); + for (int i=1; i +#include +#include +#include +#include +#include "c_common.h" + +#define IDX(i,j) (i)*M+j + + +double rho(const double x, const double y) { + const double s1 = 0.6; + const double e1 = 0.8; + const double s2 = 0.2; + const double e2 = 0.4; + + if (s1 < x && x < e1 && s1 < y && y < e1) { + return 1.0; + } else if ( s2 < x && x < e2 && s2 < y && y < e2 ) { + return -1.0; + } else { + return 0.0; + } +} + +double iterate(int M, double *restrict phi, double *restrict phinew, double *restrict rhoa) { + double delta = 0, err; + for (int i=1; i < M-1; i++) { + for (int j=1; j < M-1; j++) { + phinew[IDX(i,j)] = (phi[IDX(i+1,j)] + phi[IDX(i-1,j)] + phi[IDX(i,j+1)] + phi[IDX(i,j-1)] + rhoa[IDX(i,j)])*0.25; + err = fabs(phinew[IDX(i,j)] - phi[IDX(i,j)]); + if ( err > delta ) delta = err; + } + } + return delta; +} + +void init_rho(int M, double *restrict rhoa, const double epsilon, const double a) { + const double a2 = a * a / epsilon; + for (int i=1; i +#include +#include +#include +#include +#include "c_common.h" + +double ** malloc_2d(int m, int n) { + if (m <= 0 || n <= 0) + return NULL; + + // Allocate pointers for columns + double **array2D = malloc(m * sizeof(double *)); + if (array2D == NULL) { + return NULL; + } + + // Allocate actual data + array2D[0] = malloc(m * n * sizeof(double)); + if (array2D[0] == NULL) { + free(array2D); + return NULL; + } + + // assign pointers to make it 2D + for(int i = 1; i < m; i++) { + array2D[i] = array2D[0] + i * n ; + } + + return array2D; +} + +void free_2d(double **array2D) { + free(array2D[0]); + free(array2D); +} + + + +double rho(const double x, const double y) { + const double s1 = 0.6; + const double e1 = 0.8; + const double s2 = 0.2; + const double e2 = 0.4; + + if (s1 < x && x < e1 && s1 < y && y < e1) { + return 1.0; + } else if ( s2 < x && x < e2 && s2 < y && y < e2 ) { + return -1.0; + } else { + return 0.0; + } +} + +double iterate(int M, double *restrict*restrict phi, double *restrict*restrict phinew, double *restrict*restrict rhoa) { + double delta = 0, err; + for (int i=1; i < M-1; i++) { + for (int j=1; j < M-1; j++) { + phinew[i][j] = (phi[i+1][j] + phi[i-1][j] + phi[i][j+1] + phi[i][j-1] + rhoa[i][j])*0.25; + err = fabs(phinew[i][j] - phi[i][j]); + if ( err > delta ) delta = err; + } + } + return delta; +} + +void init_rho(int M, double *restrict*restrict rhoa, const double epsilon, const double a) { + const double a2 = a * a / epsilon; + for (int i=1; i +#include +#include +#include + +void parse_args(int argc, char **argv, int *N, int *N_iter) { + if ( argc < 3 ) { + printf("Not enough arguments [N N_ITER]\n"); + } + + *N = atoi(argv[1]); + *N_iter = atoi(argv[2]); +} + +void write_timing(int N, int iter, double delta, clock_t timing) { + printf("%16d %16d %22.15e %22.15e\n", N, iter, delta, ((double)timing)/CLOCKS_PER_SEC/iter); +} diff --git a/poisson2d/c_common.h b/poisson2d/c_common.h new file mode 100644 index 0000000..374921d --- /dev/null +++ b/poisson2d/c_common.h @@ -0,0 +1,10 @@ +#ifndef _COMMON_HEADER_ +#define _COMMON_HEADER_ + +#define EPSILON0 8.85e-12 +#define SPACE 0.01 + + +void parse_args(int, char **, int *, int *); +void write_timing(int, int, double, clock_t); +#endif diff --git a/poisson2d/fortran_2.f90 b/poisson2d/fortran_2.f90 new file mode 100644 index 0000000..22c1bcd --- /dev/null +++ b/poisson2d/fortran_2.f90 @@ -0,0 +1,67 @@ +module poisson_m + use common_m + implicit none + +contains + + pure real(dp) function rho(x,y) + real(dp), intent(in) :: x,y + if (x > 0.6_dp .and. x < 0.8_dp .and. y > 0.6_dp .and. y<0.8) then + rho = 1.0_dp + else if (x> 0.2_dp .and. x<0.4_dp .and. y>0.2_dp .and. y<0.4_dp) then + rho = -1.0_dp + else + rho = 0.0_dp + end if + end function rho + + subroutine run(M, N_ITER) + integer, intent(in) :: M, N_ITER + + integer :: i,j, iter + real(dp) :: tic, toc + real(dp) :: delta, phiprime(M,M), phi(M,M), a2, temp(M,M) + + ! Since arrays are allocated on the stack + call cpu_time(tic) + + phiprime(:,:) = 0.0_dp + phi(:,:) = 0.0_dp + + a2 = a**2 + + iter = 0 + do while (iter < N_ITER ) + iter = iter + 1 + do j=2, M-1 + do i=2, M-1 + phiprime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4.0_dp & + + a2/4.0_dp/epsilon0*rho((i-1)*a,(j-1)*a) + end do + end do + + delta = maxval(abs(phiprime - phi)) + temp = phi + phi = phiprime + phiprime = temp + + end do + call cpu_time(toc) + + call write_timing(M, iter, delta, toc - tic) + + end subroutine run + +end module poisson_m + +program poisson + use common_m, only : parse_args + use poisson_m, only: run + implicit none + integer :: M, N_ITER + + call parse_args(M, N_ITER) + + call run(M, N_iter) + +end program poisson diff --git a/poisson2d/fortran_3.f90 b/poisson2d/fortran_3.f90 new file mode 100644 index 0000000..9b9153c --- /dev/null +++ b/poisson2d/fortran_3.f90 @@ -0,0 +1,74 @@ +module poisson_m + use common_m + implicit none + +contains + + pure real(dp) function rho(x,y) + real(dp), intent(in) :: x,y + if (x > 0.6_dp .and. x < 0.8_dp .and. y > 0.6_dp .and. y<0.8) then + rho = 1.0_dp + else if (x> 0.2_dp .and. x<0.4_dp .and. y>0.2_dp .and. y<0.4_dp) then + rho = -1.0_dp + else + rho = 0.0_dp + end if + end function rho + + subroutine run(M, N_ITER) + integer, intent(in) :: M, N_ITER + + integer :: i,j, iter + real(dp) :: tic, toc + real(dp) :: delta, a2 + real(dp) :: phiprime(M,M), phi(M,M), rhoa(M,M), temp(M,M) + + ! Since arrays are allocated on the stack + call cpu_time(tic) + + phiprime(:,:) = 0.0_dp + phi(:,:) = 0.0_dp + do j=2, M-1 + do i=2, M-1 + rhoa(i,j) = rho((i-1)*a,(j-1)*a) + end do + end do + + a2 = a**2 + + iter = 0 + do while (iter < N_ITER ) + + iter = iter + 1 + do j=2, M-1 + do i=2, M-1 + phiprime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1) + & + a2 / epsilon0 * rhoa(i,j))*0.25_dp + end do + end do + + delta = maxval(abs(phiprime - phi)) + temp = phi + phi = phiprime + phiprime = temp + + end do + call cpu_time(toc) + + call write_timing(M, iter, delta, toc - tic) + + end subroutine run + +end module poisson_m + +program poisson + use common_m, only : parse_args + use poisson_m, only: run + implicit none + integer :: M, N_ITER + + call parse_args(M, N_ITER) + + call run(M, N_iter) + +end program poisson diff --git a/poisson2d/fortran_4.f90 b/poisson2d/fortran_4.f90 new file mode 100644 index 0000000..4ecb6ed --- /dev/null +++ b/poisson2d/fortran_4.f90 @@ -0,0 +1,88 @@ +program poisson + use common_m + + implicit none + + integer :: M, N_ITER + integer :: i,j, iter + real(dp) :: tic, toc + real(dp) :: delta, a2 + + real(dp), allocatable :: rhoa(:,:) + real(dp), allocatable, target :: phiprime(:,:), phi(:,:) + real(dp), pointer :: p(:,:), pnew(:,:), tmp(:,:) + + call parse_args(M, N_ITER) + + allocate(phi(M,M)) + allocate(phiprime(M,M)) + allocate(rhoa(M,M)) + + ! we don't time allocation/deallocation + call cpu_time(tic) + + ! Fortran doesn't care too much about pow + ! since it figures out if the exponent is an integer or not + a2 = a*a / epsilon0 + do j=2, M-1 + do i=2, M-1 + rhoa(i,j) = rho((i-1)*a,(j-1)*a) * a2 + end do + end do + + phi(:,:) = 0.0_dp + phiprime(:,:) = 0.0_dp + p => phi + pnew => phiprime + iter = 0 + do while ( iter < N_ITER ) + iter = iter + 1 + + call iterate(rhoa, p, pnew, delta) + + ! Easer to swap pointer than copying stuff around + ! In fortran we could even use pointers + ! But we can just call + tmp => p + p => pnew + pnew => tmp + + end do + + call cpu_time(toc) + + deallocate(phi, phiprime, rhoa) + + call write_timing(M, iter, delta, toc - tic) + +contains + + subroutine iterate(rhoa, p, pnew, delta) + real(dp), intent(in) :: p(:,:), rhoa(:,:) + real(dp), intent(out) :: pnew(:,:) + real(dp), intent(out) :: delta + + integer :: i, j + + delta = 0._dp + do j=2, M-1 + do i=2, M-1 + pnew(i,j) = (p(i+1,j) + p(i-1,j) + p(i,j+1) + p(i,j-1) + rhoa(i,j)) * 0.25_dp + delta = max(delta, abs(pnew(i,j) - p(i,j))) + end do + end do + + end subroutine iterate + + pure real(dp) function rho(x,y) + real(dp), intent(in) :: x,y + if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then + rho = 1.0_dp + else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then + rho = -1.0_dp + else + rho = 0.0_dp + end if + end function rho + +end program poisson diff --git a/poisson2d/fortran_common.f90 b/poisson2d/fortran_common.f90 new file mode 100644 index 0000000..eaf46fd --- /dev/null +++ b/poisson2d/fortran_common.f90 @@ -0,0 +1,43 @@ +!< Module for handling common things such as input arguments and timings +module common_m + + implicit none + + public + integer, parameter :: sp = kind(0.) + integer, parameter :: dp = kind(0.d0) + + real(dp), parameter :: a = 0.01_dp + real(dp), parameter :: epsilon0 = 8.85E-12_dp + +contains + + subroutine parse_args(N, N_ITER) + integer, intent(out) :: N, N_ITER + + character(len=128) :: input + integer :: length + + if ( command_argument_count() < 2 ) then + stop 'Not enough arguments [N N_ITER]' + end if + + call get_command_argument(1,input,length) + read(input,*) N + call get_command_argument(2,input,length) + read(input,*) N_ITER + + end subroutine parse_args + + subroutine write_timing(N, iter, delta, timing) + integer, intent(in) :: N, iter + real(dp), intent(in) :: delta, timing + + write(*,'(2(i16,tr1),2(e22.15,tr1))') N, iter, delta, timing/real(iter,dp) + + end subroutine write_timing + +end module common_m + + + diff --git a/poisson2d/naive.c b/poisson2d/naive.c deleted file mode 100644 index 9d72f22..0000000 --- a/poisson2d/naive.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include -#include -#include -#define M 300 -void swap(double (*a)[M], double (*b)[M], int n) -{ - double swp; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - swp = a[i][j]; - a[i][j] = b[i][j]; - b[i][j] = swp; - } - } -} -double mmax(double (*phi)[M], double (*phip)[M], int n) -{ - double max = 0.0; - double diff = 0.0; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - diff = fabs(phi[i][j]-phip[i][j]); - if (diff > max) - max = diff; - } - } - return max; -} -double rho(double x, double y) -{ - double s1 = 0.6; - double e1 = 0.8; - double s2 = 0.2; - double e2 = 0.4; - - if (x > s1 && x < e1 && y > s1 && y < e1) { - return 1.0; - } else if (x > s2 && x < e2 && y > s2 && y < e2 ) { - return -1.0; - } else { - return 0.0; - } -} -void run(double toler, double a) -{ - double epsilon0 = 8.85e-12; - double a2; - - double (*phi)[M]; - double (*phip)[M]; - double (*rhoa)[M]; - phi = malloc(sizeof(double[M][M])); - phip = malloc(sizeof(double[M][M])); - rhoa = malloc(sizeof(double[M][M])); - - int iter = 0; - - memset(phip, 0, sizeof(phip[0][0])*M*M); - memset(phi, 0, sizeof(phi[0][0])*M*M); - - double delta = 1.0; - a2 = pow(a,2.0); - while (delta > toler) { - iter += 1; - - for (int i=1; i < M-1; i++) { - for (int j=1; j < M-1; j++) { - phip[i][j] = (phi[i+1][j] + phi[i-1][j] + - phi[i][j+1] + phi[i][j-1])/4.0 + - a2/(4.0 * epsilon0)*rho(i*a,j*a); - } - } - delta = mmax(phi, phip, M); - swap(phi, phip, M); - - } - printf("iters %d", iter); - -} - -int main(int argc, char *argv[]) -{ - clock_t start = clock(); - run(1e-6, 0.01); - clock_t end = clock(); - double total = ((double)(end - start)) / CLOCKS_PER_SEC; - printf("Execution time: %f\n",total); -} diff --git a/poisson2d/naive.f90 b/poisson2d/naive.f90 deleted file mode 100644 index 3c34692..0000000 --- a/poisson2d/naive.f90 +++ /dev/null @@ -1,51 +0,0 @@ -module rhofunc -implicit none -public -integer, parameter :: dp=kind(0.d0) -contains - pure real(dp) function rho(x,y) - real(dp), intent(in) :: x,y - if (x > 0.6_dp .and. x < 0.8_dp .and. y > 0.6_dp .and. y<0.8) then - rho = 1.0_dp - else if (x> 0.2_dp .and. x<0.4_dp .and. y>0.2_dp .and. y<0.4_dp) then - rho = -1.0_dp - else - rho = 0.0_dp - end if - end function - -end module - -program poisson -use rhofunc, only: rho -implicit none -integer, parameter :: dp=kind(0.d0), M=300 -integer :: i,j, iter -real(dp),parameter :: epsilon0=8.85E-12_dp, target=1E-6_dp, a=0.01 -real(dp) :: delta, b, e, phiprime(M,M), phi(M,M), a2, rhoarr(M,M), temp(M,M) - - -delta = 1.0_dp -iter = 0 -call cpu_time(b) -phiprime(:,:) = 0.0_dp -phi(:,:) = 0.0_dp - -do while (delta > target ) - iter = iter + 1 - a2 = a**2.0_dp - do i=2, M-1 - do j=2, M-1 - phiprime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4.0_dp & - + a2/4.0_dp/epsilon0*rho(i*a,j*a) - end do - end do - delta = maxval(abs(phiprime - phi)) - temp = phi - phi = phiprime - phiprime = temp - -end do -call cpu_time(e) -print *, e-b, iter -end program \ No newline at end of file diff --git a/poisson2d/optimized.c b/poisson2d/optimized.c deleted file mode 100644 index 0dddfec..0000000 --- a/poisson2d/optimized.c +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include -#include -#include -#define M 300 -void swap(double (*a)[M], double (*b)[M], int n) -{ - double swp; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - a[i][j] = b[i][j]; - } - } -} -double mmax(double (*phi)[M], double (*phip)[M], int n) -{ - double max = 0.0; - double diff = 0.0; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - diff = fabs(phi[i][j]-phip[i][j]); - if (diff > max) - max = diff; - } - } - return max; -} -double rho(double x, double y) -{ - double s1 = 0.6; - double e1 = 0.8; - double s2 = 0.2; - double e2 = 0.4; - - if (x > s1 && x < e1 && y > s1 && y < e1) { - return 1.0; - } else if (x > s2 && x < e2 && y > s2 && y < e2 ) { - return -1.0; - } else { - return 0.0; - } -} -void run(double toler, double a) -{ - double epsilon0 = 8.85e-12; - double a2; - - double (*phi)[M]; - double (*phip)[M]; - double (*rhoa)[M]; - phi = malloc(sizeof(double[M][M])); - phip = malloc(sizeof(double[M][M])); - rhoa = malloc(sizeof(double[M][M])); - - int iter = 0; - - memset(phip, 0, sizeof(phip[0][0])*M*M); - memset(phi, 0, sizeof(phi[0][0])*M*M); - for (int i=1; i toler) { - iter += 1; - - for (int i=1; i < M-1; i++) { - for (int j=1; j < M-1; j++) { - phip[i][j] = (phi[i+1][j] + phi[i-1][j] + - phi[i][j+1] + phi[i][j-1])/4.0 + - a2/(4.0 * epsilon0)*rhoa[i][j]; - } - } - delta = mmax(phi, phip, M); - swap(phi, phip, M); - - } - printf("iters %d", iter); - -} - -int main(int argc, char *argv[]) -{ - clock_t start = clock(); - run(1e-6, 0.01); - clock_t end = clock(); - double total = ((double)(end - start)) / CLOCKS_PER_SEC; - printf("Execution time: %f\n",total); -} diff --git a/poisson2d/optimized.f90 b/poisson2d/optimized.f90 deleted file mode 100644 index 3bc1045..0000000 --- a/poisson2d/optimized.f90 +++ /dev/null @@ -1,55 +0,0 @@ -module rhofunc -implicit none -public -integer, parameter :: dp=kind(0.d0) -contains - pure real(dp) function rho(x,y) - real(dp), intent(in) :: x,y - if (x > 0.6_dp .and. x < 0.8_dp .and. y > 0.6_dp .and. y<0.8) then - rho = 1.0_dp - else if (x> 0.2_dp .and. x<0.4_dp .and. y>0.2_dp .and. y<0.4_dp) then - rho = -1.0_dp - else - rho = 0.0_dp - end if - end function - -end module - -program poisson -use rhofunc, only: rho -implicit none -integer, parameter :: dp=kind(0.d0), M=300 -integer :: i,j, iter -real(dp),parameter :: epsilon0=8.85E-12_dp, target=1E-6_dp, a=0.01 -real(dp) :: delta, b, e, phiprime(M,M), phi(M,M), a2, rhoarr(M,M) - - -delta = 1.0_dp -iter = 0 -call cpu_time(b) -phiprime(:,:) = 0.0_dp -phi(:,:) = 0.0_dp -do i=1, M - do j=1, M - rhoarr(i,j) = rho(i*a,j*a) - end do -end do - -do while (delta > target ) - iter = iter + 1 - a2 = a**2.0_dp - do i=2, M-1 - do j=2, M-1 - phiprime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4.0_dp & - + a2/4.0_dp/epsilon0*rhoarr(i,j) - end do - end do - delta = maxval(abs(phiprime - phi)) - phi = phiprime - - -end do -call cpu_time(e) -print *, e-b, iter -end program \ No newline at end of file diff --git a/poisson2d/run.sh b/poisson2d/run.sh new file mode 100644 index 0000000..234dae4 --- /dev/null +++ b/poisson2d/run.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# This may not apply to your distribution +# change or comment out at will, but remark +# the stack-size. +ulimit -s unlimited + +OPTS="-O3 -march=native" +FC=gfortran +CC=gcc + +$CC $OPTS -c c_common.c +for c in 1 2 3 4_1d 4_2d +do + echo "Compiling and running: c_${c}" + $CC $OPTS -o c_${c} c_${c}.c c_common.o + + # Now run for + { + for N in 100 200 300 ; do + ./c_${c} $N 10000 + done + for N in 400 500 600; do + ./c_${c} $N 4000 + done + } > c_${c}.data +done + +$FC $OPTS -c fortran_common.f90 +for f in 2 3 4 +do + echo "Compiling and running: fortran_${f}" + $FC $OPTS -o fortran_${f} fortran_${f}.f90 fortran_common.o + + # Now run for + { + for N in 100 200 300 ; do + ./fortran_${f} $N 10000 + done + for N in 400 500 600; do + ./fortran_${f} $N 4000 + done + } > fortran_${f}.data +done