Skip to content

Commit

Permalink
mnt: added optimize1d.c
Browse files Browse the repository at this point in the history
Also benchmarks should not rely on convergence, rather a fixed
number of iterations is more appropriate.

The problem of inconsistent iterations between the C and Fortran versions
was also fixed because of an indexing error when calculating the cell
coordinates (C is 0-based, F is 1-based). So they now run the same number
of iterations, regardless of M.

Moved timings about to make them more comparable.

Ensured that all examples now use allocate/malloc which is much more
realistic. Even though it does not have the same performance it
is much more appropriate for benchmark cases.

Lastly, made calls more similar, C and F both
call a function for the iteration step.
  • Loading branch information
zerothi committed Jul 2, 2021
1 parent 2cffc7b commit 5184dda
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 48 deletions.
2 changes: 2 additions & 0 deletions poisson2d/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6G
Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was
discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461
with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk)

The codes were rewritten for more realistic cases by zerothi.
96 changes: 52 additions & 44 deletions poisson2d/optimized.f90
Original file line number Diff line number Diff line change
@@ -1,44 +1,26 @@
module rhofunc
implicit none
public
integer, parameter :: dp=kind(0.d0)

contains

pure real(dp) function rho(x,y)
real(dp), intent(in) :: x,y
if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then
rho = 1.0_dp
else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then
rho = -1.0_dp
else
rho = 0.0_dp
end if
end function rho

end module rhofunc

program poisson
use rhofunc, only: rho

implicit none

integer, parameter :: M = 1000
integer, parameter :: dp=kind(0.d0)
integer, parameter :: M = 300
integer, parameter :: N_ITER = 10000
real(dp),parameter :: target=1E-1_dp
real(dp),parameter :: a=0.01_dp

integer :: i,j, iter
real(dp),parameter :: epsilon0=8.85E-12_dp
real(dp) :: delta, b, e, a2

integer :: i,j, iter
real(dp) :: delta, b, e, a2

real(dp), allocatable :: rhoarr(:,:)
real(dp), allocatable, target :: phiprime(:,:), phi(:,:)
real(dp), pointer :: p(:,:), pnew(:,:)
real(dp), pointer :: p(:,:), pnew(:,:), tmp(:,:)

call cpu_time(b)

allocate(phiprime(M,M), phi(M,M))
allocate(rhoarr(M,M))

call cpu_time(b)

! Fortran doesn't care too much about pow
! since it figures out if the exponent is an integer or not
a2 = a*a / epsilon0
Expand All @@ -48,21 +30,43 @@ program poisson
end do
end do

! We only need to initialize 1 array
phi(:,:) = 0.0_dp
phiprime(:,:) = 0.0_dp
p => phi
pnew => phiprime
iter = 0
delta = target + 1._dp
do while ( delta > target )
do while ( iter < N_ITER )
iter = iter + 1

call iterate(rhoarr, p, pnew, delta)

! Easer to swap pointer than copying stuff around
if ( mod(iter, 2) == 1 ) then
p => phi
pnew => phiprime
else
p => phiprime
pnew => phi
end if
! In fortran we could even use pointers
! But we can just call
tmp => p
p => pnew
pnew => tmp

end do

deallocate(phi, phiprime, rhoarr)

call cpu_time(e)

write(*,'(a)') 'fortran version'
write(*,'(a,f20.10)') 'delta = ',delta
write(*,'(a,i0)') 'Iterations = ',iter
write(*,'(a,f20.10)') 'Time = ',e - b

contains

subroutine iterate(rhoarr, p, pnew, delta)
real(dp), intent(in) :: p(:,:), rhoarr(:,:)
real(dp), intent(out) :: pnew(:,:)
real(dp), intent(out) :: delta

integer :: i, j

delta = 0._dp
do j=2, M-1
Expand All @@ -72,13 +76,17 @@ program poisson
end do
end do

end do

call cpu_time(e)
end subroutine iterate

deallocate(phi, phiprime, rhoarr)

write(*,'(a,i0)') 'Iterations = ',iter
write(*,'(a,f20.10)') 'Time = ',e - b
pure real(dp) function rho(x,y)
real(dp), intent(in) :: x,y
if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then
rho = 1.0_dp
else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then
rho = -1.0_dp
else
rho = 0.0_dp
end if
end function rho

end program poisson
106 changes: 106 additions & 0 deletions poisson2d/optimized1d.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
#define M 1000
#define N_ITER 10000

#define IDX(i,j) (i)*M+j


double rho(const double x, const double y) {
const double s1 = 0.6;
const double e1 = 0.8;
const double s2 = 0.2;
const double e2 = 0.4;

if (s1 < x && x < e1 && s1 < y && y < e1) {
return 1.0;
} else if ( s2 < x && x < e2 && s2 < y && y < e2 ) {
return -1.0;
} else {
return 0.0;
}
}

double iterate(double *restrict phi, double *restrict phinew, double *restrict rhoa) {
double delta = 0, err;
for (int i=1; i < M-1; i++) {
for (int j=1; j < M-1; j++) {
phinew[IDX(i,j)] = (phi[IDX(i+1,j)] + phi[IDX(i-1,j)] + phi[IDX(i,j+1)] + phi[IDX(i,j-1)] + rhoa[IDX(i,j)])*0.25;
err = fabs(phinew[IDX(i,j)] - phi[IDX(i,j)]);
if ( err > delta ) delta = err;
}
}
return delta;
}

void init_rho(double *restrict rhoa, const double epsilon, const double a) {
const double a2 = a * a / epsilon;
for (int i=1; i<M-1; i++) {
for (int j=1; j<M-1; j++) {
rhoa[IDX(i,j)] = rho(i*a,j*a) * a2;
}
}
}


void run(const double toler, const double a)
{
double epsilon0 = 8.85e-12;

double *phi;
double *phip;
double *rhoa;
double *tmp;

// A real world program will definitely use malloc
phi = malloc(M*M*sizeof(double));
phip = malloc(M*M*sizeof(double));
rhoa = malloc(M*M*sizeof(double));

// Only need to initialize one of them!
for (int i = 0 ; i < M*M ; i++ )
phi[i] = 0.;
for (int i = 0 ; i < M*M ; i++ )
phip[i] = 0.;

// In C one tries to avoid using pow because
// it assumes floating point powers (integers are faster)
// So better do it directly
init_rho(rhoa, epsilon0, a);

int iter = 0;
double delta;
while ( iter < N_ITER ) {
iter += 1;

delta = iterate(phi, phip, rhoa);

// swap pointers (no copies)
tmp = phi;
phi = phip;
phip = tmp;
}

free(phi);
free(phip);
free(rhoa);

printf("delta = %20.10f\n",delta);
printf("Iterations = %d\n", iter);
}

int main(int argc, char *argv[])
{
const double target = 1e-1;
const double a = 0.01;

clock_t start = clock();
printf("c version [1d]\n");
run(target, a);
clock_t end = clock();
double total = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("Time = %20.10f\n",total);
}
12 changes: 8 additions & 4 deletions poisson2d/optimized.c → poisson2d/optimized2d.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
#include <string.h>
#include <math.h>
#include <time.h>
#define M 300
#define M 1000
#define N_ITER 10000


double ** malloc_2d(int m, int n) {
Expand Down Expand Up @@ -99,7 +100,7 @@ void run(double toler, double a)

int iter = 0;
double delta;
do {
while ( iter < N_ITER ) {
iter += 1;

delta = iterate(phi, phip, rhoa);
Expand All @@ -108,20 +109,23 @@ void run(double toler, double a)
tmp = phi;
phi = phip;
phip = tmp;
} while ( delta > toler );
}

free_2d(phi);
free_2d(phip);
free_2d(rhoa);

printf("delta = %20.10f\n",delta);
printf("Iterations = %d\n", iter);
}

int main(int argc, char *argv[])
{
clock_t start = clock();
const double target = 1e-1;
const double a = 0.01;

clock_t start = clock();
printf("c version [2d]\n");
run(target, a);
clock_t end = clock();
double total = ((double)(end - start)) / CLOCKS_PER_SEC;
Expand Down

0 comments on commit 5184dda

Please sign in to comment.