━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
               PERFORMANCE WITH ALLOCATABLE TRACER ARRAY
              ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━


Testing performance of NorESM system when replacing static array with
allocatable array for ocean tracers.

Static array source code is `release-noresm2.0.5' and referred to as
`noresm2.0.5'. Allocatable array source code is in a development gitHub
branch, and referred to as `noresm-dev'. All runs are made on Betzy
(unless specified otherwise).


1 NOINYOC_T62_tn21
══════════════════

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
   Experiment                    noresm2.0.5  noresm-dev  note                
  ────────────────────────────────────────────────────────────────────────────
   NOINYOC - 10y                      208.67      207.86                      
   NOINYOC - 1y startup (run 1)       201.16      200.04                      
   NOINYOC - 1y startup (run 2)       199.78      198.74                      
   NOINYOC - 1y restart (run 1)       204.88      205.55  Average for 9 years 
   NOINYOC - 1y restart (run 2)       204.95      203.25  Average for 9 years 
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Table 1: Throughput numbers (simulated years/day) from the timings
  report (bigger is better).


2 NOINYOC_T62_tn14
══════════════════

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
   Experiment            noresm2.0.5  noresm-dev  note                
  ────────────────────────────────────────────────────────────────────
   NOINYOC - 10y               46.71       44.99                      
   NOINYOC - 1y startup        45.75       44.14                      
   NOINYOC - 1y restart        46.06       44.30  Average for 9 years 
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Table 2: Throughput numbers (simulated years/day) from the timings
  report (bigger is better).


3 NOINYOC_T62_tn025
═══════════════════

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
   Experiment            noresm2.0.5  noresm-dev  note               
  ───────────────────────────────────────────────────────────────────
   NOINYOC - 1y                 5.78        5.55                     
   NOINYOC - 1m (run 1)         4.73        4.62                     
   NOINYOC - 1m (run 2)         4.75        4.61                     
   NOINYOC - 1m (run 3)  (*)    3.99        4.63  (*) Long init time 
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Table 3: Throughput numbers (simulated years/day) from the timings
  report (bigger is better).


4 NOINYOC_T62_tn14 on Fram
══════════════════════════

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
   Experiment            noresm2.0.5  noresm-dev  note                
  ────────────────────────────────────────────────────────────────────
   NOINYOC - 10y               50.86       46.17                      
   NOINYOC - 1y startup        50.08       44.91                      
   NOINYOC - 1y restart        50.02       46.12  Average for 9 years 
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Table 4: Throughput numbers (simulated years/day) from the timings
  report (bigger is better).


5 N1850_f19_tn14
════════════════

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
   Experiment          noresm2.0.5    noresm-dev  note                   
  ───────────────────────────────────────────────────────────────────────
   N1850 - 5y                24.03  (*)    22.55  (*) Performance issue? 
   N1850 - 1y startup        24.56         24.47                         
   N1850 - 1y restart        25.08         24.54  Average for 4 years    
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Table 5: Throughput numbers (simulated years/day) from the timings
  report (bigger is better).


6 N1850_f19_tn14 on Fram
════════════════════════

  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
   Experiment          noresm2.0.5  noresm-dev  note                 
  ───────────────────────────────────────────────────────────────────
   N1850 - 1m startup        19.26       19.36                       
   N1850 - 1m restart        19.54       19.42  Average for 4 months 
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  Table 6: Throughput numbers (simulated years/day) from the timings
  report (bigger is better).

  The results for Betzy show no consistent performance loss for the
  NOINYOC_T62_tn21 case, and a 2% to 4% performance loss for
  NOINYOC_T62_tn14 and NOINYOC_T62_tn0254. Performance loss is mainly
  noticed at startup, longer runs and restart runs generally perform
  better, although the performance loss seems to be persistent also for
  the longer duration runs. Note also that performance loss due to
  external factors can have a significantly larger impact for individual
  runs (see NOINYOC_T62_tn0254 run 3).

  The results for Fram show a significant performance loss (9% to 10%)
  when replacing the static tracer array with an equivalent allocatable
  array. The performance loss is fairly consistent both for the 1 year
  startup and restart runs and for the 10 year run, so this seems to be
  a permanent feature for the model setup on Fram.

  The N1850_f19_tn14 on Betzy results appears to be somewhat contrary to
  the NOINYOC runs, with better performance for the shorter 1 year runs
  than the 5 year runs (in particular for `noresm-dev', perhaps external
  performance issues?). The 1 year runs seem to indicate a performance
  loss in the range of 0% to 2% for N1850 when using allocatable arrays.
  On Fram there is practically no difference when running N1850_f19_tn14
  for 1 month intervals.

  These are results for a relatively small sample size on Betzy and
  Fram, using only one compiler option (intel) and no tuning of source
  code or compiler options to improve performance, so the results
  provide only a rough indication of what to expect when replacing a
  large static array with an equivalent allocatable array in the code.
  There seems to be a clear difference on Fram for NOINYOC_T62_tn14, but
  not for N1850_f19_tn14. This suggests that significant performance
  loss could occur for some specific compiler + case setup parameters,
  but otherwise no clear pattern seem to emerge from these tests.