Skip to content

Commit 74ea10a

Browse files
committed
Reorganise ectrans_benchmark
1 parent d1db60b commit 74ea10a

File tree

1 file changed

+35
-50
lines changed

1 file changed

+35
-50
lines changed

src/programs/ectrans-benchmark.F90

Lines changed: 35 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,6 @@ program ectrans_benchmark
1515
! This test performs spectral to real and real to spectral transforms repeated in
1616
! timed loop.
1717
!
18-
! 1) One "surface" field is always transformed:
19-
! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk)
20-
!
21-
! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0"
22-
!
23-
! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk)
24-
!
25-
! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and
26-
! can be enabled with "--vordiv"
27-
!
28-
! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk)
29-
!
30-
! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2)
31-
! This must be enabled with "--scders"
32-
!
33-
! 5) Optionally uv East-West derivate can be computed from vorticity/divergence.
34-
! This must be enabled with "--vordiv --uvders"
35-
!
36-
!
3718
! Authors : George Mozdzynski
3819
! Willem Deconinck
3920
! Ioan Hadade
@@ -52,7 +33,7 @@ program ectrans_benchmark
5233
integer(kind=jpim), parameter :: min_octa_points = 20
5334

5435
integer(kind=jpim) :: istack, getstackusage
55-
real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5)
36+
real(kind=jprd) :: zmaxerr(5)
5637
real(kind=jprd) :: zmaxerrg
5738

5839
! Output unit numbers
@@ -61,15 +42,14 @@ program ectrans_benchmark
6142
integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output
6243

6344
! Default parameters
64-
integer(kind=jpim) :: nsmax = 79 ! Spectral truncation
6545
integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test
6646
integer(kind=jpim) :: nfld = 1 ! Number of 3D scalar fields
6747
integer(kind=jpim) :: nlev = 1 ! Number of vertical levels
6848

69-
integer(kind=jpim) :: nflevg ! Total number of vertical levels
70-
integer(kind=jpim) :: ndgl ! Number of latitudes
71-
integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary)
72-
integer(kind=jpim) :: ngptot ! Total number of grid points on this task
49+
integer(kind=jpim) :: nflevg ! Total number of vertical levels
50+
51+
integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary)
52+
integer(kind=jpim) :: ngptot ! Total number of grid points on this task
7353
integer(kind=jpim) :: ngptotg ! Total number of grid points across all tasks
7454

7555
integer(kind=jpim) :: ifld
@@ -81,7 +61,7 @@ program ectrans_benchmark
8161
integer(kind=jpim) :: ib
8262
integer(kind=jpim) :: jprtrv
8363

84-
integer(kind=jpim), allocatable :: nloen(:), nprcids(:)
64+
integer(kind=jpim), allocatable :: nprcids(:)
8565
integer(kind=jpim) :: myproc, jj
8666
integer :: jstep
8767

@@ -110,13 +90,25 @@ program ectrans_benchmark
11090
real(kind=jprb), allocatable :: zgp2(:,:,:)
11191

11292
logical :: lstack = .false. ! Output stack info
113-
logical :: luserpnm = .false.
114-
logical :: lkeeprpnm = .false.
93+
94+
! setup_trans options
95+
integer(kind=jpim) :: nsmax = 79 ! Spectral truncation
96+
integer(kind=jpim) :: ndgl ! Number of latitudes
97+
integer(kind=jpim), allocatable :: nloen(:) ! Number of points on each latitude
98+
logical :: luserpnm = .false. ! Use Belusov algorithm to compute RPNM array instead of per m
11599
logical :: luseflt = .false. ! Use fast legendre transforms
100+
101+
! Extra inv_trans options
102+
logical :: lvordiv = .false. ! Compute vorticity and divergence in grid point space
103+
logical :: lscders = .false. ! Compute derivatives of scalar (North-South and East-West) in grid
104+
! point space
105+
logical :: luvder = .false. ! Compute East-West derivatives of U and V wind in grid point space
106+
107+
! GSTATS options
108+
logical :: lstats = .true. ! gstats statistics
116109
logical :: ltrace_stats = .false.
117110
logical :: lstats_omp = .false.
118111
logical :: lstats_comms = .false.
119-
logical :: lstats = .true. ! gstats statistics
120112
logical :: lbarrier_stats = .false.
121113
logical :: lbarrier_stats2 = .false.
122114
logical :: ldetailed_stats = .false.
@@ -125,16 +117,13 @@ program ectrans_benchmark
125117
logical :: lstatscpu = .false.
126118
logical :: lstats_mem = .false.
127119
logical :: lxml_stats = .false.
128-
logical :: lvordiv = .false.
129-
logical :: lscders = .false.
130-
logical :: luvder = .false.
131-
logical :: lprint_norms = .false. ! Calculate and print spectral norms
132-
logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end
133-
134120
integer(kind=jpim) :: nstats_mem = 0
135121
integer(kind=jpim) :: ntrace_stats = 0
136122
integer(kind=jpim) :: nprnt_stats = 1
137123

124+
logical :: lprint_norms = .false. ! Calculate and print spectral norms
125+
logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end
126+
138127
! The multiplier of the machine epsilon used as a tolerance for correctness checking
139128
! ncheck = 0 (the default) means that correctness checking is disabled
140129
integer(kind=jpim) :: ncheck = 0
@@ -144,11 +133,6 @@ program ectrans_benchmark
144133
! Verbosity level (0 or 1)
145134
integer :: verbosity = 0
146135

147-
real(kind=jprd) :: zra = 6371229._jprd
148-
149-
integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions
150-
integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib
151-
152136
integer(kind=jpim) :: nproc ! Number of procs
153137
integer(kind=jpim) :: nthread
154138
integer(kind=jpim) :: nprgpns ! Grid-point decomp
@@ -170,7 +154,6 @@ program ectrans_benchmark
170154
logical :: lsync_trans = .true. ! Activate barrier sync
171155
logical :: leq_regions = .true. ! Eq regions flag
172156

173-
174157
integer(kind=jpim) :: nproma = 0
175158
integer(kind=jpim) :: ngpblks
176159
! locals
@@ -363,18 +346,16 @@ program ectrans_benchmark
363346

364347
call gstats(1, 0)
365348
call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), &
366-
& kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, &
367-
& kprtrw=nprtrw, ldsync_trans=lsync_trans, &
368-
& ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi)
349+
& kprgpns=nprgpns, kprgpew=nprgpew, kprtrw=nprtrw, ldsync_trans=lsync_trans, &
350+
& ldeq_regions=leq_regions, ldalloperm=.true., ldmpoff=.not.luse_mpi)
369351
call gstats(1, 1)
370352

371353
call gstats(2, 0)
372354
! IFS spectral fields are dimensioned NFLEVL, Nils !!
373355
call set_ectrans_gpu_nflev(nflevl)
374356
! We pass nflevl via environment variable in order not to change API
375357
! In long run, ectrans should grow its internal buffers automatically
376-
call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., &
377-
& lduserpnm=luserpnm, ldkeeprpnm=lkeeprpnm, &
358+
call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., lduserpnm=luserpnm, &
378359
& lduseflt=luseflt)
379360
call gstats(2, 1)
380361

@@ -426,11 +407,13 @@ program ectrans_benchmark
426407
! Allocate and initialize spectral arrays
427408
!===================================================================================================
428409

410+
! Initialize vorticity and divergence - same for both call modes
429411
allocate(zspvor(nflevl,nspec2))
430412
allocate(zspdiv(nflevl,nspec2))
431413
call initialize_spectral_field(nsmax, zspvor)
432414
call initialize_spectral_field(nsmax, zspdiv)
433415

416+
! Initialize spectral arrays differently depending on call mode
434417
if (icall_mode == 1) then
435418
allocate(zspscalar(nfld*nflevl+1,nspec2))
436419
call initialize_spectral_field(nsmax, zspscalar)
@@ -443,9 +426,8 @@ program ectrans_benchmark
443426
call initialize_spectral_field(nsmax, zspsc2)
444427
endif
445428

429+
! Compute spectral distribution variables
446430
allocate(ivset(nflevg))
447-
448-
! Compute spectral distribution
449431
ilev = 0
450432
do jb = 1, nprtrv
451433
do jlev=1, numll(jb)
@@ -470,15 +452,17 @@ program ectrans_benchmark
470452
! Allocate gridpoint arrays
471453
!===================================================================================================
472454

455+
! Determine start and end slice points for grid point arrays when they are passed back to dir_trans
473456
ipgp_start = 1
474457
ipgp_end = (2 + nfld) * nflevg + 1
475458
ipgpuv_start = 1
476459
ipgpuv_end = 2
477460

478461
! Also enable vorticity divergence?
479462
if (lvordiv) then
480-
inum_wind_fields = 4
481-
! If lvordiv, skip the vor and div elements when passing zgp
463+
inum_wind_fields = 4 ! Four fields - U, V, vorticity, divergence
464+
! If lvordiv, skip the vorticity and divergence elements when passing zgp
465+
! These two come first when enabled
482466
ipgp_start = ipgp_start + 2 * nflevg
483467
ipgp_end = ipgp_end + 2 * nflevg
484468
ipgpuv_start = ipgpuv_start + 2
@@ -503,6 +487,7 @@ program ectrans_benchmark
503487
inum_sc_2d_fields = inum_sc_2d_fields * 3
504488
endif
505489

490+
! Finally, allocate grid point arrays
506491
if (icall_mode == 1) then
507492
itotal_fields = nflevg * (inum_wind_fields + inum_sc_3d_fields) + inum_sc_2d_fields
508493
allocate(zgp(nproma,itotal_fields,ngpblks))

0 commit comments

Comments
 (0)