@@ -15,25 +15,6 @@ program ectrans_benchmark
15
15
! This test performs spectral to real and real to spectral transforms repeated in
16
16
! timed loop.
17
17
!
18
- ! 1) One "surface" field is always transformed:
19
- ! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk)
20
- !
21
- ! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0"
22
- !
23
- ! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk)
24
- !
25
- ! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and
26
- ! can be enabled with "--vordiv"
27
- !
28
- ! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk)
29
- !
30
- ! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2)
31
- ! This must be enabled with "--scders"
32
- !
33
- ! 5) Optionally uv East-West derivate can be computed from vorticity/divergence.
34
- ! This must be enabled with "--vordiv --uvders"
35
- !
36
- !
37
18
! Authors : George Mozdzynski
38
19
! Willem Deconinck
39
20
! Ioan Hadade
@@ -52,7 +33,7 @@ program ectrans_benchmark
52
33
integer (kind= jpim), parameter :: min_octa_points = 20
53
34
54
35
integer (kind= jpim) :: istack, getstackusage
55
- real (kind= jprd), dimension ( 1 ) :: zmaxerr( 5 ), zerr (5 )
36
+ real (kind= jprd) :: zmaxerr(5 )
56
37
real (kind= jprd) :: zmaxerrg
57
38
58
39
! Output unit numbers
@@ -61,15 +42,14 @@ program ectrans_benchmark
61
42
integer (kind= jpim), parameter :: noutdump = 7 ! Unit number for field output
62
43
63
44
! Default parameters
64
- integer (kind= jpim) :: nsmax = 79 ! Spectral truncation
65
45
integer (kind= jpim) :: iters = 10 ! Number of iterations for transform test
66
46
integer (kind= jpim) :: nfld = 1 ! Number of 3D scalar fields
67
47
integer (kind= jpim) :: nlev = 1 ! Number of vertical levels
68
48
69
- integer (kind= jpim) :: nflevg ! Total number of vertical levels
70
- integer (kind = jpim) :: ndgl ! Number of latitudes
71
- integer (kind= jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary)
72
- integer (kind= jpim) :: ngptot ! Total number of grid points on this task
49
+ integer (kind= jpim) :: nflevg ! Total number of vertical levels
50
+
51
+ integer (kind= jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary)
52
+ integer (kind= jpim) :: ngptot ! Total number of grid points on this task
73
53
integer (kind= jpim) :: ngptotg ! Total number of grid points across all tasks
74
54
75
55
integer (kind= jpim) :: ifld
@@ -81,7 +61,7 @@ program ectrans_benchmark
81
61
integer (kind= jpim) :: ib
82
62
integer (kind= jpim) :: jprtrv
83
63
84
- integer (kind= jpim), allocatable :: nloen(:), nprcids(:)
64
+ integer (kind= jpim), allocatable :: nprcids(:)
85
65
integer (kind= jpim) :: myproc, jj
86
66
integer :: jstep
87
67
@@ -110,13 +90,25 @@ program ectrans_benchmark
110
90
real (kind= jprb), allocatable :: zgp2(:,:,:)
111
91
112
92
logical :: lstack = .false. ! Output stack info
113
- logical :: luserpnm = .false.
114
- logical :: lkeeprpnm = .false.
93
+
94
+ ! setup_trans options
95
+ integer (kind= jpim) :: nsmax = 79 ! Spectral truncation
96
+ integer (kind= jpim) :: ndgl ! Number of latitudes
97
+ integer (kind= jpim), allocatable :: nloen(:) ! Number of points on each latitude
98
+ logical :: luserpnm = .false. ! Use Belusov algorithm to compute RPNM array instead of per m
115
99
logical :: luseflt = .false. ! Use fast legendre transforms
100
+
101
+ ! Extra inv_trans options
102
+ logical :: lvordiv = .false. ! Compute vorticity and divergence in grid point space
103
+ logical :: lscders = .false. ! Compute derivatives of scalar (North-South and East-West) in grid
104
+ ! point space
105
+ logical :: luvder = .false. ! Compute East-West derivatives of U and V wind in grid point space
106
+
107
+ ! GSTATS options
108
+ logical :: lstats = .true. ! gstats statistics
116
109
logical :: ltrace_stats = .false.
117
110
logical :: lstats_omp = .false.
118
111
logical :: lstats_comms = .false.
119
- logical :: lstats = .true. ! gstats statistics
120
112
logical :: lbarrier_stats = .false.
121
113
logical :: lbarrier_stats2 = .false.
122
114
logical :: ldetailed_stats = .false.
@@ -125,16 +117,13 @@ program ectrans_benchmark
125
117
logical :: lstatscpu = .false.
126
118
logical :: lstats_mem = .false.
127
119
logical :: lxml_stats = .false.
128
- logical :: lvordiv = .false.
129
- logical :: lscders = .false.
130
- logical :: luvder = .false.
131
- logical :: lprint_norms = .false. ! Calculate and print spectral norms
132
- logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end
133
-
134
120
integer (kind= jpim) :: nstats_mem = 0
135
121
integer (kind= jpim) :: ntrace_stats = 0
136
122
integer (kind= jpim) :: nprnt_stats = 1
137
123
124
+ logical :: lprint_norms = .false. ! Calculate and print spectral norms
125
+ logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end
126
+
138
127
! The multiplier of the machine epsilon used as a tolerance for correctness checking
139
128
! ncheck = 0 (the default) means that correctness checking is disabled
140
129
integer (kind= jpim) :: ncheck = 0
@@ -144,11 +133,6 @@ program ectrans_benchmark
144
133
! Verbosity level (0 or 1)
145
134
integer :: verbosity = 0
146
135
147
- real (kind= jprd) :: zra = 6371229._jprd
148
-
149
- integer (kind= jpim) :: nmax_resol = 37 ! Max number of resolutions
150
- integer (kind= jpim) :: npromatr = 0 ! nproma for trans lib
151
-
152
136
integer (kind= jpim) :: nproc ! Number of procs
153
137
integer (kind= jpim) :: nthread
154
138
integer (kind= jpim) :: nprgpns ! Grid-point decomp
@@ -170,7 +154,6 @@ program ectrans_benchmark
170
154
logical :: lsync_trans = .true. ! Activate barrier sync
171
155
logical :: leq_regions = .true. ! Eq regions flag
172
156
173
-
174
157
integer (kind= jpim) :: nproma = 0
175
158
integer (kind= jpim) :: ngpblks
176
159
! locals
@@ -363,18 +346,16 @@ program ectrans_benchmark
363
346
364
347
call gstats(1 , 0 )
365
348
call setup_trans0(kout= nout, kerr= nerr, kprintlev= merge (2 , 0 , verbosity == 1 ), &
366
- & kmax_resol= nmax_resol, kpromatr= npromatr, kprgpns= nprgpns, kprgpew= nprgpew, &
367
- & kprtrw= nprtrw, ldsync_trans= lsync_trans, &
368
- & ldeq_regions= leq_regions, prad= zra, ldalloperm= .true. , ldmpoff= .not. luse_mpi)
349
+ & kprgpns= nprgpns, kprgpew= nprgpew, kprtrw= nprtrw, ldsync_trans= lsync_trans, &
350
+ & ldeq_regions= leq_regions, ldalloperm= .true. , ldmpoff= .not. luse_mpi)
369
351
call gstats(1 , 1 )
370
352
371
353
call gstats(2 , 0 )
372
354
! IFS spectral fields are dimensioned NFLEVL, Nils !!
373
355
call set_ectrans_gpu_nflev(nflevl)
374
356
! We pass nflevl via environment variable in order not to change API
375
357
! In long run, ectrans should grow its internal buffers automatically
376
- call setup_trans(ksmax= nsmax, kdgl= ndgl, kloen= nloen, ldsplit= .true. , &
377
- & lduserpnm= luserpnm, ldkeeprpnm= lkeeprpnm, &
358
+ call setup_trans(ksmax= nsmax, kdgl= ndgl, kloen= nloen, ldsplit= .true. , lduserpnm= luserpnm, &
378
359
& lduseflt= luseflt)
379
360
call gstats(2 , 1 )
380
361
@@ -426,11 +407,13 @@ program ectrans_benchmark
426
407
! Allocate and initialize spectral arrays
427
408
! ===================================================================================================
428
409
410
+ ! Initialize vorticity and divergence - same for both call modes
429
411
allocate (zspvor(nflevl,nspec2))
430
412
allocate (zspdiv(nflevl,nspec2))
431
413
call initialize_spectral_field(nsmax, zspvor)
432
414
call initialize_spectral_field(nsmax, zspdiv)
433
415
416
+ ! Initialize spectral arrays differently depending on call mode
434
417
if (icall_mode == 1 ) then
435
418
allocate (zspscalar(nfld* nflevl+1 ,nspec2))
436
419
call initialize_spectral_field(nsmax, zspscalar)
@@ -443,9 +426,8 @@ program ectrans_benchmark
443
426
call initialize_spectral_field(nsmax, zspsc2)
444
427
endif
445
428
429
+ ! Compute spectral distribution variables
446
430
allocate (ivset(nflevg))
447
-
448
- ! Compute spectral distribution
449
431
ilev = 0
450
432
do jb = 1 , nprtrv
451
433
do jlev= 1 , numll(jb)
@@ -470,15 +452,17 @@ program ectrans_benchmark
470
452
! Allocate gridpoint arrays
471
453
! ===================================================================================================
472
454
455
+ ! Determine start and end slice points for grid point arrays when they are passed back to dir_trans
473
456
ipgp_start = 1
474
457
ipgp_end = (2 + nfld) * nflevg + 1
475
458
ipgpuv_start = 1
476
459
ipgpuv_end = 2
477
460
478
461
! Also enable vorticity divergence?
479
462
if (lvordiv) then
480
- inum_wind_fields = 4
481
- ! If lvordiv, skip the vor and div elements when passing zgp
463
+ inum_wind_fields = 4 ! Four fields - U, V, vorticity, divergence
464
+ ! If lvordiv, skip the vorticity and divergence elements when passing zgp
465
+ ! These two come first when enabled
482
466
ipgp_start = ipgp_start + 2 * nflevg
483
467
ipgp_end = ipgp_end + 2 * nflevg
484
468
ipgpuv_start = ipgpuv_start + 2
@@ -503,6 +487,7 @@ program ectrans_benchmark
503
487
inum_sc_2d_fields = inum_sc_2d_fields * 3
504
488
endif
505
489
490
+ ! Finally, allocate grid point arrays
506
491
if (icall_mode == 1 ) then
507
492
itotal_fields = nflevg * (inum_wind_fields + inum_sc_3d_fields) + inum_sc_2d_fields
508
493
allocate (zgp(nproma,itotal_fields,ngpblks))
0 commit comments