-
Notifications
You must be signed in to change notification settings - Fork 101
/
quda.h
1879 lines (1542 loc) · 80.7 KB
/
quda.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma once
/**
* @file quda.h
* @brief Main header file for the QUDA library
*
* Note to QUDA developers: When adding new members to QudaGaugeParam
* and QudaInvertParam, be sure to update lib/check_params.h as well
* as the Fortran interface in lib/quda_fortran.F90.
*/
#include <stdbool.h> // bool support
#include <enum_quda.h>
#include <stdio.h> /* for FILE */
#include <quda_define.h>
#include <quda_constants.h>
#ifndef __CUDACC_RTC__
#define double_complex double _Complex
#else // keep NVRTC happy since it can't handle C types
#define double_complex double2
#endif
#ifdef __cplusplus
extern "C" {
#endif
/**
* Parameters having to do with the gauge field or the
* interpretation of the gauge field by various Dirac operators
*/
typedef struct QudaGaugeParam_s {
size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */
QudaFieldLocation location; /**< The location of the gauge field */
int X[4]; /**< The local space-time dimensions (without checkboarding) */
double anisotropy; /**< Used for Wilson and Wilson-clover */
double tadpole_coeff; /**< Used for staggered only */
double scale; /**< Used by staggered long links */
QudaLinkType type; /**< The link type of the gauge field (e.g., Wilson, fat, long, etc.) */
QudaGaugeFieldOrder gauge_order; /**< The ordering on the input gauge field */
QudaTboundary t_boundary; /**< The temporal boundary condition that will be used for fermion fields */
QudaPrecision cpu_prec; /**< The precision used by the caller */
QudaPrecision cuda_prec; /**< The precision of the cuda gauge field */
QudaReconstructType reconstruct; /**< The reconstruction type of the cuda gauge field */
QudaPrecision cuda_prec_sloppy; /**< The precision of the sloppy gauge field */
QudaReconstructType reconstruct_sloppy; /**< The recontruction type of the sloppy gauge field */
QudaPrecision cuda_prec_refinement_sloppy; /**< The precision of the sloppy gauge field for the refinement step in multishift */
QudaReconstructType reconstruct_refinement_sloppy; /**< The recontruction type of the sloppy gauge field for the refinement step in multishift*/
QudaPrecision cuda_prec_precondition; /**< The precision of the preconditioner gauge field */
QudaReconstructType reconstruct_precondition; /**< The recontruction type of the preconditioner gauge field */
QudaPrecision cuda_prec_eigensolver; /**< The precision of the eigensolver gauge field */
QudaReconstructType reconstruct_eigensolver; /**< The recontruction type of the eigensolver gauge field */
QudaGaugeFixed gauge_fix; /**< Whether the input gauge field is in the axial gauge or not */
int ga_pad; /**< The pad size that native GaugeFields will use (default=0) */
int site_ga_pad; /**< Used by link fattening and the gauge and fermion forces */
int staple_pad; /**< Used by link fattening */
int llfat_ga_pad; /**< Used by link fattening */
int mom_ga_pad; /**< Used by the gauge and fermion forces */
QudaStaggeredPhase staggered_phase_type; /**< Set the staggered phase type of the links */
int staggered_phase_applied; /**< Whether the staggered phase has already been applied to the links */
double i_mu; /**< Imaginary chemical potential */
int overlap; /**< Width of overlapping domains */
int overwrite_gauge; /**< When computing gauge, should we overwrite it or accumulate to it */
int overwrite_mom; /**< When computing momentum, should we overwrite it or accumulate to it */
int use_resident_gauge; /**< Use the resident gauge field as input */
int use_resident_mom; /**< Use the resident momentum field as input*/
int make_resident_gauge; /**< Make the result gauge field resident */
int make_resident_mom; /**< Make the result momentum field resident */
int return_result_gauge; /**< Return the result gauge field */
int return_result_mom; /**< Return the result momentum field */
size_t gauge_offset; /**< Offset into MILC site struct to the gauge field (only if gauge_order=MILC_SITE_GAUGE_ORDER) */
size_t mom_offset; /**< Offset into MILC site struct to the momentum field (only if gauge_order=MILC_SITE_GAUGE_ORDER) */
size_t site_size; /**< Size of MILC site struct (only if gauge_order=MILC_SITE_GAUGE_ORDER) */
} QudaGaugeParam;
/**
* Parameters relating to the solver and the choice of Dirac operator.
*/
typedef struct QudaInvertParam_s {
/** Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */
size_t struct_size;
QudaFieldLocation input_location; /**< The location of the input field */
QudaFieldLocation output_location; /**< The location of the output field */
QudaDslashType dslash_type; /**< The Dirac Dslash type that is being used */
QudaInverterType inv_type; /**< Which linear solver to use */
double mass; /**< Used for staggered only */
double kappa; /**< Used for Wilson and Wilson-clover */
double m5; /**< Domain wall height */
int Ls; /**< Extent of the 5th dimension (for domain wall) */
double_complex b_5[QUDA_MAX_DWF_LS]; /**< Mobius coefficients - only real part used if regular Mobius */
double_complex c_5[QUDA_MAX_DWF_LS]; /**< Mobius coefficients - only real part used if regular Mobius */
/**<
* The following specifies the EOFA parameters. Notation follows arXiv:1706.05843
* eofa_shift: the "\beta" in the paper
* eofa_pm: plus or minus for the EOFA operator
* mq1, mq2, mq3 are the three masses corresponds to Hasenbusch mass spliting.
* As far as I know mq1 is always the same as "mass" but it's here just for consistence.
* */
double eofa_shift;
int eofa_pm;
double mq1;
double mq2;
double mq3;
double mu; /**< Twisted mass parameter */
double tm_rho; /**< Hasenbusch mass shift applied like twisted mass to diagonal (but not inverse) */
double epsilon; /**< Twisted mass parameter */
double evmax; /** maximum of the eigenvalues of the ndeg twisted mass operator needed for fermionic forces **/
QudaTwistFlavorType twist_flavor; /**< Twisted mass flavor */
int laplace3D; /**< omit this direction from laplace operator: x,y,z,t -> 0,1,2,3 (-1 is full 4D) */
int covdev_mu; /**< Apply forward/backward covariant derivative in direction mu(mu<=3)/mu-4(mu>3) */
double tol; /**< Solver tolerance in the L2 residual norm */
double tol_restart; /**< Solver tolerance in the L2 residual norm (used to restart InitCG) */
double tol_hq; /**< Solver tolerance in the heavy quark residual norm */
int compute_true_res; /** Whether to compute the true residual post solve */
double true_res[QUDA_MAX_MULTI_SRC]; /**< Actual L2 residual norm achieved in the solver */
double true_res_hq[QUDA_MAX_MULTI_SRC]; /**< Actual heavy quark residual norm achieved in the solver */
int maxiter; /**< Maximum number of iterations in the linear solver */
double reliable_delta; /**< Reliable update tolerance */
double reliable_delta_refinement; /**< Reliable update tolerance used in post multi-shift solver refinement */
int use_alternative_reliable; /**< Whether to use alternative reliable updates */
int use_sloppy_partial_accumulator; /**< Whether to keep the partial solution accumuator in sloppy precision */
/**< This parameter determines how often we accumulate into the
solution vector from the direction vectors in the solver.
E.g., running with solution_accumulator_pipeline = 4, means we
will update the solution vector every four iterations using the
direction vectors from the prior four iterations. This
increases performance of mixed-precision solvers since it means
less high-precision vector round-trip memory travel, but
requires more low-precision memory allocation. */
int solution_accumulator_pipeline;
/**< This parameter determines how many consecutive reliable update
residual increases we tolerate before terminating the solver,
i.e., how long do we want to keep trying to converge */
int max_res_increase;
/**< This parameter determines how many total reliable update
residual increases we tolerate before terminating the solver,
i.e., how long do we want to keep trying to converge */
int max_res_increase_total;
/**< This parameter determines how many consecutive heavy-quark
residual increases we tolerate before terminating the solver,
i.e., how long do we want to keep trying to converge */
int max_hq_res_increase;
/**< This parameter determines how many total heavy-quark residual
restarts we tolerate before terminating the solver, i.e., how long
do we want to keep trying to converge */
int max_hq_res_restart_total;
/**< After how many iterations shall the heavy quark residual be updated */
int heavy_quark_check;
int pipeline; /**< Whether to use a pipelined solver with less global sums */
int num_offset; /**< Number of offsets in the multi-shift solver */
int num_src; /**< Number of sources in the multiple source solver */
int num_src_per_sub_partition; /**< Number of sources in the multiple source solver, but per sub-partition */
/**< The grid of sub-partition according to which the processor grid will be partitioned.
Should have:
split_grid[0] * split_grid[1] * split_grid[2] * split_grid[3] * num_src_per_sub_partition == num_src. **/
int split_grid[QUDA_MAX_DIM];
int overlap; /**< Width of domain overlaps */
/** Offsets for multi-shift solver */
double offset[QUDA_MAX_MULTI_SHIFT];
/** Solver tolerance for each offset */
double tol_offset[QUDA_MAX_MULTI_SHIFT];
/** Solver tolerance for each shift when refinement is applied using the heavy-quark residual */
double tol_hq_offset[QUDA_MAX_MULTI_SHIFT];
/** Actual L2 residual norm achieved in solver for each offset */
double true_res_offset[QUDA_MAX_MULTI_SHIFT];
/** Iterated L2 residual norm achieved in multi shift solver for each offset */
double iter_res_offset[QUDA_MAX_MULTI_SHIFT];
/** Actual heavy quark residual norm achieved in solver for each offset */
double true_res_hq_offset[QUDA_MAX_MULTI_SHIFT];
/** Residuals in the partial faction expansion */
double residue[QUDA_MAX_MULTI_SHIFT];
/** Whether we should evaluate the action after the linear solver*/
int compute_action;
/** Computed value of the bilinear action (complex-valued)
invert: \phi^\dagger A^{-1} \phi
multishift: \phi^\dagger r(x) \phi = \phi^\dagger (sum_k residue[k] * (A + offset[k])^{-1} ) \phi */
double action[2];
QudaSolutionType solution_type; /**< Type of system to solve */
QudaSolveType solve_type; /**< How to solve it */
QudaMatPCType matpc_type; /**< The preconditioned matrix type */
QudaDagType dagger; /**< Whether we are using the Hermitian conjugate system or not */
QudaMassNormalization mass_normalization; /**< The mass normalization is being used by the caller */
QudaSolverNormalization solver_normalization; /**< The normalization desired in the solver */
QudaPreserveSource preserve_source; /**< Preserve the source or not in the linear solver (deprecated) */
QudaPrecision cpu_prec; /**< The precision used by the input fermion fields */
QudaPrecision cuda_prec; /**< The precision used by the QUDA solver */
QudaPrecision cuda_prec_sloppy; /**< The precision used by the QUDA sloppy operator */
QudaPrecision cuda_prec_refinement_sloppy; /**< The precision of the sloppy gauge field for the refinement step in multishift */
QudaPrecision cuda_prec_precondition; /**< The precision used by the QUDA preconditioner */
QudaPrecision cuda_prec_eigensolver; /**< The precision used by the QUDA eigensolver */
QudaDiracFieldOrder dirac_order; /**< The order of the input and output fermion fields */
QudaGammaBasis gamma_basis; /**< Gamma basis of the input and output host fields */
QudaFieldLocation clover_location; /**< The location of the clover field */
QudaPrecision clover_cpu_prec; /**< The precision used for the input clover field */
QudaPrecision clover_cuda_prec; /**< The precision used for the clover field in the QUDA solver */
QudaPrecision clover_cuda_prec_sloppy; /**< The precision used for the clover field in the QUDA sloppy operator */
QudaPrecision clover_cuda_prec_refinement_sloppy; /**< The precision of the sloppy clover field for the refinement step in multishift */
QudaPrecision clover_cuda_prec_precondition; /**< The precision used for the clover field in the QUDA preconditioner */
QudaPrecision clover_cuda_prec_eigensolver; /**< The precision used for the clover field in the QUDA eigensolver */
QudaCloverFieldOrder clover_order; /**< The order of the input clover field */
QudaUseInitGuess use_init_guess; /**< Whether to use an initial guess in the solver or not */
double clover_csw; /**< Csw coefficient of the clover term */
double clover_coeff; /**< Coefficient of the clover term */
double clover_rho; /**< Real number added to the clover diagonal (not to inverse) */
int compute_clover_trlog; /**< Whether to compute the trace log of the clover term */
double trlogA[2]; /**< The trace log of the clover term (even/odd computed separately) */
int compute_clover; /**< Whether to compute the clover field */
int compute_clover_inverse; /**< Whether to compute the clover inverse field */
int return_clover; /**< Whether to copy back the clover matrix field */
int return_clover_inverse; /**< Whether to copy back the inverted clover matrix field */
QudaVerbosity verbosity; /**< The verbosity setting to use in the solver */
int iter; /**< The number of iterations performed by the solver */
double gflops; /**< The Gflops rate of the solver */
double secs; /**< The time taken by the solver */
double energy; /**< The energy consumed by the solver */
double power; /**< The mean power of the solver */
double temp; /**< The mean temperature of the device for the duration of the solve */
double clock; /**< The mean clock frequency of the device for the duration of the solve */
/** Number of steps in s-step algorithms */
int Nsteps;
/** Maximum size of Krylov space used by solver */
int gcrNkrylov;
/*
* The following parameters are related to the solver
* preconditioner, if enabled.
*/
/**
* The inner Krylov solver used in the preconditioner. Set to
* QUDA_INVALID_INVERTER to disable the preconditioner entirely.
*/
QudaInverterType inv_type_precondition;
/** Preconditioner instance, e.g., multigrid */
void *preconditioner;
/** Deflation instance */
void *deflation_op;
/** defines deflation */
void *eig_param;
/** If true, deflate the initial guess */
QudaBoolean deflate;
/** Dirac Dslash used in preconditioner */
QudaDslashType dslash_type_precondition;
/** Verbosity of the inner Krylov solver */
QudaVerbosity verbosity_precondition;
/** Tolerance in the inner solver */
double tol_precondition;
/** Maximum number of iterations allowed in the inner solver */
int maxiter_precondition;
/** Relaxation parameter used in GCR-DD (default = 1.0) */
double omega;
/** Basis for CA algorithms */
QudaCABasis ca_basis;
/** Minimum eigenvalue for Chebyshev CA basis */
double ca_lambda_min;
/** Maximum eigenvalue for Chebyshev CA basis */
double ca_lambda_max;
/** Basis for CA algorithms in a preconditioned solver */
QudaCABasis ca_basis_precondition;
/** Minimum eigenvalue for Chebyshev CA basis in a preconditioner solver */
double ca_lambda_min_precondition;
/** Maximum eigenvalue for Chebyshev CA basis in a preconditioner solver */
double ca_lambda_max_precondition;
/** Number of preconditioner cycles to perform per iteration */
int precondition_cycle;
/** Whether to use additive or multiplicative Schwarz preconditioning */
QudaSchwarzType schwarz_type;
/** The type of accelerator type to use for preconditioner */
QudaAcceleratorType accelerator_type_precondition;
/**
* The following parameters are the ones used to perform the adaptive MADWF in MSPCG
* See section 3.3 of [arXiv:2104.05615]
*/
/** The diagonal constant to suppress the low modes when performing 5D transfer */
double madwf_diagonal_suppressor;
/** The target MADWF Ls to be used in the accelerator */
int madwf_ls;
/** The minimum number of iterations after which to generate the null vectors for MADWF */
int madwf_null_miniter;
/** The maximum tolerance after which to generate the null vectors for MADWF */
double madwf_null_tol;
/** The maximum number of iterations for the training iterations */
int madwf_train_maxiter;
/** Whether to load the MADWF parameters from the file system */
QudaBoolean madwf_param_load;
/** Whether to save the MADWF parameters to the file system */
QudaBoolean madwf_param_save;
/** Path to load from the file system */
char madwf_param_infile[256];
/** Path to save to the file system */
char madwf_param_outfile[256];
/**
* Whether to use the L2 relative residual, Fermilab heavy-quark
* residual, or both to determine convergence. To require that both
* stopping conditions are satisfied, use a bitwise OR as follows:
*
* p.residual_type = (QudaResidualType) (QUDA_L2_RELATIVE_RESIDUAL
* | QUDA_HEAVY_QUARK_RESIDUAL);
*/
QudaResidualType residual_type;
/**Parameters for deflated solvers*/
/** The precision of the Ritz vectors */
QudaPrecision cuda_prec_ritz;
/** How many vectors to compute after one solve
* for eigCG recommended values 8 or 16
*/
int n_ev;
/** EeigCG : Search space dimension
* gmresdr : Krylov subspace dimension
*/
int max_search_dim;
/** For systems with many RHS: current RHS index */
int rhs_idx;
/** Specifies deflation space volume: total number of eigenvectors is n_ev*deflation_grid */
int deflation_grid;
/** eigCG: selection criterion for the reduced eigenvector set */
double eigenval_tol;
/** mixed precision eigCG tuning parameter: minimum search vector space restarts */
int eigcg_max_restarts;
/** initCG tuning parameter: maximum restarts */
int max_restart_num;
/** initCG tuning parameter: tolerance for cg refinement corrections in the deflation stage */
double inc_tol;
/** Whether to make the solution vector(s) after the solve */
int make_resident_solution;
/** Whether to use the resident solution vector(s) */
int use_resident_solution;
/** Whether to use the solution vector to augment the chronological basis */
int chrono_make_resident;
/** Whether the solution should replace the last entry in the chronology */
int chrono_replace_last;
/** Whether to use the resident chronological basis */
int chrono_use_resident;
/** The maximum length of the chronological history to store */
int chrono_max_dim;
/** The index to indicate which chrono history we are augmenting */
int chrono_index;
/** Precision to store the chronological basis in */
QudaPrecision chrono_precision;
/** Which external library to use in the linear solvers (Eigen) */
QudaExtLibType extlib_type;
/** Whether to use the platform native or generic BLAS / LAPACK */
QudaBoolean native_blas_lapack;
/** Whether to use fused kernels for mobius */
QudaBoolean use_mobius_fused_kernel;
/**
* Parameters for distance preconditioning algorithm proposed in arXiv:1006.4028,
* which is useful to solve a precise heavy quark propagator.
* alpha0 and t0 follow Eq.(9) in the article.
*/
/** The alpha0 parameter for distance preconditioning, related to the pseudoscalar meson mass */
double distance_pc_alpha0;
/** The t0 parameter for distance preconditioning, the timeslice where the source is located */
int distance_pc_t0;
} QudaInvertParam;
// Parameter set for solving eigenvalue problems.
typedef struct QudaEigParam_s {
/** Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */
size_t struct_size;
// EIGENSOLVER PARAMS
//-------------------------------------------------
/** Used to store information pertinent to the operator **/
QudaInvertParam *invert_param;
/** Type of eigensolver algorithm to employ **/
QudaEigType eig_type;
/** Use Polynomial Acceleration **/
QudaBoolean use_poly_acc;
/** Degree of the Chebysev polynomial **/
int poly_deg;
/** Range used in polynomial acceleration **/
double a_min;
double a_max;
/** Whether to preserve the deflation space between solves. If
true, the space will be stored in an instance of the
deflation_space struct, pointed to by preserve_deflation_space */
QudaBoolean preserve_deflation;
/** This is where we store the deflation space. This will point
to an instance of deflation_space. When a deflated solver is enabled, the deflation space will be obtained from this. */
void *preserve_deflation_space;
/** If we restore the deflation space, this boolean indicates
whether we are also preserving the evalues or recomputing
them. For example if a different mass shift is being used
than the one used to generate the space, then this should be
false, but preserve_deflation would be true */
QudaBoolean preserve_evals;
/** Whether to use the smeared gauge field for the Dirac operator
for whose eigenvalues are are computing. */
bool use_smeared_gauge;
/** What type of Dirac operator we are using **/
/** If !(use_norm_op) && !(use_dagger) use M. **/
/** If use_dagger, use Mdag **/
/** If use_norm_op, use MdagM **/
/** If use_norm_op && use_dagger use MMdag. **/
/** If use_pc for any, then use the even-odd pc version **/
QudaBoolean use_dagger;
QudaBoolean use_norm_op;
QudaBoolean use_pc;
/** Use Eigen routines to eigensolve the upper Hessenberg via QR **/
QudaBoolean use_eigen_qr;
/** Performs an MdagM solve, then constructs the left and right SVD. **/
QudaBoolean compute_svd;
/** Performs the \gamma_5 OP solve by Post multipling the eignvectors with
\gamma_5 before computing the eigenvalues */
QudaBoolean compute_gamma5;
/** If true, the solver will error out if the convergence criteria are not met **/
QudaBoolean require_convergence;
/** Which part of the spectrum to solve **/
QudaEigSpectrumType spectrum;
/** Size of the eigenvector search space **/
int n_ev;
/** Total size of Krylov space **/
int n_kr;
/** Max number of locked eigenpairs (deduced at runtime) **/
int nLockedMax;
/** Number of requested converged eigenvectors **/
int n_conv;
/** Number of requested converged eigenvectors to use in deflation **/
int n_ev_deflate;
/** Tolerance on the least well known eigenvalue's residual **/
double tol;
/** Tolerance on the QR iteration **/
double qr_tol;
/** For IRLM/IRAM, check every nth restart **/
int check_interval;
/** For IRLM/IRAM, quit after n restarts **/
int max_restarts;
/** For the Ritz rotation, the maximal number of extra vectors the solver may allocate **/
int batched_rotate;
/** For block method solvers, the block size **/
int block_size;
/** The batch size used when computing eigenvalues **/
int compute_evals_batch_size;
/** For block method solvers, quit after n attempts at block orthonormalisation **/
int max_ortho_attempts;
/** For hybrid modifeld Gram-Schmidt orthonormalisations **/
int ortho_block_size;
/** In the test function, cross check the device result against ARPACK **/
QudaBoolean arpack_check;
/** For Arpack cross check, name of the Arpack logfile **/
char arpack_logfile[512];
/** Name of the QUDA logfile (residua, upper Hessenberg/tridiag matrix updates) **/
char QUDA_logfile[512];
/** The orthogonal direction in the 3D eigensolver **/
int ortho_dim;
/** The size of the orthogonal direction in the 3D eigensolver, local **/
int ortho_dim_size_local;
//-------------------------------------------------
// EIG-CG PARAMS
//-------------------------------------------------
int nk;
int np;
/** Whether to load eigenvectors */
QudaBoolean import_vectors;
/** The precision of the Ritz vectors */
QudaPrecision cuda_prec_ritz;
/** The memory type used to keep the Ritz vectors */
QudaMemoryType mem_type_ritz;
/** Location where deflation should be done */
QudaFieldLocation location;
/** Whether to run the verification checks once set up is complete */
QudaBoolean run_verify;
/** Filename prefix where to load the null-space vectors */
char vec_infile[256];
/** Filename prefix for where to save the null-space vectors */
char vec_outfile[256];
/** The precision with which to save the vectors */
QudaPrecision save_prec;
/** Whether to inflate single-parity eigen-vector I/O to a full
field (e.g., enabling this is required for compatability with
MILC I/O) */
QudaBoolean io_parity_inflate;
/** Whether to save eigenvectors in QIO singlefile or partfile format */
QudaBoolean partfile;
/** Which external library to use in the deflation operations (Eigen) */
QudaExtLibType extlib_type;
//-------------------------------------------------
} QudaEigParam;
typedef struct QudaMultigridParam_s {
/** Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct size */
size_t struct_size;
QudaInvertParam *invert_param;
QudaEigParam *eig_param[QUDA_MAX_MG_LEVEL];
/** Number of multigrid levels */
int n_level;
/** Geometric block sizes to use on each level */
int geo_block_size[QUDA_MAX_MG_LEVEL][QUDA_MAX_DIM];
/** Spin block sizes to use on each level */
int spin_block_size[QUDA_MAX_MG_LEVEL];
/** Number of null-space vectors to use on each level */
int n_vec[QUDA_MAX_MG_LEVEL];
/** Precision to store the null-space vectors in (post block orthogonalization) */
QudaPrecision precision_null[QUDA_MAX_MG_LEVEL];
/** Number of times to repeat Gram-Schmidt in block orthogonalization */
int n_block_ortho[QUDA_MAX_MG_LEVEL];
/** Whether to do passes at block orthogonalize in fixed point for improved accuracy */
QudaBoolean block_ortho_two_pass[QUDA_MAX_MG_LEVEL];
/** Verbosity on each level of the multigrid */
QudaVerbosity verbosity[QUDA_MAX_MG_LEVEL];
/** Setup MMA usage on each level of the multigrid */
QudaBoolean setup_use_mma[QUDA_MAX_MG_LEVEL];
/** Dslash MMA usage on each level of the multigrid */
QudaBoolean dslash_use_mma[QUDA_MAX_MG_LEVEL];
/** Inverter to use in the setup phase */
QudaInverterType setup_inv_type[QUDA_MAX_MG_LEVEL];
/** Solver batch size to use in the setup phase */
int n_vec_batch[QUDA_MAX_MG_LEVEL];
/** Number of setup iterations */
int num_setup_iter[QUDA_MAX_MG_LEVEL];
/** Tolerance to use in the setup phase */
double setup_tol[QUDA_MAX_MG_LEVEL];
/** Maximum number of iterations for each setup solver */
int setup_maxiter[QUDA_MAX_MG_LEVEL];
/** Maximum number of iterations for refreshing the null-space vectors */
int setup_maxiter_refresh[QUDA_MAX_MG_LEVEL];
/** Basis to use for CA solver setup */
QudaCABasis setup_ca_basis[QUDA_MAX_MG_LEVEL];
/** Basis size for CA solver setup */
int setup_ca_basis_size[QUDA_MAX_MG_LEVEL];
/** Minimum eigenvalue for Chebyshev CA basis */
double setup_ca_lambda_min[QUDA_MAX_MG_LEVEL];
/** Maximum eigenvalue for Chebyshev CA basis */
double setup_ca_lambda_max[QUDA_MAX_MG_LEVEL];
/** Null-space type to use in the setup phase */
QudaSetupType setup_type;
/** Pre orthonormalize vectors in the setup phase */
QudaBoolean pre_orthonormalize;
/** Post orthonormalize vectors in the setup phase */
QudaBoolean post_orthonormalize;
/** The solver that wraps around the coarse grid correction and smoother */
QudaInverterType coarse_solver[QUDA_MAX_MG_LEVEL];
/** Tolerance for the solver that wraps around the coarse grid correction and smoother */
double coarse_solver_tol[QUDA_MAX_MG_LEVEL];
/** Maximum number of iterations for the solver that wraps around the coarse grid correction and smoother */
int coarse_solver_maxiter[QUDA_MAX_MG_LEVEL];
/** Basis to use for CA coarse solvers */
QudaCABasis coarse_solver_ca_basis[QUDA_MAX_MG_LEVEL];
/** Basis size for CA coarse solvers */
int coarse_solver_ca_basis_size[QUDA_MAX_MG_LEVEL];
/** Minimum eigenvalue for Chebyshev CA basis */
double coarse_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL];
/** Maximum eigenvalue for Chebyshev CA basis */
double coarse_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL];
/** Smoother to use on each level */
QudaInverterType smoother[QUDA_MAX_MG_LEVEL];
/** Tolerance to use for the smoother / solver on each level */
double smoother_tol[QUDA_MAX_MG_LEVEL];
/** Number of pre-smoother applications on each level */
int nu_pre[QUDA_MAX_MG_LEVEL];
/** Number of post-smoother applications on each level */
int nu_post[QUDA_MAX_MG_LEVEL];
/** Basis to use for CA smoother solvers */
QudaCABasis smoother_solver_ca_basis[QUDA_MAX_MG_LEVEL];
/** Minimum eigenvalue for Chebyshev CA smoother basis */
double smoother_solver_ca_lambda_min[QUDA_MAX_MG_LEVEL];
/** Maximum eigenvalue for Chebyshev CA smoother basis */
double smoother_solver_ca_lambda_max[QUDA_MAX_MG_LEVEL];
/** Over/under relaxation factor for the smoother at each level */
double omega[QUDA_MAX_MG_LEVEL];
/** Precision to use for halo communication in the smoother */
QudaPrecision smoother_halo_precision[QUDA_MAX_MG_LEVEL];
/** Whether to use additive or multiplicative Schwarz preconditioning in the smoother */
QudaSchwarzType smoother_schwarz_type[QUDA_MAX_MG_LEVEL];
/** Number of Schwarz cycles to apply */
int smoother_schwarz_cycle[QUDA_MAX_MG_LEVEL];
/** The type of residual to send to the next coarse grid, and thus the
type of solution to receive back from this coarse grid */
QudaSolutionType coarse_grid_solution_type[QUDA_MAX_MG_LEVEL];
/** The type of smoother solve to do on each grid (e/o preconditioning or not)*/
QudaSolveType smoother_solve_type[QUDA_MAX_MG_LEVEL];
/** The type of multigrid cycle to perform at each level */
QudaMultigridCycleType cycle_type[QUDA_MAX_MG_LEVEL];
/** Whether to use global reductions or not for the smoother / solver at each level */
QudaBoolean global_reduction[QUDA_MAX_MG_LEVEL];
/** Location where each level should be done */
QudaFieldLocation location[QUDA_MAX_MG_LEVEL];
/** Location where the coarse-operator construction will be computedn */
QudaFieldLocation setup_location[QUDA_MAX_MG_LEVEL];
/** Whether to use eigenvectors for the nullspace or, if the coarsest instance deflate*/
QudaBoolean use_eig_solver[QUDA_MAX_MG_LEVEL];
/** Minimize device memory allocations during the adaptive setup,
placing temporary fields in mapped memory instad of device
memory */
QudaBoolean setup_minimize_memory;
/** Whether to compute the null vectors or reload them */
QudaComputeNullVector compute_null_vector;
/** Whether to generate on all levels or just on level 0 */
QudaBoolean generate_all_levels;
/** Whether to run the verification checks once set up is complete */
QudaBoolean run_verify;
/** Whether to run null Vs eigen vector overlap checks once set up is complete */
QudaBoolean run_low_mode_check;
/** Whether to run null vector oblique checks once set up is complete */
QudaBoolean run_oblique_proj_check;
/** Whether to load the null-space vectors to disk (requires QIO) */
QudaBoolean vec_load[QUDA_MAX_MG_LEVEL];
/** Filename prefix where to load the null-space vectors */
char vec_infile[QUDA_MAX_MG_LEVEL][256];
/** Whether to store the null-space vectors to disk (requires QIO) */
QudaBoolean vec_store[QUDA_MAX_MG_LEVEL];
/** Filename prefix for where to save the null-space vectors */
char vec_outfile[QUDA_MAX_MG_LEVEL][256];
/** Whether to store the null-space vectors in singlefile or partfile format */
QudaBoolean mg_vec_partfile[QUDA_MAX_MG_LEVEL];
/** Whether to use and initial guess during coarse grid deflation */
QudaBoolean coarse_guess;
/** Whether to preserve the deflation space during MG update */
QudaBoolean preserve_deflation;
/** Multiplicative factor for the mu parameter */
double mu_factor[QUDA_MAX_MG_LEVEL];
/** Boolean for aggregation type, implies staggered or not */
QudaTransferType transfer_type[QUDA_MAX_MG_LEVEL];
/** Whether or not to let MG coarsening drop improvements, for ex dropping long links in small aggregation dimensions */
QudaBoolean allow_truncation;
/** Whether or not to use the dagger approximation for the KD preconditioned operator */
QudaBoolean staggered_kd_dagger_approximation;
/** Whether to do a full (false) or thin (true) update in the context of updateMultigridQuda */
QudaBoolean thin_update_only;
} QudaMultigridParam;
typedef struct QudaGaugeObservableParam_s {
size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct*/
QudaBoolean su_project; /**< Whether to project onto the manifold prior to measurement */
QudaBoolean compute_plaquette; /**< Whether to compute the plaquette */
double plaquette[3]; /**< Total, spatial and temporal field energies, respectively */
QudaBoolean compute_polyakov_loop; /**< Whether to compute the temporal Polyakov loop */
double ploop[2]; /**< Real and imaginary part of temporal Polyakov loop */
QudaBoolean compute_gauge_loop_trace; /**< Whether to compute gauge loop traces */
double_complex *traces; /**< Individual complex traces of each loop */
int **input_path_buff; /**< Array of paths */
int *path_length; /**< Length of each path */
double *loop_coeff; /**< Multiplicative factor for each loop */
int num_paths; /**< Total number of paths */
int max_length; /**< Maximum length of any path */
double factor; /**< Global multiplicative factor to apply to each loop trace */
QudaBoolean compute_qcharge; /**< Whether to compute the topological charge and field energy */
double qcharge; /**< Computed topological charge */
double energy[3]; /**< Total, spatial and temporal field energies, respectively */
QudaBoolean compute_qcharge_density; /**< Whether to compute the topological charge density */
void *qcharge_density; /**< Pointer to host array of length volume where the q-charge density will be copied */
QudaBoolean
remove_staggered_phase; /**< Whether or not the resident gauge field has staggered phases applied and if they should
be removed; this was needed for the Polyakov loop calculation when called through MILC,
with the underlying issue documented https://github.com/lattice/quda/issues/1315 */
} QudaGaugeObservableParam;
typedef struct QudaGaugeSmearParam_s {
size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct*/
unsigned int n_steps; /**< The total number of smearing steps to perform. */
double epsilon; /**< Serves as one of the coefficients in Over Improved Stout smearing, or as the step size in
Wilson/Symanzik flow */
double alpha; /**< The single coefficient used in APE smearing */
double rho; /**< Serves as one of the coefficients used in Over Improved Stout smearing, or as the single coefficient used in Stout */
double alpha1; /**< The coefficient used in HYP smearing step 3 (will not be used in 3D smearing)*/
double alpha2; /**< The coefficient used in HYP smearing step 2*/
double alpha3; /**< The coefficient used in HYP smearing step 1*/
unsigned int meas_interval; /**< Perform the requested measurements on the gauge field at this interval */
QudaGaugeSmearType smear_type; /**< The smearing type to perform */
QudaBoolean restart; /**< Used to restart the smearing from existing gaugeSmeared */
double t0; /**< Starting flow time for Wilson flow */
int dir_ignore; /**< The direction to be ignored by the smearing algorithm
A negative value means 3D for APE/STOUT and 4D for OVRIMP_STOUT/HYP */
} QudaGaugeSmearParam;
typedef struct QudaBLASParam_s {
size_t struct_size; /**< Size of this struct in bytes. Used to ensure that the host application and QUDA see the same struct*/
QudaBLASType blas_type; /**< Type of BLAS computation to perfrom */
// GEMM params
QudaBLASOperation trans_a; /**< operation op(A) that is non- or (conj.) transpose. */
QudaBLASOperation trans_b; /**< operation op(B) that is non- or (conj.) transpose. */
int m; /**< number of rows of matrix op(A) and C. */
int n; /**< number of columns of matrix op(B) and C. */
int k; /**< number of columns of op(A) and rows of op(B). */
int lda; /**< leading dimension of two-dimensional array used to store the matrix A. */
int ldb; /**< leading dimension of two-dimensional array used to store matrix B. */
int ldc; /**< leading dimension of two-dimensional array used to store matrix C. */
int a_offset; /**< position of the A array from which begin read/write. */
int b_offset; /**< position of the B array from which begin read/write. */
int c_offset; /**< position of the C array from which begin read/write. */
int a_stride; /**< stride of the A array in strided(batched) mode */
int b_stride; /**< stride of the B array in strided(batched) mode */
int c_stride; /**< stride of the C array in strided(batched) mode */
double_complex alpha; /**< scalar used for multiplication. */
double_complex beta; /**< scalar used for multiplication. If beta==0, C does not have to be a valid input. */
// LU inversion params
int inv_mat_size; /**< The rank of the square matrix in the LU inversion */
// Common params
int batch_count; /**< number of pointers contained in arrayA, arrayB and arrayC. */
QudaBLASDataType data_type; /**< Specifies if using S(C) or D(Z) BLAS type */
QudaBLASDataOrder data_order; /**< Specifies if using Row or Column major */
} QudaBLASParam;
/*
* Interface functions, found in interface_quda.cpp
*/
/**
* Set parameters related to status reporting.
*
* In typical usage, this function will be called once (or not at
* all) just before the call to initQuda(), but it's valid to call
* it any number of times at any point during execution. Prior to
* the first time it's called, the parameters take default values
* as indicated below.
*
* @param verbosity Default verbosity, ranging from QUDA_SILENT to
* QUDA_DEBUG_VERBOSE. Within a solver, this
* parameter is overridden by the "verbosity"
* member of QudaInvertParam. The default value
* is QUDA_SUMMARIZE.
*
* @param prefix String to prepend to all messages from QUDA. This
* defaults to the empty string (""), but you may
* wish to specify something like "QUDA: " to
* distinguish QUDA's output from that of your
* application.
*
* @param outfile File pointer (such as stdout, stderr, or a handle
* returned by fopen()) where messages should be
* printed. The default is stdout.
*/
void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[],
FILE *outfile);
/**
* initCommsGridQuda() takes an optional "rank_from_coords" argument that
* should be a pointer to a user-defined function with this prototype.
*
* @param coords Node coordinates
* @param fdata Any auxiliary data needed by the function
* @return MPI rank or QMP node ID cooresponding to the node coordinates
*
* @see initCommsGridQuda
*/
typedef int (*QudaCommsMap)(const int *coords, void *fdata);
/**
* @param mycomm User provided MPI communicator in place of MPI_COMM_WORLD
*/
void qudaSetCommHandle(void *mycomm);
/**
* Declare the grid mapping ("logical topology" in QMP parlance)
* used for communications in a multi-GPU grid. This function
* should be called prior to initQuda(). The only case in which
* it's optional is when QMP is used for communication and the
* logical topology has already been declared by the application.
*
* @param nDim Number of grid dimensions. "4" is the only supported
* value currently.
*
* @param dims Array of grid dimensions. dims[0]*dims[1]*dims[2]*dims[3]
* must equal the total number of MPI ranks or QMP nodes.
*
* @param func Pointer to a user-supplied function that maps coordinates
* in the communication grid to MPI ranks (or QMP node IDs).
* If the pointer is NULL, the default mapping depends on
* whether QMP or MPI is being used for communication. With
* QMP, the existing logical topology is used if it's been
* declared. With MPI or as a fallback with QMP, the default
* ordering is lexicographical with the fourth ("t") index
* varying fastest.
*
* @param fdata Pointer to any data required by "func" (may be NULL)
*
* @see QudaCommsMap
*/
void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata);
/**
* Initialize the library. This is a low-level interface that is
* called by initQuda. Calling initQudaDevice requires that the
* user also call initQudaMemory before using QUDA.
*
* @param device CUDA device number to use. In a multi-GPU build,
* this parameter may either be set explicitly on a
* per-process basis or set to -1 to enable a default
* allocation of devices to processes.