From 38ffaaeeff03eaa45a33acece5eda573a2cab1a7 Mon Sep 17 00:00:00 2001 From: M Clark Date: Thu, 28 May 2015 14:04:55 -0700 Subject: [PATCH] Added doxygen comments for all functions in the milc interface. Closes #233. --- include/quda.h | 10 +- include/quda_milc_interface.h | 551 +++++++++++++++++++++++++++------- 2 files changed, 452 insertions(+), 109 deletions(-) diff --git a/include/quda.h b/include/quda.h index d3b04ae250..7f8726633c 100644 --- a/include/quda.h +++ b/include/quda.h @@ -726,12 +726,20 @@ extern "C" { * * @param oprod The outer product to be computed. * @param quark The input fermion field. - * @param displacement The fermion-field displacement in the outer product. + * @param num The number of quark fields * @param coeff The coefficient multiplying the fermion fields in the outer product * @param param The parameters of the outer-product field. */ void computeStaggeredOprodQuda(void** oprod, void** quark, int num, double** coeff, QudaGaugeParam* param); + /** + * Compute the naive staggered force (experimental). All fields are + * QUDA device fields and must be in the same precision. + * + * mom Momentum field (QUDA device field) + * quark Quark field solution vectors + * coeff Step-size coefficient + */ void computeStaggeredForceQuda(void* mom, void* quark, double* coeff); /** diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h index 89294bace1..003592993e 100644 --- a/include/quda_milc_interface.h +++ b/include/quda_milc_interface.h @@ -4,94 +4,274 @@ #include #include +/** + * @file quda_milc_interface.h + * + * @section Description + * + * The header file defines the milc interface to enable easy + * interfacing between QUDA and the MILC software packed. + */ + #ifdef __cplusplus extern "C" { #endif + /** + * Parameters related to linear solvers. + */ typedef struct { - int max_iter; - QudaParity evenodd; // options are QUDA_EVEN_PARITY, QUDA_ODD_PARITY, QUDA_INVALID_PARITY - int mixed_precision; - double boundary_phase[4]; + int max_iter; /** Maximum number of iterations */ + QudaParity evenodd; /** Which parity are we working on ? (options are QUDA_EVEN_PARITY, QUDA_ODD_PARITY, QUDA_INVALID_PARITY */ + int mixed_precision; /** Whether to use mixed precision or not (1 - yes, 0 - no) */ + double boundary_phase[4]; /** Boundary conditions */ } QudaInvertArgs_t; + /** + * Parameters related to problem size and machine topology. + */ typedef struct { - const int* latsize; - const int* machsize; // grid size - int device; // device number + const int* latsize; /** Local lattice dimensions */ + const int* machsize; /** Machine grid size */ + int device; /** GPU device number */ } QudaLayout_t; + /** + * Parameters used to create a QUDA context. + */ typedef struct { - QudaVerbosity verbosity; - QudaLayout_t layout; + QudaVerbosity verbosity; /** How verbose QUDA should be (QUDA_SILENT, QUDA_VERBOSE or QUDA_SUMMARIZE) */ + QudaLayout_t layout; /** Layout for QUDA to use */ } QudaInitArgs_t; // passed to the initialization struct + /** + * Parameters for defining HISQ calculations + */ typedef struct { - int reunit_allow_svd; - int reunit_svd_only; - double reunit_svd_abs_error; - double reunit_svd_rel_error; - double force_filter; + int reunit_allow_svd; /** Allow SVD for reuniarization */ + int reunit_svd_only; /** Force use of SVD for reunitarization */ + double reunit_svd_abs_error; /** Absolute error bound for SVD to apply */ + double reunit_svd_rel_error; /** Relative error bound for SVD to apply */ + double force_filter; /** UV filter to apply to force */ } QudaHisqParams_t; + /** + * Parameters for defining fat-link calculations + */ typedef struct { - int su3_source; // is the incoming gauge field su3? - int use_pinned_memory; // use page-locked memory in Quda? + int su3_source; /** is the incoming gauge field SU(3) */ + int use_pinned_memory; /** use page-locked memory in QUDA */ } QudaFatLinkArgs_t; - + /** + * Initialize the QUDA context. + * + * @param input Meta data for the QUDA context + */ void qudaInit(QudaInitArgs_t input); + /** + * Set set the local dimensions and machine topology for QUDA to use + * + * @param layout Struct defining local dimensions and machine topology + */ void qudaSetLayout(QudaLayout_t layout); + /** + * Destroy the QUDA context. + */ void qudaFinalize(); - + /** + * Set the algorithms to use for HISQ fermion calculations, e.g., + * SVD parameters for reunitarization. + * + * @param hisq_params Meta data desribing the algorithms to use for HISQ fermions + */ void qudaHisqParamsInit(QudaHisqParams_t hisq_params); + /** + * Compute the fat and long links using the input gauge field. All + * fields passed here are host fields, that must be preallocated. + * The precision of all fields must match. + * + * @param precision The precision of the fields + * @param fatlink_args Meta data for the algorithms to deploy + * @param act_path_coeff Array of coefficients for each path in the action + * @param inlink Host gauge field used for input + * @param fatlink Host fat-link field that is computed + * @param longlink Host long-link field that is computed + */ + void qudaLoadKSLink(int precision, + QudaFatLinkArgs_t fatlink_args, + const double act_path_coeff[6], + void* inlink, + void* fatlink, + void* longlink); - void qudaLoadKSLink(int precision, QudaFatLinkArgs_t fatlink_args, const double act_path_coeff[6], void* inlink, void* fatlink, void* longlink); - + /** + * Compute the fat links and unitzarize using the input gauge field. + * All fields passed here are host fields, that must be + * preallocated. The precision of all fields must match. + * + * @param precision The precision of the fields + * @param fatlink_args Meta data for the algorithms to deploy + * @param path_coeff Array of coefficients for each path in the action + * @param inlink Host gauge field used for input + * @param fatlink Host fat-link field that is computed + * @param ulink Host unitarized field that is computed + */ + void qudaLoadUnitarizedLink(int precision, + QudaFatLinkArgs_t fatlink_args, + const double path_coeff[6], + void* inlink, + void* fatlink, + void* ulink); - void qudaLoadUnitarizedLink(int precision, QudaFatLinkArgs_t fatlink_args, const double path_coeff[6], void* inlink, void* fatlink, void* ulink); + /** + * Solve Ax=b using an improved staggered operator with a + * domain-decomposition preconditioner. All fields are fields + * passed and returned are host (CPU) field in MILC order. This + * function requires that persistent gauge and clover fields have + * been created prior. This interface is experimental. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param precision Precision for QUDA to use (2 - double, 1 - single) + * @param mass Fermion mass parameter + * @param inv_args Struct setting some solver metedata + * @param target_residual Target residual + * @param target_relative_residual Target Fermilab residual + * @param domain_overlap Array specifying the overlap of the domains in each dimension + * @param fatlink Fat-link field on the host + * @param longlink Long-link field on the host + * @param source Right-hand side source field + * @param solution Solution spinor field + * @param final_residual True residual + * @param final_relative_residual True Fermilab residual + * @param num_iters Number of iterations taken + */ + void qudaDDInvert(int external_precision, + int quda_precision, + double mass, + QudaInvertArgs_t inv_args, + double target_residual, + double target_fermilab_residual, + const int * const domain_overlap, + const void* const fatlink, + const void* const longlink, + void* source, + void* solution, + double* const final_residual, + double* const final_fermilab_residual, + int* num_iters); + /** + * Solve Ax=b using an improved staggered operator with a + * domain-decomposition preconditioner. All fields are fields + * passed and returned are host (CPU) field in MILC order. This + * function requires that persistent gauge and clover fields have + * been created prior. This interface is experimental. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param quda_precision Precision for QUDA to use (2 - double, 1 - single) + * @param mass Fermion mass parameter + * @param inv_args Struct setting some solver metedata + * @param target_residual Target residual + * @param target_relative_residual Target Fermilab residual + * @param domain_overlap Array specifying the overlap of the domains in each dimension + * @param milc_fatlink Fat-link field on the host + * @param milc_longlink Long-link field on the host + * @param tadpole Tadpole improvement facter + * @param source Right-hand side source field + * @param solution Solution spinor field + * @param final_residual True residual + * @param final_relative_residual True Fermilab residual + * @param num_iters Number of iterations taken + */ void qudaInvert(int external_precision, - int quda_precision, - double mass, - QudaInvertArgs_t inv_args, - double target_resid, - double target_relresid, - const void* const milc_fatlink, - const void* const milc_longlink, - const double tadpole, - void* source, - void* solution, - double* const final_resid, - double* const final_rel_resid, - int* num_iters); - - + int quda_precision, + double mass, + QudaInvertArgs_t inv_args, + double target_residual, + double target_fermilab_residual, + const void* const milc_fatlink, + const void* const milc_longlink, + const double tadpole, + void* source, + void* solution, + double* const final_resid, + double* const final_rel_resid, + int* num_iters); + + /** + * Solve using an improved + * staggered operator with a domain-decomposition preconditioner. + * All fields are fields passed and returned are host (CPU) field in + * MILC order. This function requires that persistent gauge and + * clover fields have been created prior. When a pure + * double-precision solver is requested no reliable updates are + * used, else reliable updates are used with a reliable_delta + * parameter of 0.1. This interface is experimental. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param precision Precision for QUDA to use (2 - double, 1 - single) + * @param inv_args Struct setting some solver metedata + * @param target_residual Target residual + * @param target_relative_residual Target Fermilab residual + * @param domain_overlap Array specifying the overlap of the domains in each dimension + * @param fatlink Fat-link field on the host + * @param longlink Long-link field on the host + * @param source Right-hand side source field + * @param solution Solution spinor field + * @param final_residual True residual + * @param final_relative_residual True Fermilab residual + * @param num_iters Number of iterations taken + */ void qudaDDInvert(int external_precision, - int quda_precision, - double mass, - QudaInvertArgs_t inv_args, - double target_residual, - double target_fermilab_residual, - const int * const domain_overlap, - const void* const fatlink, - const void* const longlink, - void* source, - void* solution, - double* const final_residual, - double* const final_fermilab_residual, - int* num_iters); - + int quda_precision, + double mass, + QudaInvertArgs_t inv_args, + double target_residual, + double target_fermilab_residual, + const int * const domain_overlap, + const void* const fatlink, + const void* const longlink, + void* source, + void* solution, + double* const final_residual, + double* const final_fermilab_residual, + int* num_iters); + /** + * Solve for multiple shifts (e.g., masses) using an improved + * staggered operator. All fields are fields passed and returned + * are host (CPU) field in MILC order. This function requires that + * persistent gauge and clover fields have been created prior. When + * a pure double-precision solver is requested no reliable updates + * are used, else reliable updates are used with a reliable_delta + * parameter of 0.1. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param precision Precision for QUDA to use (2 - double, 1 - single) + * @param num_offsets Number of shifts to solve for + * @param offset Array of shift offset values + * @param inv_args Struct setting some solver metedata + * @param target_residual Array of target residuals per shift + * @param target_relative_residual Array of target Fermilab residuals per shift + * @param milc_fatlink Fat-link field on the host + * @param milc_longlink Long-link field on the host + * @param tadpole Tadpole improvement factor + * @param source Right-hand side source field + * @param solutionArray Array of solution spinor fields + * @param final_residual Array of true residuals + * @param final_relative_residual Array of true Fermilab residuals + * @param num_iters Number of iterations taken + */ void qudaMultishiftInvert( int external_precision, int precision, @@ -99,54 +279,132 @@ extern "C" { double* const offset, QudaInvertArgs_t inv_args, const double* target_residual, - const double* target_relative_residual, + const double* target_fermilab_residual, const void* const milc_fatlink, const void* const milc_longlink, const double tadpole, void* source, void** solutionArray, double* const final_residual, - double* const final_relative_residual, + double* const final_fermilab_residual, int* num_iters); - + /** + * Solve Ax=b using a Wilson-Clover operator. All fields are fields + * passed and returned are host (CPU) field in MILC order. This + * function creates the gauge and clover field from the host fields. + * Reliable updates are used with a reliable_delta parameter of 0.1. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param quda_precision Precision for QUDA to use (2 - double, 1 - single) + * @param kappa Kappa value + * @param clover_coeff Clover coefficient + * @param inv_args Struct setting some solver metedata + * @param target_residual Target residual + * @param milc_link Gauge field on the host + * @param milc_clover Clover field on the host + * @param milc_clover_inv Inverse clover on the host + * @param clover_coeff Clover coefficient + * @param source Right-hand side source field + * @param solution Solution spinor field + * @param final_residual True residual returned by the solver + * @param final_residual True Fermilab residual returned by the solver + * @param num_iters Number of iterations taken + */ void qudaCloverInvert(int external_precision, - int quda_precision, - double kappa, - double clover_coeff, - QudaInvertArgs_t inv_args, - double target_residual, - double target_fermilab_residual, - const void* milc_link, - void* milc_clover, - void* milc_clover_inv, - void* source, - void* solution, - double* const final_residual, - double* const final_fermilab_residual, - int* num_iters - ); + int quda_precision, + double kappa, + double clover_coeff, + QudaInvertArgs_t inv_args, + double target_residual, + double target_fermilab_residual, + const void* milc_link, + void* milc_clover, + void* milc_clover_inv, + void* source, + void* solution, + double* const final_residual, + double* const final_fermilab_residual, + int* num_iters); + /** + * Load the gauge field from the host. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param quda_precision Precision for QUDA to use (2 - double, 1 - single) + * @param inv_args Meta data + * @param milc_link Base pointer to host gauge field (regardless of dimensionality) + */ void qudaLoadGaugeField(int external_precision, - int quda_precision, - QudaInvertArgs_t inv_args, - const void* milc_link) ; + int quda_precision, + QudaInvertArgs_t inv_args, + const void* milc_link) ; + /** + Free the gauge field allocated in QUDA. + */ void qudaFreeGaugeField(); + /** + * Load the clover field and its inverse from the host. If null + * pointers are passed, the clover field and / or its inverse will + * be computed dynamically from the resident gauge field. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param quda_precision Precision for QUDA to use (2 - double, 1 - single) + * @param inv_args Meta data + * @param milc_clover Pointer to host clover field. If 0 then the + * clover field is computed dynamically within QUDA. + * @param milc_clover_inv Pointer to host inverse clover field. If + * 0 then the inverse if computed dynamically within QUDA. + * @param solution_type The type of solution required (mat, matpc) + * @param solve_type The solve type to use (normal/direct/preconditioning) + * @param clover_coeff Clover coefficient + * @param compute_trlog Whether to compute the trlog of the clover field when inverting + * @param Array for storing the trlog (length two, one for each parity) + */ void qudaLoadCloverField(int external_precision, - int quda_precision, - QudaInvertArgs_t inv_args, - void* milc_clover, - void* milc_clover_inv, - QudaSolutionType solution_type, - QudaSolveType solve_type, - double clover_coeff, - int compute_trlog, - double *trlog) ; + int quda_precision, + QudaInvertArgs_t inv_args, + void* milc_clover, + void* milc_clover_inv, + QudaSolutionType solution_type, + QudaSolveType solve_type, + double clover_coeff, + int compute_trlog, + double *trlog) ; + /** + Free the clover field allocated in QUDA. + */ void qudaFreeCloverField(); + /** + * Solve for multiple shifts (e.g., masses) using a Wilson-Clover + * operator with multi-shift CG. All fields are fields passed and + * returned are host (CPU) field in MILC order. This function + * requires that persistent gauge and clover fields have been + * created prior. When a pure double-precision solver is requested + * no reliable updates are used, else reliable updates are used with + * a reliable_delta parameter of 0.1. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param quda_precision Precision for QUDA to use (2 - double, 1 - single) + * @param num_offsets Number of shifts to solve for + * @param offset Array of shift offset values + * @param kappa Kappa value + * @param clover_coeff Clover coefficient + * @param inv_args Struct setting some solver metedata + * @param target_residual Array of target residuals per shift + * @param milc_link Ignored + * @param milc_clover Ignored + * @param milc_clover_inv Ignored + * @param clover_coeff Clover coefficient + * @param source Right-hand side source field + * @param solutionArray Array of solution spinor fields + * @param final_residual Array of true residuals + * @param num_iters Number of iterations taken + */ void qudaCloverMultishiftInvert(int external_precision, int quda_precision, int num_offsets, @@ -164,6 +422,37 @@ extern "C" { int* num_iters ); + /** + * Solve for multiple shifts (e.g., masses) using a Wilson-Clover + * operator with multi-shift CG. This is a special variant of the + * multi-shift solver where the additional vectors required for + * force computation are also returned. All fields are fields + * passed and returned are host (CPU) field in MILC order. This + * function requires that persistent gauge and clover fields have + * been created prior. When a pure double-precision solver is + * requested no reliable updates are used, else reliable updates are + * used with a reliable_delta parameter of 0.1. + * + * @param external_precision Precision of host fields passed to QUDA (2 - double, 1 - single) + * @param quda_precision Precision for QUDA to use (2 - double, 1 - single) + * @param num_offsets Number of shifts to solve for + * @param offset Array of shift offset values + * @param kappa Kappa value + * @param clover_coeff Clover coefficient + * @param inv_args Struct setting some solver metedata + * @param target_residual Array of target residuals per shift + * @param milc_link Ignored + * @param milc_clover Ignored + * @param milc_clover_inv Ignored + * @param clover_coeff Clover coefficient + * @param source Right-hand side source field + * @param psiEven Array of solution spinor fields + * @param psiOdd Array of fields with A_oo^{-1} D_oe * x + * @param pEven Array of fields with M_ee * x + * @param pOdd Array of fields with A_oo^{-1} D_oe * M_ee * x + * @param final_residual Array of true residuals + * @param num_iters Number of iterations taken + */ void qudaCloverMultishiftMDInvert(int external_precision, int quda_precision, int num_offsets, @@ -184,41 +473,87 @@ extern "C" { int* num_iters ); - void qudaHisqForce( - int precision, - const double level2_coeff[6], - const double fat7_coeff[6], - const void* const staple_src[4], - const void* const one_link_src[4], - const void* const naik_src[4], - const void* const w_link, - const void* const v_link, - const void* const u_link, - void* const milc_momentum); + /** + * Compute the fermion force for the HISQ quark action. All fields + * are host fields in MILC order, and the precision of these fields + * must match. + * + * @param precision The precision of the fields + * @param level2_coeff The coefficients for the second level of smearing in the quark action. + * @param fat7_coeff The coefficients for the first level of smearing (fat7) in the quark action. + * @param staple_src Quark outer-product for the staple. + * @param one_link_src Quark outer-product for the one-link term in the action. + * @param naik_src Quark outer-product for the three-hop term in the action. + * @param w_link Unitarized link variables obtained by applying fat7 smearing and unitarization to the original links. + * @param v_link Fat7 link variables. + * @param u_link SU(3) think link variables. + * @param milc_momentum The momentum contribution from the quark action. + */ + void qudaHisqForce(int precision, + const double level2_coeff[6], + const double fat7_coeff[6], + const void* const staple_src[4], + const void* const one_link_src[4], + const void* const naik_src[4], + const void* const w_link, + const void* const v_link, + const void* const u_link, + void* const milc_momentum); - void qudaAsqtadForce( - int precision, - const double act_path_coeff[6], - const void* const one_link_src[4], - const void* const naik_src[4], - const void* const link, - void* const milc_momentum); + /** + * Compute the fermion force for the Asqtad quark action. All fields + * are host fields in MILC order, and the precision of these fields + * must match. + * + * @param precision The precision of the fields + * @param act_path_coeff The coefficients that define the asqtad action. + * @param one_link_src Quark outer-product for the one-link term in the action. + * @param naik_src Quark outer-product for the three-hop term in the action. + * @param link The gauge field + * @param milc_momentum The momentum contribution from the quark action. + */ + void qudaAsqtadForce(int precision, + const double act_path_coeff[6], + const void* const one_link_src[4], + const void* const naik_src[4], + const void* const link, + void* const milc_momentum); + /** + * Compute the gauge force and update the mometum field. All fields + * here are CPU fields in MILC order, and their precisions should + * match. + * + * @param precision The precision of the field (2 - double, 1 - single) + * @param dummy Not presently used + * @param milc_loop_coeff Coefficients of the different loops in the Symanzik action + * @param eb3 The integration step size (for MILC this is dt*beta/3) + * @param milc_sitelink The gauge field from which we compute the force + * @param milc_momentum The momentum field to be updated + */ void qudaGaugeForce(int precision, - int num_loop_types, - double milc_loop_coeff[3], - double eb3, - void* milc_sitelink, - void* milc_momentum); - + int dummy, + double milc_loop_coeff[3], + double eb3, + void* milc_sitelink, + void* milc_momentum); + /** + * Compute the staggered quark-field outer product needed for gauge generation + * + * @param precision The precision of the field (2 - double, 1 - single) + * @param num_terms The number of quak fields + * @param coeff The coefficient multiplying the fermion fields in the outer product + * @param quark_field The input fermion field. + * @param oprod The outer product to be computed. + */ void qudaComputeOprod(int precision, - int num_terms, - double** coeff, - void** quark_field, - void* oprod[2]); + int num_terms, + double** coeff, + void** quark_field, + void* oprod[2]); /**