Skip to content

Commit

Permalink
Merge pull request #260 from lattice/hotfix/tifr
Browse files Browse the repository at this point in the history
Hotfix/tifr
  • Loading branch information
Mathias Wagner committed May 28, 2015
2 parents 20543fa + e338930 commit 77b896e
Show file tree
Hide file tree
Showing 7 changed files with 567 additions and 360 deletions.
30 changes: 28 additions & 2 deletions include/gauge_field_order.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,13 +444,15 @@ namespace quda {
#if __COMPUTE_CAPABILITY__ >= 200
const int hasPhase;
const size_t phaseOffset;
void *backup_h; //! host memory for backing up the field when tuning
size_t bytes;
#endif

FloatNOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0) :
reconstruct(u), volumeCB(u.VolumeCB()), stride(u.Stride()), geometry(u.Geometry())
#if __COMPUTE_CAPABILITY__ >= 200
, hasPhase((u.Reconstruct() == QUDA_RECONSTRUCT_9 || u.Reconstruct() == QUDA_RECONSTRUCT_13) ? 1 : 0),
phaseOffset(u.PhaseOffset())
phaseOffset(u.PhaseOffset()), backup_h(0), bytes(u.Bytes())
#endif
{
if (gauge_) { gauge[0] = gauge_; gauge[1] = (Float*)((char*)gauge_ + u.Bytes()/2);
Expand All @@ -467,7 +469,7 @@ namespace quda {
: reconstruct(order.reconstruct), volumeCB(order.volumeCB), stride(order.stride),
geometry(order.geometry)
#if __COMPUTE_CAPABILITY__ >= 200
, hasPhase(order.hasPhase), phaseOffset(order.phaseOffset)
, hasPhase(order.hasPhase), phaseOffset(order.phaseOffset), backup_h(0), bytes(order.bytes)
#endif
{
gauge[0] = order.gauge[0];
Expand Down Expand Up @@ -618,6 +620,30 @@ namespace quda {
}
}

/**
used to backup the field to the host when tuning
*/
void save() {
#if __COMPUTE_CAPABILITY__ >= 200
if (backup_h) errorQuda("Already allocated host backup");
backup_h = safe_malloc(bytes);
cudaMemcpy(backup_h, gauge[0], bytes, cudaMemcpyDeviceToHost);
checkCudaError();
#endif
}

/**
restore the field from the host after tuning
*/
void load() {
#if __COMPUTE_CAPABILITY__ >= 200
cudaMemcpy(gauge[0], backup_h, bytes, cudaMemcpyHostToDevice);
host_free(backup_h);
backup_h = 0;
checkCudaError();
#endif
}

size_t Bytes() const { return reconLen * sizeof(Float); }
};

Expand Down
88 changes: 74 additions & 14 deletions include/quda.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,31 +635,91 @@ extern "C" {


/**
* Take a gauge field on the host, extend it and load it onto the device.
* Return a pointer to the extended gauge field.
* Take a gauge field on the host, load it onto the device and extend it.
* Return a pointer to the extended gauge field object.
*
* @param gauge The CPU gauge field (optional - if set to 0 then the gauge field zeroed)
* @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
* @param param The parameters of the external field and the field to be created
* @return Pointer to the gauge field (cast as a void*)
*/
void* createExtendedGaugeField(void* gauge, int geometry, QudaGaugeParam* param);

void* createGaugeField(void* gauge, int geometry, QudaGaugeParam* param);
void* createExtendedGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param);

void saveGaugeField(void* outGauge, void* inGauge, QudaGaugeParam* param);
/**
* Allocate a gauge (matrix) field on the device and optionally download a host gauge field.
*
* @param gauge The host gauge field (optional - if set to 0 then the gauge field zeroed)
* @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
* @param param The parameters of the external field and the field to be created
* @return Pointer to the gauge field (cast as a void*)
*/
void* createGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param);

void extendGaugeField(void* outGauge, void* inGauge);
/**
* Copy the QUDA gauge (matrix) field on the device to the CPU
*
* @param outGauge Pointer to the host gauge field
* @param inGauge Pointer to the device gauge field (QUDA device field)
* @param param The parameters of the host and device fields
*/
void saveGaugeFieldQuda(void* outGauge, void* inGauge, QudaGaugeParam* param);

/**
* Take a gauge field on the device and copy to the extended gauge
* field. The precisions and reconstruct types can differ between
* the input and output field, but they must be compatible (same volume, geometry).
*
* @param outGauge Pointer to the output extended device gauge field (QUDA extended device field)
* @param inGauge Pointer to the input device gauge field (QUDA gauge field)
*/
void extendGaugeFieldQuda(void* outGauge, void* inGauge);

/**
* Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
* Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
*
* @param gauge Gauge field to be freed
*/
void destroyQudaGaugeField(void* gauge);
void destroyGaugeFieldQuda(void* gauge);

/**
* Compute the clover field and its inverse from the resident gauge field.
*
* @param param The parameters of the clover field to create
*/
void createCloverQuda(QudaInvertParam* param);


void computeCloverTraceQuda(void* out, void* clover, int mu, int nu, int dim[4]);

/**
* Compute the sigma trace field (part of clover force computation).
* All the pointers here are for QUDA native device objects. The
* precisions of all fields must match. This function requires that
* there is a persistent clover field.
*
* @param out Sigma trace field (QUDA device field, geometry = 1)
* @param dummy (not used)
* @param mu mu direction
* @param nu nu direction
* @param dim array of local field dimensions
*/
void computeCloverTraceQuda(void* out, void* dummy, int mu, int nu, int dim[4]);

/**
* Compute the derivative of the clover term (part of clover force
* computation). All the pointers here are for QUDA native device
* objects. The precisions of all fields must match.
*
* @param out Clover derivative field (QUDA device field, geometry = 1)
* @param gauge Gauge field (extended QUDA device field, gemoetry = 4)
* @param oprod Matrix field (outer product) which is multiplied by the derivative
* @param mu mu direction
* @param nu nu direction
* @param coeff Coefficient of the clover derviative (including stepsize and clover coefficient)
* @param parity Parity for which we are computing
* @param param Gauge field meta data
* @param conjugate Whether to make the oprod field anti-hermitian prior to multiplication
*/
void computeCloverDerivativeQuda(void* out, void* gauge, void* oprod, int mu, int nu,
double coeff,
QudaParity parity, QudaGaugeParam* param, int conjugate);
double coeff,
QudaParity parity, QudaGaugeParam* param, int conjugate);

/**
* Compute the quark-field outer product needed for gauge generation
Expand Down
127 changes: 109 additions & 18 deletions include/quda_milc_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,27 +221,118 @@ extern "C" {
void* oprod[2]);


/**
* Evolve the gauge field by step size dt, using the momentum field
* I.e., Evalulate U(t+dt) = e(dt pi) U(t). All fields are CPU fields in MILC order.
*
* @param precision Precision of the field (2 - double, 1 - single)
* @param dt The integration step size step
* @param momentum The momentum field
* @param The gauge field to be updated
*/
void qudaUpdateU(int precision,
double eps,
void* momentum,
void* link);

void qudaCloverTrace(void* out, void* clover, int mu, int nu);


void qudaCloverDerivative(void* out, void* gauge, void* oprod,
int mu, int nu, double coeff, int precision, int parity, int conjugate);


void* qudaCreateExtendedGaugeField(void* gauge, int geometry, int precision);

void* qudaCreateGaugeField(void* gauge, int geometry, int precision);

void qudaSaveGaugeField(void* gauge, void* inGauge);

double eps,
void* momentum,
void* link);

/**
* Compute the sigma trace field (part of clover force computation).
* All the pointers here are for QUDA native device objects. The
* precisions of all fields must match. This function requires that
* there is a persistent clover field.
*
* @param out Sigma trace field (QUDA device field, geometry = 1)
* @param dummy (not used)
* @param mu mu direction
* @param nu nu direction
*/
void qudaCloverTrace(void* out,
void* dummy,
int mu,
int nu);


/**
* Compute the derivative of the clover term (part of clover force
* computation). All the pointers here are for QUDA native device
* objects. The precisions of all fields must match.
*
* @param out Clover derivative field (QUDA device field, geometry = 1)
* @param gauge Gauge field (extended QUDA device field, gemoetry = 4)
* @param oprod Matrix field (outer product) which is multiplied by the derivative
* @param mu mu direction
* @param nu nu direction
* @param coeff Coefficient of the clover derviative (including stepsize and clover coefficient)
* @param precision Precision of the fields (2 = double, 1 = single)
* @param parity Parity for which we are computing
* @param conjugate Whether to make the oprod field anti-hermitian prior to multiplication
*/
void qudaCloverDerivative(void* out,
void* gauge,
void* oprod,
int mu,
int nu,
double coeff,
int precision,
int parity,
int conjugate);


/**
* Take a gauge field on the host, load it onto the device and extend it.
* Return a pointer to the extended gauge field object.
*
* @param gauge The CPU gauge field (optional - if set to 0 then the gauge field zeroed)
* @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
* @param precision The precision of the fields (2 - double, 1 - single)
* @return Pointer to the gauge field (cast as a void*)
*/
void* qudaCreateExtendedGaugeField(void* gauge,
int geometry,
int precision);

/**
* Take the QUDA resident gauge field and extend it.
* Return a pointer to the extended gauge field object.
*
* @param gauge The CPU gauge field (optional - if set to 0 then the gauge field zeroed)
* @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
* @param precision The precision of the fields (2 - double, 1 - single)
* @return Pointer to the gauge field (cast as a void*)
*/
void* qudaResidentExtendedGaugeField(void* gauge,
int geometry,
int precision);

/**
* Allocate a gauge (matrix) field on the device and optionally download a host gauge field.
*
* @param gauge The host gauge field (optional - if set to 0 then the gauge field zeroed)
* @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
* @param precision The precision of the field to be created (2 - double, 1 - single)
* @return Pointer to the gauge field (cast as a void*)
*/
void* qudaCreateGaugeField(void* gauge,
int geometry,
int precision);

/**
* Copy the QUDA gauge (matrix) field on the device to the CPU
*
* @param outGauge Pointer to the host gauge field
* @param inGauge Pointer to the device gauge field (QUDA device field)
*/
void qudaSaveGaugeField(void* gauge,
void* inGauge);

/**
* Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
*
* @param gauge Gauge field to be freed
*/
void qudaDestroyGaugeField(void* gauge);


#ifdef __cplusplus
}
#endif
Expand Down
Loading

0 comments on commit 77b896e

Please sign in to comment.