Merge pull request #260 from lattice/hotfix/tifr

Hotfix/tifr
lattice · May 28, 2015 · 77b896e · 77b896e
2 parents 20543fa + e338930
commit 77b896e
Show file tree

Hide file tree

Showing 7 changed files with 567 additions and 360 deletions.
diff --git a/include/gauge_field_order.h b/include/gauge_field_order.h
@@ -444,13 +444,15 @@ namespace quda {
 #if __COMPUTE_CAPABILITY__ >= 200
       const int hasPhase; 
       const size_t phaseOffset;
+      void *backup_h; //! host memory for backing up the field when tuning
+      size_t bytes;
 #endif
 
     FloatNOrder(const GaugeField &u, Float *gauge_=0, Float **ghost_=0) : 
       reconstruct(u), volumeCB(u.VolumeCB()), stride(u.Stride()), geometry(u.Geometry())
 #if __COMPUTE_CAPABILITY__ >= 200
 	, hasPhase((u.Reconstruct() == QUDA_RECONSTRUCT_9 || u.Reconstruct() == QUDA_RECONSTRUCT_13) ? 1 : 0), 
-	phaseOffset(u.PhaseOffset())
+	phaseOffset(u.PhaseOffset()), backup_h(0), bytes(u.Bytes())
 #endif
       {
 	if (gauge_) { gauge[0] = gauge_; gauge[1] = (Float*)((char*)gauge_ + u.Bytes()/2);
@@ -467,7 +469,7 @@ namespace quda {
     : reconstruct(order.reconstruct), volumeCB(order.volumeCB), stride(order.stride), 
 	geometry(order.geometry) 
 #if __COMPUTE_CAPABILITY__ >= 200
-	, hasPhase(order.hasPhase), phaseOffset(order.phaseOffset) 
+	, hasPhase(order.hasPhase), phaseOffset(order.phaseOffset), backup_h(0), bytes(order.bytes)
 #endif
       {
 	gauge[0] = order.gauge[0];
@@ -618,6 +620,30 @@ namespace quda {
 	}
       }
 
+      /**
+	 used to backup the field to the host when tuning
+      */
+      void save() {
+#if __COMPUTE_CAPABILITY__ >= 200
+	if (backup_h) errorQuda("Already allocated host backup");
+	backup_h = safe_malloc(bytes);
+	cudaMemcpy(backup_h, gauge[0], bytes, cudaMemcpyDeviceToHost);
+	checkCudaError();
+#endif
+      }
+
+      /**
+	 restore the field from the host after tuning
+      */
+      void load() {
+#if __COMPUTE_CAPABILITY__ >= 200
+	cudaMemcpy(gauge[0], backup_h, bytes, cudaMemcpyHostToDevice);
+	host_free(backup_h);
+	backup_h = 0;
+	checkCudaError();
+#endif
+      }
+
       size_t Bytes() const { return reconLen * sizeof(Float); }
     };
 

diff --git a/include/quda.h b/include/quda.h
@@ -635,31 +635,91 @@ extern "C" {
 
 
   /**
-   * Take a gauge field on the host, extend it and load it onto the device. 
-   * Return a pointer to the extended gauge field.
+   * Take a gauge field on the host, load it onto the device and extend it.
+   * Return a pointer to the extended gauge field object.
+   *
+   * @param gauge The CPU gauge field (optional - if set to 0 then the gauge field zeroed)
+   * @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
+   * @param param The parameters of the external field and the field to be created
+   * @return Pointer to the gauge field (cast as a void*)
    */
-  void* createExtendedGaugeField(void* gauge, int geometry, QudaGaugeParam* param);
-
-  void* createGaugeField(void* gauge, int geometry, QudaGaugeParam* param);
+  void* createExtendedGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param);
 
-  void  saveGaugeField(void* outGauge, void* inGauge, QudaGaugeParam* param);
+  /**
+   * Allocate a gauge (matrix) field on the device and optionally download a host gauge field.
+   *
+   * @param gauge The host gauge field (optional - if set to 0 then the gauge field zeroed)
+   * @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
+   * @param param The parameters of the external field and the field to be created
+   * @return Pointer to the gauge field (cast as a void*)
+   */
+  void* createGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param);
 
-  void  extendGaugeField(void* outGauge, void* inGauge);
+  /**
+   * Copy the QUDA gauge (matrix) field on the device to the CPU
+   *
+   * @param outGauge Pointer to the host gauge field
+   * @param inGauge Pointer to the device gauge field (QUDA device field)
+   * @param param The parameters of the host and device fields
+   */
+  void  saveGaugeFieldQuda(void* outGauge, void* inGauge, QudaGaugeParam* param);
 
+  /**
+   * Take a gauge field on the device and copy to the extended gauge
+   * field.  The precisions and reconstruct types can differ between
+   * the input and output field, but they must be compatible (same volume, geometry).
+   *
+   * @param outGauge Pointer to the output extended device gauge field (QUDA extended device field)
+   * @param inGauge Pointer to the input device gauge field (QUDA gauge field)
+   */
+  void  extendGaugeFieldQuda(void* outGauge, void* inGauge);
 
   /**
-   *  Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
+   * Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
+   *
+   * @param gauge Gauge field to be freed
    */
-  void destroyQudaGaugeField(void* gauge);
+  void destroyGaugeFieldQuda(void* gauge);
 
+  /**
+   * Compute the clover field and its inverse from the resident gauge field.
+   *
+   * @param param The parameters of the clover field to create
+   */
   void createCloverQuda(QudaInvertParam* param);
 
-
-  void computeCloverTraceQuda(void* out, void* clover, int mu, int nu, int dim[4]);
-
+  /**
+   * Compute the sigma trace field (part of clover force computation).
+   * All the pointers here are for QUDA native device objects.  The
+   * precisions of all fields must match.  This function requires that
+   * there is a persistent clover field.
+   * 
+   * @param out Sigma trace field  (QUDA device field, geometry = 1)
+   * @param dummy (not used)
+   * @param mu mu direction
+   * @param nu nu direction
+   * @param dim array of local field dimensions
+   */
+  void computeCloverTraceQuda(void* out, void* dummy, int mu, int nu, int dim[4]);
+
+  /**
+   * Compute the derivative of the clover term (part of clover force
+   * computation).  All the pointers here are for QUDA native device
+   * objects.  The precisions of all fields must match.
+   * 
+   * @param out Clover derivative field (QUDA device field, geometry = 1)
+   * @param gauge Gauge field (extended QUDA device field, gemoetry = 4)
+   * @param oprod Matrix field (outer product) which is multiplied by the derivative
+   * @param mu mu direction
+   * @param nu nu direction
+   * @param coeff Coefficient of the clover derviative (including stepsize and clover coefficient)
+   * @param parity Parity for which we are computing
+   * @param param Gauge field meta data
+   * @param conjugate Whether to make the oprod field anti-hermitian prior to multiplication
+   */
   void computeCloverDerivativeQuda(void* out, void* gauge, void* oprod, int mu, int nu,
-      double coeff,
-      QudaParity parity, QudaGaugeParam* param, int conjugate);
+				   double coeff,
+				   QudaParity parity, QudaGaugeParam* param, int conjugate);
 
   /**
    * Compute the quark-field outer product needed for gauge generation

diff --git a/include/quda_milc_interface.h b/include/quda_milc_interface.h
@@ -221,27 +221,118 @@ extern "C" {
       void* oprod[2]);
 
 
+  /**
+   * Evolve the gauge field by step size dt, using the momentum field
+   * I.e., Evalulate U(t+dt) = e(dt pi) U(t).  All fields are CPU fields in MILC order.
+   *
+   * @param precision Precision of the field (2 - double, 1 - single)
+   * @param dt The integration step size step
+   * @param momentum The momentum field
+   * @param The gauge field to be updated 
+   */
   void qudaUpdateU(int precision, 
-      double eps,
-      void* momentum, 
-      void* link);
-
-  void qudaCloverTrace(void* out, void* clover, int mu, int nu);
-
-
-  void qudaCloverDerivative(void* out, void* gauge, void* oprod, 
-      int mu, int nu, double coeff, int precision, int parity, int conjugate);
-
-
-  void* qudaCreateExtendedGaugeField(void* gauge, int geometry, int precision);
-
-  void* qudaCreateGaugeField(void* gauge, int geometry, int precision);
-
-  void qudaSaveGaugeField(void* gauge, void* inGauge);
-
+		   double eps,
+		   void* momentum, 
+		   void* link);
+
+  /**
+   * Compute the sigma trace field (part of clover force computation).
+   * All the pointers here are for QUDA native device objects.  The
+   * precisions of all fields must match.  This function requires that
+   * there is a persistent clover field.
+   * 
+   * @param out Sigma trace field  (QUDA device field, geometry = 1)
+   * @param dummy (not used)
+   * @param mu mu direction
+   * @param nu nu direction
+   */
+  void qudaCloverTrace(void* out,
+		       void* dummy,
+		       int mu,
+		       int nu);
+
+
+  /**
+   * Compute the derivative of the clover term (part of clover force
+   * computation).  All the pointers here are for QUDA native device
+   * objects.  The precisions of all fields must match.
+   * 
+   * @param out Clover derivative field (QUDA device field, geometry = 1)
+   * @param gauge Gauge field (extended QUDA device field, gemoetry = 4)
+   * @param oprod Matrix field (outer product) which is multiplied by the derivative
+   * @param mu mu direction
+   * @param nu nu direction
+   * @param coeff Coefficient of the clover derviative (including stepsize and clover coefficient)
+   * @param precision Precision of the fields (2 = double, 1 = single)
+   * @param parity Parity for which we are computing
+   * @param conjugate Whether to make the oprod field anti-hermitian prior to multiplication
+   */
+  void qudaCloverDerivative(void* out,
+			    void* gauge,
+			    void* oprod, 
+			    int mu,
+			    int nu,
+			    double coeff,
+			    int precision,
+			    int parity,
+			    int conjugate);
+
+
+  /**
+   * Take a gauge field on the host, load it onto the device and extend it.
+   * Return a pointer to the extended gauge field object.
+   *
+   * @param gauge The CPU gauge field (optional - if set to 0 then the gauge field zeroed)
+   * @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
+   * @param precision The precision of the fields (2 - double, 1 - single)
+   * @return Pointer to the gauge field (cast as a void*)
+   */
+  void* qudaCreateExtendedGaugeField(void* gauge,
+				     int geometry,
+				     int precision);
+
+  /**
+   * Take the QUDA resident gauge field and extend it.
+   * Return a pointer to the extended gauge field object.
+   *
+   * @param gauge The CPU gauge field (optional - if set to 0 then the gauge field zeroed)
+   * @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
+   * @param precision The precision of the fields (2 - double, 1 - single)
+   * @return Pointer to the gauge field (cast as a void*)
+   */
+  void* qudaResidentExtendedGaugeField(void* gauge,
+				       int geometry,
+				       int precision);
+
+  /**
+   * Allocate a gauge (matrix) field on the device and optionally download a host gauge field.
+   *
+   * @param gauge The host gauge field (optional - if set to 0 then the gauge field zeroed)
+   * @param geometry The geometry of the matrix field to create (1 - scaler, 4 - vector, 6 - tensor)
+   * @param precision The precision of the field to be created (2 - double, 1 - single)
+   * @return Pointer to the gauge field (cast as a void*)
+   */
+  void* qudaCreateGaugeField(void* gauge,
+			     int geometry,
+			     int precision);
+
+  /**
+   * Copy the QUDA gauge (matrix) field on the device to the CPU
+   *
+   * @param outGauge Pointer to the host gauge field
+   * @param inGauge Pointer to the device gauge field (QUDA device field)
+   */
+  void qudaSaveGaugeField(void* gauge,
+			  void* inGauge);
+
+  /**
+   * Reinterpret gauge as a pointer to cudaGaugeField and call destructor.
+   *
+   * @param gauge Gauge field to be freed
+   */
   void qudaDestroyGaugeField(void* gauge);
 
-
+  
 #ifdef __cplusplus
 }
 #endif