Skip to content

Commit

Permalink
Merge pull request lattice#2 from ETMC-QUDA/hotfix/loops
Browse files Browse the repository at this point in the history
Separate out loop calculation functions, update smethod default param…
  • Loading branch information
ckallidonis authored Mar 2, 2017
2 parents d6d9383 + 14ee9e2 commit 6aa1279
Show file tree
Hide file tree
Showing 5 changed files with 1,386 additions and 1,053 deletions.
5 changes: 3 additions & 2 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Dependencies:
In order to compile this code, one must also have the following installed:

QUDA specific
REQUIRED:
magma 1.7.0

ETMC-QUDA specific
Expand Down Expand Up @@ -47,7 +48,7 @@ to the interface_quda.cpp file of mainline QUDA. This is so we can use the
internal gauge field structures directly. The class definitions and header
files of auxiliary functions are contained in files prepended like so,

QKXTM_<some file>.suffix
qudaQKXTM_<some file>.suffix

There is a single .cu file for kernels, a 'code_pieces' directory in lib/,
a single qudaQKXTM_Kepler_utils.cpp and .h file for containers and general
Expand Down Expand Up @@ -75,7 +76,7 @@ of QUDA.

Authors:

The majority of this code base was authored by,
The majority of the QKXTM code base was authored by,

Kyriakos Hadjiyiannakou
Christos Kallidonis
Expand Down
228 changes: 0 additions & 228 deletions lib/qudaQKXTM_Deflation_Kepler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1907,231 +1907,3 @@ projectVector(QKXTM_Vector_Kepler<Float> &vec_defl,
printfQuda("projectVector: Deflation of the source vector completed succesfully\n");
}

//------------------------------------------------------------------//
//- C.K. Functions to perform and write the exact part of the loop -//
//------------------------------------------------------------------//

template<typename Float>
void QKXTM_Deflation_Kepler<Float>::
Loop_w_One_Der_FullOp_Exact(int n, QudaInvertParam *param,
void *gen_uloc,void *std_uloc,
void **gen_oneD,
void **std_oneD,
void **gen_csvC,
void **std_csvC){

if(!isFullOp) errorQuda("oneEndTrick_w_One_Der_FullOp_Exact: This function only works with the full operator\n");

void *h_ctrn, *ctrnS, *ctrnC;

double t1,t2;

if((cudaMallocHost(&h_ctrn, sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3])) == cudaErrorMemoryAllocation)
errorQuda("oneEndTrick_w_One_Der_FullOp_Exact: Error allocating memory for contraction results in CPU.\n");
cudaMemset(h_ctrn, 0, sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3]);

if((cudaMalloc(&ctrnS, sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3])) == cudaErrorMemoryAllocation)
errorQuda("oneEndTrick_w_One_Der_FullOp_Exact: Error allocating memory for contraction results in GPU.\n");
cudaMemset(ctrnS, 0, sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3]);

if((cudaMalloc(&ctrnC, sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3])) == cudaErrorMemoryAllocation)
errorQuda("oneEndTrick_w_One_Der_FullOp_Exact: Error allocating memory for contraction results in GPU.\n");
cudaMemset(ctrnC, 0, sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3]);

checkCudaError();

//- Set the eigenvector into cudaColorSpinorField format and save to x
bool pc_solve = false;
cudaColorSpinorField *x1 = NULL;

double *eigVec = (double*) malloc(bytes_total_length_per_NeV);
memcpy(eigVec,&(h_elem[n*total_length_per_NeV]),bytes_total_length_per_NeV);

QKXTM_Vector_Kepler<double> *Kvec =
new QKXTM_Vector_Kepler<double>(BOTH,VECTOR);

ColorSpinorParam cpuParam((void*)eigVec,*param,GK_localL,pc_solve);
ColorSpinorParam cudaParam(cpuParam, *param);
cudaParam.create = QUDA_ZERO_FIELD_CREATE;
x1 = new cudaColorSpinorField(cudaParam);

Kvec->packVector(eigVec);
Kvec->loadVector();
Kvec->uploadToCuda(x1,pc_solve);

Float eVal = eigenValues[2*n+0];

cudaColorSpinorField *tmp1 = NULL;
cudaColorSpinorField *tmp2 = NULL;
cudaParam.create = QUDA_ZERO_FIELD_CREATE;
tmp1 = new cudaColorSpinorField(cudaParam);
tmp2 = new cudaColorSpinorField(cudaParam);
blas::zero(*tmp1);
blas::zero(*tmp2);

cudaColorSpinorField &tmp3 = *tmp1;
cudaColorSpinorField &tmp4 = *tmp2;
cudaColorSpinorField &x = *x1;
//------------------------------------------------------------------------

DiracParam dWParam;
dWParam.matpcType = QUDA_MATPC_EVEN_EVEN;
dWParam.dagger = QUDA_DAG_NO;
dWParam.gauge = gaugePrecise;
dWParam.kappa = param->kappa;
dWParam.mass = 1./(2.*param->kappa) - 4.;
dWParam.m5 = 0.;
dWParam.mu = 0.;
for(int i=0; i<4; i++)
dWParam.commDim[i] = 1;

if(param->dslash_type == QUDA_TWISTED_CLOVER_DSLASH){
dWParam.type = QUDA_CLOVER_DIRAC;
dWParam.clover = cloverPrecise;
DiracClover *dW = new DiracClover(dWParam);
dW->M(tmp4,x);
delete dW;
}
else if (param->dslash_type == QUDA_TWISTED_MASS_DSLASH){
dWParam.type = QUDA_WILSON_DIRAC;
DiracWilson *dW = new DiracWilson(dWParam);
dW->M(tmp4,x);
delete dW;
}
else{
errorQuda("oneEndTrick_w_One_Der_FullOp_Exact: One end trick works only for twisted mass fermions\n");
}
checkCudaError();

gamma5Cuda(static_cast<cudaColorSpinorField*>(&tmp3.Even()),
static_cast<cudaColorSpinorField*>(&tmp4.Even()));
gamma5Cuda(static_cast<cudaColorSpinorField*>(&tmp3.Odd()),
static_cast<cudaColorSpinorField*>(&tmp4.Odd()));

long int sizeBuffer;
sizeBuffer =
sizeof(Float)*32*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3];
CovD *cov = new CovD(gaugePrecise, profileCovDev);

int NN = 16*GK_localL[0]*GK_localL[1]*GK_localL[2]*GK_localL[3];
int incx = 1;
int incy = 1;
Float pceval[2] = {1.0/eVal,0.0};
Float mceval[2] = {-1.0/eVal,0.0};

// ULTRA-LOCAL Generalized one-end trick
contract(x, tmp3, ctrnS, QUDA_CONTRACT_GAMMA5);
cudaMemcpy(h_ctrn, ctrnS, sizeBuffer, cudaMemcpyDeviceToHost);

if( typeid(Float) == typeid(float) )
cblas_caxpy(NN,(void*)pceval,(void*)h_ctrn,incx,(void*)gen_uloc,incy);
else if( typeid(Float) == typeid(double) )
cblas_zaxpy(NN,(void*)pceval,(void*)h_ctrn,incx,(void*)gen_uloc,incy);
//------------------------------------------------

// ULTRA-LOCAL Standard one-end trick
contract(x, x, ctrnS, QUDA_CONTRACT_GAMMA5);
cudaMemcpy(h_ctrn, ctrnS, sizeBuffer, cudaMemcpyDeviceToHost);

if( typeid(Float) == typeid(float) )
cblas_caxpy(NN,(void*)mceval,(void*)h_ctrn,incx,(void*)std_uloc,incy);
else if( typeid(Float) == typeid(double) )
cblas_zaxpy(NN,(void*)mceval,(void*)h_ctrn,incx,(void*)std_uloc,incy);
//------------------------------------------------

cudaDeviceSynchronize();

// ONE-DERIVATIVE Generalized one-end trick
for(int mu=0; mu<4; mu++){
cov->M(tmp4,tmp3,mu);
// Term 0
contract(x, tmp4, ctrnS, QUDA_CONTRACT_GAMMA5);

cov->M (tmp4, x, mu+4);
// Term 0 + Term 3
contract(tmp4, tmp3, ctrnS, QUDA_CONTRACT_GAMMA5_PLUS);
cudaMemcpy(ctrnC, ctrnS, sizeBuffer, cudaMemcpyDeviceToDevice);

cov->M (tmp4, x, mu);
// Term 0 + Term 3 + Term 2 (C Sum)
contract(tmp4, tmp3, ctrnC, QUDA_CONTRACT_GAMMA5_PLUS);
// Term 0 + Term 3 - Term 2 (D Dif)
contract(tmp4, tmp3, ctrnS, QUDA_CONTRACT_GAMMA5_MINUS);

cov->M (tmp4, tmp3, mu+4);
// Term 0 + Term 3 + Term 2 + Term 1 (C Sum)
contract(x, tmp4, ctrnC, QUDA_CONTRACT_GAMMA5_PLUS);
// Term 0 + Term 3 - Term 2 - Term 1 (D Dif)
contract(x, tmp4, ctrnS, QUDA_CONTRACT_GAMMA5_MINUS);
cudaMemcpy(h_ctrn, ctrnS, sizeBuffer, cudaMemcpyDeviceToHost);

if( typeid(Float) == typeid(float) )
cblas_caxpy(NN, (void*) pceval, (void*) h_ctrn, incx,
(void*) gen_oneD[mu], incy);
else if( typeid(Float) == typeid(double) )
cblas_zaxpy(NN, (void*) pceval, (void*) h_ctrn, incx,
(void*) gen_oneD[mu], incy);

cudaMemcpy(h_ctrn, ctrnC, sizeBuffer, cudaMemcpyDeviceToHost);

if( typeid(Float) == typeid(float) )
cblas_caxpy(NN, (void*) pceval, (void*) h_ctrn, incx,
(void*) gen_csvC[mu], incy);
else if( typeid(Float) == typeid(double) )
cblas_zaxpy(NN, (void*) pceval, (void*) h_ctrn, incx,
(void*) gen_csvC[mu], incy);
}

//------------------------------------------------

// ONE-DERIVATIVE Standard one-end trick
for(int mu=0; mu<4; mu++){
cov->M (tmp4, x, mu);
cov->M (tmp3, x, mu+4);
// Term 0
contract(x, tmp4, ctrnS, QUDA_CONTRACT_GAMMA5);
// Term 0 + Term 3
contract(tmp3, x, ctrnS, QUDA_CONTRACT_GAMMA5_PLUS);
cudaMemcpy(ctrnC, ctrnS, sizeBuffer, cudaMemcpyDeviceToDevice);

// Term 0 + Term 3 + Term 2 (C Sum)
contract(tmp4, x, ctrnC, QUDA_CONTRACT_GAMMA5_PLUS);
// Term 0 + Term 3 - Term 2 (D Dif)
contract(tmp4, x, ctrnS, QUDA_CONTRACT_GAMMA5_MINUS);
// Term 0 + Term 3 + Term 2 + Term 1 (C Sum)
contract(x, tmp3, ctrnC, QUDA_CONTRACT_GAMMA5_PLUS);
// Term 0 + Term 3 - Term 2 - Term 1 (D Dif)
contract(x, tmp3, ctrnS, QUDA_CONTRACT_GAMMA5_MINUS);
cudaMemcpy(h_ctrn, ctrnS, sizeBuffer, cudaMemcpyDeviceToHost);

if( typeid(Float) == typeid(float) )
cblas_caxpy(NN, (void*) mceval, (void*) h_ctrn, incx,
(void*) std_oneD[mu], incy);
else if( typeid(Float) == typeid(double) )
cblas_zaxpy(NN, (void*) mceval, (void*) h_ctrn, incx,
(void*) std_oneD[mu], incy);

cudaMemcpy(h_ctrn, ctrnC, sizeBuffer, cudaMemcpyDeviceToHost);

if( typeid(Float) == typeid(float) )
cblas_caxpy(NN, (void*) mceval, (void*) h_ctrn, incx,
(void*) std_csvC[mu], incy);
else if( typeid(Float) == typeid(double) )
cblas_zaxpy(NN, (void*) mceval, (void*) h_ctrn, incx,
(void*) std_csvC[mu], incy);
}

//------------------------------------------------

delete Kvec;
delete x1;
delete tmp1;
delete tmp2;
free(eigVec);

delete cov;
cudaFreeHost(h_ctrn);
cudaFree(ctrnS);
cudaFree(ctrnC);
checkCudaError();
}
Loading

0 comments on commit 6aa1279

Please sign in to comment.