Skip to content

Commit

Permalink
Fixed bug when compiling blas with only staggered enabled
Browse files Browse the repository at this point in the history
  • Loading branch information
maddyscientist committed May 7, 2016
1 parent 2be9efe commit a242e6d
Showing 1 changed file with 26 additions and 27 deletions.
53 changes: 26 additions & 27 deletions lib/blas_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,40 +275,39 @@ template <template <typename Float, typename FloatN> class Functor,
errorQuda("blas has not been built for Nspin=%d fields", x.Nspin());
#endif
} else if (x.Precision() == QUDA_SINGLE_PRECISION) {
if (x.Nspin() == 4) {
#if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC)
const int M = 1;
#endif
if (x.Nspin() == 4) {
#if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC)
Spinor<float4,float4,float4,M,writeX,0> X(x);
Spinor<float4,float4,float4,M,writeY,1> Y(y);
Spinor<float4,float4,float4,M,writeZ,2> Z(z);
Spinor<float4,float4,float4,M,writeW,3> W(w);
Functor<float2, float4> f(make_float2(a.x, a.y), make_float2(b.x, b.y), make_float2(c.x, c.y));
BlasCuda<float4,M,
Spinor<float4,float4,float4,M,writeX,0>, Spinor<float4,float4,float4,M,writeY,1>,
Spinor<float4,float4,float4,M,writeZ,2>, Spinor<float4,float4,float4,M,writeW,3>,
Functor<float2, float4> > blas(X, Y, Z, W, f, x.Length()/(4*M), bytes, norm_bytes);
blas.apply(*blasStream);
Spinor<float4,float4,float4,M,writeX,0> X(x);
Spinor<float4,float4,float4,M,writeY,1> Y(y);
Spinor<float4,float4,float4,M,writeZ,2> Z(z);
Spinor<float4,float4,float4,M,writeW,3> W(w);
Functor<float2, float4> f(make_float2(a.x, a.y), make_float2(b.x, b.y), make_float2(c.x, c.y));
BlasCuda<float4,M,
Spinor<float4,float4,float4,M,writeX,0>, Spinor<float4,float4,float4,M,writeY,1>,
Spinor<float4,float4,float4,M,writeZ,2>, Spinor<float4,float4,float4,M,writeW,3>,
Functor<float2, float4> > blas(X, Y, Z, W, f, x.Length()/(4*M), bytes, norm_bytes);
blas.apply(*blasStream);
#else
errorQuda("blas has not been built for Nspin=%d fields", x.Nspin());
errorQuda("blas has not been built for Nspin=%d fields", x.Nspin());
#endif
} else if (x.Nspin()==2 || x.Nspin()==1) {
} else if (x.Nspin()==2 || x.Nspin()==1) {
#if defined(GPU_WILSON_DIRAC) || defined(GPU_DOMAIN_WALL_DIRAC) || defined(GPU_STAGGERED_DIRAC)
Spinor<float2,float2,float2,M,writeX,0> X(x);
Spinor<float2,float2,float2,M,writeY,1> Y(y);
Spinor<float2,float2,float2,M,writeZ,2> Z(z);
Spinor<float2,float2,float2,M,writeW,3> W(w);
Functor<float2, float2> f(make_float2(a.x, a.y), make_float2(b.x, b.y), make_float2(c.x, c.y));
BlasCuda<float2,M,
Spinor<float2,float2,float2,M,writeX,0>, Spinor<float2,float2,float2,M,writeY,1>,
Spinor<float2,float2,float2,M,writeZ,2>, Spinor<float2,float2,float2,M,writeW,3>,
Functor<float2, float2> > blas(X, Y, Z, W, f, x.Length()/(2*M), bytes, norm_bytes);
blas.apply(*blasStream);
const int M = 1;
Spinor<float2,float2,float2,M,writeX,0> X(x);
Spinor<float2,float2,float2,M,writeY,1> Y(y);
Spinor<float2,float2,float2,M,writeZ,2> Z(z);
Spinor<float2,float2,float2,M,writeW,3> W(w);
Functor<float2, float2> f(make_float2(a.x, a.y), make_float2(b.x, b.y), make_float2(c.x, c.y));
BlasCuda<float2,M,
Spinor<float2,float2,float2,M,writeX,0>, Spinor<float2,float2,float2,M,writeY,1>,
Spinor<float2,float2,float2,M,writeZ,2>, Spinor<float2,float2,float2,M,writeW,3>,
Functor<float2, float2> > blas(X, Y, Z, W, f, x.Length()/(2*M), bytes, norm_bytes);
blas.apply(*blasStream);
#else
errorQuda("blas has not been built for Nspin=%d fields", x.Nspin());
errorQuda("blas has not been built for Nspin=%d fields", x.Nspin());
#endif
} else { errorQuda("nSpin=%d is not supported\n", x.Nspin()); }
} else { errorQuda("nSpin=%d is not supported\n", x.Nspin()); }
} else {
if (x.Ncolor() != 3) { errorQuda("nColor = %d is not supported", x.Ncolor()); }
if (x.Nspin() == 4){ //wilson
Expand Down

0 comments on commit a242e6d

Please sign in to comment.