From 93836c0e3c9111eb57d539ad0ae9e160117742a8 Mon Sep 17 00:00:00 2001 From: mclark Date: Thu, 16 Jul 2015 22:25:01 +0900 Subject: [PATCH] Fixed memory bandwidth computations for 4-d preconditioned 5-d dslash operators (closes #315). --- lib/dslash_domain_wall_4d.cu | 22 ++++++++++++++++++++++ lib/dslash_mobius.cu | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/lib/dslash_domain_wall_4d.cu b/lib/dslash_domain_wall_4d.cu index 15730330be..5051392c36 100644 --- a/lib/dslash_domain_wall_4d.cu +++ b/lib/dslash_domain_wall_4d.cu @@ -215,6 +215,28 @@ namespace quda { } return flops; } + + long long bytes() const { + bool isHalf = in->Precision() == sizeof(short) ? true : false; + int spinor_bytes = 2 * in->Ncolor() * in->Nspin() * in->Precision() + (isHalf ? sizeof(float) : 0); + long long Ls = in->X(4); + long long bytes; + + switch(DS_type){ + case 0: + bytes = DslashCuda::bytes(); + break; + case 1: + bytes = (x ? 5ll : 4ll ) * spinor_bytes * in->VolumeCB(); + break; + case 2: + bytes = (x ? Ls + 2 : Ls + 1) * spinor_bytes * in->VolumeCB(); + break; + default: + errorQuda("invalid Dslash type"); + } + return bytes; + } }; #endif // GPU_DOMAIN_WALL_DIRAC diff --git a/lib/dslash_mobius.cu b/lib/dslash_mobius.cu index 144f79b7b8..ed5dadd68f 100644 --- a/lib/dslash_mobius.cu +++ b/lib/dslash_mobius.cu @@ -228,6 +228,29 @@ namespace quda { } return flops; } + + long long bytes() const { + bool isHalf = in->Precision() == sizeof(short) ? true : false; + int spinor_bytes = 2 * in->Ncolor() * in->Nspin() * in->Precision() + (isHalf ? sizeof(float) : 0); + long long Ls = in->X(4); + long long bytes; + + switch(DS_type){ + case 0: + bytes = DslashCuda::bytes(); + break; + case 1: + case 2: + bytes = (x ? 5ll : 4ll) * spinor_bytes * in->VolumeCB(); + break; + case 3: + bytes = (x ? Ls + 2 : Ls + 1) * spinor_bytes * in->VolumeCB(); + break; + default: + errorQuda("invalid Dslash type"); + } + return bytes; + } }; #endif // GPU_DOMAIN_WALL_DIRAC