-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #253 from lattice/feature/gauge-fix
Gauge fixing, pure gauge and optimized gauge I/O routines
- Loading branch information
Showing
67 changed files
with
9,253 additions
and
880 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#pragma once | ||
|
||
/** | ||
@file atomic.cuh | ||
@section Description | ||
Provides definitions of atomic functions that are not native to | ||
CUDA. These are intentionally not declared in the namespace to | ||
avoid confusion when resolving the native atomicAdd functions. | ||
*/ | ||
|
||
/** | ||
Implementation of double-precision atomic addition using compare | ||
and swap. | ||
@param addr Address that stores the atomic variable to be updated | ||
@param val Value to be added to the atomic | ||
*/ | ||
static inline __device__ double atomicAdd(double *addr, double val){ | ||
double old = *addr, assumed; | ||
do { | ||
assumed = old; | ||
old = __longlong_as_double( atomicCAS((unsigned long long int*)addr, | ||
__double_as_longlong(assumed), | ||
__double_as_longlong(val + assumed))); | ||
} while ( __double_as_longlong(assumed) != __double_as_longlong(old) ); | ||
|
||
return old; | ||
} | ||
|
||
/** | ||
Implementation of double2 atomic addition using two | ||
double-precision additions. | ||
@param addr Address that stores the atomic variable to be updated | ||
@param val Value to be added to the atomic | ||
*/ | ||
static inline __device__ double2 atomicAdd(double2 *addr, double2 val){ | ||
double2 old = *addr; | ||
old.x = atomicAdd((double*)addr, val.x); | ||
old.y = atomicAdd((double*)addr + 1, val.y); | ||
return old; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#pragma once | ||
#include <cub/cub.cuh> | ||
|
||
/** | ||
@file cub_helper.cuh | ||
@section Description | ||
Provides helper functors for custom datatypes for cub algorithms. | ||
*/ | ||
|
||
namespace quda { | ||
|
||
/** | ||
Helper functor for generic addition reduction. | ||
*/ | ||
template <typename T> | ||
struct Summ { | ||
__host__ __device__ __forceinline__ T operator() (const T &a, const T &b){ | ||
return a + b; | ||
} | ||
}; | ||
|
||
/** | ||
Helper functor for double2 addition reduction. | ||
*/ | ||
template <> | ||
struct Summ<double2>{ | ||
__host__ __device__ __forceinline__ double2 operator() (const double2 &a, const double2 &b){ | ||
return make_double2(a.x + b.x, a.y + b.y); | ||
} | ||
}; | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.