Skip to content
This repository has been archived by the owner on Aug 3, 2021. It is now read-only.

Commit

Permalink
checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
borisfom committed Oct 14, 2016
1 parent 8a75e34 commit ac964ee
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 728 deletions.
103 changes: 0 additions & 103 deletions lib/THC/THCHalf.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,106 +29,3 @@ void THCHalf2Float(THCState *state, float *out, half *in, ptrdiff_t len) {
#endif
in, in + len, out, __half2floatOp());
}

float THC_half2float(half a)
{
unsigned int bits = a.x & 0x7fff;
unsigned int sign = a.x & 0x8000;
unsigned int exp = a.x & 0x7c00;

bits <<= 13;
sign <<= 16;

bits += 0x38000000U;

// flush denormals to 0
bits = (exp == 0 ? 0 : bits) | sign;

union {
float f;
unsigned int v;
} conv;
conv.v = bits;

return conv.f;
}

/*
Copyright (c) 2015, Norbert Juffa
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

half THC_float2half(float a)
{
uint32_t ia;
uint16_t ir;
memcpy(&ia, &a, sizeof(float));

ir = (ia >> 16) & 0x8000;
if ((ia & 0x7f800000) == 0x7f800000) {
if ((ia & 0x7fffffff) == 0x7f800000) {
ir |= 0x7c00; /* infinity */
} else {
ir = 0x7fff; /* canonical NaN */
}
} else if ((ia & 0x7f800000) >= 0x33000000) {
int shift = (int)((ia >> 23) & 0xff) - 127;
if (shift > 15) {
ir |= 0x7c00; /* infinity */
} else {
ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */
if (shift < -14) { /* denormal */
ir |= ia >> (-1 - shift);
ia = ia << (32 - (-1 - shift));
} else { /* normal */
ir |= ia >> (24 - 11);
ia = ia << (32 - (24 - 11));
ir = ir + ((14 + shift) << 10);
}
/* IEEE-754 round to nearest of even */
if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) {
ir++;
}
}
}

half ret;
memcpy(&ret, &ir, sizeof(half));
return ret;
}

THC_EXTERNC int THC_nativeHalfInstructions(THCState *state) {
#if CUDA_HALF_INSTRUCTIONS
cudaDeviceProp* prop =
THCState_getCurrentDeviceProperties(state);

// CC 5.3+
return (prop->major > 5 ||
(prop->major == 5 && prop->minor == 3));
#else
return false;
#endif
}
87 changes: 51 additions & 36 deletions lib/THC/THCHalf.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#ifndef THC_HALF_CONVERSION_INC
#define THC_HALF_CONVERSION_INC
# define THC_HALF_CONVERSION_INC

#include "THCGeneral.h"
# include "THCGeneral.h"

/* We compile with CudaHalfTensor support if we have this: */
#if CUDA_VERSION >= 7050 || CUDA_HAS_FP16
#define CUDA_HALF_TENSOR 1
#endif
# if CUDA_VERSION >= 7050 || CUDA_HAS_FP16
# define CUDA_HALF_TENSOR 1
# endif

/* This define forces use of 32-bit float math on 16-bit float type 'half' (a.k.a. "pseudo-fp16 mode")
even if native harware support is available.
Expand All @@ -16,30 +16,54 @@
(especially if the bulk of your code is in CUDNN and not Cutorch).
*/

#define FORCE_PSEUDO_FP16 1
# define FORCE_PSEUDO_FP16 1

#ifndef FORCE_PSEUDO_FP16
# ifndef FORCE_PSEUDO_FP16
/* Kernel side: Native fp16 ALU instructions are available if we have this: */
# if defined(CUDA_HALF_TENSOR) && (CUDA_VERSION >= 8000) && (__CUDA_ARCH__ >= 530)
# define CUDA_HALF_INSTRUCTIONS 1
# if defined(CUDA_HALF_TENSOR) && (CUDA_VERSION >= 8000) && (__CUDA_ARCH__ >= 530)
# define CUDA_HALF_INSTRUCTIONS 1
# endif
# endif
#endif

#ifdef CUDA_HALF_TENSOR
#include <cuda_fp16.h>
#include <stdint.h>
# ifdef CUDA_HALF_TENSOR

# include <cuda_fp16.h>
# include <stdint.h>

/* CPU emulation */
THC_EXTERNC half THC_float2half(float a);
/* CPU conversion methods, scalar */
THC_EXTERNC half THC_float2half(float a);
THC_EXTERNC float THC_half2float(half a);

#if defined (__CUDA_ARCH__)
# define THC_FLOAT_TO_HALF(x) __float2half((float)x)
# define THC_HALF_TO_FLOAT(x) __half2float((float)x)
#else
# define THC_FLOAT_TO_HALF(x) THC_float2half((float)x)
# define THC_HALF_TO_FLOAT(x) THC_half2float((float)x)
#endif
//
// Vector conversion routines, using Thrust
//
THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, long len);
THC_EXTERNC void THCHalf2Float(THCState *state, float *out, half *in, long len);

# if defined (__CUDA_ARCH__)
/* use instrintic functons defined for device only in cuda_fp16.h */
# define THC_FLOAT_TO_HALF(x) __float2half((float)x)
# define THC_HALF_TO_FLOAT(x) __half2float((float)x)
# else
/* use host conversion functions */
# define THC_FLOAT_TO_HALF(x) THC_float2half((float)x)
# define THC_HALF_TO_FLOAT(x) THC_half2float((float)x)
# endif

/* Basic wrapper for 'half */
struct Half: public half {
const half& val() const { return *this; }
half& val() { return *this; }
const half& operator half() const { return val(); }
Half(const half& v): half(v) {}
Half(const Half& v): half(v.val) {}
};


/* "pseudo-fp16" type: 16-bit storage, float math */
struct PseudoHalf: public Half {
PseudoHalf(const Half& v): Half(v.val) {}
};

/* Check for native fp16 support on the current device (CC 5.3+) */
THC_EXTERNC int THC_nativeHalfInstructions(THCState *state);
Expand All @@ -52,28 +76,19 @@ __host__ __device__ __forceinline__ bool operator!=(const half& a, const half& b
return a.x != b.x;
}

#endif /* CUDA_HALF_TENSOR */

#ifdef __CUDA_ARCH__
//
// host (CPU) routines
//
THC_EXTERNC void THCFloat2Half(THCState *state, half *out, float *in, long len);
THC_EXTERNC void THCHalf2Float(THCState *state, float *out, half *in, long len);

/// `half` has some type conversion issues associated with it, since it
/// is a struct without a constructor/implicit conversion constructor.
/// We use this to convert scalar values to the given type that the
/// tensor expects.

template <typename In, typename Out>
struct ScalarConvert {
static inline __host__ __device__ Out to(const In& v) { return Out(v); }
static inline __host__ __device__ const Out& to(const In& v) { return Out(v); }
};

template <typename Out>
struct ScalarConvert<half, Out> {
static __host__ __device__ __forceinline__ Out to(const half& v) {
static __host__ __device__ __forceinline__ const Out& to(const half& v) {
return (Out) THC_HALF_TO_FLOAT(v);
}
};
Expand All @@ -87,10 +102,10 @@ struct ScalarConvert<In, half> {

template <>
struct ScalarConvert<half, half> {
static __host__ __device__ __forceinline__ half to(const half& v) {
static __host__ __device__ __forceinline__ const half& to(const half& v) {
return v;
}
};
#endif /* CUDA */

#endif
# endif /* CUDA_HALF_TENSOR */
#endif /* THC_HALF_CONVERSION_INC */
Loading

0 comments on commit ac964ee

Please sign in to comment.