Skip to content
This repository has been archived by the owner on Aug 3, 2021. It is now read-only.

Commit

Permalink
Merge branch 'cpu_half' into spec_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
borisfom committed Nov 1, 2016
2 parents 38882e6 + 10ef056 commit b572cf9
Show file tree
Hide file tree
Showing 51 changed files with 315 additions and 313 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ IF (NOT WIN32)
SET(CMAKE_C_FLAGS "-std=c99 -Werror=implicit-function-declaration ${CMAKE_C_FLAGS}")
ENDIF (NOT WIN32)
IF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
SET(CMAKE_C_FLAGS "-DCUDA_HAS_FP16=1 ${CMAKE_C_FLAGS}")
SET(CMAKE_C_FLAGS "-DCUDA_HAS_FP16=1 -DTH_GENERIC_USE_HALF=1 ${CMAKE_C_FLAGS}")
ENDIF()

INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
Expand Down
1 change: 1 addition & 0 deletions FFI.lua
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ typedef struct THCStorage
THAllocator *allocator;
void *allocatorContext;
struct THCStorage *view;
int device;
} THCStorage;
typedef struct THCTensor
Expand Down
9 changes: 2 additions & 7 deletions Tensor.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ end

local TensorTypes = {
float = 'torch.FloatTensor',
half = 'torch.HalfTensor',
double = 'torch.DoubleTensor',
byte = 'torch.ByteTensor',
char = 'torch.CharTensor',
Expand Down Expand Up @@ -85,11 +86,5 @@ for ValueType, CudaTensorType in pairs(CudaTensorTypes) do
end

if cutorch.hasHalf then
do
local function Tensor__totable(self)
local host_tensor = self:float()
return self:float():totable()
end
rawset(torch.getmetatable('torch.CudaHalfTensor'), 'totable', Tensor__totable)
end
CudaTensorTypes.half = 'torch.CudaHalfTensor'
end
26 changes: 2 additions & 24 deletions generic/CStorage.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
#define THC_GENERIC_FILE "generic/CStorage.c"
#else

#include "THCHalf.h"

/* everything is as the generic Storage.c, except few things (see below) */

#ifndef THC_REAL_IS_HALF
#define THFile_readRealRaw(file, data, size) \
{ \
real *fdata = (real*)THAlloc(sizeof(real)*size); \
Expand All @@ -22,23 +19,6 @@
TH_CONCAT_3(THFile_write,Real,Raw)(file, fdata, size); \
THFree(fdata); \
}
#else
#define THFile_readRealRaw(file, data, size) \
{ \
real *fdata = (real*)THAlloc(sizeof(real)*size); \
THFile_readCharRaw(file, (char *)fdata, sizeof(real) * size); \
THCudaCheck(cudaMemcpy(data, fdata, size * sizeof(real), cudaMemcpyHostToDevice)); \
THFree(fdata); \
}

#define THFile_writeRealRaw(file, data, size) \
{ \
real *fdata = (real*)THAlloc(sizeof(real)*size); \
THCudaCheck(cudaMemcpy(fdata, data, size * sizeof(real), cudaMemcpyDeviceToHost)); \
THFile_writeCharRaw(file, (char *)fdata, size * sizeof(real)); \
THFree(fdata); \
}
#endif

#define TH_GENERIC_FILE "generic/Storage.c"
#include "generic/Storage.c"
Expand Down Expand Up @@ -87,14 +67,15 @@ static int cutorch_Storage_(copy)(lua_State *L)
THCStorage_(copyFloat)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) )
THCStorage_(copyDouble)(state, storage, src);
else if( (src = luaT_toudata(L, 2, "torch.HalfStorage")) )
THCStorage_(copyHalf)(state, storage, src);
else
luaL_typerror(L, 2, "torch.*Storage");

lua_settop(L, 1);
return 1;
}

#ifndef THC_REAL_IS_HALF
static int TH_CONCAT_3(cutorch_,Real,Storage_copy)(lua_State *L)
{
THStorage *storage = luaT_checkudata(L, 1, TH_CONCAT_STRING_3(torch.,Real,Storage));
Expand Down Expand Up @@ -139,7 +120,6 @@ static int TH_CONCAT_3(cutorch_,Real,Storage_copy)(lua_State *L)
lua_settop(L, 1);
return 1;
}
#endif

static int cutorch_Storage_(getDevice)(lua_State *L) {
THCStorage *storage = luaT_checkudata(L, 1, torch_Storage);
Expand All @@ -154,12 +134,10 @@ void cutorch_Storage_(init)(lua_State* L)

// torch_Storage macro is defined in Storage.c produce the CudaTensor types
// so I have to construct the normal torch types by hand
#ifndef THC_REAL_IS_HALF
luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Storage));
lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Storage_copy));
lua_setfield(L, -2, "copy");
lua_pop(L, 1);
#endif

luaT_pushmetatable(L, torch_Storage);
lua_pushcfunction(L, cutorch_Storage_(copy));
Expand Down
13 changes: 0 additions & 13 deletions generic/CTensor.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
#define THC_GENERIC_FILE "generic/CTensor.c"
#else

#include "THCHalf.h"

/* everything is as the generic Storage.c, except few things (see below) */

#define TH_GENERIC_FILE "generic/Tensor.c"
Expand Down Expand Up @@ -56,7 +54,6 @@ static int cutorch_Tensor_(copy)(lua_State *L)
return 1;
}

#ifndef THC_REAL_IS_HALF
static int cutorch_Tensor_(copyAsyncCPU)(lua_State *L)
{
#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor)
Expand All @@ -74,10 +71,7 @@ static int cutorch_Tensor_(copyAsyncCPU)(lua_State *L)
return 1;
#undef STRINGIFY_TENSOR
}
#endif


#ifndef THC_REAL_IS_HALF
static int TH_CONCAT_3(cutorch_,Real,Tensor_copy)(lua_State *L)
{
THTensor *tensor = luaT_checkudata(L, 1, TH_CONCAT_STRING_3(torch.,Real,Tensor));
Expand Down Expand Up @@ -122,9 +116,7 @@ static int TH_CONCAT_3(cutorch_,Real,Tensor_copy)(lua_State *L)
lua_settop(L, 1);
return 1;
}
#endif

#ifndef THC_REAL_IS_HALF
static int TH_CONCAT_3(cutorch_,Real,Tensor_copyAsyncCuda)(lua_State *L)
{
#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor)
Expand All @@ -139,9 +131,6 @@ static int TH_CONCAT_3(cutorch_,Real,Tensor_copyAsyncCuda)(lua_State *L)
return 1;
#undef STRINGIFY_TENSOR
}
#endif



#ifdef THC_REAL_IS_FLOAT
static void THFloatTensor_computesz(THFloatTensor *self, long **sz_, long **st_)
Expand Down Expand Up @@ -248,7 +237,6 @@ void cutorch_Tensor_(init)(lua_State* L)
lua_pop(L, 1);
#endif

#ifndef THC_REAL_IS_HALF
luaT_pushmetatable(L, TH_CONCAT_STRING_3(torch.,Real,Tensor));
lua_pushcfunction(L, TH_CONCAT_3(cutorch_,Real,Tensor_copy));
lua_setfield(L, -2, "copy");
Expand All @@ -264,7 +252,6 @@ void cutorch_Tensor_(init)(lua_State* L)
lua_pushcfunction(L, cutorch_Tensor_(copyAsyncCPU));
lua_setfield(L, -2, "copyAsync");
lua_pop(L, 1);
#endif

luaT_pushmetatable(L, torch_Tensor);
lua_pushcfunction(L, cutorch_Tensor_(copy));
Expand Down
5 changes: 2 additions & 3 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "THCGeneral.h"
#include "THCCachingAllocator.h"
#include "THCTensorRandom.h"
#include "THCHalf.h" // for CUDA_HALF_TENSOR

extern void cutorch_CudaByteStorage_init(lua_State* L);
extern void cutorch_CudaCharStorage_init(lua_State* L);
Expand Down Expand Up @@ -911,11 +910,11 @@ static int cutorch_hasHalfInstructions(lua_State *L) {

static int cutorch_hasFastHalfInstructions(lua_State *L) {
THCState *state = cutorch_getstate(L);
#ifdef CUDA_HALF_TENSOR
#ifdef CUDA_HALF_TENSOR
lua_pushboolean(L, THC_fastHalfInstructions(state));
#else
lua_pushboolean(L, 0);
#endif
#endif
return 1;
}

Expand Down
6 changes: 3 additions & 3 deletions lib/THC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ ELSE()
SET(THC_INSTALL_CMAKE_SUBDIR ${Torch_INSTALL_CMAKE_SUBDIR})
ENDIF()

INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}")
CONFIGURE_FILE(THCGeneral.h.in "${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h")

Expand All @@ -123,10 +124,8 @@ ELSE()
ENDIF()

SET(src
THCAllocator.c
THCCachingAllocator.cpp
THCGeneral.c
THCStorage.c
THCStorageCopy.c
THCStream.c
THCTensor.c
Expand Down Expand Up @@ -176,7 +175,7 @@ IF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
LIST(APPEND src-cuda THCHalf.cu)
LIST(APPEND src-cuda THCHalf.c)
LIST(APPEND CUDA_NVCC_FLAGS "-DCUDA_HAS_FP16=1")
SET(CMAKE_C_FLAGS "-DCUDA_HAS_FP16=1 ${CMAKE_C_FLAGS}")
SET(CMAKE_C_FLAGS "-DCUDA_HAS_FP16=1 -DTH_GENERIC_USE_HALF=1 ${CMAKE_C_FLAGS}")
ELSE(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
MESSAGE(STATUS "Could not find CUDA with FP16 support, compiling without torch.CudaHalfTensor")
ENDIF(CUDA_HAS_FP16 OR NOT ${CUDA_VERSION} LESS 7.5)
Expand All @@ -203,6 +202,7 @@ INSTALL(FILES
THCStorage.h
THCStorageCopy.h
THCStream.h
THCThreadLocal.h
THCTensor.h
THCTensorCopy.h
THCTensorRandom.h
Expand Down
22 changes: 5 additions & 17 deletions lib/THC/THCAllocator.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "THCAllocator.h"

static void *THCudaHostAllocator_alloc(void* ctx, ptrdiff_t size) {
static void *THCudaHostAllocator_malloc(void* ctx, ptrdiff_t size) {
void* ptr;

if (size < 0) THError("Invalid memory size: %ld", size);
Expand All @@ -18,20 +18,8 @@ static void THCudaHostAllocator_free(void* ctx, void* ptr) {
THCudaCheck(cudaFreeHost(ptr));
}

static void *THCudaHostAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {
if (size < 0) THError("Invalid memory size: %ld", size);

THCudaHostAllocator_free(ctx, ptr);

if (size == 0) return NULL;

THCudaCheck(cudaMallocHost(&ptr, size));

return ptr;
}

void THCAllocator_init(THAllocator *cudaHostAllocator) {
cudaHostAllocator->malloc = &THCudaHostAllocator_alloc;
cudaHostAllocator->realloc = &THCudaHostAllocator_realloc;
cudaHostAllocator->free = &THCudaHostAllocator_free;
void THCAllocator_init(THCState *state) {
state->cudaHostAllocator->malloc = &THCudaHostAllocator_malloc;
state->cudaHostAllocator->realloc = NULL;
state->cudaHostAllocator->free = &THCudaHostAllocator_free;
}
2 changes: 1 addition & 1 deletion lib/THC/THCAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

#include "THCGeneral.h"

THC_API void THCAllocator_init(THAllocator *state);
THC_API void THCAllocator_init(THCState *state);

#endif
2 changes: 1 addition & 1 deletion lib/THC/THCAtomics.cuh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef THC_ATOMICS_INC
#define THC_ATOMICS_INC

#include "THCHalf.h"
#include "THCGeneral.h"

template <typename T, size_t n>
struct AtomicAddIntegerImpl;
Expand Down
2 changes: 0 additions & 2 deletions lib/THC/THCBlas.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#include "THCBlas.h"
#include "THCGeneral.h"
#include "THCHalf.h"

float THCudaBlas_Sdot(THCState *state, long n, float *x, long incx, float *y, long incy)
{
Expand Down
1 change: 0 additions & 1 deletion lib/THC/THCBlas.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#define THC_BLAS_INC

#include "THCGeneral.h"
#include "THCHalf.h"

/* Level 1 */
THC_API float THCudaBlas_Sdot(THCState *state, long n, float *x, long incx, float *y, long incy);
Expand Down
3 changes: 2 additions & 1 deletion lib/THC/THCCachingAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,12 +300,13 @@ static cudaError_t THCCachingAllocator_emptyCache(void* ctx)
static THCCachingAllocator caching_allocator;
static THCDeviceAllocator device_allocator = {
&THCCachingAllocator_malloc,
NULL,
&THCCachingAllocator_free,
&THCCachingAllocator_emptyCache,
&caching_allocator
};

THC_API THCDeviceAllocator* THCCachingAllocator_get()
THC_API THCDeviceAllocator* THCCachingAllocator_get(void)
{
return &device_allocator;
}
2 changes: 1 addition & 1 deletion lib/THC/THCCachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

#include "THCGeneral.h"

THC_API THCDeviceAllocator* THCCachingAllocator_get();
THC_API THCDeviceAllocator* THCCachingAllocator_get(void);

#endif
Loading

0 comments on commit b572cf9

Please sign in to comment.