Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Latest commit

 

History

History
459 lines (348 loc) · 12.5 KB

addCustomDevice.md

File metadata and controls

459 lines (348 loc) · 12.5 KB

如何支持一个新的设备

概览

添加一个新的设备需要以下3个步骤:

假设新设备的名称为TNEW, 以下将以这个设备名称进行演示。

CMakeList中添加设备的支持

  • 修改根目录CMakeList.txt
#select the plantform to build
anakin_option(USE_GPU_PLACE "Select the build mode for GPU place." NO)
anakin_option(USE_X86_PLACE "Select the build mode for X86 place." NO)
anakin_option(USE_ARM_PLACE "Select the build mode for ARM place." NO)
anakin_option(USE_TNEW_PLACE "Select the build mode for ARM place." YES)
  • 修改saber/CMakeList.txt

根据新增设备的目录完善saber目录下的CMakeList.txt

if(USE_TNEW_PLACE)
    anakin_fetch_files_with_suffix(${ANAKIN_SABER}/core/impl/tnew "cpp" ANAKIN_SABER_BASE_SRC)
    anakin_fetch_files_with_suffix(${ANAKIN_SABER}/funcs/impl/tnew "cpp" ANAKIN_SABER_BASE_SRC)
endif()
  • 修改test/CMakeList.txt

新增设备的单测文件放在test/saber/tnew目录下,修改test目录下的CMakeList.txt

if(USE_TNEW_PLACE)
    anakin_fetch_files_with_suffix(${ANAKIN_UNIT_TEST}/saber/tnew "cpp" ANAKIN_TEST_CASE_SRC)
endif()
  • 修改cmake/anakin_config.h.in
// plantform to use
#cmakedefine USE_GPU_PLACE

#cmakedefine USE_X86_PLACE

#cmakedefine USE_ARM_PLACE

#cmakedefine USE_TNEW_PLACE
  • 其他依赖和编译选项
    修改cmake目录下的compiler_options.cmakefind_modules.cmake

saber中添加设备的实现

saberAnakin的基础计算库,对外提供设备无关的统一的API,设备相关的实现都会封装到TargetWrapper中。

saber/saber_types.h中添加设备

enum TargetTypeEnum {
    eINVALID = -1,
    eNV = 1,
    eAMD = 2,
    eARM = 3,
    eX86 = 4,
    eNVHX86 = 5,
    eTNEW = 6
};

typedef TargetType<eNV> NV;
typedef TargetType<eARM> ARM;
typedef TargetType<eAMD> AMD;
typedef TargetType<eX86> X86;
typedef TargetType<eTNEW> TNEW;

saber/core中添加设备的实现

  1. target_traits.h中添加新设备
  • 增加设备类型
struct __cuda_device{};
struct __arm_device{};
struct __amd_device{};
struct __x86_device{};
struct __tnew_device{};
  • TargetTypeTraits模板具体化
template <>
struct TargetTypeTraits<TNEW> {
    typedef __xxx_target target_category;//根据实际设备是host端还是device端进行选择
    typedef __tnew_device target_type;
};
  1. data_traits.h中特化DataTrait模板类

如果设备需要特殊的数据类型,则特化出设备的DataTrait类的实现,例如opencl数据类型的实现如下:

#ifdef USE_OPENCL
struct ClMem{
    ClMem(){
        dmem = nullptr;
        offset = 0;
    }

    ClMem(cl_mem* mem_in, int offset_in = 0) {
        dmem = mem_in;
        offset = offset_in;
    }

    ClMem(ClMem& right) {
        dmem = right.dmem;
        offset = right.offset;
    }

    ClMem& operator=(ClMem& right) {
        this->dmem = right.dmem;
        this->offset = right.offset;
        return *this;
    }

    ClMem& operator+(int offset_in) {
        this->offset += offset_in;
        return *this;
    }

    int offset{0};
    cl_mem* dmem;
};

template <>
struct DataTrait<AMD, AK_FLOAT> {
    typedef ClMem Dtype;
    typedef float dtype;
};

template <>
struct DataTrait<AMD, AK_DOUBLE> {
    typedef ClMem Dtype;
    typedef double dtype;
};

template <>
struct DataTrait<AMD, AK_INT8> {
    typedef ClMem Dtype;
    typedef char dtype;
};
#endif //use_opencl
  1. target_wrapper.h中特化TargetWrapper模板类

特化TargetWrapper模板类,在target_wrapper.h中声明函数,具体如下:

template <>
struct TargetWrapper<TNEW, __xxx_target> { //根据TNEW的具体类型修改__xxx_target,__host_target或者__device_target

    typedef xxx_event event_t;          //根据设备实现xxx_event
    typedef xxx_stream stream_t;        //根据设备实现xxx_stream

    static void get_device_count(int& count);

    static void set_device(int id);

    //We should add strategy to avoid malloc directly
    static void mem_alloc(void** ptr, size_t n);

    static void mem_free(void* ptr);

    static void mem_set(void* ptr, int value, size_t n);

    static void create_event(event_t& event, bool flag = false);

    static void create_stream(stream_t& stream);

    static void create_stream_with_flag(stream_t& stream, unsigned int flag);

    static void create_stream_with_priority(stream_t& stream, unsigned int flag, int priority);

    static void destroy_stream(stream_t& stream);

    static void destroy_event(event_t& event);

    static void record_event(event_t& event, stream_t stream);

    static void query_event(event_t& event);

    static void sync_event(event_t& event);

    static void sync_stream(event_t& event, stream_t& stream);

    static void sync_memcpy(void* dst, int dst_id, const void* src, int src_id, \
                            size_t count, __DtoD);

    static void async_memcpy(void* dst, int dst_id, const void* src, int src_id, \
                             size_t count, stream_t& stream, __DtoD);

    static void sync_memcpy(void* dst, int dst_id, const void* src, int src_id, \
                            size_t count, __HtoD);

    static void async_memcpy(void* dst, int dst_id, const void* src, int src_id, \
                             size_t count, stream_t& stream, __HtoD);

    static void sync_memcpy(void* dst, int dst_id, const void* src, int src_id, \
                            size_t count, __DtoH);

    static void async_memcpy(void* dst, int dst_id, const void* src, int src_id, \
                             size_t count, stream_t& stream, __DtoH);

    static void sync_memcpy_p2p(void* dst, int dst_dev, const void* src, \
                                int src_dev, size_t count);

    static void async_memcpy_p2p(void* dst, int dst_dev, const void* src, \
                                 int src_dev, size_t count, stream_t& stream);

    static int get_device_id();
};
  1. impl/目录下添加设备目录和实现

saber/core/impl目录下添加设备目录tnew

  • 实现TargetWrapper<TNEW, __xxx_target>结构体中各函数的定义。
    如果TargetWrapper<TNEW, __xxx_target>的实现与默认的模板类一致,则不用特化出该类。
typedef TargetWrapper<TNEW, __xxx_target> TNEW_API;
void TNEW_API::get_device_count(int &count) {
    // add implementation
}

void TNEW_API::set_device(int id){
    // add implementation
}
        
void TNEW_API::mem_alloc(void** ptr, size_t n){
    // add implementation
}
        
void TNEW_API::mem_free(void* ptr){
    if(ptr != nullptr){
        // add implementation
    }
}
...
  • 特化实现device.h中的Device<TNEW>
template <>
void Device<TNEW>::create_stream() {
    // add implementation
}

template <>
void Device<TNEW>::get_info() {

    // add implementation
}

saber/funcs中实现设备相关的op

参考如何增加新的Operator

framework中添加设备的具体化或实例化

framework/core

  • net.cpp中添加实例化
#ifdef USE_TNEW_PLACE
template class Net<TNEW, AK_FLOAT, Precision::FP32, OpRunType::ASYNC>;
template class Net<TNEW, AK_FLOAT, Precision::FP32, OpRunType::SYNC>;
#endif
  • operator_func.cpp中添加实例化
#ifdef USE_TNEW_PLACE
template class OperatorFunc<TNEW, AK_FLOAT, Precision::FP32>;
#endif
  • worker.cpp中添加实例化
#ifdef USE_TNEW_PLACE
template class Worker<TNEW, AK_FLOAT, Precision::FP32, OpRunType::ASYNC>;
template class Worker<TNEW, AK_FLOAT, Precision::FP32, OpRunType::SYNC>;
#endif
  • operator_attr.cpp中添加实例化
template
OpAttrWarpper& OpAttrWarpper::__alias__<TNEW, AK_FLOAT, Precision::FP32>(const std::string& op_name);
template
OpAttrWarpper& OpAttrWarpper::__alias__<TNEW, AK_FLOAT, Precision::FP16>(const std::string& op_name);
template
OpAttrWarpper& OpAttrWarpper::__alias__<TNEW, AK_FLOAT, Precision::INT8>(const std::string& op_name);
  • parameter.h中添加设备的实现
#ifdef USE_TNEW_PLACE
template<typename Dtype>
class PBlock<Dtype, TNEW> {
public:
	typedef Tensor4d<TNEW, DataTypeRecover<Dtype>::type> type;

	PBlock() {
		_inner_tensor = std::make_shared<type>(); 
	}
	...
}
#endif //TNEW
  • type_traits_extend.h中添加设备的实现
template<>
struct target_host<saber::TNEW> {
    typedef saber::X86 type; //根据TNEW选择正确的host type
};

framework/graph

  • graph.cpp中添加实例化
  #ifdef USE_TNEW_PLACE
  template class Graph<TNEW, AK_FLOAT, Precision::FP32>;
  template class Graph<TNEW, AK_FLOAT, Precision::FP16>;
  template class Graph<TNEW, AK_FLOAT, Precision::INT8>;
  #endif

framework/model_parser

  • parser.cpp中添加实例化
  #ifdef USE_TNEW_PLACE
  template
  Status load<TNEW, AK_FLOAT, Precision::FP32>(graph::Graph<TNEW, AK_FLOAT, Precision::FP32>* graph,
          const char* model_path);
  template
  Status load<TNEW, AK_FLOAT, Precision::FP16>(graph::Graph<TNEW, AK_FLOAT, Precision::FP16>* graph,
          const char* model_path);
  template
  Status load<TNEW, AK_FLOAT, Precision::INT8>(graph::Graph<TNEW, AK_FLOAT, Precision::INT8>* graph,
          const char* model_path);
  
  template
  Status save<TNEW, AK_FLOAT, Precision::FP32>(graph::Graph<TNEW, AK_FLOAT, Precision::FP32>* graph,
          std::string& model_path);
  template
  Status save<TNEW, AK_FLOAT, Precision::FP16>(graph::Graph<TNEW, AK_FLOAT, Precision::FP16>* graph,
          std::string& model_path);
  template
  Status save<TNEW, AK_FLOAT, Precision::INT8>(graph::Graph<TNEW, AK_FLOAT, Precision::INT8>* graph,
          std::string& model_path);
  
  template
  Status load<TNEW, AK_FLOAT, Precision::FP32>(graph::Graph<TNEW, AK_FLOAT, Precision::FP32>* graph,
          std::string& model_path);
  template
  Status load<TNEW, AK_FLOAT, Precision::FP16>(graph::Graph<TNEW, AK_FLOAT, Precision::FP16>* graph,
          std::string& model_path);
  template
  Status load<TNEW, AK_FLOAT, Precision::INT8>(graph::Graph<TNEW, AK_FLOAT, Precision::INT8>* graph,
          std::string& model_path);
  
  template
  Status save<TNEW, AK_FLOAT, Precision::FP32>(graph::Graph<TNEW, AK_FLOAT, Precision::FP32>* graph,
          const char* model_path);
  template
  Status save<TNEW, AK_FLOAT, Precision::FP16>(graph::Graph<TNEW, AK_FLOAT, Precision::FP16>* graph,
          const char* model_path);
  template
  Status save<TNEW, AK_FLOAT, Precision::INT8>(graph::Graph<TNEW, AK_FLOAT, Precision::INT8>* graph,
          const char* model_path);
  #endif
  • model_io.cpp中添加实例化
#ifdef USE_TNEW_PLACE
template class NodeIO<TNEW, AK_FLOAT, Precision::FP32>;
template class NodeIO<TNEW, AK_FLOAT, Precision::FP16>;
template class NodeIO<TNEW, AK_FLOAT, Precision::INT8>;
#endif

framework/operators

framework/operators目录下所有op添加实例化或具体化 以activation.cpp为例,实例化如下:

#ifdef USE_TNEW_PLACE
INSTANCE_ACTIVATION(TNEW, AK_FLOAT, Precision::FP32);
INSTANCE_ACTIVATION(TNEW, AK_FLOAT, Precision::FP16);
INSTANCE_ACTIVATION(TNEW, AK_FLOAT, Precision::INT8);
template class ActivationHelper<TNEW, AK_FLOAT, Precision::FP32>;
ANAKIN_REGISTER_OP_HELPER(Activation, ActivationHelper, TNEW, AK_FLOAT, Precision::FP32);
#endif

如果TNEW设备函数的实现与现有模板实现不一致,可以特化实现如下(以init()为例):

#ifdef USE_TNEW_PLACE
INSTANCE_ACTIVATION(TNEW, AK_FLOAT, Precision::FP32);
INSTANCE_ACTIVATION(TNEW, AK_FLOAT, Precision::FP16);
INSTANCE_ACTIVATION(TNEW, AK_FLOAT, Precision::INT8);
template <>
Status ActivationHelper<TNEW, AK_FLOAT, Precision::FP32>::Init(OpContext<TNEW> &ctx,\
        const std::vector<Tensor4dPtr<TNEW, AK_FLOAT> >& ins, \
                std::vector<Tensor4dPtr<TNEW, AK_FLOAT> >& outs) {
    SABER_CHECK(_funcs_activation.init(ins, outs, _param_activation, SPECIFY, SABER_IMPL, ctx)); //在这里选择实现方式
    return Status::OK();
}
ANAKIN_REGISTER_OP_HELPER(Activation, ActivationHelper, TNEW, AK_FLOAT, Precision::FP32);
#endif

ANAKIN_REGISTER_OP(Activation)中添加TNEW的注册

#ifdef USE_TNEW_PLACE
.__alias__<TNEW, AK_FLOAT, Precision::FP32>("activation")
#endif

注意事项

不要修改Tensor/Buffer/Env/Context这些类函数的接口和实现