1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127 | /*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#ifndef _JIT_UNI_REORDER_HPP
#define _JIT_UNI_REORDER_HPP
#include <assert.h>
#include "c_types_map.hpp"
#include "type_helpers.hpp"
#include "cpu_primitive.hpp"
#include "cpu_reorder_pd.hpp"
namespace mkldnn {
namespace impl {
namespace cpu {
namespace tr {
constexpr int max_ndims = MKLDNN_MAX_NDIMS;
struct node_t {
size_t n;
ptrdiff_t is; // input stride
ptrdiff_t os; // output stride
ptrdiff_t ss; // scale stride
};
enum class scale_type_t { NONE, COMMON, MANY };
struct prb_t {
data_type_t itype;
data_type_t otype;
int ndims;
node_t nodes[max_ndims];
ptrdiff_t ioff;
ptrdiff_t ooff;
scale_type_t scale_type;
float beta;
};
status_t prb_init(prb_t &prb, const memory_desc_t &imd,
const memory_desc_t &omd, const primitive_attr_t *attr);
/** sorts the problem nodes so that output strides come in ascending order */
void prb_normalize(prb_t &p);
/** folds nodes together if possible */
void prb_simplify(prb_t &p);
/** splits the node dim into two of sizes n1 and n / n1
* @warning n must be multiple of n1 */
void prb_node_split(prb_t &p, int dim, size_t n1);
/** swaps d0 and d1 nodes */
void prb_node_swap(prb_t &p, int d0, int d1);
/** moves node d0 to the d1 position.
* nodes (d0, d1] are shifted to the left if d0 < d1 or
* to the right if d0 > d1 */
void prb_node_move(prb_t &p, int d0, int d1);
/** dumps the problem to stdout */
void prb_dump(const prb_t &p);
struct call_param_t {
const void *in;
void *out;
const float *scale;
};
struct kernel_t {
struct desc_t {
int id;
prb_t prb;
};
kernel_t(const desc_t &desc): desc_(desc), ker_(nullptr) {}<--- Struct 'kernel_t' has a constructor with 1 argument that is not explicit. [+]Struct 'kernel_t' has a constructor with 1 argument that is not explicit. Such constructors should in general be explicit for type safety reasons. Using the explicit keyword in the constructor means some mistakes when using the class can be avoided.
void operator()(const call_param_t *c) const { assert(ker_); ker_(c); }
virtual ~kernel_t() {}
/** inits kernel descriptor:
* desc -- kernel descriptor (output)
* prb -- transposition problem (input)
* ndims_ker_max -- limit the maximum number of dimensions kernel
* will process (optional, 0 -- no limitation) */
static status_t desc_init(desc_t &desc, const prb_t &prb,
int ndims_ker_max = 0);
/** creates kernel for the problem described in desc */
static kernel_t *create(const desc_t &desc);
protected:
const desc_t desc_;
const prb_t &prb_ = desc_.prb;
void (*ker_)(const call_param_t *);
};
/* TODO: add trans_t class */
}
/* for cpu reorder list */
status_t jit_uni_reorder_create(reorder_pd_t **reorder_pd,
engine_t *engine, const primitive_attr_t *attr,
engine_t *src_engine, const memory_desc_t *src_md,
engine_t *dst_engine, const memory_desc_t *dst_md);
}
}
}
#endif
|