forked from warmspringwinds/pytorch-cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_allocated_gpu_memory.cpp
72 lines (61 loc) · 1.87 KB
/
read_allocated_gpu_memory.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/*
Example shows how an already allocated memory can be reused.
It's a common case when the memory has to be used without transferring it to CPU
and back to GPU.
*/
#include "ATen/ATen.h"
#include <cuda_runtime.h>
using namespace at; // assumed in the following
int main()
{
int width = 300;
int height = 300;
// Dummy CPU image -- RGBA
std::vector<unsigned char> image(4 * width * height);
for (size_t y = 0; y < height; ++y) {
for (size_t x = 0; x < width; ++x) {
size_t idx = y * width + x;
unsigned char value = (float) (y + 1) / height * 255;
if (x < 0.03125*width) {
image[ idx * 4 + 0] = 255-value;
image[ idx * 4 + 1] = 255-value;
image[ idx * 4 + 2] = 255-value;
image[ idx * 4 + 3] = 255;
}
else if (x < 0.34375*width) {
image[ idx * 4 + 0] = value;
image[ idx * 4 + 1] = 0;
image[ idx * 4 + 2] = 0;
image[ idx * 4 + 3] = 255;
}
else if (x < 0.65625*width) {
image[ idx * 4 + 0] = 0;
image[ idx * 4 + 1] = 255-value;
image[ idx * 4 + 2] = 0;
image[ idx * 4 + 3] = 255;
}
else if (x < 0.96875*width) {
image[ idx * 4 + 0] = 0;
image[ idx * 4 + 1] = 0;
image[ idx * 4 + 2] = value;
image[ idx * 4 + 3] = 255;
}
else {
image[ idx * 4 + 0] = value;
image[ idx * 4 + 1] = value;
image[ idx * 4 + 2] = value;
image[ idx * 4 + 3] = 255;
}
}
}
// Load the dummy image to GPU
unsigned char * cuda_pointer;
cudaMalloc(&cuda_pointer, 4 * width * height * sizeof(unsigned char));
cudaMemcpy(cuda_pointer, image.data(), sizeof(unsigned char) * 4 * width * height, cudaMemcpyHostToDevice);
// Read the dummy image from GPU and use it as a tensor later on
auto f = CUDA(kByte).tensorFromBlob(cuda_pointer, {4 * width * height});
auto new_one = f.toType(CPU(kByte));
// Nicely print out the contents of the variable
std::cout << f << std::endl;
cudaFree(cuda_pointer);
}