-
Notifications
You must be signed in to change notification settings - Fork 5
/
lesson0.cpp
89 lines (76 loc) · 3.52 KB
/
lesson0.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "bitreader.h"
#include "bitwriter.h"
#include "test_harness.h"
//////////////////
//// Lesson 0 - Basic Arithmetic Coding
//////////////////
/*
This lesson shows how an arithmetic coder works:
An arithmetic coder simply lets you skew how much disk space you spend for a 0 or a 1 bit
Respectively. If you are sure that the next bit will be zero, you want to set the probability
Of a zero very high, so that the weight will favor zeros over ones.
An article going in depth on how an arithmetic coder is implemented is here
http://www.drdobbs.com/cpp/data-compression-with-arithmetic-encodin/240169251
But for simplicity, we'll treat the VP8 bool_decoder as a black box.
Note that the algorithm will always produce correct results:
The ZERO data with incorrect probability example shows the calamity that happens
When the wrong probability is guessed for a stream. The result is simpy 7x bigger
The data is still recoverable, just extra disk space was wasted
Likewise you can see when the probability is guessed exactly, the compression factor is 100:1 here
The EXERCISE is as follows: given an array filled with mostly 1 bits, what should the probability
of a zero be passed in as. The current value, 255, is not beneficial and results in a 6.5x
bloat to the original file
*/
void encode_with_fixed_probability(uint8_t prob){
vpx_writer wri ={0};
vpx_start_encode(&wri, compressed); //setup the arithmetic coder
for (size_t i = 0; i < sizeof(uncompressed); ++i) { // go through each byte in the buffer
for(int bit = 1; bit < 256; bit <<= 1) { // for each bit in each byte
vpx_write(&wri, !!(uncompressed[i] & bit), prob); // serialize out the bit
// use the specified prob
// to skew the cost of 1 or 0
}
}
vpx_stop_encode(&wri);
printf("Buffer encoded with prob(0) = %.2f results in %d size (%.2f%%)\n",
prob / 255.,
wri.pos,
100 * wri.pos / float(sizeof(uncompressed)));
vpx_reader rea={0};
vpx_reader_init(&rea,
wri.buffer,
wri.pos); // setup the decoder
memset(roundtrip, 0, sizeof(roundtrip)); // zero out the roundtrip array
for (size_t i = 0; i < sizeof(roundtrip); ++i) { // go through each byte in the output
for(int bit = 1; bit < 256; bit <<= 1) { // for each bit in the output
if (vpx_read(&rea, prob)) { // read the bit in the input, weighted by prob
roundtrip[i] |= bit; // if it's true, set the bit in the output
}
}
}
assert(vpx_reader_has_error(&rea) == 0); //make sure the reader has no error
assert(memcmp(uncompressed, roundtrip, sizeof(uncompressed)) == 0); // make sure input = output
}
int main () {
printf("Random data\n");
populate_random_data();
encode_with_fixed_probability(127);
printf("ASCII data\n");
populate_ascii_data();
encode_with_fixed_probability(145);
printf("ZERO data\n");
populate_zero_data();
encode_with_fixed_probability(255);
printf("ZERO data with incorrect probability\n");
populate_zero_data();
encode_with_fixed_probability(0);
printf("----------------\nEXERCISE\n");
unsigned int prob = 255; // <-- FILL ME IN WITH A BETTER VALUE
exercise_mostly_one_data();
encode_with_fixed_probability(prob);
return roundtrip[0];
}