-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLZW15V.C
273 lines (243 loc) · 9.42 KB
/
LZW15V.C
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
/************************** Start of LZW15V.C *************************
*
* This is the LZW module which implements a more powerful version
* of the algorithm. This version of the program has three major
* improvements over LZW12.C. First, it expands the maximum code size
* to 15 bits. Second, it starts encoding with 9 bit codes, working
* its way up in bit size only as necessary. Finally, it flushes the
* dictionary when done.
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include "errhand.h"
#include "bitio.h"
/*
* Constants used throughout the program. BITS defines the maximum
* number of bits that can be used in the output code. TABLE_SIZE defines
* the size of the dictionary table. TABLE_BANKS are the number of
* 256 element dictionary pages needed. The code defines should be
* self-explanatory.
*/
#define BITS 15
#define MAX_CODE ( ( 1 << BITS ) - 1 )
#define TABLE_SIZE 35023L
#define TABLE_BANKS ( ( TABLE_SIZE >> 8 ) + 1 )
#define END_OF_STREAM 256
#define BUMP_CODE 257
#define FLUSH_CODE 258
#define FIRST_CODE 259
#define UNUSED -1
/*
* Local prototypes.
*/
unsigned int find_child_node( int parent_code, int child_character );
unsigned int decode_string( unsigned int offset, unsigned int code );
//char *CompressionName = "LZW 15 Bit Variable Rate Encoder";
//char *Usage = "in-file out-file\n\n";
/*
* This data structure defines the dictionary. Each entry in the dictionary
* has a code value. This is the code emitted by the compressor. Each
* code is actually made up of two pieces: a parent_code, and a
* character. Code values of less than 256 are actually plain
* text codes.
*
* Note that in order to handle 16 bit segmented compilers, such as most
* of the MS-DOS compilers, it was necessary to break up the dictionary
* into a table of smaller dictionary pointers. Every reference to the
* dictionary was replaced by a macro that did a pointer dereference first.
* By breaking up the index along byte boundaries we should be as efficient
* as possible.
*/
struct dictionary {
int code_value;
int parent_code;
char character;
} *dict[ TABLE_BANKS ];
#define DICT( i ) dict[ i >> 8 ][ i & 0xff ]
/*
* Other global data structures. The decode_stack is used to reverse
* strings that come out of the tree during decoding. next_code is the
* next code to be added to the dictionary, both during compression and
* decompression. current_code_bits defines how many bits are currently
* being used for output, and next_bump_code defines the code that will
* trigger the next jump in word size.
*/
static char decode_stack[ TABLE_SIZE ];
unsigned int next_code;
int current_code_bits;
unsigned int next_bump_code;
/*
* This routine is used to initialize the dictionary, both when the
* compressor or decompressor first starts up, and also when a flush
* code comes in. Note that even thought the decompressor sets all
* the code_value elements to UNUSED, it doesn't really need to.
*/
void InitializeDictionary()
{
unsigned int i;
for ( i = 0 ; i < TABLE_SIZE ; i++ )
DICT( i ).code_value = UNUSED;
next_code = FIRST_CODE;
current_code_bits = 9;
next_bump_code = 511;
}
/*
* This routine allocates the dictionary. Since the total size of the
* dictionary is much larger than 64K, it can't be allocated as a single
* object. Instead, it is allocated as a set of pointers to smaller
* dictionary objects. The special DICT() macro is used to translate
* indices into pairs of references.
*/
void InitializeStorage()
{
int i;
for ( i = 0 ; i < TABLE_BANKS ; i++ ) {
dict[ i ] = (struct dictionary *) calloc( 256, sizeof ( struct dictionary ) );
if ( dict[ i ] == NULL )
fatal_error( "Error allocating dictionary space" );
}
}
/*
* The compressor is short and simple. It reads in new symbols one
* at a time from the input file. It then checks to see if the
* combination of the current symbol and the current code are already
* defined in the dictionary. If they are not, they are added to the
* dictionary, and we start over with a new one symbol code. If they
* are, the code for the combination of the code and character becomes
* our new code. Note that in this enhanced version of LZW, the
* encoder needs to check the codes for boundary conditions.
*/
int CompressFile_lzw_15_bit_variable_rate_encoder(FILE *input,BIT_FILE *output,int argc,char *argv[])
{
int character;
int string_code;
unsigned int index;
DWORD milliseconds = GetTickCount();
InitializeStorage();
InitializeDictionary();
if ( ( string_code = getc( input ) ) == EOF )
string_code = END_OF_STREAM;
while ( ( character = getc( input ) ) != EOF ) {
index = find_child_node( string_code, character );
if ( DICT( index ).code_value != - 1 )
string_code = DICT( index ).code_value;
else {
DICT( index ).code_value = next_code++;
DICT( index ).parent_code = string_code;
DICT( index ).character = (char) character;
OutputBits( output, (unsigned long) string_code, current_code_bits );
string_code = character;
if ( next_code > MAX_CODE ) {
OutputBits( output, (unsigned long) FLUSH_CODE, current_code_bits );
InitializeDictionary();
} else if ( next_code > next_bump_code ) {
OutputBits( output, (unsigned long) BUMP_CODE, current_code_bits );
current_code_bits++;
next_bump_code <<= 1;
next_bump_code |= 1;
//putc( 'B', stdout );
}
}
}
OutputBits( output, (unsigned long) string_code, current_code_bits );
OutputBits( output, (unsigned long) END_OF_STREAM, current_code_bits);
return (GetTickCount() - milliseconds);
}
/*
* The file expander operates much like the encoder. It has to
* read in codes, the convert the codes to a string of characters.
* The only catch in the whole operation occurs when the encoder
* encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this
* occurs, the encoder outputs a code that is not presently defined
* in the table. This is handled as an exception. All of the special
* input codes are handled in various ways.
*/
int ExpandFile_lzw_15_bit_variable_rate_encoder(BIT_FILE *input, FILE *output, int argc, char *argv[])
{
unsigned int new_code;
unsigned int old_code;
int character;
unsigned int count;
DWORD milliseconds = GetTickCount();
InitializeStorage();
for ( ; ; ) {
InitializeDictionary();
old_code = (unsigned int) InputBits( input, current_code_bits );
if ( old_code == END_OF_STREAM )
return (GetTickCount() - milliseconds);
character = old_code;
putc( old_code, output );
for ( ; ; ) {
new_code = (unsigned int) InputBits( input, current_code_bits );
if ( new_code == END_OF_STREAM )
return (GetTickCount() - milliseconds);
if ( new_code == FLUSH_CODE )
break;
if ( new_code == BUMP_CODE ) {
current_code_bits++;
continue;
}
if ( new_code >= next_code ) {
decode_stack[ 0 ] = (char) character;
count = decode_string( 1, old_code );
}
else
count = decode_string( 0, new_code );
character = decode_stack[ count - 1 ];
while ( count > 0 )
putc( decode_stack[ --count ], output );
DICT( next_code ).parent_code = old_code;
DICT( next_code ).character = (char) character;
next_code++;
old_code = new_code;
}
}
return (GetTickCount() - milliseconds);
}
/*
* This hashing routine is responsible for finding the table location
* for a string/character combination. The table index is created
* by using an exclusive OR combination of the prefix and character.
* This code also has to check for collisions, and handles them by
* jumping around in the table.
*/
static unsigned int find_child_node(int parent_code,int child_character)
{
unsigned int index;
unsigned int offset;
index = ( child_character << ( BITS - 8 ) ) ^ parent_code;
if ( index == 0 )
offset = 1;
else
offset = TABLE_SIZE - index;
for ( ; ; ) {
if ( DICT( index ).code_value == UNUSED )
return( (unsigned int) index );
if ( DICT( index ).parent_code == parent_code &&
DICT( index ).character == (char) child_character )
return( index );
if ( (int) index >= offset )
index -= offset;
else
index += TABLE_SIZE - offset;
}
}
/*
* This routine decodes a string from the dictionary, and stores it
* in the decode_stack data structure. It returns a count to the
* calling program of how many characters were placed in the stack.
*/
static unsigned int decode_string(unsigned int count, unsigned int code)
{
while ( code > 255 ) {
decode_stack[ count++ ] = DICT( code ).character;
code = DICT( code ).parent_code;
}
decode_stack[ count++ ] = (char) code;
return( count );
}
/************************** End of LZW15V.C *************************/