forked from kgober/FSX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCompress.cs
237 lines (223 loc) · 10.5 KB
/
Compress.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
// Compress.cs
// Copyright © 2019-2020 Kenneth Gober
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// .Z compressed file format:
// header: bytes 0-2
// 0 magic = 0x1f
// 1 magic = 0x9d
// 2 format
// max_bits = format & 0x1f
// max_bits is the maximum code size, starting code size is 9 bits.
// block_mode = format & 0x80
// in block mode, code 256 is used to clear the dictionary and reset code size to 9
// compressed data: bytes 3+
// a number of 9-byte blocks each containing 8 packed 9-bit LZW code words
// a number of 10-byte blocks each containing 8 packed 10-bit LZW code words
// a number of 11-byte blocks each containing 8 packed 11-bit LZW code words
// ...
//
// Final block of code words ends at the earliest 1-byte boundary (there is no EOF code word).
// When code size resets or changes, remaining code words in the current block are discarded.
// Within blocks, bits are packed into bytes least-significant-bit first (right to left).
//
// LZW compression/decompression algorithm:
// https://marknelson.us/posts/1989/10/01/lzw-data-compression.html
// https://marknelson.us/posts/2011/11/08/lzw-revisited.html
//
// Example: compression of ABABCCC
// in add out residual
// +A -> A A
// A+B -> AB 257 A B
// B+A -> BA 258 B A
// A+B -> AB AB (257)
// AB+C -> ABC 259 257 C
// C+C -> CC 260 C C
// C+C -> CC CC (260)
// CC 260
//
// Example: decompression of A B 257 C 260
// in out add
// A A
// B B 257 = AB (A + B[0])
// 257 AB 258 = BA (B + AB[0])
// C C 259 = ABC (AB + C[0])
// 260 CC 260 = CC (C + C[0])
//
// Reference Implementations:
// net.sources 1984-07-05 Subject: File compression utility
// net.sources 1984-08-30 Subject: File compression program (compress) release 2.0
// mod.sources 1985-01-03 Subject: Compress release 3.0
// mod.sources 1985-08-01 Subject: Compress 4.0 (part [012] of 2)
// 3.0 was also posted to comp.sources.unix volume1 as "compress"
// 4.0 was also posted to comp.sources.unix volume2 as "compress4.0"
using System;
namespace FSX
{
class Compress
{
public static Boolean HasHeader(Byte[] data)
{
if (data.Length < 3) return false;
if (data[0] != 0x1f) return false;
if (data[1] != 0x9d) return false;
return true;
}
public class Decompressor
{
private Byte[] mData; // compressed data
private Int32 mSize; // uncompressed size
private Int32[] mLength; // lengths of each dictionary entry
private Int32[] mPrefixCode; // prefix of each dictionary entry is another dictionary entry
private Byte[] mSuffixByte; // suffix of each dictionary entry is a byte to be appended to prefix
public Decompressor(Byte[] data)
{
mData = data;
mSize = -2;
}
public Int32 GetByteCount()
{
if (mSize != -2) return mSize;
if (!HasHeader(mData)) return (mSize = -1);
Int32 MAX_BITS = mData[2] & 0x1f;
if (MAX_BITS > 24) return (mSize = -1); // current BitReader only handles up to 24-bit code words
if ((mData[2] & 0x60) != 0) return (mSize = -1); // unsupported flag bits
Boolean BLOCK_MODE = ((mData[2] & 0x80) != 0);
Int32 n = 1 << MAX_BITS;
mLength = new Int32[n];
for (Int32 i = 0; i < 256; i++) mLength[i] = 1;
Int32 block_offset = 3;
BitReaderL R = new BitReaderL(mData, block_offset);
Int32 code_size = 9;
Int32 code_max = (1 << code_size) - 1;
Int32 next_free = (BLOCK_MODE) ? 257 : 256;
Int32 code = R.Next(code_size);
if (code == -1) return (mSize = 0); // valid 0-byte output
n = 1; // first code is always for 1 byte, so start count at 1
Int32 prev_code = code;
while ((code = R.Next(code_size)) != -1)
{
if ((code == 256) && BLOCK_MODE)
{
// start reading a new block, with code size reset to 9
R = new BitReaderL(mData, block_offset = NextBlockOffset(block_offset, R.Offset, code_size));
code_max = (1 << (code_size = 9)) - 1;
next_free = 256; // use 256 instead of 257 so next code isn't (usably) added to dictionary
continue;
}
if (code > next_free) return (mSize = -1); // invalid or corrupt input
if (next_free <= code_max)
{
// new dictionary entries are always 1 byte longer than the previously received one.
mLength[next_free] = mLength[prev_code] + 1;
if ((next_free == code_max) && (code_size < MAX_BITS))
{
// start reading a new block, with code size increased by 1
R = new BitReaderL(mData, block_offset = NextBlockOffset(block_offset, R.Offset, code_size));
code_max = (1 << ++code_size) - 1;
}
next_free++;
}
n += mLength[code];
prev_code = code;
}
return (mSize = n);
}
public Byte[] GetBytes()
{
Int32 n = GetByteCount();
if (n == -1) return null;
Byte[] buf = new Byte[n];
if (n == 0) return buf;
Int32 MAX_BITS = mData[2] & 0x1f;
Boolean BLOCK_MODE = ((mData[2] & 0x80) != 0);
n = 1 << MAX_BITS;
mPrefixCode = new Int32[n];
mSuffixByte = new Byte[n];
for (Int32 i = 0; i < 256; i++)
{
mPrefixCode[i] = -1;
mSuffixByte[i] = (Byte)i;
}
Int32 block_offset = 3;
BitReaderL R = new BitReaderL(mData, block_offset);
Int32 code_size = 9;
Int32 code_max = (1 << code_size) - 1;
Int32 next_free = (BLOCK_MODE) ? 257 : 256;
Int32 code = R.Next(code_size);
n = 0;
Byte b = Put(buf, ref n, code); // first code gets output without adding anything to dictionary
Int32 prev_code = code;
while ((code = R.Next(code_size)) != -1)
{
if ((code == 256) && BLOCK_MODE)
{
// start reading a new block, with code size reset to 9
R = new BitReaderL(mData, block_offset = NextBlockOffset(block_offset, R.Offset, code_size));
code_max = (1 << (code_size = 9)) - 1;
next_free = 256; // use 256 instead of 257 so next code isn't (usably) added to dictionary
continue;
}
if (code == next_free)
{
// special case: the last byte of this code is the same as the first byte of the previous code
mLength[code] = mLength[prev_code] + 1;
mPrefixCode[code] = prev_code;
mSuffixByte[code] = b;
}
b = Put(buf, ref n, code);
if (next_free <= code_max)
{
// add a new code to dictionary for prev_code plus the first byte of current code
mLength[next_free] = mLength[prev_code] + 1;
mPrefixCode[next_free] = prev_code;
mSuffixByte[next_free] = b;
if ((next_free == code_max) && (code_size < MAX_BITS))
{
// start reading a new block, with code size increased by 1
R = new BitReaderL(mData, block_offset = NextBlockOffset(block_offset, R.Offset, code_size));
code_max = (1 << ++code_size) - 1;
}
next_free++;
}
prev_code = code;
}
return buf;
}
// write the bytes encoded by a given code word, and return the first byte
private Byte Put(Byte[] buffer, ref Int32 offset, Int32 code)
{
offset += mLength[code];
Int32 p = offset;
while (code != -1)
{
buffer[--p] = mSuffixByte[code];
code = mPrefixCode[code];
}
return buffer[p];
}
// find the starting offset of the next block of code words
private Int32 NextBlockOffset(Int32 startOffset, Int32 currentOffset, Int32 blockSize)
{
Int32 n = (currentOffset - startOffset) % blockSize;
return (n == 0) ? currentOffset : currentOffset + blockSize - n;
}
}
}
}