root / lab4 / .minix-src / include / lzma / lzma12.h @ 14
History | View | Annotate | Download (14.4 KB)
1 |
/**
|
---|---|
2 |
* \file lzma/lzma12.h
|
3 |
* \brief LZMA1 and LZMA2 filters
|
4 |
*/
|
5 |
|
6 |
/*
|
7 |
* Author: Lasse Collin
|
8 |
*
|
9 |
* This file has been put into the public domain.
|
10 |
* You can do whatever you want with this file.
|
11 |
*
|
12 |
* See ../lzma.h for information about liblzma as a whole.
|
13 |
*/
|
14 |
|
15 |
#ifndef LZMA_H_INTERNAL
|
16 |
# error Never include this file directly. Use <lzma.h> instead.
|
17 |
#endif
|
18 |
|
19 |
|
20 |
/**
|
21 |
* \brief LZMA1 Filter ID
|
22 |
*
|
23 |
* LZMA1 is the very same thing as what was called just LZMA in LZMA Utils,
|
24 |
* 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from
|
25 |
* accidentally using LZMA when they actually want LZMA2.
|
26 |
*
|
27 |
* LZMA1 shouldn't be used for new applications unless you _really_ know
|
28 |
* what you are doing. LZMA2 is almost always a better choice.
|
29 |
*/
|
30 |
#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) |
31 |
|
32 |
/**
|
33 |
* \brief LZMA2 Filter ID
|
34 |
*
|
35 |
* Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds
|
36 |
* support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion
|
37 |
* when trying to compress uncompressible data), possibility to change
|
38 |
* lc/lp/pb in the middle of encoding, and some other internal improvements.
|
39 |
*/
|
40 |
#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) |
41 |
|
42 |
|
43 |
/**
|
44 |
* \brief Match finders
|
45 |
*
|
46 |
* Match finder has major effect on both speed and compression ratio.
|
47 |
* Usually hash chains are faster than binary trees.
|
48 |
*
|
49 |
* If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better
|
50 |
* choice, because binary trees get much higher compression ratio penalty
|
51 |
* with LZMA_SYNC_FLUSH.
|
52 |
*
|
53 |
* The memory usage formulas are only rough estimates, which are closest to
|
54 |
* reality when dict_size is a power of two. The formulas are more complex
|
55 |
* in reality, and can also change a little between liblzma versions. Use
|
56 |
* lzma_raw_encoder_memusage() to get more accurate estimate of memory usage.
|
57 |
*/
|
58 |
typedef enum { |
59 |
LZMA_MF_HC3 = 0x03,
|
60 |
/**<
|
61 |
* \brief Hash Chain with 2- and 3-byte hashing
|
62 |
*
|
63 |
* Minimum nice_len: 3
|
64 |
*
|
65 |
* Memory usage:
|
66 |
* - dict_size <= 16 MiB: dict_size * 7.5
|
67 |
* - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB
|
68 |
*/
|
69 |
|
70 |
LZMA_MF_HC4 = 0x04,
|
71 |
/**<
|
72 |
* \brief Hash Chain with 2-, 3-, and 4-byte hashing
|
73 |
*
|
74 |
* Minimum nice_len: 4
|
75 |
*
|
76 |
* Memory usage:
|
77 |
* - dict_size <= 32 MiB: dict_size * 7.5
|
78 |
* - dict_size > 32 MiB: dict_size * 6.5
|
79 |
*/
|
80 |
|
81 |
LZMA_MF_BT2 = 0x12,
|
82 |
/**<
|
83 |
* \brief Binary Tree with 2-byte hashing
|
84 |
*
|
85 |
* Minimum nice_len: 2
|
86 |
*
|
87 |
* Memory usage: dict_size * 9.5
|
88 |
*/
|
89 |
|
90 |
LZMA_MF_BT3 = 0x13,
|
91 |
/**<
|
92 |
* \brief Binary Tree with 2- and 3-byte hashing
|
93 |
*
|
94 |
* Minimum nice_len: 3
|
95 |
*
|
96 |
* Memory usage:
|
97 |
* - dict_size <= 16 MiB: dict_size * 11.5
|
98 |
* - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB
|
99 |
*/
|
100 |
|
101 |
LZMA_MF_BT4 = 0x14
|
102 |
/**<
|
103 |
* \brief Binary Tree with 2-, 3-, and 4-byte hashing
|
104 |
*
|
105 |
* Minimum nice_len: 4
|
106 |
*
|
107 |
* Memory usage:
|
108 |
* - dict_size <= 32 MiB: dict_size * 11.5
|
109 |
* - dict_size > 32 MiB: dict_size * 10.5
|
110 |
*/
|
111 |
} lzma_match_finder; |
112 |
|
113 |
|
114 |
/**
|
115 |
* \brief Test if given match finder is supported
|
116 |
*
|
117 |
* Return true if the given match finder is supported by this liblzma build.
|
118 |
* Otherwise false is returned. It is safe to call this with a value that
|
119 |
* isn't listed in lzma_match_finder enumeration; the return value will be
|
120 |
* false.
|
121 |
*
|
122 |
* There is no way to list which match finders are available in this
|
123 |
* particular liblzma version and build. It would be useless, because
|
124 |
* a new match finder, which the application developer wasn't aware,
|
125 |
* could require giving additional options to the encoder that the older
|
126 |
* match finders don't need.
|
127 |
*/
|
128 |
extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder)
|
129 |
lzma_nothrow lzma_attr_const; |
130 |
|
131 |
|
132 |
/**
|
133 |
* \brief Compression modes
|
134 |
*
|
135 |
* This selects the function used to analyze the data produced by the match
|
136 |
* finder.
|
137 |
*/
|
138 |
typedef enum { |
139 |
LZMA_MODE_FAST = 1,
|
140 |
/**<
|
141 |
* \brief Fast compression
|
142 |
*
|
143 |
* Fast mode is usually at its best when combined with
|
144 |
* a hash chain match finder.
|
145 |
*/
|
146 |
|
147 |
LZMA_MODE_NORMAL = 2
|
148 |
/**<
|
149 |
* \brief Normal compression
|
150 |
*
|
151 |
* This is usually notably slower than fast mode. Use this
|
152 |
* together with binary tree match finders to expose the
|
153 |
* full potential of the LZMA1 or LZMA2 encoder.
|
154 |
*/
|
155 |
} lzma_mode; |
156 |
|
157 |
|
158 |
/**
|
159 |
* \brief Test if given compression mode is supported
|
160 |
*
|
161 |
* Return true if the given compression mode is supported by this liblzma
|
162 |
* build. Otherwise false is returned. It is safe to call this with a value
|
163 |
* that isn't listed in lzma_mode enumeration; the return value will be false.
|
164 |
*
|
165 |
* There is no way to list which modes are available in this particular
|
166 |
* liblzma version and build. It would be useless, because a new compression
|
167 |
* mode, which the application developer wasn't aware, could require giving
|
168 |
* additional options to the encoder that the older modes don't need.
|
169 |
*/
|
170 |
extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode)
|
171 |
lzma_nothrow lzma_attr_const; |
172 |
|
173 |
|
174 |
/**
|
175 |
* \brief Options specific to the LZMA1 and LZMA2 filters
|
176 |
*
|
177 |
* Since LZMA1 and LZMA2 share most of the code, it's simplest to share
|
178 |
* the options structure too. For encoding, all but the reserved variables
|
179 |
* need to be initialized unless specifically mentioned otherwise.
|
180 |
* lzma_lzma_preset() can be used to get a good starting point.
|
181 |
*
|
182 |
* For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and
|
183 |
* preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb.
|
184 |
*/
|
185 |
typedef struct { |
186 |
/**
|
187 |
* \brief Dictionary size in bytes
|
188 |
*
|
189 |
* Dictionary size indicates how many bytes of the recently processed
|
190 |
* uncompressed data is kept in memory. One method to reduce size of
|
191 |
* the uncompressed data is to store distance-length pairs, which
|
192 |
* indicate what data to repeat from the dictionary buffer. Thus,
|
193 |
* the bigger the dictionary, the better the compression ratio
|
194 |
* usually is.
|
195 |
*
|
196 |
* Maximum size of the dictionary depends on multiple things:
|
197 |
* - Memory usage limit
|
198 |
* - Available address space (not a problem on 64-bit systems)
|
199 |
* - Selected match finder (encoder only)
|
200 |
*
|
201 |
* Currently the maximum dictionary size for encoding is 1.5 GiB
|
202 |
* (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit
|
203 |
* systems for certain match finder implementation reasons. In the
|
204 |
* future, there may be match finders that support bigger
|
205 |
* dictionaries.
|
206 |
*
|
207 |
* Decoder already supports dictionaries up to 4 GiB - 1 B (i.e.
|
208 |
* UINT32_MAX), so increasing the maximum dictionary size of the
|
209 |
* encoder won't cause problems for old decoders.
|
210 |
*
|
211 |
* Because extremely small dictionaries sizes would have unneeded
|
212 |
* overhead in the decoder, the minimum dictionary size is 4096 bytes.
|
213 |
*
|
214 |
* \note When decoding, too big dictionary does no other harm
|
215 |
* than wasting memory.
|
216 |
*/
|
217 |
uint32_t dict_size; |
218 |
# define LZMA_DICT_SIZE_MIN UINT32_C(4096) |
219 |
# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) |
220 |
|
221 |
/**
|
222 |
* \brief Pointer to an initial dictionary
|
223 |
*
|
224 |
* It is possible to initialize the LZ77 history window using
|
225 |
* a preset dictionary. It is useful when compressing many
|
226 |
* similar, relatively small chunks of data independently from
|
227 |
* each other. The preset dictionary should contain typical
|
228 |
* strings that occur in the files being compressed. The most
|
229 |
* probable strings should be near the end of the preset dictionary.
|
230 |
*
|
231 |
* This feature should be used only in special situations. For
|
232 |
* now, it works correctly only with raw encoding and decoding.
|
233 |
* Currently none of the container formats supported by
|
234 |
* liblzma allow preset dictionary when decoding, thus if
|
235 |
* you create a .xz or .lzma file with preset dictionary, it
|
236 |
* cannot be decoded with the regular decoder functions. In the
|
237 |
* future, the .xz format will likely get support for preset
|
238 |
* dictionary though.
|
239 |
*/
|
240 |
const uint8_t *preset_dict;
|
241 |
|
242 |
/**
|
243 |
* \brief Size of the preset dictionary
|
244 |
*
|
245 |
* Specifies the size of the preset dictionary. If the size is
|
246 |
* bigger than dict_size, only the last dict_size bytes are
|
247 |
* processed.
|
248 |
*
|
249 |
* This variable is read only when preset_dict is not NULL.
|
250 |
* If preset_dict is not NULL but preset_dict_size is zero,
|
251 |
* no preset dictionary is used (identical to only setting
|
252 |
* preset_dict to NULL).
|
253 |
*/
|
254 |
uint32_t preset_dict_size; |
255 |
|
256 |
/**
|
257 |
* \brief Number of literal context bits
|
258 |
*
|
259 |
* How many of the highest bits of the previous uncompressed
|
260 |
* eight-bit byte (also known as `literal') are taken into
|
261 |
* account when predicting the bits of the next literal.
|
262 |
*
|
263 |
* E.g. in typical English text, an upper-case letter is
|
264 |
* often followed by a lower-case letter, and a lower-case
|
265 |
* letter is usually followed by another lower-case letter.
|
266 |
* In the US-ASCII character set, the highest three bits are 010
|
267 |
* for upper-case letters and 011 for lower-case letters.
|
268 |
* When lc is at least 3, the literal coding can take advantage of
|
269 |
* this property in the uncompressed data.
|
270 |
*
|
271 |
* There is a limit that applies to literal context bits and literal
|
272 |
* position bits together: lc + lp <= 4. Without this limit the
|
273 |
* decoding could become very slow, which could have security related
|
274 |
* results in some cases like email servers doing virus scanning.
|
275 |
* This limit also simplifies the internal implementation in liblzma.
|
276 |
*
|
277 |
* There may be LZMA1 streams that have lc + lp > 4 (maximum possible
|
278 |
* lc would be 8). It is not possible to decode such streams with
|
279 |
* liblzma.
|
280 |
*/
|
281 |
uint32_t lc; |
282 |
# define LZMA_LCLP_MIN 0 |
283 |
# define LZMA_LCLP_MAX 4 |
284 |
# define LZMA_LC_DEFAULT 3 |
285 |
|
286 |
/**
|
287 |
* \brief Number of literal position bits
|
288 |
*
|
289 |
* lp affects what kind of alignment in the uncompressed data is
|
290 |
* assumed when encoding literals. A literal is a single 8-bit byte.
|
291 |
* See pb below for more information about alignment.
|
292 |
*/
|
293 |
uint32_t lp; |
294 |
# define LZMA_LP_DEFAULT 0 |
295 |
|
296 |
/**
|
297 |
* \brief Number of position bits
|
298 |
*
|
299 |
* pb affects what kind of alignment in the uncompressed data is
|
300 |
* assumed in general. The default means four-byte alignment
|
301 |
* (2^ pb =2^2=4), which is often a good choice when there's
|
302 |
* no better guess.
|
303 |
*
|
304 |
* When the aligment is known, setting pb accordingly may reduce
|
305 |
* the file size a little. E.g. with text files having one-byte
|
306 |
* alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can
|
307 |
* improve compression slightly. For UTF-16 text, pb=1 is a good
|
308 |
* choice. If the alignment is an odd number like 3 bytes, pb=0
|
309 |
* might be the best choice.
|
310 |
*
|
311 |
* Even though the assumed alignment can be adjusted with pb and
|
312 |
* lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment.
|
313 |
* It might be worth taking into account when designing file formats
|
314 |
* that are likely to be often compressed with LZMA1 or LZMA2.
|
315 |
*/
|
316 |
uint32_t pb; |
317 |
# define LZMA_PB_MIN 0 |
318 |
# define LZMA_PB_MAX 4 |
319 |
# define LZMA_PB_DEFAULT 2 |
320 |
|
321 |
/** Compression mode */
|
322 |
lzma_mode mode; |
323 |
|
324 |
/**
|
325 |
* \brief Nice length of a match
|
326 |
*
|
327 |
* This determines how many bytes the encoder compares from the match
|
328 |
* candidates when looking for the best match. Once a match of at
|
329 |
* least nice_len bytes long is found, the encoder stops looking for
|
330 |
* better candidates and encodes the match. (Naturally, if the found
|
331 |
* match is actually longer than nice_len, the actual length is
|
332 |
* encoded; it's not truncated to nice_len.)
|
333 |
*
|
334 |
* Bigger values usually increase the compression ratio and
|
335 |
* compression time. For most files, 32 to 128 is a good value,
|
336 |
* which gives very good compression ratio at good speed.
|
337 |
*
|
338 |
* The exact minimum value depends on the match finder. The maximum
|
339 |
* is 273, which is the maximum length of a match that LZMA1 and
|
340 |
* LZMA2 can encode.
|
341 |
*/
|
342 |
uint32_t nice_len; |
343 |
|
344 |
/** Match finder ID */
|
345 |
lzma_match_finder mf; |
346 |
|
347 |
/**
|
348 |
* \brief Maximum search depth in the match finder
|
349 |
*
|
350 |
* For every input byte, match finder searches through the hash chain
|
351 |
* or binary tree in a loop, each iteration going one step deeper in
|
352 |
* the chain or tree. The searching stops if
|
353 |
* - a match of at least nice_len bytes long is found;
|
354 |
* - all match candidates from the hash chain or binary tree have
|
355 |
* been checked; or
|
356 |
* - maximum search depth is reached.
|
357 |
*
|
358 |
* Maximum search depth is needed to prevent the match finder from
|
359 |
* wasting too much time in case there are lots of short match
|
360 |
* candidates. On the other hand, stopping the search before all
|
361 |
* candidates have been checked can reduce compression ratio.
|
362 |
*
|
363 |
* Setting depth to zero tells liblzma to use an automatic default
|
364 |
* value, that depends on the selected match finder and nice_len.
|
365 |
* The default is in the range [4, 200] or so (it may vary between
|
366 |
* liblzma versions).
|
367 |
*
|
368 |
* Using a bigger depth value than the default can increase
|
369 |
* compression ratio in some cases. There is no strict maximum value,
|
370 |
* but high values (thousands or millions) should be used with care:
|
371 |
* the encoder could remain fast enough with typical input, but
|
372 |
* malicious input could cause the match finder to slow down
|
373 |
* dramatically, possibly creating a denial of service attack.
|
374 |
*/
|
375 |
uint32_t depth; |
376 |
|
377 |
/*
|
378 |
* Reserved space to allow possible future extensions without
|
379 |
* breaking the ABI. You should not touch these, because the names
|
380 |
* of these variables may change. These are and will never be used
|
381 |
* with the currently supported options, so it is safe to leave these
|
382 |
* uninitialized.
|
383 |
*/
|
384 |
uint32_t reserved_int1; |
385 |
uint32_t reserved_int2; |
386 |
uint32_t reserved_int3; |
387 |
uint32_t reserved_int4; |
388 |
uint32_t reserved_int5; |
389 |
uint32_t reserved_int6; |
390 |
uint32_t reserved_int7; |
391 |
uint32_t reserved_int8; |
392 |
lzma_reserved_enum reserved_enum1; |
393 |
lzma_reserved_enum reserved_enum2; |
394 |
lzma_reserved_enum reserved_enum3; |
395 |
lzma_reserved_enum reserved_enum4; |
396 |
void *reserved_ptr1;
|
397 |
void *reserved_ptr2;
|
398 |
|
399 |
} lzma_options_lzma; |
400 |
|
401 |
|
402 |
/**
|
403 |
* \brief Set a compression preset to lzma_options_lzma structure
|
404 |
*
|
405 |
* 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9
|
406 |
* of the xz command line tool. In addition, it is possible to bitwise-or
|
407 |
* flags to the preset. Currently only LZMA_PRESET_EXTREME is supported.
|
408 |
* The flags are defined in container.h, because the flags are used also
|
409 |
* with lzma_easy_encoder().
|
410 |
*
|
411 |
* The preset values are subject to changes between liblzma versions.
|
412 |
*
|
413 |
* This function is available only if LZMA1 or LZMA2 encoder has been enabled
|
414 |
* when building liblzma.
|
415 |
*
|
416 |
* \return On success, false is returned. If the preset is not
|
417 |
* supported, true is returned.
|
418 |
*/
|
419 |
extern LZMA_API(lzma_bool) lzma_lzma_preset(
|
420 |
lzma_options_lzma *options, uint32_t preset) lzma_nothrow; |