root / lab4 / .minix-src / include / lzma / lzma12.h @ 13
History | View | Annotate | Download (14.4 KB)
1 | 13 | up20180614 | /**
|
---|---|---|---|
2 | * \file lzma/lzma12.h
|
||
3 | * \brief LZMA1 and LZMA2 filters
|
||
4 | */
|
||
5 | |||
6 | /*
|
||
7 | * Author: Lasse Collin
|
||
8 | *
|
||
9 | * This file has been put into the public domain.
|
||
10 | * You can do whatever you want with this file.
|
||
11 | *
|
||
12 | * See ../lzma.h for information about liblzma as a whole.
|
||
13 | */
|
||
14 | |||
15 | #ifndef LZMA_H_INTERNAL
|
||
16 | # error Never include this file directly. Use <lzma.h> instead.
|
||
17 | #endif
|
||
18 | |||
19 | |||
20 | /**
|
||
21 | * \brief LZMA1 Filter ID
|
||
22 | *
|
||
23 | * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils,
|
||
24 | * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from
|
||
25 | * accidentally using LZMA when they actually want LZMA2.
|
||
26 | *
|
||
27 | * LZMA1 shouldn't be used for new applications unless you _really_ know
|
||
28 | * what you are doing. LZMA2 is almost always a better choice.
|
||
29 | */
|
||
30 | #define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) |
||
31 | |||
32 | /**
|
||
33 | * \brief LZMA2 Filter ID
|
||
34 | *
|
||
35 | * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds
|
||
36 | * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion
|
||
37 | * when trying to compress uncompressible data), possibility to change
|
||
38 | * lc/lp/pb in the middle of encoding, and some other internal improvements.
|
||
39 | */
|
||
40 | #define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) |
||
41 | |||
42 | |||
43 | /**
|
||
44 | * \brief Match finders
|
||
45 | *
|
||
46 | * Match finder has major effect on both speed and compression ratio.
|
||
47 | * Usually hash chains are faster than binary trees.
|
||
48 | *
|
||
49 | * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better
|
||
50 | * choice, because binary trees get much higher compression ratio penalty
|
||
51 | * with LZMA_SYNC_FLUSH.
|
||
52 | *
|
||
53 | * The memory usage formulas are only rough estimates, which are closest to
|
||
54 | * reality when dict_size is a power of two. The formulas are more complex
|
||
55 | * in reality, and can also change a little between liblzma versions. Use
|
||
56 | * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage.
|
||
57 | */
|
||
58 | typedef enum { |
||
59 | LZMA_MF_HC3 = 0x03,
|
||
60 | /**<
|
||
61 | * \brief Hash Chain with 2- and 3-byte hashing
|
||
62 | *
|
||
63 | * Minimum nice_len: 3
|
||
64 | *
|
||
65 | * Memory usage:
|
||
66 | * - dict_size <= 16 MiB: dict_size * 7.5
|
||
67 | * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB
|
||
68 | */
|
||
69 | |||
70 | LZMA_MF_HC4 = 0x04,
|
||
71 | /**<
|
||
72 | * \brief Hash Chain with 2-, 3-, and 4-byte hashing
|
||
73 | *
|
||
74 | * Minimum nice_len: 4
|
||
75 | *
|
||
76 | * Memory usage:
|
||
77 | * - dict_size <= 32 MiB: dict_size * 7.5
|
||
78 | * - dict_size > 32 MiB: dict_size * 6.5
|
||
79 | */
|
||
80 | |||
81 | LZMA_MF_BT2 = 0x12,
|
||
82 | /**<
|
||
83 | * \brief Binary Tree with 2-byte hashing
|
||
84 | *
|
||
85 | * Minimum nice_len: 2
|
||
86 | *
|
||
87 | * Memory usage: dict_size * 9.5
|
||
88 | */
|
||
89 | |||
90 | LZMA_MF_BT3 = 0x13,
|
||
91 | /**<
|
||
92 | * \brief Binary Tree with 2- and 3-byte hashing
|
||
93 | *
|
||
94 | * Minimum nice_len: 3
|
||
95 | *
|
||
96 | * Memory usage:
|
||
97 | * - dict_size <= 16 MiB: dict_size * 11.5
|
||
98 | * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB
|
||
99 | */
|
||
100 | |||
101 | LZMA_MF_BT4 = 0x14
|
||
102 | /**<
|
||
103 | * \brief Binary Tree with 2-, 3-, and 4-byte hashing
|
||
104 | *
|
||
105 | * Minimum nice_len: 4
|
||
106 | *
|
||
107 | * Memory usage:
|
||
108 | * - dict_size <= 32 MiB: dict_size * 11.5
|
||
109 | * - dict_size > 32 MiB: dict_size * 10.5
|
||
110 | */
|
||
111 | } lzma_match_finder; |
||
112 | |||
113 | |||
114 | /**
|
||
115 | * \brief Test if given match finder is supported
|
||
116 | *
|
||
117 | * Return true if the given match finder is supported by this liblzma build.
|
||
118 | * Otherwise false is returned. It is safe to call this with a value that
|
||
119 | * isn't listed in lzma_match_finder enumeration; the return value will be
|
||
120 | * false.
|
||
121 | *
|
||
122 | * There is no way to list which match finders are available in this
|
||
123 | * particular liblzma version and build. It would be useless, because
|
||
124 | * a new match finder, which the application developer wasn't aware,
|
||
125 | * could require giving additional options to the encoder that the older
|
||
126 | * match finders don't need.
|
||
127 | */
|
||
128 | extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder)
|
||
129 | lzma_nothrow lzma_attr_const; |
||
130 | |||
131 | |||
132 | /**
|
||
133 | * \brief Compression modes
|
||
134 | *
|
||
135 | * This selects the function used to analyze the data produced by the match
|
||
136 | * finder.
|
||
137 | */
|
||
138 | typedef enum { |
||
139 | LZMA_MODE_FAST = 1,
|
||
140 | /**<
|
||
141 | * \brief Fast compression
|
||
142 | *
|
||
143 | * Fast mode is usually at its best when combined with
|
||
144 | * a hash chain match finder.
|
||
145 | */
|
||
146 | |||
147 | LZMA_MODE_NORMAL = 2
|
||
148 | /**<
|
||
149 | * \brief Normal compression
|
||
150 | *
|
||
151 | * This is usually notably slower than fast mode. Use this
|
||
152 | * together with binary tree match finders to expose the
|
||
153 | * full potential of the LZMA1 or LZMA2 encoder.
|
||
154 | */
|
||
155 | } lzma_mode; |
||
156 | |||
157 | |||
158 | /**
|
||
159 | * \brief Test if given compression mode is supported
|
||
160 | *
|
||
161 | * Return true if the given compression mode is supported by this liblzma
|
||
162 | * build. Otherwise false is returned. It is safe to call this with a value
|
||
163 | * that isn't listed in lzma_mode enumeration; the return value will be false.
|
||
164 | *
|
||
165 | * There is no way to list which modes are available in this particular
|
||
166 | * liblzma version and build. It would be useless, because a new compression
|
||
167 | * mode, which the application developer wasn't aware, could require giving
|
||
168 | * additional options to the encoder that the older modes don't need.
|
||
169 | */
|
||
170 | extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode)
|
||
171 | lzma_nothrow lzma_attr_const; |
||
172 | |||
173 | |||
174 | /**
|
||
175 | * \brief Options specific to the LZMA1 and LZMA2 filters
|
||
176 | *
|
||
177 | * Since LZMA1 and LZMA2 share most of the code, it's simplest to share
|
||
178 | * the options structure too. For encoding, all but the reserved variables
|
||
179 | * need to be initialized unless specifically mentioned otherwise.
|
||
180 | * lzma_lzma_preset() can be used to get a good starting point.
|
||
181 | *
|
||
182 | * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and
|
||
183 | * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb.
|
||
184 | */
|
||
185 | typedef struct { |
||
186 | /**
|
||
187 | * \brief Dictionary size in bytes
|
||
188 | *
|
||
189 | * Dictionary size indicates how many bytes of the recently processed
|
||
190 | * uncompressed data is kept in memory. One method to reduce size of
|
||
191 | * the uncompressed data is to store distance-length pairs, which
|
||
192 | * indicate what data to repeat from the dictionary buffer. Thus,
|
||
193 | * the bigger the dictionary, the better the compression ratio
|
||
194 | * usually is.
|
||
195 | *
|
||
196 | * Maximum size of the dictionary depends on multiple things:
|
||
197 | * - Memory usage limit
|
||
198 | * - Available address space (not a problem on 64-bit systems)
|
||
199 | * - Selected match finder (encoder only)
|
||
200 | *
|
||
201 | * Currently the maximum dictionary size for encoding is 1.5 GiB
|
||
202 | * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit
|
||
203 | * systems for certain match finder implementation reasons. In the
|
||
204 | * future, there may be match finders that support bigger
|
||
205 | * dictionaries.
|
||
206 | *
|
||
207 | * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e.
|
||
208 | * UINT32_MAX), so increasing the maximum dictionary size of the
|
||
209 | * encoder won't cause problems for old decoders.
|
||
210 | *
|
||
211 | * Because extremely small dictionaries sizes would have unneeded
|
||
212 | * overhead in the decoder, the minimum dictionary size is 4096 bytes.
|
||
213 | *
|
||
214 | * \note When decoding, too big dictionary does no other harm
|
||
215 | * than wasting memory.
|
||
216 | */
|
||
217 | uint32_t dict_size; |
||
218 | # define LZMA_DICT_SIZE_MIN UINT32_C(4096) |
||
219 | # define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) |
||
220 | |||
221 | /**
|
||
222 | * \brief Pointer to an initial dictionary
|
||
223 | *
|
||
224 | * It is possible to initialize the LZ77 history window using
|
||
225 | * a preset dictionary. It is useful when compressing many
|
||
226 | * similar, relatively small chunks of data independently from
|
||
227 | * each other. The preset dictionary should contain typical
|
||
228 | * strings that occur in the files being compressed. The most
|
||
229 | * probable strings should be near the end of the preset dictionary.
|
||
230 | *
|
||
231 | * This feature should be used only in special situations. For
|
||
232 | * now, it works correctly only with raw encoding and decoding.
|
||
233 | * Currently none of the container formats supported by
|
||
234 | * liblzma allow preset dictionary when decoding, thus if
|
||
235 | * you create a .xz or .lzma file with preset dictionary, it
|
||
236 | * cannot be decoded with the regular decoder functions. In the
|
||
237 | * future, the .xz format will likely get support for preset
|
||
238 | * dictionary though.
|
||
239 | */
|
||
240 | const uint8_t *preset_dict;
|
||
241 | |||
242 | /**
|
||
243 | * \brief Size of the preset dictionary
|
||
244 | *
|
||
245 | * Specifies the size of the preset dictionary. If the size is
|
||
246 | * bigger than dict_size, only the last dict_size bytes are
|
||
247 | * processed.
|
||
248 | *
|
||
249 | * This variable is read only when preset_dict is not NULL.
|
||
250 | * If preset_dict is not NULL but preset_dict_size is zero,
|
||
251 | * no preset dictionary is used (identical to only setting
|
||
252 | * preset_dict to NULL).
|
||
253 | */
|
||
254 | uint32_t preset_dict_size; |
||
255 | |||
256 | /**
|
||
257 | * \brief Number of literal context bits
|
||
258 | *
|
||
259 | * How many of the highest bits of the previous uncompressed
|
||
260 | * eight-bit byte (also known as `literal') are taken into
|
||
261 | * account when predicting the bits of the next literal.
|
||
262 | *
|
||
263 | * E.g. in typical English text, an upper-case letter is
|
||
264 | * often followed by a lower-case letter, and a lower-case
|
||
265 | * letter is usually followed by another lower-case letter.
|
||
266 | * In the US-ASCII character set, the highest three bits are 010
|
||
267 | * for upper-case letters and 011 for lower-case letters.
|
||
268 | * When lc is at least 3, the literal coding can take advantage of
|
||
269 | * this property in the uncompressed data.
|
||
270 | *
|
||
271 | * There is a limit that applies to literal context bits and literal
|
||
272 | * position bits together: lc + lp <= 4. Without this limit the
|
||
273 | * decoding could become very slow, which could have security related
|
||
274 | * results in some cases like email servers doing virus scanning.
|
||
275 | * This limit also simplifies the internal implementation in liblzma.
|
||
276 | *
|
||
277 | * There may be LZMA1 streams that have lc + lp > 4 (maximum possible
|
||
278 | * lc would be 8). It is not possible to decode such streams with
|
||
279 | * liblzma.
|
||
280 | */
|
||
281 | uint32_t lc; |
||
282 | # define LZMA_LCLP_MIN 0 |
||
283 | # define LZMA_LCLP_MAX 4 |
||
284 | # define LZMA_LC_DEFAULT 3 |
||
285 | |||
286 | /**
|
||
287 | * \brief Number of literal position bits
|
||
288 | *
|
||
289 | * lp affects what kind of alignment in the uncompressed data is
|
||
290 | * assumed when encoding literals. A literal is a single 8-bit byte.
|
||
291 | * See pb below for more information about alignment.
|
||
292 | */
|
||
293 | uint32_t lp; |
||
294 | # define LZMA_LP_DEFAULT 0 |
||
295 | |||
296 | /**
|
||
297 | * \brief Number of position bits
|
||
298 | *
|
||
299 | * pb affects what kind of alignment in the uncompressed data is
|
||
300 | * assumed in general. The default means four-byte alignment
|
||
301 | * (2^ pb =2^2=4), which is often a good choice when there's
|
||
302 | * no better guess.
|
||
303 | *
|
||
304 | * When the aligment is known, setting pb accordingly may reduce
|
||
305 | * the file size a little. E.g. with text files having one-byte
|
||
306 | * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can
|
||
307 | * improve compression slightly. For UTF-16 text, pb=1 is a good
|
||
308 | * choice. If the alignment is an odd number like 3 bytes, pb=0
|
||
309 | * might be the best choice.
|
||
310 | *
|
||
311 | * Even though the assumed alignment can be adjusted with pb and
|
||
312 | * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment.
|
||
313 | * It might be worth taking into account when designing file formats
|
||
314 | * that are likely to be often compressed with LZMA1 or LZMA2.
|
||
315 | */
|
||
316 | uint32_t pb; |
||
317 | # define LZMA_PB_MIN 0 |
||
318 | # define LZMA_PB_MAX 4 |
||
319 | # define LZMA_PB_DEFAULT 2 |
||
320 | |||
321 | /** Compression mode */
|
||
322 | lzma_mode mode; |
||
323 | |||
324 | /**
|
||
325 | * \brief Nice length of a match
|
||
326 | *
|
||
327 | * This determines how many bytes the encoder compares from the match
|
||
328 | * candidates when looking for the best match. Once a match of at
|
||
329 | * least nice_len bytes long is found, the encoder stops looking for
|
||
330 | * better candidates and encodes the match. (Naturally, if the found
|
||
331 | * match is actually longer than nice_len, the actual length is
|
||
332 | * encoded; it's not truncated to nice_len.)
|
||
333 | *
|
||
334 | * Bigger values usually increase the compression ratio and
|
||
335 | * compression time. For most files, 32 to 128 is a good value,
|
||
336 | * which gives very good compression ratio at good speed.
|
||
337 | *
|
||
338 | * The exact minimum value depends on the match finder. The maximum
|
||
339 | * is 273, which is the maximum length of a match that LZMA1 and
|
||
340 | * LZMA2 can encode.
|
||
341 | */
|
||
342 | uint32_t nice_len; |
||
343 | |||
344 | /** Match finder ID */
|
||
345 | lzma_match_finder mf; |
||
346 | |||
347 | /**
|
||
348 | * \brief Maximum search depth in the match finder
|
||
349 | *
|
||
350 | * For every input byte, match finder searches through the hash chain
|
||
351 | * or binary tree in a loop, each iteration going one step deeper in
|
||
352 | * the chain or tree. The searching stops if
|
||
353 | * - a match of at least nice_len bytes long is found;
|
||
354 | * - all match candidates from the hash chain or binary tree have
|
||
355 | * been checked; or
|
||
356 | * - maximum search depth is reached.
|
||
357 | *
|
||
358 | * Maximum search depth is needed to prevent the match finder from
|
||
359 | * wasting too much time in case there are lots of short match
|
||
360 | * candidates. On the other hand, stopping the search before all
|
||
361 | * candidates have been checked can reduce compression ratio.
|
||
362 | *
|
||
363 | * Setting depth to zero tells liblzma to use an automatic default
|
||
364 | * value, that depends on the selected match finder and nice_len.
|
||
365 | * The default is in the range [4, 200] or so (it may vary between
|
||
366 | * liblzma versions).
|
||
367 | *
|
||
368 | * Using a bigger depth value than the default can increase
|
||
369 | * compression ratio in some cases. There is no strict maximum value,
|
||
370 | * but high values (thousands or millions) should be used with care:
|
||
371 | * the encoder could remain fast enough with typical input, but
|
||
372 | * malicious input could cause the match finder to slow down
|
||
373 | * dramatically, possibly creating a denial of service attack.
|
||
374 | */
|
||
375 | uint32_t depth; |
||
376 | |||
377 | /*
|
||
378 | * Reserved space to allow possible future extensions without
|
||
379 | * breaking the ABI. You should not touch these, because the names
|
||
380 | * of these variables may change. These are and will never be used
|
||
381 | * with the currently supported options, so it is safe to leave these
|
||
382 | * uninitialized.
|
||
383 | */
|
||
384 | uint32_t reserved_int1; |
||
385 | uint32_t reserved_int2; |
||
386 | uint32_t reserved_int3; |
||
387 | uint32_t reserved_int4; |
||
388 | uint32_t reserved_int5; |
||
389 | uint32_t reserved_int6; |
||
390 | uint32_t reserved_int7; |
||
391 | uint32_t reserved_int8; |
||
392 | lzma_reserved_enum reserved_enum1; |
||
393 | lzma_reserved_enum reserved_enum2; |
||
394 | lzma_reserved_enum reserved_enum3; |
||
395 | lzma_reserved_enum reserved_enum4; |
||
396 | void *reserved_ptr1;
|
||
397 | void *reserved_ptr2;
|
||
398 | |||
399 | } lzma_options_lzma; |
||
400 | |||
401 | |||
402 | /**
|
||
403 | * \brief Set a compression preset to lzma_options_lzma structure
|
||
404 | *
|
||
405 | * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9
|
||
406 | * of the xz command line tool. In addition, it is possible to bitwise-or
|
||
407 | * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported.
|
||
408 | * The flags are defined in container.h, because the flags are used also
|
||
409 | * with lzma_easy_encoder().
|
||
410 | *
|
||
411 | * The preset values are subject to changes between liblzma versions.
|
||
412 | *
|
||
413 | * This function is available only if LZMA1 or LZMA2 encoder has been enabled
|
||
414 | * when building liblzma.
|
||
415 | *
|
||
416 | * \return On success, false is returned. If the preset is not
|
||
417 | * supported, true is returned.
|
||
418 | */
|
||
419 | extern LZMA_API(lzma_bool) lzma_lzma_preset(
|
||
420 | lzma_options_lzma *options, uint32_t preset) lzma_nothrow; |