1/////////////////////////////////////////////////////////////////////////////// 2// 3/// \file 01_compress_easy.c 4/// \brief Compress from stdin to stdout in multi-call mode 5/// 6/// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE 7/// 8/// Example: ./01_compress_easy 6 < foo > foo.xz 9// 10// Author: Lasse Collin 11// 12// This file has been put into the public domain. 13// You can do whatever you want with this file. 14// 15/////////////////////////////////////////////////////////////////////////////// 16 17#include <stdbool.h> 18#include <stdlib.h> 19#include <stdio.h> 20#include <string.h> 21#include <errno.h> 22#include <lzma.h> 23 24 25static void 26show_usage_and_exit(const char *argv0) 27{ 28 fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n" 29 "PRESET is a number 0-9 and can optionally be " 30 "followed by `e' to indicate extreme preset\n", 31 argv0); 32 exit(EXIT_FAILURE); 33} 34 35 36static uint32_t 37get_preset(int argc, char **argv) 38{ 39 // One argument whose first char must be 0-9. 40 if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9') 41 show_usage_and_exit(argv[0]); 42 43 // Calculate the preste level 0-9. 44 uint32_t preset = argv[1][0] - '0'; 45 46 // If there is a second char, it must be 'e'. It will set 47 // the LZMA_PRESET_EXTREME flag. 48 if (argv[1][1] != '\0') { 49 if (argv[1][1] != 'e' || argv[1][2] != '\0') 50 show_usage_and_exit(argv[0]); 51 52 preset |= LZMA_PRESET_EXTREME; 53 } 54 55 return preset; 56} 57 58 59static bool 60init_encoder(lzma_stream *strm, uint32_t preset) 61{ 62 // Initialize the encoder using a preset. Set the integrity to check 63 // to CRC64, which is the default in the xz command line tool. If 64 // the .xz file needs to be decompressed with XZ Embedded, use 65 // LZMA_CHECK_CRC32 instead. 66 lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64); 67 68 // Return successfully if the initialization went fine. 69 if (ret == LZMA_OK) 70 return true; 71 72 // Something went wrong. The possible errors are documented in 73 // lzma/container.h (src/liblzma/api/lzma/container.h in the source 74 // package or e.g. /usr/include/lzma/container.h depending on the 75 // install prefix). 76 const char *msg; 77 switch (ret) { 78 case LZMA_MEM_ERROR: 79 msg = "Memory allocation failed"; 80 break; 81 82 case LZMA_OPTIONS_ERROR: 83 msg = "Specified preset is not supported"; 84 break; 85 86 case LZMA_UNSUPPORTED_CHECK: 87 msg = "Specified integrity check is not supported"; 88 break; 89 90 default: 91 // This is most likely LZMA_PROG_ERROR indicating a bug in 92 // this program or in liblzma. It is inconvenient to have a 93 // separate error message for errors that should be impossible 94 // to occur, but knowing the error code is important for 95 // debugging. That's why it is good to print the error code 96 // at least when there is no good error message to show. 97 msg = "Unknown error, possibly a bug"; 98 break; 99 } 100 101 fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n", 102 msg, ret); 103 return false; 104} 105 106 107static bool 108compress(lzma_stream *strm, FILE *infile, FILE *outfile) 109{ 110 // This will be LZMA_RUN until the end of the input file is reached. 111 // This tells lzma_code() when there will be no more input. 112 lzma_action action = LZMA_RUN; 113 114 // Buffers to temporarily hold uncompressed input 115 // and compressed output. 116 uint8_t inbuf[BUFSIZ]; 117 uint8_t outbuf[BUFSIZ]; 118 119 // Initialize the input and output pointers. Initializing next_in 120 // and avail_in isn't really necessary when we are going to encode 121 // just one file since LZMA_STREAM_INIT takes care of initializing 122 // those already. But it doesn't hurt much and it will be needed 123 // if encoding more than one file like we will in 02_decompress.c. 124 // 125 // While we don't care about strm->total_in or strm->total_out in this 126 // example, it is worth noting that initializing the encoder will 127 // always reset total_in and total_out to zero. But the encoder 128 // initialization doesn't touch next_in, avail_in, next_out, or 129 // avail_out. 130 strm->next_in = NULL; 131 strm->avail_in = 0; 132 strm->next_out = outbuf; 133 strm->avail_out = sizeof(outbuf); 134 135 // Loop until the file has been successfully compressed or until 136 // an error occurs. 137 while (true) { 138 // Fill the input buffer if it is empty. 139 if (strm->avail_in == 0 && !feof(infile)) { 140 strm->next_in = inbuf; 141 strm->avail_in = fread(inbuf, 1, sizeof(inbuf), 142 infile); 143 144 if (ferror(infile)) { 145 fprintf(stderr, "Read error: %s\n", 146 strerror(errno)); 147 return false; 148 } 149 150 // Once the end of the input file has been reached, 151 // we need to tell lzma_code() that no more input 152 // will be coming and that it should finish the 153 // encoding. 154 if (feof(infile)) 155 action = LZMA_FINISH; 156 } 157 158 // Tell liblzma do the actual encoding. 159 // 160 // This reads up to strm->avail_in bytes of input starting 161 // from strm->next_in. avail_in will be decremented and 162 // next_in incremented by an equal amount to match the 163 // number of input bytes consumed. 164 // 165 // Up to strm->avail_out bytes of compressed output will be 166 // written starting from strm->next_out. avail_out and next_out 167 // will be incremented by an equal amount to match the number 168 // of output bytes written. 169 // 170 // The encoder has to do internal buffering, which means that 171 // it may take quite a bit of input before the same data is 172 // available in compressed form in the output buffer. 173 lzma_ret ret = lzma_code(strm, action); 174 175 // If the output buffer is full or if the compression finished 176 // successfully, write the data from the output bufffer to 177 // the output file. 178 if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { 179 // When lzma_code() has returned LZMA_STREAM_END, 180 // the output buffer is likely to be only partially 181 // full. Calculate how much new data there is to 182 // be written to the output file. 183 size_t write_size = sizeof(outbuf) - strm->avail_out; 184 185 if (fwrite(outbuf, 1, write_size, outfile) 186 != write_size) { 187 fprintf(stderr, "Write error: %s\n", 188 strerror(errno)); 189 return false; 190 } 191 192 // Reset next_out and avail_out. 193 strm->next_out = outbuf; 194 strm->avail_out = sizeof(outbuf); 195 } 196 197 // Normally the return value of lzma_code() will be LZMA_OK 198 // until everything has been encoded. 199 if (ret != LZMA_OK) { 200 // Once everything has been encoded successfully, the 201 // return value of lzma_code() will be LZMA_STREAM_END. 202 // 203 // It is important to check for LZMA_STREAM_END. Do not 204 // assume that getting ret != LZMA_OK would mean that 205 // everything has gone well. 206 if (ret == LZMA_STREAM_END) 207 return true; 208 209 // It's not LZMA_OK nor LZMA_STREAM_END, 210 // so it must be an error code. See lzma/base.h 211 // (src/liblzma/api/lzma/base.h in the source package 212 // or e.g. /usr/include/lzma/base.h depending on the 213 // install prefix) for the list and documentation of 214 // possible values. Most values listen in lzma_ret 215 // enumeration aren't possible in this example. 216 const char *msg; 217 switch (ret) { 218 case LZMA_MEM_ERROR: 219 msg = "Memory allocation failed"; 220 break; 221 222 case LZMA_DATA_ERROR: 223 // This error is returned if the compressed 224 // or uncompressed size get near 8 EiB 225 // (2^63 bytes) because that's where the .xz 226 // file format size limits currently are. 227 // That is, the possibility of this error 228 // is mostly theoretical unless you are doing 229 // something very unusual. 230 // 231 // Note that strm->total_in and strm->total_out 232 // have nothing to do with this error. Changing 233 // those variables won't increase or decrease 234 // the chance of getting this error. 235 msg = "File size limits exceeded"; 236 break; 237 238 default: 239 // This is most likely LZMA_PROG_ERROR, but 240 // if this program is buggy (or liblzma has 241 // a bug), it may be e.g. LZMA_BUF_ERROR or 242 // LZMA_OPTIONS_ERROR too. 243 // 244 // It is inconvenient to have a separate 245 // error message for errors that should be 246 // impossible to occur, but knowing the error 247 // code is important for debugging. That's why 248 // it is good to print the error code at least 249 // when there is no good error message to show. 250 msg = "Unknown error, possibly a bug"; 251 break; 252 } 253 254 fprintf(stderr, "Encoder error: %s (error code %u)\n", 255 msg, ret); 256 return false; 257 } 258 } 259} 260 261 262extern int 263main(int argc, char **argv) 264{ 265 // Get the preset number from the command line. 266 uint32_t preset = get_preset(argc, argv); 267 268 // Initialize a lzma_stream structure. When it is allocated on stack, 269 // it is simplest to use LZMA_STREAM_INIT macro like below. When it 270 // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr)) 271 // works (as long as NULL pointers are represented with zero bits 272 // as they are on practically all computers today). 273 lzma_stream strm = LZMA_STREAM_INIT; 274 275 // Initialize the encoder. If it succeeds, compress from 276 // stdin to stdout. 277 bool success = init_encoder(&strm, preset); 278 if (success) 279 success = compress(&strm, stdin, stdout); 280 281 // Free the memory allocated for the encoder. If we were encoding 282 // multiple files, this would only need to be done after the last 283 // file. See 02_decompress.c for handling of multiple files. 284 // 285 // It is OK to call lzma_end() multiple times or when it hasn't been 286 // actually used except initialized with LZMA_STREAM_INIT. 287 lzma_end(&strm); 288 289 // Close stdout to catch possible write errors that can occur 290 // when pending data is flushed from the stdio buffers. 291 if (fclose(stdout)) { 292 fprintf(stderr, "Write error: %s\n", strerror(errno)); 293 success = false; 294 } 295 296 return success ? EXIT_SUCCESS : EXIT_FAILURE; 297} 298