1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file file_io.c 4207753Smm/// \brief File opening, unlinking, and closing 5207753Smm// 6207753Smm// Author: Lasse Collin 7207753Smm// 8207753Smm// This file has been put into the public domain. 9207753Smm// You can do whatever you want with this file. 10207753Smm// 11207753Smm/////////////////////////////////////////////////////////////////////////////// 12207753Smm 13207753Smm#include "private.h" 14207753Smm 15207753Smm#include <fcntl.h> 16207753Smm 17207753Smm#ifdef TUKLIB_DOSLIKE 18207753Smm# include <io.h> 19207753Smm#else 20207753Smmstatic bool warn_fchown; 21207753Smm#endif 22207753Smm 23207753Smm#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 24207753Smm# include <sys/time.h> 25207753Smm#elif defined(HAVE_UTIME) 26207753Smm# include <utime.h> 27207753Smm#endif 28207753Smm 29207753Smm#include "tuklib_open_stdxxx.h" 30207753Smm 31207753Smm#ifndef O_BINARY 32207753Smm# define O_BINARY 0 33207753Smm#endif 34207753Smm 35207753Smm#ifndef O_NOCTTY 36207753Smm# define O_NOCTTY 0 37207753Smm#endif 38207753Smm 39207753Smm 40207753Smm/// If true, try to create sparse files when decompressing. 41207753Smmstatic bool try_sparse = true; 42207753Smm 43207753Smm#ifndef TUKLIB_DOSLIKE 44263286Sdelphij/// Original file status flags of standard output. This is used by 45263286Sdelphij/// io_open_dest() and io_close_dest() to save and restore the flags. 46263286Sdelphijstatic int stdout_flags; 47263286Sdelphijstatic bool restore_stdout_flags = false; 48207753Smm#endif 49207753Smm 50207753Smm 51207753Smmstatic bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 52207753Smm 53207753Smm 54207753Smmextern void 55207753Smmio_init(void) 56207753Smm{ 57223935Smm // Make sure that stdin, stdout, and stderr are connected to 58207753Smm // a valid file descriptor. Exit immediately with exit code ERROR 59207753Smm // if we cannot make the file descriptors valid. Maybe we should 60207753Smm // print an error message, but our stderr could be screwed anyway. 61207753Smm tuklib_open_stdxxx(E_ERROR); 62207753Smm 63207753Smm#ifndef TUKLIB_DOSLIKE 64207753Smm // If fchown() fails setting the owner, we warn about it only if 65207753Smm // we are root. 66207753Smm warn_fchown = geteuid() == 0; 67207753Smm#endif 68207753Smm 69207753Smm#ifdef __DJGPP__ 70207753Smm // Avoid doing useless things when statting files. 71207753Smm // This isn't important but doesn't hurt. 72207753Smm _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT 73207753Smm | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 74207753Smm#endif 75207753Smm 76207753Smm return; 77207753Smm} 78207753Smm 79207753Smm 80207753Smmextern void 81207753Smmio_no_sparse(void) 82207753Smm{ 83207753Smm try_sparse = false; 84207753Smm return; 85207753Smm} 86207753Smm 87207753Smm 88207753Smm/// \brief Unlink a file 89207753Smm/// 90207753Smm/// This tries to verify that the file being unlinked really is the file that 91207753Smm/// we want to unlink by verifying device and inode numbers. There's still 92207753Smm/// a small unavoidable race, but this is much better than nothing (the file 93207753Smm/// could have been moved/replaced even hours earlier). 94207753Smmstatic void 95207753Smmio_unlink(const char *name, const struct stat *known_st) 96207753Smm{ 97207753Smm#if defined(TUKLIB_DOSLIKE) 98207753Smm // On DOS-like systems, st_ino is meaningless, so don't bother 99207753Smm // testing it. Just silence a compiler warning. 100207753Smm (void)known_st; 101207753Smm#else 102207753Smm struct stat new_st; 103207753Smm 104207753Smm // If --force was used, use stat() instead of lstat(). This way 105207753Smm // (de)compressing symlinks works correctly. However, it also means 106207753Smm // that xz cannot detect if a regular file foo is renamed to bar 107207753Smm // and then a symlink foo -> bar is created. Because of stat() 108207753Smm // instead of lstat(), xz will think that foo hasn't been replaced 109207753Smm // with another file. Thus, xz will remove foo even though it no 110207753Smm // longer is the same file that xz used when it started compressing. 111207753Smm // Probably it's not too bad though, so this doesn't need a more 112207753Smm // complex fix. 113207753Smm const int stat_ret = opt_force 114207753Smm ? stat(name, &new_st) : lstat(name, &new_st); 115207753Smm 116207753Smm if (stat_ret 117207753Smm# ifdef __VMS 118207753Smm // st_ino is an array, and we don't want to 119207753Smm // compare st_dev at all. 120207753Smm || memcmp(&new_st.st_ino, &known_st->st_ino, 121207753Smm sizeof(new_st.st_ino)) != 0 122207753Smm# else 123207753Smm // Typical POSIX-like system 124207753Smm || new_st.st_dev != known_st->st_dev 125207753Smm || new_st.st_ino != known_st->st_ino 126207753Smm# endif 127207753Smm ) 128207753Smm // TRANSLATORS: When compression or decompression finishes, 129207753Smm // and xz is going to remove the source file, xz first checks 130207753Smm // if the source file still exists, and if it does, does its 131207753Smm // device and inode numbers match what xz saw when it opened 132207753Smm // the source file. If these checks fail, this message is 133207753Smm // shown, %s being the filename, and the file is not deleted. 134207753Smm // The check for device and inode numbers is there, because 135207753Smm // it is possible that the user has put a new file in place 136207753Smm // of the original file, and in that case it obviously 137207753Smm // shouldn't be removed. 138207753Smm message_error(_("%s: File seems to have been moved, " 139207753Smm "not removing"), name); 140207753Smm else 141207753Smm#endif 142207753Smm // There's a race condition between lstat() and unlink() 143207753Smm // but at least we have tried to avoid removing wrong file. 144207753Smm if (unlink(name)) 145207753Smm message_error(_("%s: Cannot remove: %s"), 146207753Smm name, strerror(errno)); 147207753Smm 148207753Smm return; 149207753Smm} 150207753Smm 151207753Smm 152207753Smm/// \brief Copies owner/group and permissions 153207753Smm/// 154207753Smm/// \todo ACL and EA support 155207753Smm/// 156207753Smmstatic void 157207753Smmio_copy_attrs(const file_pair *pair) 158207753Smm{ 159207753Smm // Skip chown and chmod on Windows. 160207753Smm#ifndef TUKLIB_DOSLIKE 161207753Smm // This function is more tricky than you may think at first. 162207753Smm // Blindly copying permissions may permit users to access the 163207753Smm // destination file who didn't have permission to access the 164207753Smm // source file. 165207753Smm 166207753Smm // Try changing the owner of the file. If we aren't root or the owner 167207753Smm // isn't already us, fchown() probably doesn't succeed. We warn 168207753Smm // about failing fchown() only if we are root. 169207753Smm if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) 170207753Smm message_warning(_("%s: Cannot set the file owner: %s"), 171207753Smm pair->dest_name, strerror(errno)); 172207753Smm 173207753Smm mode_t mode; 174207753Smm 175207753Smm if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { 176207753Smm message_warning(_("%s: Cannot set the file group: %s"), 177207753Smm pair->dest_name, strerror(errno)); 178207753Smm // We can still safely copy some additional permissions: 179207753Smm // `group' must be at least as strict as `other' and 180207753Smm // also vice versa. 181207753Smm // 182207753Smm // NOTE: After this, the owner of the source file may 183207753Smm // get additional permissions. This shouldn't be too bad, 184207753Smm // because the owner would have had permission to chmod 185207753Smm // the original file anyway. 186207753Smm mode = ((pair->src_st.st_mode & 0070) >> 3) 187207753Smm & (pair->src_st.st_mode & 0007); 188207753Smm mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 189207753Smm } else { 190207753Smm // Drop the setuid, setgid, and sticky bits. 191207753Smm mode = pair->src_st.st_mode & 0777; 192207753Smm } 193207753Smm 194207753Smm if (fchmod(pair->dest_fd, mode)) 195207753Smm message_warning(_("%s: Cannot set the file permissions: %s"), 196207753Smm pair->dest_name, strerror(errno)); 197207753Smm#endif 198207753Smm 199207753Smm // Copy the timestamps. We have several possible ways to do this, of 200207753Smm // which some are better in both security and precision. 201207753Smm // 202207753Smm // First, get the nanosecond part of the timestamps. As of writing, 203207753Smm // it's not standardized by POSIX, and there are several names for 204207753Smm // the same thing in struct stat. 205207753Smm long atime_nsec; 206207753Smm long mtime_nsec; 207207753Smm 208207753Smm# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 209207753Smm // GNU and Solaris 210207753Smm atime_nsec = pair->src_st.st_atim.tv_nsec; 211207753Smm mtime_nsec = pair->src_st.st_mtim.tv_nsec; 212207753Smm 213207753Smm# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 214207753Smm // BSD 215207753Smm atime_nsec = pair->src_st.st_atimespec.tv_nsec; 216207753Smm mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 217207753Smm 218207753Smm# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 219207753Smm // GNU and BSD without extensions 220207753Smm atime_nsec = pair->src_st.st_atimensec; 221207753Smm mtime_nsec = pair->src_st.st_mtimensec; 222207753Smm 223207753Smm# elif defined(HAVE_STRUCT_STAT_ST_UATIME) 224207753Smm // Tru64 225207753Smm atime_nsec = pair->src_st.st_uatime * 1000; 226207753Smm mtime_nsec = pair->src_st.st_umtime * 1000; 227207753Smm 228207753Smm# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 229207753Smm // UnixWare 230207753Smm atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 231207753Smm mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 232207753Smm 233207753Smm# else 234207753Smm // Safe fallback 235207753Smm atime_nsec = 0; 236207753Smm mtime_nsec = 0; 237207753Smm# endif 238207753Smm 239207753Smm // Construct a structure to hold the timestamps and call appropriate 240207753Smm // function to set the timestamps. 241207753Smm#if defined(HAVE_FUTIMENS) 242207753Smm // Use nanosecond precision. 243207753Smm struct timespec tv[2]; 244207753Smm tv[0].tv_sec = pair->src_st.st_atime; 245207753Smm tv[0].tv_nsec = atime_nsec; 246207753Smm tv[1].tv_sec = pair->src_st.st_mtime; 247207753Smm tv[1].tv_nsec = mtime_nsec; 248207753Smm 249207753Smm (void)futimens(pair->dest_fd, tv); 250207753Smm 251207753Smm#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 252207753Smm // Use microsecond precision. 253207753Smm struct timeval tv[2]; 254207753Smm tv[0].tv_sec = pair->src_st.st_atime; 255207753Smm tv[0].tv_usec = atime_nsec / 1000; 256207753Smm tv[1].tv_sec = pair->src_st.st_mtime; 257207753Smm tv[1].tv_usec = mtime_nsec / 1000; 258207753Smm 259207753Smm# if defined(HAVE_FUTIMES) 260207753Smm (void)futimes(pair->dest_fd, tv); 261207753Smm# elif defined(HAVE_FUTIMESAT) 262207753Smm (void)futimesat(pair->dest_fd, NULL, tv); 263207753Smm# else 264207753Smm // Argh, no function to use a file descriptor to set the timestamp. 265207753Smm (void)utimes(pair->dest_name, tv); 266207753Smm# endif 267207753Smm 268207753Smm#elif defined(HAVE_UTIME) 269207753Smm // Use one-second precision. utime() doesn't support using file 270207753Smm // descriptor either. Some systems have broken utime() prototype 271207753Smm // so don't make this const. 272207753Smm struct utimbuf buf = { 273207753Smm .actime = pair->src_st.st_atime, 274207753Smm .modtime = pair->src_st.st_mtime, 275207753Smm }; 276207753Smm 277207753Smm // Avoid warnings. 278207753Smm (void)atime_nsec; 279207753Smm (void)mtime_nsec; 280207753Smm 281207753Smm (void)utime(pair->dest_name, &buf); 282207753Smm#endif 283207753Smm 284207753Smm return; 285207753Smm} 286207753Smm 287207753Smm 288207753Smm/// Opens the source file. Returns false on success, true on error. 289207753Smmstatic bool 290207753Smmio_open_src_real(file_pair *pair) 291207753Smm{ 292207753Smm // There's nothing to open when reading from stdin. 293207753Smm if (pair->src_name == stdin_filename) { 294207753Smm pair->src_fd = STDIN_FILENO; 295207753Smm#ifdef TUKLIB_DOSLIKE 296207753Smm setmode(STDIN_FILENO, O_BINARY); 297207753Smm#endif 298207753Smm return false; 299207753Smm } 300207753Smm 301207753Smm // Symlinks are not followed unless writing to stdout or --force 302207753Smm // was used. 303207753Smm const bool follow_symlinks = opt_stdout || opt_force; 304207753Smm 305207753Smm // We accept only regular files if we are writing the output 306207753Smm // to disk too. bzip2 allows overriding this with --force but 307207753Smm // gzip and xz don't. 308207753Smm const bool reg_files_only = !opt_stdout; 309207753Smm 310207753Smm // Flags for open() 311207753Smm int flags = O_RDONLY | O_BINARY | O_NOCTTY; 312207753Smm 313207753Smm#ifndef TUKLIB_DOSLIKE 314207753Smm // If we accept only regular files, we need to be careful to avoid 315207753Smm // problems with special files like devices and FIFOs. O_NONBLOCK 316207753Smm // prevents blocking when opening such files. When we want to accept 317207753Smm // special files, we must not use O_NONBLOCK, or otherwise we won't 318207753Smm // block waiting e.g. FIFOs to become readable. 319207753Smm if (reg_files_only) 320207753Smm flags |= O_NONBLOCK; 321207753Smm#endif 322207753Smm 323207753Smm#if defined(O_NOFOLLOW) 324207753Smm if (!follow_symlinks) 325207753Smm flags |= O_NOFOLLOW; 326207753Smm#elif !defined(TUKLIB_DOSLIKE) 327207753Smm // Some POSIX-like systems lack O_NOFOLLOW (it's not required 328207753Smm // by POSIX). Check for symlinks with a separate lstat() on 329207753Smm // these systems. 330207753Smm if (!follow_symlinks) { 331207753Smm struct stat st; 332207753Smm if (lstat(pair->src_name, &st)) { 333207753Smm message_error("%s: %s", pair->src_name, 334207753Smm strerror(errno)); 335207753Smm return true; 336207753Smm 337207753Smm } else if (S_ISLNK(st.st_mode)) { 338207753Smm message_warning(_("%s: Is a symbolic link, " 339207753Smm "skipping"), pair->src_name); 340207753Smm return true; 341207753Smm } 342207753Smm } 343207753Smm#else 344207753Smm // Avoid warnings. 345207753Smm (void)follow_symlinks; 346207753Smm#endif 347207753Smm 348207753Smm // Try to open the file. If we are accepting non-regular files, 349207753Smm // unblock the caught signals so that open() can be interrupted 350207753Smm // if it blocks e.g. due to a FIFO file. 351207753Smm if (!reg_files_only) 352207753Smm signals_unblock(); 353207753Smm 354207753Smm // Maybe this wouldn't need a loop, since all the signal handlers for 355207753Smm // which we don't use SA_RESTART set user_abort to true. But it 356207753Smm // doesn't hurt to have it just in case. 357207753Smm do { 358207753Smm pair->src_fd = open(pair->src_name, flags); 359207753Smm } while (pair->src_fd == -1 && errno == EINTR && !user_abort); 360207753Smm 361207753Smm if (!reg_files_only) 362207753Smm signals_block(); 363207753Smm 364207753Smm if (pair->src_fd == -1) { 365207753Smm // If we were interrupted, don't display any error message. 366207753Smm if (errno == EINTR) { 367207753Smm // All the signals that don't have SA_RESTART 368207753Smm // set user_abort. 369207753Smm assert(user_abort); 370207753Smm return true; 371207753Smm } 372207753Smm 373207753Smm#ifdef O_NOFOLLOW 374213700Smm // Give an understandable error message if the reason 375207753Smm // for failing was that the file was a symbolic link. 376207753Smm // 377207753Smm // Note that at least Linux, OpenBSD, Solaris, and Darwin 378213700Smm // use ELOOP to indicate that O_NOFOLLOW was the reason 379207753Smm // that open() failed. Because there may be 380207753Smm // directories in the pathname, ELOOP may occur also 381207753Smm // because of a symlink loop in the directory part. 382213700Smm // So ELOOP doesn't tell us what actually went wrong, 383213700Smm // and this stupidity went into POSIX-1.2008 too. 384207753Smm // 385207753Smm // FreeBSD associates EMLINK with O_NOFOLLOW and 386207753Smm // Tru64 uses ENOTSUP. We use these directly here 387207753Smm // and skip the lstat() call and the associated race. 388207753Smm // I want to hear if there are other kernels that 389207753Smm // fail with something else than ELOOP with O_NOFOLLOW. 390207753Smm bool was_symlink = false; 391207753Smm 392207753Smm# if defined(__FreeBSD__) || defined(__DragonFly__) 393207753Smm if (errno == EMLINK) 394207753Smm was_symlink = true; 395207753Smm 396207753Smm# elif defined(__digital__) && defined(__unix__) 397207753Smm if (errno == ENOTSUP) 398207753Smm was_symlink = true; 399207753Smm 400207753Smm# elif defined(__NetBSD__) 401207753Smm if (errno == EFTYPE) 402207753Smm was_symlink = true; 403207753Smm 404207753Smm# else 405207753Smm if (errno == ELOOP && !follow_symlinks) { 406207753Smm const int saved_errno = errno; 407207753Smm struct stat st; 408207753Smm if (lstat(pair->src_name, &st) == 0 409207753Smm && S_ISLNK(st.st_mode)) 410207753Smm was_symlink = true; 411207753Smm 412207753Smm errno = saved_errno; 413207753Smm } 414207753Smm# endif 415207753Smm 416207753Smm if (was_symlink) 417207753Smm message_warning(_("%s: Is a symbolic link, " 418207753Smm "skipping"), pair->src_name); 419207753Smm else 420207753Smm#endif 421207753Smm // Something else than O_NOFOLLOW failing 422207753Smm // (assuming that the race conditions didn't 423207753Smm // confuse us). 424207753Smm message_error("%s: %s", pair->src_name, 425207753Smm strerror(errno)); 426207753Smm 427207753Smm return true; 428207753Smm } 429207753Smm 430207753Smm#ifndef TUKLIB_DOSLIKE 431207753Smm // Drop O_NONBLOCK, which is used only when we are accepting only 432207753Smm // regular files. After the open() call, we want things to block 433207753Smm // instead of giving EAGAIN. 434207753Smm if (reg_files_only) { 435207753Smm flags = fcntl(pair->src_fd, F_GETFL); 436207753Smm if (flags == -1) 437207753Smm goto error_msg; 438207753Smm 439207753Smm flags &= ~O_NONBLOCK; 440207753Smm 441263286Sdelphij if (fcntl(pair->src_fd, F_SETFL, flags) == -1) 442207753Smm goto error_msg; 443207753Smm } 444207753Smm#endif 445207753Smm 446207753Smm // Stat the source file. We need the result also when we copy 447207753Smm // the permissions, and when unlinking. 448207753Smm if (fstat(pair->src_fd, &pair->src_st)) 449207753Smm goto error_msg; 450207753Smm 451207753Smm if (S_ISDIR(pair->src_st.st_mode)) { 452207753Smm message_warning(_("%s: Is a directory, skipping"), 453207753Smm pair->src_name); 454207753Smm goto error; 455207753Smm } 456207753Smm 457219001Smm if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 458219001Smm message_warning(_("%s: Not a regular file, skipping"), 459219001Smm pair->src_name); 460219001Smm goto error; 461219001Smm } 462207753Smm 463207753Smm#ifndef TUKLIB_DOSLIKE 464219001Smm if (reg_files_only && !opt_force) { 465207753Smm if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 466207753Smm // gzip rejects setuid and setgid files even 467207753Smm // when --force was used. bzip2 doesn't check 468207753Smm // for them, but calls fchown() after fchmod(), 469207753Smm // and many systems automatically drop setuid 470207753Smm // and setgid bits there. 471207753Smm // 472207753Smm // We accept setuid and setgid files if 473207753Smm // --force was used. We drop these bits 474207753Smm // explicitly in io_copy_attr(). 475207753Smm message_warning(_("%s: File has setuid or " 476207753Smm "setgid bit set, skipping"), 477207753Smm pair->src_name); 478207753Smm goto error; 479207753Smm } 480207753Smm 481207753Smm if (pair->src_st.st_mode & S_ISVTX) { 482207753Smm message_warning(_("%s: File has sticky bit " 483207753Smm "set, skipping"), 484207753Smm pair->src_name); 485207753Smm goto error; 486207753Smm } 487207753Smm 488207753Smm if (pair->src_st.st_nlink > 1) { 489207753Smm message_warning(_("%s: Input file has more " 490207753Smm "than one hard link, " 491207753Smm "skipping"), pair->src_name); 492207753Smm goto error; 493207753Smm } 494219001Smm } 495207753Smm#endif 496207753Smm 497207753Smm return false; 498207753Smm 499207753Smmerror_msg: 500207753Smm message_error("%s: %s", pair->src_name, strerror(errno)); 501207753Smmerror: 502207753Smm (void)close(pair->src_fd); 503207753Smm return true; 504207753Smm} 505207753Smm 506207753Smm 507207753Smmextern file_pair * 508207753Smmio_open_src(const char *src_name) 509207753Smm{ 510207753Smm if (is_empty_filename(src_name)) 511207753Smm return NULL; 512207753Smm 513207753Smm // Since we have only one file open at a time, we can use 514207753Smm // a statically allocated structure. 515207753Smm static file_pair pair; 516207753Smm 517207753Smm pair = (file_pair){ 518207753Smm .src_name = src_name, 519207753Smm .dest_name = NULL, 520207753Smm .src_fd = -1, 521207753Smm .dest_fd = -1, 522207753Smm .src_eof = false, 523207753Smm .dest_try_sparse = false, 524207753Smm .dest_pending_sparse = 0, 525207753Smm }; 526207753Smm 527207753Smm // Block the signals, for which we have a custom signal handler, so 528207753Smm // that we don't need to worry about EINTR. 529207753Smm signals_block(); 530207753Smm const bool error = io_open_src_real(&pair); 531207753Smm signals_unblock(); 532207753Smm 533207753Smm return error ? NULL : &pair; 534207753Smm} 535207753Smm 536207753Smm 537207753Smm/// \brief Closes source file of the file_pair structure 538207753Smm/// 539207753Smm/// \param pair File whose src_fd should be closed 540207753Smm/// \param success If true, the file will be removed from the disk if 541207753Smm/// closing succeeds and --keep hasn't been used. 542207753Smmstatic void 543207753Smmio_close_src(file_pair *pair, bool success) 544207753Smm{ 545207753Smm if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 546207753Smm#ifdef TUKLIB_DOSLIKE 547207753Smm (void)close(pair->src_fd); 548207753Smm#endif 549207753Smm 550207753Smm // If we are going to unlink(), do it before closing the file. 551207753Smm // This way there's no risk that someone replaces the file and 552207753Smm // happens to get same inode number, which would make us 553207753Smm // unlink() wrong file. 554207753Smm // 555207753Smm // NOTE: DOS-like systems are an exception to this, because 556207753Smm // they don't allow unlinking files that are open. *sigh* 557207753Smm if (success && !opt_keep_original) 558207753Smm io_unlink(pair->src_name, &pair->src_st); 559207753Smm 560207753Smm#ifndef TUKLIB_DOSLIKE 561207753Smm (void)close(pair->src_fd); 562207753Smm#endif 563207753Smm } 564207753Smm 565207753Smm return; 566207753Smm} 567207753Smm 568207753Smm 569207753Smmstatic bool 570207753Smmio_open_dest_real(file_pair *pair) 571207753Smm{ 572207753Smm if (opt_stdout || pair->src_fd == STDIN_FILENO) { 573207753Smm // We don't modify or free() this. 574207753Smm pair->dest_name = (char *)"(stdout)"; 575207753Smm pair->dest_fd = STDOUT_FILENO; 576207753Smm#ifdef TUKLIB_DOSLIKE 577207753Smm setmode(STDOUT_FILENO, O_BINARY); 578207753Smm#endif 579207753Smm } else { 580207753Smm pair->dest_name = suffix_get_dest_name(pair->src_name); 581207753Smm if (pair->dest_name == NULL) 582207753Smm return true; 583207753Smm 584207753Smm // If --force was used, unlink the target file first. 585207753Smm if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 586207753Smm message_error(_("%s: Cannot remove: %s"), 587207753Smm pair->dest_name, strerror(errno)); 588207753Smm free(pair->dest_name); 589207753Smm return true; 590207753Smm } 591207753Smm 592207753Smm // Open the file. 593207753Smm const int flags = O_WRONLY | O_BINARY | O_NOCTTY 594207753Smm | O_CREAT | O_EXCL; 595207753Smm const mode_t mode = S_IRUSR | S_IWUSR; 596207753Smm pair->dest_fd = open(pair->dest_name, flags, mode); 597207753Smm 598207753Smm if (pair->dest_fd == -1) { 599207753Smm message_error("%s: %s", pair->dest_name, 600207753Smm strerror(errno)); 601207753Smm free(pair->dest_name); 602207753Smm return true; 603207753Smm } 604207753Smm } 605207753Smm 606207753Smm // If this really fails... well, we have a safe fallback. 607207753Smm if (fstat(pair->dest_fd, &pair->dest_st)) { 608207753Smm#if defined(__VMS) 609207753Smm pair->dest_st.st_ino[0] = 0; 610207753Smm pair->dest_st.st_ino[1] = 0; 611207753Smm pair->dest_st.st_ino[2] = 0; 612207753Smm#elif !defined(TUKLIB_DOSLIKE) 613207753Smm pair->dest_st.st_dev = 0; 614207753Smm pair->dest_st.st_ino = 0; 615207753Smm#endif 616207753Smm#ifndef TUKLIB_DOSLIKE 617207753Smm } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 618207753Smm // When writing to standard output, we need to be extra 619207753Smm // careful: 620207753Smm // - It may be connected to something else than 621207753Smm // a regular file. 622207753Smm // - We aren't necessarily writing to a new empty file 623207753Smm // or to the end of an existing file. 624207753Smm // - O_APPEND may be active. 625207753Smm // 626207753Smm // TODO: I'm keeping this disabled for DOS-like systems 627207753Smm // for now. FAT doesn't support sparse files, but NTFS 628207753Smm // does, so maybe this should be enabled on Windows after 629207753Smm // some testing. 630207753Smm if (pair->dest_fd == STDOUT_FILENO) { 631207753Smm if (!S_ISREG(pair->dest_st.st_mode)) 632207753Smm return false; 633207753Smm 634263286Sdelphij stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 635263286Sdelphij if (stdout_flags == -1) 636207753Smm return false; 637207753Smm 638263286Sdelphij if (stdout_flags & O_APPEND) { 639207753Smm // Creating a sparse file is not possible 640207753Smm // when O_APPEND is active (it's used by 641207753Smm // shell's >> redirection). As I understand 642207753Smm // it, it is safe to temporarily disable 643207753Smm // O_APPEND in xz, because if someone 644207753Smm // happened to write to the same file at the 645207753Smm // same time, results would be bad anyway 646207753Smm // (users shouldn't assume that xz uses any 647207753Smm // specific block size when writing data). 648207753Smm // 649207753Smm // The write position may be something else 650207753Smm // than the end of the file, so we must fix 651207753Smm // it to start writing at the end of the file 652207753Smm // to imitate O_APPEND. 653207753Smm if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 654207753Smm return false; 655207753Smm 656207753Smm if (fcntl(STDOUT_FILENO, F_SETFL, 657263286Sdelphij stdout_flags & ~O_APPEND) 658263286Sdelphij == -1) 659207753Smm return false; 660207753Smm 661263286Sdelphij // Disabling O_APPEND succeeded. Mark 662263286Sdelphij // that the flags should be restored 663263286Sdelphij // in io_close_dest(). 664263286Sdelphij restore_stdout_flags = true; 665207753Smm 666207753Smm } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 667207753Smm != pair->dest_st.st_size) { 668207753Smm // Writing won't start exactly at the end 669207753Smm // of the file. We cannot use sparse output, 670207753Smm // because it would probably corrupt the file. 671207753Smm return false; 672207753Smm } 673207753Smm } 674207753Smm 675207753Smm pair->dest_try_sparse = true; 676207753Smm#endif 677207753Smm } 678207753Smm 679207753Smm return false; 680207753Smm} 681207753Smm 682207753Smm 683207753Smmextern bool 684207753Smmio_open_dest(file_pair *pair) 685207753Smm{ 686207753Smm signals_block(); 687207753Smm const bool ret = io_open_dest_real(pair); 688207753Smm signals_unblock(); 689207753Smm return ret; 690207753Smm} 691207753Smm 692207753Smm 693207753Smm/// \brief Closes destination file of the file_pair structure 694207753Smm/// 695207753Smm/// \param pair File whose dest_fd should be closed 696207753Smm/// \param success If false, the file will be removed from the disk. 697207753Smm/// 698207753Smm/// \return Zero if closing succeeds. On error, -1 is returned and 699207753Smm/// error message printed. 700207753Smmstatic bool 701207753Smmio_close_dest(file_pair *pair, bool success) 702207753Smm{ 703207753Smm#ifndef TUKLIB_DOSLIKE 704207753Smm // If io_open_dest() has disabled O_APPEND, restore it here. 705263286Sdelphij if (restore_stdout_flags) { 706207753Smm assert(pair->dest_fd == STDOUT_FILENO); 707207753Smm 708263286Sdelphij restore_stdout_flags = false; 709207753Smm 710263286Sdelphij if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 711207753Smm message_error(_("Error restoring the O_APPEND flag " 712207753Smm "to standard output: %s"), 713207753Smm strerror(errno)); 714207753Smm return true; 715207753Smm } 716207753Smm } 717207753Smm#endif 718207753Smm 719207753Smm if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 720207753Smm return false; 721207753Smm 722207753Smm if (close(pair->dest_fd)) { 723207753Smm message_error(_("%s: Closing the file failed: %s"), 724207753Smm pair->dest_name, strerror(errno)); 725207753Smm 726207753Smm // Closing destination file failed, so we cannot trust its 727207753Smm // contents. Get rid of junk: 728207753Smm io_unlink(pair->dest_name, &pair->dest_st); 729207753Smm free(pair->dest_name); 730207753Smm return true; 731207753Smm } 732207753Smm 733207753Smm // If the operation using this file wasn't successful, we git rid 734207753Smm // of the junk file. 735207753Smm if (!success) 736207753Smm io_unlink(pair->dest_name, &pair->dest_st); 737207753Smm 738207753Smm free(pair->dest_name); 739207753Smm 740207753Smm return false; 741207753Smm} 742207753Smm 743207753Smm 744207753Smmextern void 745207753Smmio_close(file_pair *pair, bool success) 746207753Smm{ 747207753Smm // Take care of sparseness at the end of the output file. 748207753Smm if (success && pair->dest_try_sparse 749207753Smm && pair->dest_pending_sparse > 0) { 750207753Smm // Seek forward one byte less than the size of the pending 751207753Smm // hole, then write one zero-byte. This way the file grows 752207753Smm // to its correct size. An alternative would be to use 753207753Smm // ftruncate() but that isn't portable enough (e.g. it 754207753Smm // doesn't work with FAT on Linux; FAT isn't that important 755207753Smm // since it doesn't support sparse files anyway, but we don't 756207753Smm // want to create corrupt files on it). 757207753Smm if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 758207753Smm SEEK_CUR) == -1) { 759207753Smm message_error(_("%s: Seeking failed when trying " 760207753Smm "to create a sparse file: %s"), 761207753Smm pair->dest_name, strerror(errno)); 762207753Smm success = false; 763207753Smm } else { 764207753Smm const uint8_t zero[1] = { '\0' }; 765207753Smm if (io_write_buf(pair, zero, 1)) 766207753Smm success = false; 767207753Smm } 768207753Smm } 769207753Smm 770207753Smm signals_block(); 771207753Smm 772207753Smm // Copy the file attributes. We need to skip this if destination 773207753Smm // file isn't open or it is standard output. 774207753Smm if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 775207753Smm io_copy_attrs(pair); 776207753Smm 777207753Smm // Close the destination first. If it fails, we must not remove 778207753Smm // the source file! 779207753Smm if (io_close_dest(pair, success)) 780207753Smm success = false; 781207753Smm 782207753Smm // Close the source file, and unlink it if the operation using this 783207753Smm // file pair was successful and we haven't requested to keep the 784207753Smm // source file. 785207753Smm io_close_src(pair, success); 786207753Smm 787207753Smm signals_unblock(); 788207753Smm 789207753Smm return; 790207753Smm} 791207753Smm 792207753Smm 793207753Smmextern size_t 794207753Smmio_read(file_pair *pair, io_buf *buf_union, size_t size) 795207753Smm{ 796207753Smm // We use small buffers here. 797207753Smm assert(size < SSIZE_MAX); 798207753Smm 799207753Smm uint8_t *buf = buf_union->u8; 800207753Smm size_t left = size; 801207753Smm 802207753Smm while (left > 0) { 803207753Smm const ssize_t amount = read(pair->src_fd, buf, left); 804207753Smm 805207753Smm if (amount == 0) { 806207753Smm pair->src_eof = true; 807207753Smm break; 808207753Smm } 809207753Smm 810207753Smm if (amount == -1) { 811207753Smm if (errno == EINTR) { 812207753Smm if (user_abort) 813207753Smm return SIZE_MAX; 814207753Smm 815207753Smm continue; 816207753Smm } 817207753Smm 818207753Smm message_error(_("%s: Read error: %s"), 819207753Smm pair->src_name, strerror(errno)); 820207753Smm 821207753Smm // FIXME Is this needed? 822207753Smm pair->src_eof = true; 823207753Smm 824207753Smm return SIZE_MAX; 825207753Smm } 826207753Smm 827207753Smm buf += (size_t)(amount); 828207753Smm left -= (size_t)(amount); 829207753Smm } 830207753Smm 831207753Smm return size - left; 832207753Smm} 833207753Smm 834207753Smm 835207753Smmextern bool 836207753Smmio_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 837207753Smm{ 838207753Smm // Using lseek() and read() is more portable than pread() and 839207753Smm // for us it is as good as real pread(). 840207753Smm if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 841207753Smm message_error(_("%s: Error seeking the file: %s"), 842207753Smm pair->src_name, strerror(errno)); 843207753Smm return true; 844207753Smm } 845207753Smm 846207753Smm const size_t amount = io_read(pair, buf, size); 847207753Smm if (amount == SIZE_MAX) 848207753Smm return true; 849207753Smm 850207753Smm if (amount != size) { 851207753Smm message_error(_("%s: Unexpected end of file"), 852207753Smm pair->src_name); 853207753Smm return true; 854207753Smm } 855207753Smm 856207753Smm return false; 857207753Smm} 858207753Smm 859207753Smm 860207753Smmstatic bool 861207753Smmis_sparse(const io_buf *buf) 862207753Smm{ 863207753Smm assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 864207753Smm 865207753Smm for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 866207753Smm if (buf->u64[i] != 0) 867207753Smm return false; 868207753Smm 869207753Smm return true; 870207753Smm} 871207753Smm 872207753Smm 873207753Smmstatic bool 874207753Smmio_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 875207753Smm{ 876207753Smm assert(size < SSIZE_MAX); 877207753Smm 878207753Smm while (size > 0) { 879207753Smm const ssize_t amount = write(pair->dest_fd, buf, size); 880207753Smm if (amount == -1) { 881207753Smm if (errno == EINTR) { 882207753Smm if (user_abort) 883263286Sdelphij return true; 884207753Smm 885207753Smm continue; 886207753Smm } 887207753Smm 888207753Smm // Handle broken pipe specially. gzip and bzip2 889207753Smm // don't print anything on SIGPIPE. In addition, 890207753Smm // gzip --quiet uses exit status 2 (warning) on 891207753Smm // broken pipe instead of whatever raise(SIGPIPE) 892207753Smm // would make it return. It is there to hide "Broken 893207753Smm // pipe" message on some old shells (probably old 894207753Smm // GNU bash). 895207753Smm // 896207753Smm // We don't do anything special with --quiet, which 897207753Smm // is what bzip2 does too. If we get SIGPIPE, we 898207753Smm // will handle it like other signals by setting 899207753Smm // user_abort, and get EPIPE here. 900207753Smm if (errno != EPIPE) 901207753Smm message_error(_("%s: Write error: %s"), 902207753Smm pair->dest_name, strerror(errno)); 903207753Smm 904207753Smm return true; 905207753Smm } 906207753Smm 907207753Smm buf += (size_t)(amount); 908207753Smm size -= (size_t)(amount); 909207753Smm } 910207753Smm 911207753Smm return false; 912207753Smm} 913207753Smm 914207753Smm 915207753Smmextern bool 916207753Smmio_write(file_pair *pair, const io_buf *buf, size_t size) 917207753Smm{ 918207753Smm assert(size <= IO_BUFFER_SIZE); 919207753Smm 920207753Smm if (pair->dest_try_sparse) { 921207753Smm // Check if the block is sparse (contains only zeros). If it 922207753Smm // sparse, we just store the amount and return. We will take 923207753Smm // care of actually skipping over the hole when we hit the 924207753Smm // next data block or close the file. 925207753Smm // 926207753Smm // Since io_close() requires that dest_pending_sparse > 0 927207753Smm // if the file ends with sparse block, we must also return 928207753Smm // if size == 0 to avoid doing the lseek(). 929207753Smm if (size == IO_BUFFER_SIZE) { 930207753Smm if (is_sparse(buf)) { 931207753Smm pair->dest_pending_sparse += size; 932207753Smm return false; 933207753Smm } 934207753Smm } else if (size == 0) { 935207753Smm return false; 936207753Smm } 937207753Smm 938207753Smm // This is not a sparse block. If we have a pending hole, 939207753Smm // skip it now. 940207753Smm if (pair->dest_pending_sparse > 0) { 941207753Smm if (lseek(pair->dest_fd, pair->dest_pending_sparse, 942207753Smm SEEK_CUR) == -1) { 943207753Smm message_error(_("%s: Seeking failed when " 944207753Smm "trying to create a sparse " 945207753Smm "file: %s"), pair->dest_name, 946207753Smm strerror(errno)); 947207753Smm return true; 948207753Smm } 949207753Smm 950207753Smm pair->dest_pending_sparse = 0; 951207753Smm } 952207753Smm } 953207753Smm 954207753Smm return io_write_buf(pair, buf->u8, size); 955207753Smm} 956