1207753Smm/////////////////////////////////////////////////////////////////////////////// 2207753Smm// 3207753Smm/// \file file_io.c 4207753Smm/// \brief File opening, unlinking, and closing 5207753Smm// 6207753Smm// Author: Lasse Collin 7207753Smm// 8207753Smm// This file has been put into the public domain. 9207753Smm// You can do whatever you want with this file. 10207753Smm// 11207753Smm/////////////////////////////////////////////////////////////////////////////// 12207753Smm 13207753Smm#include "private.h" 14207753Smm 15207753Smm#include <fcntl.h> 16207753Smm 17207753Smm#ifdef TUKLIB_DOSLIKE 18207753Smm# include <io.h> 19207753Smm#else 20207753Smmstatic bool warn_fchown; 21207753Smm#endif 22207753Smm 23207753Smm#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 24207753Smm# include <sys/time.h> 25207753Smm#elif defined(HAVE_UTIME) 26207753Smm# include <utime.h> 27207753Smm#endif 28207753Smm 29207753Smm#include "tuklib_open_stdxxx.h" 30207753Smm 31207753Smm#ifndef O_BINARY 32207753Smm# define O_BINARY 0 33207753Smm#endif 34207753Smm 35207753Smm#ifndef O_NOCTTY 36207753Smm# define O_NOCTTY 0 37207753Smm#endif 38207753Smm 39207753Smm 40207753Smm/// If true, try to create sparse files when decompressing. 41207753Smmstatic bool try_sparse = true; 42207753Smm 43207753Smm#ifndef TUKLIB_DOSLIKE 44207753Smm/// File status flags of standard output. This is used by io_open_dest() 45207753Smm/// and io_close_dest(). 46207753Smmstatic int stdout_flags = 0; 47207753Smm#endif 48207753Smm 49207753Smm 50207753Smmstatic bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 51207753Smm 52207753Smm 53207753Smmextern void 54207753Smmio_init(void) 55207753Smm{ 56223935Smm // Make sure that stdin, stdout, and stderr are connected to 57207753Smm // a valid file descriptor. Exit immediately with exit code ERROR 58207753Smm // if we cannot make the file descriptors valid. Maybe we should 59207753Smm // print an error message, but our stderr could be screwed anyway. 60207753Smm tuklib_open_stdxxx(E_ERROR); 61207753Smm 62207753Smm#ifndef TUKLIB_DOSLIKE 63207753Smm // If fchown() fails setting the owner, we warn about it only if 64207753Smm // we are root. 65207753Smm warn_fchown = geteuid() == 0; 66207753Smm#endif 67207753Smm 68207753Smm#ifdef __DJGPP__ 69207753Smm // Avoid doing useless things when statting files. 70207753Smm // This isn't important but doesn't hurt. 71207753Smm _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT 72207753Smm | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 73207753Smm#endif 74207753Smm 75207753Smm return; 76207753Smm} 77207753Smm 78207753Smm 79207753Smmextern void 80207753Smmio_no_sparse(void) 81207753Smm{ 82207753Smm try_sparse = false; 83207753Smm return; 84207753Smm} 85207753Smm 86207753Smm 87207753Smm/// \brief Unlink a file 88207753Smm/// 89207753Smm/// This tries to verify that the file being unlinked really is the file that 90207753Smm/// we want to unlink by verifying device and inode numbers. There's still 91207753Smm/// a small unavoidable race, but this is much better than nothing (the file 92207753Smm/// could have been moved/replaced even hours earlier). 93207753Smmstatic void 94207753Smmio_unlink(const char *name, const struct stat *known_st) 95207753Smm{ 96207753Smm#if defined(TUKLIB_DOSLIKE) 97207753Smm // On DOS-like systems, st_ino is meaningless, so don't bother 98207753Smm // testing it. Just silence a compiler warning. 99207753Smm (void)known_st; 100207753Smm#else 101207753Smm struct stat new_st; 102207753Smm 103207753Smm // If --force was used, use stat() instead of lstat(). This way 104207753Smm // (de)compressing symlinks works correctly. However, it also means 105207753Smm // that xz cannot detect if a regular file foo is renamed to bar 106207753Smm // and then a symlink foo -> bar is created. Because of stat() 107207753Smm // instead of lstat(), xz will think that foo hasn't been replaced 108207753Smm // with another file. Thus, xz will remove foo even though it no 109207753Smm // longer is the same file that xz used when it started compressing. 110207753Smm // Probably it's not too bad though, so this doesn't need a more 111207753Smm // complex fix. 112207753Smm const int stat_ret = opt_force 113207753Smm ? stat(name, &new_st) : lstat(name, &new_st); 114207753Smm 115207753Smm if (stat_ret 116207753Smm# ifdef __VMS 117207753Smm // st_ino is an array, and we don't want to 118207753Smm // compare st_dev at all. 119207753Smm || memcmp(&new_st.st_ino, &known_st->st_ino, 120207753Smm sizeof(new_st.st_ino)) != 0 121207753Smm# else 122207753Smm // Typical POSIX-like system 123207753Smm || new_st.st_dev != known_st->st_dev 124207753Smm || new_st.st_ino != known_st->st_ino 125207753Smm# endif 126207753Smm ) 127207753Smm // TRANSLATORS: When compression or decompression finishes, 128207753Smm // and xz is going to remove the source file, xz first checks 129207753Smm // if the source file still exists, and if it does, does its 130207753Smm // device and inode numbers match what xz saw when it opened 131207753Smm // the source file. If these checks fail, this message is 132207753Smm // shown, %s being the filename, and the file is not deleted. 133207753Smm // The check for device and inode numbers is there, because 134207753Smm // it is possible that the user has put a new file in place 135207753Smm // of the original file, and in that case it obviously 136207753Smm // shouldn't be removed. 137207753Smm message_error(_("%s: File seems to have been moved, " 138207753Smm "not removing"), name); 139207753Smm else 140207753Smm#endif 141207753Smm // There's a race condition between lstat() and unlink() 142207753Smm // but at least we have tried to avoid removing wrong file. 143207753Smm if (unlink(name)) 144207753Smm message_error(_("%s: Cannot remove: %s"), 145207753Smm name, strerror(errno)); 146207753Smm 147207753Smm return; 148207753Smm} 149207753Smm 150207753Smm 151207753Smm/// \brief Copies owner/group and permissions 152207753Smm/// 153207753Smm/// \todo ACL and EA support 154207753Smm/// 155207753Smmstatic void 156207753Smmio_copy_attrs(const file_pair *pair) 157207753Smm{ 158207753Smm // Skip chown and chmod on Windows. 159207753Smm#ifndef TUKLIB_DOSLIKE 160207753Smm // This function is more tricky than you may think at first. 161207753Smm // Blindly copying permissions may permit users to access the 162207753Smm // destination file who didn't have permission to access the 163207753Smm // source file. 164207753Smm 165207753Smm // Try changing the owner of the file. If we aren't root or the owner 166207753Smm // isn't already us, fchown() probably doesn't succeed. We warn 167207753Smm // about failing fchown() only if we are root. 168207753Smm if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) 169207753Smm message_warning(_("%s: Cannot set the file owner: %s"), 170207753Smm pair->dest_name, strerror(errno)); 171207753Smm 172207753Smm mode_t mode; 173207753Smm 174207753Smm if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { 175207753Smm message_warning(_("%s: Cannot set the file group: %s"), 176207753Smm pair->dest_name, strerror(errno)); 177207753Smm // We can still safely copy some additional permissions: 178207753Smm // `group' must be at least as strict as `other' and 179207753Smm // also vice versa. 180207753Smm // 181207753Smm // NOTE: After this, the owner of the source file may 182207753Smm // get additional permissions. This shouldn't be too bad, 183207753Smm // because the owner would have had permission to chmod 184207753Smm // the original file anyway. 185207753Smm mode = ((pair->src_st.st_mode & 0070) >> 3) 186207753Smm & (pair->src_st.st_mode & 0007); 187207753Smm mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 188207753Smm } else { 189207753Smm // Drop the setuid, setgid, and sticky bits. 190207753Smm mode = pair->src_st.st_mode & 0777; 191207753Smm } 192207753Smm 193207753Smm if (fchmod(pair->dest_fd, mode)) 194207753Smm message_warning(_("%s: Cannot set the file permissions: %s"), 195207753Smm pair->dest_name, strerror(errno)); 196207753Smm#endif 197207753Smm 198207753Smm // Copy the timestamps. We have several possible ways to do this, of 199207753Smm // which some are better in both security and precision. 200207753Smm // 201207753Smm // First, get the nanosecond part of the timestamps. As of writing, 202207753Smm // it's not standardized by POSIX, and there are several names for 203207753Smm // the same thing in struct stat. 204207753Smm long atime_nsec; 205207753Smm long mtime_nsec; 206207753Smm 207207753Smm# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 208207753Smm // GNU and Solaris 209207753Smm atime_nsec = pair->src_st.st_atim.tv_nsec; 210207753Smm mtime_nsec = pair->src_st.st_mtim.tv_nsec; 211207753Smm 212207753Smm# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 213207753Smm // BSD 214207753Smm atime_nsec = pair->src_st.st_atimespec.tv_nsec; 215207753Smm mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 216207753Smm 217207753Smm# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 218207753Smm // GNU and BSD without extensions 219207753Smm atime_nsec = pair->src_st.st_atimensec; 220207753Smm mtime_nsec = pair->src_st.st_mtimensec; 221207753Smm 222207753Smm# elif defined(HAVE_STRUCT_STAT_ST_UATIME) 223207753Smm // Tru64 224207753Smm atime_nsec = pair->src_st.st_uatime * 1000; 225207753Smm mtime_nsec = pair->src_st.st_umtime * 1000; 226207753Smm 227207753Smm# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 228207753Smm // UnixWare 229207753Smm atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 230207753Smm mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 231207753Smm 232207753Smm# else 233207753Smm // Safe fallback 234207753Smm atime_nsec = 0; 235207753Smm mtime_nsec = 0; 236207753Smm# endif 237207753Smm 238207753Smm // Construct a structure to hold the timestamps and call appropriate 239207753Smm // function to set the timestamps. 240207753Smm#if defined(HAVE_FUTIMENS) 241207753Smm // Use nanosecond precision. 242207753Smm struct timespec tv[2]; 243207753Smm tv[0].tv_sec = pair->src_st.st_atime; 244207753Smm tv[0].tv_nsec = atime_nsec; 245207753Smm tv[1].tv_sec = pair->src_st.st_mtime; 246207753Smm tv[1].tv_nsec = mtime_nsec; 247207753Smm 248207753Smm (void)futimens(pair->dest_fd, tv); 249207753Smm 250207753Smm#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 251207753Smm // Use microsecond precision. 252207753Smm struct timeval tv[2]; 253207753Smm tv[0].tv_sec = pair->src_st.st_atime; 254207753Smm tv[0].tv_usec = atime_nsec / 1000; 255207753Smm tv[1].tv_sec = pair->src_st.st_mtime; 256207753Smm tv[1].tv_usec = mtime_nsec / 1000; 257207753Smm 258207753Smm# if defined(HAVE_FUTIMES) 259207753Smm (void)futimes(pair->dest_fd, tv); 260207753Smm# elif defined(HAVE_FUTIMESAT) 261207753Smm (void)futimesat(pair->dest_fd, NULL, tv); 262207753Smm# else 263207753Smm // Argh, no function to use a file descriptor to set the timestamp. 264207753Smm (void)utimes(pair->dest_name, tv); 265207753Smm# endif 266207753Smm 267207753Smm#elif defined(HAVE_UTIME) 268207753Smm // Use one-second precision. utime() doesn't support using file 269207753Smm // descriptor either. Some systems have broken utime() prototype 270207753Smm // so don't make this const. 271207753Smm struct utimbuf buf = { 272207753Smm .actime = pair->src_st.st_atime, 273207753Smm .modtime = pair->src_st.st_mtime, 274207753Smm }; 275207753Smm 276207753Smm // Avoid warnings. 277207753Smm (void)atime_nsec; 278207753Smm (void)mtime_nsec; 279207753Smm 280207753Smm (void)utime(pair->dest_name, &buf); 281207753Smm#endif 282207753Smm 283207753Smm return; 284207753Smm} 285207753Smm 286207753Smm 287207753Smm/// Opens the source file. Returns false on success, true on error. 288207753Smmstatic bool 289207753Smmio_open_src_real(file_pair *pair) 290207753Smm{ 291207753Smm // There's nothing to open when reading from stdin. 292207753Smm if (pair->src_name == stdin_filename) { 293207753Smm pair->src_fd = STDIN_FILENO; 294207753Smm#ifdef TUKLIB_DOSLIKE 295207753Smm setmode(STDIN_FILENO, O_BINARY); 296207753Smm#endif 297207753Smm return false; 298207753Smm } 299207753Smm 300207753Smm // Symlinks are not followed unless writing to stdout or --force 301207753Smm // was used. 302207753Smm const bool follow_symlinks = opt_stdout || opt_force; 303207753Smm 304207753Smm // We accept only regular files if we are writing the output 305207753Smm // to disk too. bzip2 allows overriding this with --force but 306207753Smm // gzip and xz don't. 307207753Smm const bool reg_files_only = !opt_stdout; 308207753Smm 309207753Smm // Flags for open() 310207753Smm int flags = O_RDONLY | O_BINARY | O_NOCTTY; 311207753Smm 312207753Smm#ifndef TUKLIB_DOSLIKE 313207753Smm // If we accept only regular files, we need to be careful to avoid 314207753Smm // problems with special files like devices and FIFOs. O_NONBLOCK 315207753Smm // prevents blocking when opening such files. When we want to accept 316207753Smm // special files, we must not use O_NONBLOCK, or otherwise we won't 317207753Smm // block waiting e.g. FIFOs to become readable. 318207753Smm if (reg_files_only) 319207753Smm flags |= O_NONBLOCK; 320207753Smm#endif 321207753Smm 322207753Smm#if defined(O_NOFOLLOW) 323207753Smm if (!follow_symlinks) 324207753Smm flags |= O_NOFOLLOW; 325207753Smm#elif !defined(TUKLIB_DOSLIKE) 326207753Smm // Some POSIX-like systems lack O_NOFOLLOW (it's not required 327207753Smm // by POSIX). Check for symlinks with a separate lstat() on 328207753Smm // these systems. 329207753Smm if (!follow_symlinks) { 330207753Smm struct stat st; 331207753Smm if (lstat(pair->src_name, &st)) { 332207753Smm message_error("%s: %s", pair->src_name, 333207753Smm strerror(errno)); 334207753Smm return true; 335207753Smm 336207753Smm } else if (S_ISLNK(st.st_mode)) { 337207753Smm message_warning(_("%s: Is a symbolic link, " 338207753Smm "skipping"), pair->src_name); 339207753Smm return true; 340207753Smm } 341207753Smm } 342207753Smm#else 343207753Smm // Avoid warnings. 344207753Smm (void)follow_symlinks; 345207753Smm#endif 346207753Smm 347207753Smm // Try to open the file. If we are accepting non-regular files, 348207753Smm // unblock the caught signals so that open() can be interrupted 349207753Smm // if it blocks e.g. due to a FIFO file. 350207753Smm if (!reg_files_only) 351207753Smm signals_unblock(); 352207753Smm 353207753Smm // Maybe this wouldn't need a loop, since all the signal handlers for 354207753Smm // which we don't use SA_RESTART set user_abort to true. But it 355207753Smm // doesn't hurt to have it just in case. 356207753Smm do { 357207753Smm pair->src_fd = open(pair->src_name, flags); 358207753Smm } while (pair->src_fd == -1 && errno == EINTR && !user_abort); 359207753Smm 360207753Smm if (!reg_files_only) 361207753Smm signals_block(); 362207753Smm 363207753Smm if (pair->src_fd == -1) { 364207753Smm // If we were interrupted, don't display any error message. 365207753Smm if (errno == EINTR) { 366207753Smm // All the signals that don't have SA_RESTART 367207753Smm // set user_abort. 368207753Smm assert(user_abort); 369207753Smm return true; 370207753Smm } 371207753Smm 372207753Smm#ifdef O_NOFOLLOW 373213700Smm // Give an understandable error message if the reason 374207753Smm // for failing was that the file was a symbolic link. 375207753Smm // 376207753Smm // Note that at least Linux, OpenBSD, Solaris, and Darwin 377213700Smm // use ELOOP to indicate that O_NOFOLLOW was the reason 378207753Smm // that open() failed. Because there may be 379207753Smm // directories in the pathname, ELOOP may occur also 380207753Smm // because of a symlink loop in the directory part. 381213700Smm // So ELOOP doesn't tell us what actually went wrong, 382213700Smm // and this stupidity went into POSIX-1.2008 too. 383207753Smm // 384207753Smm // FreeBSD associates EMLINK with O_NOFOLLOW and 385207753Smm // Tru64 uses ENOTSUP. We use these directly here 386207753Smm // and skip the lstat() call and the associated race. 387207753Smm // I want to hear if there are other kernels that 388207753Smm // fail with something else than ELOOP with O_NOFOLLOW. 389207753Smm bool was_symlink = false; 390207753Smm 391207753Smm# if defined(__FreeBSD__) || defined(__DragonFly__) 392207753Smm if (errno == EMLINK) 393207753Smm was_symlink = true; 394207753Smm 395207753Smm# elif defined(__digital__) && defined(__unix__) 396207753Smm if (errno == ENOTSUP) 397207753Smm was_symlink = true; 398207753Smm 399207753Smm# elif defined(__NetBSD__) 400213700Smm // As of 2010-09-05, NetBSD doesn't document what errno is 401213700Smm // used with O_NOFOLLOW. It is EFTYPE though, and I 402213700Smm // understood that is very unlikely to change even though 403213700Smm // it is undocumented. 404207753Smm if (errno == EFTYPE) 405207753Smm was_symlink = true; 406207753Smm 407207753Smm# else 408207753Smm if (errno == ELOOP && !follow_symlinks) { 409207753Smm const int saved_errno = errno; 410207753Smm struct stat st; 411207753Smm if (lstat(pair->src_name, &st) == 0 412207753Smm && S_ISLNK(st.st_mode)) 413207753Smm was_symlink = true; 414207753Smm 415207753Smm errno = saved_errno; 416207753Smm } 417207753Smm# endif 418207753Smm 419207753Smm if (was_symlink) 420207753Smm message_warning(_("%s: Is a symbolic link, " 421207753Smm "skipping"), pair->src_name); 422207753Smm else 423207753Smm#endif 424207753Smm // Something else than O_NOFOLLOW failing 425207753Smm // (assuming that the race conditions didn't 426207753Smm // confuse us). 427207753Smm message_error("%s: %s", pair->src_name, 428207753Smm strerror(errno)); 429207753Smm 430207753Smm return true; 431207753Smm } 432207753Smm 433207753Smm#ifndef TUKLIB_DOSLIKE 434207753Smm // Drop O_NONBLOCK, which is used only when we are accepting only 435207753Smm // regular files. After the open() call, we want things to block 436207753Smm // instead of giving EAGAIN. 437207753Smm if (reg_files_only) { 438207753Smm flags = fcntl(pair->src_fd, F_GETFL); 439207753Smm if (flags == -1) 440207753Smm goto error_msg; 441207753Smm 442207753Smm flags &= ~O_NONBLOCK; 443207753Smm 444207753Smm if (fcntl(pair->src_fd, F_SETFL, flags)) 445207753Smm goto error_msg; 446207753Smm } 447207753Smm#endif 448207753Smm 449207753Smm // Stat the source file. We need the result also when we copy 450207753Smm // the permissions, and when unlinking. 451207753Smm if (fstat(pair->src_fd, &pair->src_st)) 452207753Smm goto error_msg; 453207753Smm 454207753Smm if (S_ISDIR(pair->src_st.st_mode)) { 455207753Smm message_warning(_("%s: Is a directory, skipping"), 456207753Smm pair->src_name); 457207753Smm goto error; 458207753Smm } 459207753Smm 460219001Smm if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 461219001Smm message_warning(_("%s: Not a regular file, skipping"), 462219001Smm pair->src_name); 463219001Smm goto error; 464219001Smm } 465207753Smm 466207753Smm#ifndef TUKLIB_DOSLIKE 467219001Smm if (reg_files_only && !opt_force) { 468207753Smm if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 469207753Smm // gzip rejects setuid and setgid files even 470207753Smm // when --force was used. bzip2 doesn't check 471207753Smm // for them, but calls fchown() after fchmod(), 472207753Smm // and many systems automatically drop setuid 473207753Smm // and setgid bits there. 474207753Smm // 475207753Smm // We accept setuid and setgid files if 476207753Smm // --force was used. We drop these bits 477207753Smm // explicitly in io_copy_attr(). 478207753Smm message_warning(_("%s: File has setuid or " 479207753Smm "setgid bit set, skipping"), 480207753Smm pair->src_name); 481207753Smm goto error; 482207753Smm } 483207753Smm 484207753Smm if (pair->src_st.st_mode & S_ISVTX) { 485207753Smm message_warning(_("%s: File has sticky bit " 486207753Smm "set, skipping"), 487207753Smm pair->src_name); 488207753Smm goto error; 489207753Smm } 490207753Smm 491207753Smm if (pair->src_st.st_nlink > 1) { 492207753Smm message_warning(_("%s: Input file has more " 493207753Smm "than one hard link, " 494207753Smm "skipping"), pair->src_name); 495207753Smm goto error; 496207753Smm } 497219001Smm } 498207753Smm#endif 499207753Smm 500207753Smm return false; 501207753Smm 502207753Smmerror_msg: 503207753Smm message_error("%s: %s", pair->src_name, strerror(errno)); 504207753Smmerror: 505207753Smm (void)close(pair->src_fd); 506207753Smm return true; 507207753Smm} 508207753Smm 509207753Smm 510207753Smmextern file_pair * 511207753Smmio_open_src(const char *src_name) 512207753Smm{ 513207753Smm if (is_empty_filename(src_name)) 514207753Smm return NULL; 515207753Smm 516207753Smm // Since we have only one file open at a time, we can use 517207753Smm // a statically allocated structure. 518207753Smm static file_pair pair; 519207753Smm 520207753Smm pair = (file_pair){ 521207753Smm .src_name = src_name, 522207753Smm .dest_name = NULL, 523207753Smm .src_fd = -1, 524207753Smm .dest_fd = -1, 525207753Smm .src_eof = false, 526207753Smm .dest_try_sparse = false, 527207753Smm .dest_pending_sparse = 0, 528207753Smm }; 529207753Smm 530207753Smm // Block the signals, for which we have a custom signal handler, so 531207753Smm // that we don't need to worry about EINTR. 532207753Smm signals_block(); 533207753Smm const bool error = io_open_src_real(&pair); 534207753Smm signals_unblock(); 535207753Smm 536207753Smm return error ? NULL : &pair; 537207753Smm} 538207753Smm 539207753Smm 540207753Smm/// \brief Closes source file of the file_pair structure 541207753Smm/// 542207753Smm/// \param pair File whose src_fd should be closed 543207753Smm/// \param success If true, the file will be removed from the disk if 544207753Smm/// closing succeeds and --keep hasn't been used. 545207753Smmstatic void 546207753Smmio_close_src(file_pair *pair, bool success) 547207753Smm{ 548207753Smm if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 549207753Smm#ifdef TUKLIB_DOSLIKE 550207753Smm (void)close(pair->src_fd); 551207753Smm#endif 552207753Smm 553207753Smm // If we are going to unlink(), do it before closing the file. 554207753Smm // This way there's no risk that someone replaces the file and 555207753Smm // happens to get same inode number, which would make us 556207753Smm // unlink() wrong file. 557207753Smm // 558207753Smm // NOTE: DOS-like systems are an exception to this, because 559207753Smm // they don't allow unlinking files that are open. *sigh* 560207753Smm if (success && !opt_keep_original) 561207753Smm io_unlink(pair->src_name, &pair->src_st); 562207753Smm 563207753Smm#ifndef TUKLIB_DOSLIKE 564207753Smm (void)close(pair->src_fd); 565207753Smm#endif 566207753Smm } 567207753Smm 568207753Smm return; 569207753Smm} 570207753Smm 571207753Smm 572207753Smmstatic bool 573207753Smmio_open_dest_real(file_pair *pair) 574207753Smm{ 575207753Smm if (opt_stdout || pair->src_fd == STDIN_FILENO) { 576207753Smm // We don't modify or free() this. 577207753Smm pair->dest_name = (char *)"(stdout)"; 578207753Smm pair->dest_fd = STDOUT_FILENO; 579207753Smm#ifdef TUKLIB_DOSLIKE 580207753Smm setmode(STDOUT_FILENO, O_BINARY); 581207753Smm#endif 582207753Smm } else { 583207753Smm pair->dest_name = suffix_get_dest_name(pair->src_name); 584207753Smm if (pair->dest_name == NULL) 585207753Smm return true; 586207753Smm 587207753Smm // If --force was used, unlink the target file first. 588207753Smm if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 589207753Smm message_error(_("%s: Cannot remove: %s"), 590207753Smm pair->dest_name, strerror(errno)); 591207753Smm free(pair->dest_name); 592207753Smm return true; 593207753Smm } 594207753Smm 595207753Smm // Open the file. 596207753Smm const int flags = O_WRONLY | O_BINARY | O_NOCTTY 597207753Smm | O_CREAT | O_EXCL; 598207753Smm const mode_t mode = S_IRUSR | S_IWUSR; 599207753Smm pair->dest_fd = open(pair->dest_name, flags, mode); 600207753Smm 601207753Smm if (pair->dest_fd == -1) { 602207753Smm message_error("%s: %s", pair->dest_name, 603207753Smm strerror(errno)); 604207753Smm free(pair->dest_name); 605207753Smm return true; 606207753Smm } 607207753Smm } 608207753Smm 609207753Smm // If this really fails... well, we have a safe fallback. 610207753Smm if (fstat(pair->dest_fd, &pair->dest_st)) { 611207753Smm#if defined(__VMS) 612207753Smm pair->dest_st.st_ino[0] = 0; 613207753Smm pair->dest_st.st_ino[1] = 0; 614207753Smm pair->dest_st.st_ino[2] = 0; 615207753Smm#elif !defined(TUKLIB_DOSLIKE) 616207753Smm pair->dest_st.st_dev = 0; 617207753Smm pair->dest_st.st_ino = 0; 618207753Smm#endif 619207753Smm#ifndef TUKLIB_DOSLIKE 620207753Smm } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 621207753Smm // When writing to standard output, we need to be extra 622207753Smm // careful: 623207753Smm // - It may be connected to something else than 624207753Smm // a regular file. 625207753Smm // - We aren't necessarily writing to a new empty file 626207753Smm // or to the end of an existing file. 627207753Smm // - O_APPEND may be active. 628207753Smm // 629207753Smm // TODO: I'm keeping this disabled for DOS-like systems 630207753Smm // for now. FAT doesn't support sparse files, but NTFS 631207753Smm // does, so maybe this should be enabled on Windows after 632207753Smm // some testing. 633207753Smm if (pair->dest_fd == STDOUT_FILENO) { 634207753Smm if (!S_ISREG(pair->dest_st.st_mode)) 635207753Smm return false; 636207753Smm 637207753Smm const int flags = fcntl(STDOUT_FILENO, F_GETFL); 638207753Smm if (flags == -1) 639207753Smm return false; 640207753Smm 641207753Smm if (flags & O_APPEND) { 642207753Smm // Creating a sparse file is not possible 643207753Smm // when O_APPEND is active (it's used by 644207753Smm // shell's >> redirection). As I understand 645207753Smm // it, it is safe to temporarily disable 646207753Smm // O_APPEND in xz, because if someone 647207753Smm // happened to write to the same file at the 648207753Smm // same time, results would be bad anyway 649207753Smm // (users shouldn't assume that xz uses any 650207753Smm // specific block size when writing data). 651207753Smm // 652207753Smm // The write position may be something else 653207753Smm // than the end of the file, so we must fix 654207753Smm // it to start writing at the end of the file 655207753Smm // to imitate O_APPEND. 656207753Smm if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 657207753Smm return false; 658207753Smm 659207753Smm if (fcntl(STDOUT_FILENO, F_SETFL, 660207753Smm stdout_flags & ~O_APPEND)) 661207753Smm return false; 662207753Smm 663207753Smm // Remember the flags so that io_close_dest() 664207753Smm // can restore them. 665207753Smm stdout_flags = flags; 666207753Smm 667207753Smm } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 668207753Smm != pair->dest_st.st_size) { 669207753Smm // Writing won't start exactly at the end 670207753Smm // of the file. We cannot use sparse output, 671207753Smm // because it would probably corrupt the file. 672207753Smm return false; 673207753Smm } 674207753Smm } 675207753Smm 676207753Smm pair->dest_try_sparse = true; 677207753Smm#endif 678207753Smm } 679207753Smm 680207753Smm return false; 681207753Smm} 682207753Smm 683207753Smm 684207753Smmextern bool 685207753Smmio_open_dest(file_pair *pair) 686207753Smm{ 687207753Smm signals_block(); 688207753Smm const bool ret = io_open_dest_real(pair); 689207753Smm signals_unblock(); 690207753Smm return ret; 691207753Smm} 692207753Smm 693207753Smm 694207753Smm/// \brief Closes destination file of the file_pair structure 695207753Smm/// 696207753Smm/// \param pair File whose dest_fd should be closed 697207753Smm/// \param success If false, the file will be removed from the disk. 698207753Smm/// 699207753Smm/// \return Zero if closing succeeds. On error, -1 is returned and 700207753Smm/// error message printed. 701207753Smmstatic bool 702207753Smmio_close_dest(file_pair *pair, bool success) 703207753Smm{ 704207753Smm#ifndef TUKLIB_DOSLIKE 705207753Smm // If io_open_dest() has disabled O_APPEND, restore it here. 706207753Smm if (stdout_flags != 0) { 707207753Smm assert(pair->dest_fd == STDOUT_FILENO); 708207753Smm 709207753Smm const int fail = fcntl(STDOUT_FILENO, F_SETFL, stdout_flags); 710207753Smm stdout_flags = 0; 711207753Smm 712207753Smm if (fail) { 713207753Smm message_error(_("Error restoring the O_APPEND flag " 714207753Smm "to standard output: %s"), 715207753Smm strerror(errno)); 716207753Smm return true; 717207753Smm } 718207753Smm } 719207753Smm#endif 720207753Smm 721207753Smm if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 722207753Smm return false; 723207753Smm 724207753Smm if (close(pair->dest_fd)) { 725207753Smm message_error(_("%s: Closing the file failed: %s"), 726207753Smm pair->dest_name, strerror(errno)); 727207753Smm 728207753Smm // Closing destination file failed, so we cannot trust its 729207753Smm // contents. Get rid of junk: 730207753Smm io_unlink(pair->dest_name, &pair->dest_st); 731207753Smm free(pair->dest_name); 732207753Smm return true; 733207753Smm } 734207753Smm 735207753Smm // If the operation using this file wasn't successful, we git rid 736207753Smm // of the junk file. 737207753Smm if (!success) 738207753Smm io_unlink(pair->dest_name, &pair->dest_st); 739207753Smm 740207753Smm free(pair->dest_name); 741207753Smm 742207753Smm return false; 743207753Smm} 744207753Smm 745207753Smm 746207753Smmextern void 747207753Smmio_close(file_pair *pair, bool success) 748207753Smm{ 749207753Smm // Take care of sparseness at the end of the output file. 750207753Smm if (success && pair->dest_try_sparse 751207753Smm && pair->dest_pending_sparse > 0) { 752207753Smm // Seek forward one byte less than the size of the pending 753207753Smm // hole, then write one zero-byte. This way the file grows 754207753Smm // to its correct size. An alternative would be to use 755207753Smm // ftruncate() but that isn't portable enough (e.g. it 756207753Smm // doesn't work with FAT on Linux; FAT isn't that important 757207753Smm // since it doesn't support sparse files anyway, but we don't 758207753Smm // want to create corrupt files on it). 759207753Smm if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 760207753Smm SEEK_CUR) == -1) { 761207753Smm message_error(_("%s: Seeking failed when trying " 762207753Smm "to create a sparse file: %s"), 763207753Smm pair->dest_name, strerror(errno)); 764207753Smm success = false; 765207753Smm } else { 766207753Smm const uint8_t zero[1] = { '\0' }; 767207753Smm if (io_write_buf(pair, zero, 1)) 768207753Smm success = false; 769207753Smm } 770207753Smm } 771207753Smm 772207753Smm signals_block(); 773207753Smm 774207753Smm // Copy the file attributes. We need to skip this if destination 775207753Smm // file isn't open or it is standard output. 776207753Smm if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 777207753Smm io_copy_attrs(pair); 778207753Smm 779207753Smm // Close the destination first. If it fails, we must not remove 780207753Smm // the source file! 781207753Smm if (io_close_dest(pair, success)) 782207753Smm success = false; 783207753Smm 784207753Smm // Close the source file, and unlink it if the operation using this 785207753Smm // file pair was successful and we haven't requested to keep the 786207753Smm // source file. 787207753Smm io_close_src(pair, success); 788207753Smm 789207753Smm signals_unblock(); 790207753Smm 791207753Smm return; 792207753Smm} 793207753Smm 794207753Smm 795207753Smmextern size_t 796207753Smmio_read(file_pair *pair, io_buf *buf_union, size_t size) 797207753Smm{ 798207753Smm // We use small buffers here. 799207753Smm assert(size < SSIZE_MAX); 800207753Smm 801207753Smm uint8_t *buf = buf_union->u8; 802207753Smm size_t left = size; 803207753Smm 804207753Smm while (left > 0) { 805207753Smm const ssize_t amount = read(pair->src_fd, buf, left); 806207753Smm 807207753Smm if (amount == 0) { 808207753Smm pair->src_eof = true; 809207753Smm break; 810207753Smm } 811207753Smm 812207753Smm if (amount == -1) { 813207753Smm if (errno == EINTR) { 814207753Smm if (user_abort) 815207753Smm return SIZE_MAX; 816207753Smm 817207753Smm continue; 818207753Smm } 819207753Smm 820207753Smm message_error(_("%s: Read error: %s"), 821207753Smm pair->src_name, strerror(errno)); 822207753Smm 823207753Smm // FIXME Is this needed? 824207753Smm pair->src_eof = true; 825207753Smm 826207753Smm return SIZE_MAX; 827207753Smm } 828207753Smm 829207753Smm buf += (size_t)(amount); 830207753Smm left -= (size_t)(amount); 831207753Smm } 832207753Smm 833207753Smm return size - left; 834207753Smm} 835207753Smm 836207753Smm 837207753Smmextern bool 838207753Smmio_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 839207753Smm{ 840207753Smm // Using lseek() and read() is more portable than pread() and 841207753Smm // for us it is as good as real pread(). 842207753Smm if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 843207753Smm message_error(_("%s: Error seeking the file: %s"), 844207753Smm pair->src_name, strerror(errno)); 845207753Smm return true; 846207753Smm } 847207753Smm 848207753Smm const size_t amount = io_read(pair, buf, size); 849207753Smm if (amount == SIZE_MAX) 850207753Smm return true; 851207753Smm 852207753Smm if (amount != size) { 853207753Smm message_error(_("%s: Unexpected end of file"), 854207753Smm pair->src_name); 855207753Smm return true; 856207753Smm } 857207753Smm 858207753Smm return false; 859207753Smm} 860207753Smm 861207753Smm 862207753Smmstatic bool 863207753Smmis_sparse(const io_buf *buf) 864207753Smm{ 865207753Smm assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 866207753Smm 867207753Smm for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 868207753Smm if (buf->u64[i] != 0) 869207753Smm return false; 870207753Smm 871207753Smm return true; 872207753Smm} 873207753Smm 874207753Smm 875207753Smmstatic bool 876207753Smmio_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 877207753Smm{ 878207753Smm assert(size < SSIZE_MAX); 879207753Smm 880207753Smm while (size > 0) { 881207753Smm const ssize_t amount = write(pair->dest_fd, buf, size); 882207753Smm if (amount == -1) { 883207753Smm if (errno == EINTR) { 884207753Smm if (user_abort) 885207753Smm return -1; 886207753Smm 887207753Smm continue; 888207753Smm } 889207753Smm 890207753Smm // Handle broken pipe specially. gzip and bzip2 891207753Smm // don't print anything on SIGPIPE. In addition, 892207753Smm // gzip --quiet uses exit status 2 (warning) on 893207753Smm // broken pipe instead of whatever raise(SIGPIPE) 894207753Smm // would make it return. It is there to hide "Broken 895207753Smm // pipe" message on some old shells (probably old 896207753Smm // GNU bash). 897207753Smm // 898207753Smm // We don't do anything special with --quiet, which 899207753Smm // is what bzip2 does too. If we get SIGPIPE, we 900207753Smm // will handle it like other signals by setting 901207753Smm // user_abort, and get EPIPE here. 902207753Smm if (errno != EPIPE) 903207753Smm message_error(_("%s: Write error: %s"), 904207753Smm pair->dest_name, strerror(errno)); 905207753Smm 906207753Smm return true; 907207753Smm } 908207753Smm 909207753Smm buf += (size_t)(amount); 910207753Smm size -= (size_t)(amount); 911207753Smm } 912207753Smm 913207753Smm return false; 914207753Smm} 915207753Smm 916207753Smm 917207753Smmextern bool 918207753Smmio_write(file_pair *pair, const io_buf *buf, size_t size) 919207753Smm{ 920207753Smm assert(size <= IO_BUFFER_SIZE); 921207753Smm 922207753Smm if (pair->dest_try_sparse) { 923207753Smm // Check if the block is sparse (contains only zeros). If it 924207753Smm // sparse, we just store the amount and return. We will take 925207753Smm // care of actually skipping over the hole when we hit the 926207753Smm // next data block or close the file. 927207753Smm // 928207753Smm // Since io_close() requires that dest_pending_sparse > 0 929207753Smm // if the file ends with sparse block, we must also return 930207753Smm // if size == 0 to avoid doing the lseek(). 931207753Smm if (size == IO_BUFFER_SIZE) { 932207753Smm if (is_sparse(buf)) { 933207753Smm pair->dest_pending_sparse += size; 934207753Smm return false; 935207753Smm } 936207753Smm } else if (size == 0) { 937207753Smm return false; 938207753Smm } 939207753Smm 940207753Smm // This is not a sparse block. If we have a pending hole, 941207753Smm // skip it now. 942207753Smm if (pair->dest_pending_sparse > 0) { 943207753Smm if (lseek(pair->dest_fd, pair->dest_pending_sparse, 944207753Smm SEEK_CUR) == -1) { 945207753Smm message_error(_("%s: Seeking failed when " 946207753Smm "trying to create a sparse " 947207753Smm "file: %s"), pair->dest_name, 948207753Smm strerror(errno)); 949207753Smm return true; 950207753Smm } 951207753Smm 952207753Smm pair->dest_pending_sparse = 0; 953207753Smm } 954207753Smm } 955207753Smm 956207753Smm return io_write_buf(pair, buf->u8, size); 957207753Smm} 958