1/* 2 * This file Copyright (C) Mnemosyne LLC 3 * 4 * This file is licensed by the GPL version 2. Works owned by the 5 * Transmission project are granted a special exemption to clause 2(b) 6 * so that the bulk of its code can remain under the MIT license. 7 * This exemption does not extend to derived works not owned by 8 * the Transmission project. 9 * 10 * $Id: fdlimit.c 13110 2011-12-14 05:42:15Z jordan $ 11 */ 12 13#ifdef HAVE_POSIX_FADVISE 14 #ifdef _XOPEN_SOURCE 15 #undef _XOPEN_SOURCE 16 #endif 17 #define _XOPEN_SOURCE 600 18#endif 19 20#include <assert.h> 21#include <errno.h> 22#include <inttypes.h> 23#include <string.h> 24#ifdef SYS_DARWIN 25 #include <fcntl.h> 26#endif 27 28#ifdef HAVE_FALLOCATE64 29 /* FIXME can't find the right #include voodoo to pick up the declaration.. */ 30 extern int fallocate64( int fd, int mode, uint64_t offset, uint64_t len ); 31#endif 32 33#ifdef HAVE_XFS_XFS_H 34 #include <xfs/xfs.h> 35#endif 36 37#include <sys/types.h> 38#include <sys/stat.h> 39#include <sys/time.h> /* getrlimit */ 40#include <sys/resource.h> /* getrlimit */ 41#include <fcntl.h> /* O_LARGEFILE posix_fadvise */ 42#include <unistd.h> /* lseek(), write(), ftruncate(), pread(), pwrite(), etc */ 43 44#include "transmission.h" 45#include "fdlimit.h" 46#include "net.h" 47#include "session.h" 48#include "torrent.h" /* tr_isTorrent() */ 49 50#define dbgmsg( ... ) \ 51 do { \ 52 if( tr_deepLoggingIsActive( ) ) \ 53 tr_deepLog( __FILE__, __LINE__, NULL, __VA_ARGS__ ); \ 54 } while( 0 ) 55 56/*** 57**** 58**** Local Files 59**** 60***/ 61 62#ifndef O_LARGEFILE 63 #define O_LARGEFILE 0 64#endif 65 66#ifndef O_BINARY 67 #define O_BINARY 0 68#endif 69 70#ifndef O_SEQUENTIAL 71 #define O_SEQUENTIAL 0 72#endif 73 74 75static bool 76preallocate_file_sparse( int fd, uint64_t length ) 77{ 78 const char zero = '\0'; 79 bool success = 0; 80 81 if( !length ) 82 success = true; 83 84#ifdef HAVE_FALLOCATE64 85 if( !success ) /* fallocate64 is always preferred, so try it first */ 86 success = !fallocate64( fd, 0, 0, length ); 87#endif 88 89 if( !success ) /* fallback: the old-style seek-and-write */ 90 success = ( lseek( fd, length-1, SEEK_SET ) != -1 ) 91 && ( write( fd, &zero, 1 ) != -1 ) 92 && ( ftruncate( fd, length ) != -1 ); 93 94 return success; 95} 96 97static bool 98preallocate_file_full( const char * filename, uint64_t length ) 99{ 100 bool success = 0; 101 102#ifdef WIN32 103 104 HANDLE hFile = CreateFile( filename, GENERIC_WRITE, 0, 0, CREATE_NEW, FILE_FLAG_RANDOM_ACCESS, 0 ); 105 if( hFile != INVALID_HANDLE_VALUE ) 106 { 107 LARGE_INTEGER li; 108 li.QuadPart = length; 109 success = SetFilePointerEx( hFile, li, NULL, FILE_BEGIN ) && SetEndOfFile( hFile ); 110 CloseHandle( hFile ); 111 } 112 113#else 114 115 int flags = O_RDWR | O_CREAT | O_LARGEFILE; 116 int fd = open( filename, flags, 0666 ); 117 if( fd >= 0 ) 118 { 119# ifdef HAVE_FALLOCATE64 120 if( !success ) 121 { 122 success = !fallocate64( fd, 0, 0, length ); 123 } 124# endif 125# ifdef HAVE_XFS_XFS_H 126 if( !success && platform_test_xfs_fd( fd ) ) 127 { 128 xfs_flock64_t fl; 129 fl.l_whence = 0; 130 fl.l_start = 0; 131 fl.l_len = length; 132 success = !xfsctl( NULL, fd, XFS_IOC_RESVSP64, &fl ); 133 } 134# endif 135# ifdef SYS_DARWIN 136 if( !success ) 137 { 138 fstore_t fst; 139 fst.fst_flags = F_ALLOCATECONTIG; 140 fst.fst_posmode = F_PEOFPOSMODE; 141 fst.fst_offset = 0; 142 fst.fst_length = length; 143 fst.fst_bytesalloc = 0; 144 success = !fcntl( fd, F_PREALLOCATE, &fst ); 145 } 146# endif 147# ifdef HAVE_POSIX_FALLOCATE 148 if( !success ) 149 { 150 success = !posix_fallocate( fd, 0, length ); 151 } 152# endif 153 154 if( !success ) /* if nothing else works, do it the old-fashioned way */ 155 { 156 uint8_t buf[ 4096 ]; 157 memset( buf, 0, sizeof( buf ) ); 158 success = true; 159 while ( success && ( length > 0 ) ) 160 { 161 const int thisPass = MIN( length, sizeof( buf ) ); 162 success = write( fd, buf, thisPass ) == thisPass; 163 length -= thisPass; 164 } 165 } 166 167 close( fd ); 168 } 169 170#endif 171 172 return success; 173} 174 175 176/* portability wrapper for fsync(). */ 177int 178tr_fsync( int fd ) 179{ 180#ifdef WIN32 181 return _commit( fd ); 182#else 183 return fsync( fd ); 184#endif 185} 186 187 188/* Like pread and pwrite, except that the position is undefined afterwards. 189 And of course they are not thread-safe. */ 190 191/* don't use pread/pwrite on old versions of uClibc because they're buggy. 192 * https://trac.transmissionbt.com/ticket/3826 */ 193#ifdef __UCLIBC__ 194#define TR_UCLIBC_CHECK_VERSION(major,minor,micro) \ 195 (__UCLIBC_MAJOR__ > (major) || \ 196 (__UCLIBC_MAJOR__ == (major) && __UCLIBC_MINOR__ > (minor)) || \ 197 (__UCLIBC_MAJOR__ == (major) && __UCLIBC_MINOR__ == (minor) && \ 198 __UCLIBC_SUBLEVEL__ >= (micro))) 199#if !TR_UCLIBC_CHECK_VERSION(0,9,28) 200 #undef HAVE_PREAD 201 #undef HAVE_PWRITE 202#endif 203#endif 204 205#ifdef SYS_DARWIN 206 #define HAVE_PREAD 207 #define HAVE_PWRITE 208#endif 209 210ssize_t 211tr_pread( int fd, void *buf, size_t count, off_t offset ) 212{ 213#ifdef HAVE_PREAD 214 return pread( fd, buf, count, offset ); 215#else 216 const off_t lrc = lseek( fd, offset, SEEK_SET ); 217 if( lrc < 0 ) 218 return -1; 219 return read( fd, buf, count ); 220#endif 221} 222 223ssize_t 224tr_pwrite( int fd, const void *buf, size_t count, off_t offset ) 225{ 226#ifdef HAVE_PWRITE 227 return pwrite( fd, buf, count, offset ); 228#else 229 const off_t lrc = lseek( fd, offset, SEEK_SET ); 230 if( lrc < 0 ) 231 return -1; 232 return write( fd, buf, count ); 233#endif 234} 235 236int 237tr_prefetch( int fd UNUSED, off_t offset UNUSED, size_t count UNUSED ) 238{ 239#ifdef HAVE_POSIX_FADVISE 240 return posix_fadvise( fd, offset, count, POSIX_FADV_WILLNEED ); 241#elif defined(SYS_DARWIN) 242 struct radvisory radv; 243 radv.ra_offset = offset; 244 radv.ra_count = count; 245 return fcntl( fd, F_RDADVISE, &radv ); 246#else 247 return 0; 248#endif 249} 250 251void 252tr_set_file_for_single_pass( int fd ) 253{ 254 if( fd >= 0 ) 255 { 256 /* Set hints about the lookahead buffer and caching. It's okay 257 for these to fail silently, so don't let them affect errno */ 258 const int err = errno; 259#ifdef HAVE_POSIX_FADVISE 260 posix_fadvise( fd, 0, 0, POSIX_FADV_SEQUENTIAL ); 261#endif 262#ifdef SYS_DARWIN 263 fcntl( fd, F_RDAHEAD, 1 ); 264 fcntl( fd, F_NOCACHE, 1 ); 265#endif 266 errno = err; 267 } 268} 269 270static int 271open_local_file( const char * filename, int flags ) 272{ 273 const int fd = open( filename, flags, 0666 ); 274 tr_set_file_for_single_pass( fd ); 275 return fd; 276} 277int 278tr_open_file_for_writing( const char * filename ) 279{ 280 return open_local_file( filename, O_LARGEFILE|O_BINARY|O_CREAT|O_WRONLY ); 281} 282int 283tr_open_file_for_scanning( const char * filename ) 284{ 285 return open_local_file( filename, O_LARGEFILE|O_BINARY|O_SEQUENTIAL|O_RDONLY ); 286} 287 288void 289tr_close_file( int fd ) 290{ 291#if defined(HAVE_POSIX_FADVISE) 292 /* Set hint about not caching this file. 293 It's okay for this to fail silently, so don't let it affect errno */ 294 const int err = errno; 295 posix_fadvise( fd, 0, 0, POSIX_FADV_DONTNEED ); 296 errno = err; 297#endif 298#ifdef SYS_DARWIN 299 /* it's unclear to me from the man pages if this actually flushes out the cache, 300 * but it couldn't hurt... */ 301 fcntl( fd, F_NOCACHE, 1 ); 302#endif 303 close( fd ); 304} 305 306/***** 307****** 308****** 309****** 310*****/ 311 312struct tr_cached_file 313{ 314 bool is_writable; 315 int fd; 316 int torrent_id; 317 tr_file_index_t file_index; 318 time_t used_at; 319}; 320 321static inline bool 322cached_file_is_open( const struct tr_cached_file * o ) 323{ 324 assert( o != NULL ); 325 326 return o->fd >= 0; 327} 328 329static void 330cached_file_close( struct tr_cached_file * o ) 331{ 332 assert( cached_file_is_open( o ) ); 333 334 tr_close_file( o->fd ); 335 o->fd = -1; 336} 337 338/** 339 * returns 0 on success, or an errno value on failure. 340 * errno values include ENOENT if the parent folder doesn't exist, 341 * plus the errno values set by tr_mkdirp() and open(). 342 */ 343static int 344cached_file_open( struct tr_cached_file * o, 345 const char * filename, 346 bool writable, 347 tr_preallocation_mode allocation, 348 uint64_t file_size ) 349{ 350 int flags; 351 struct stat sb; 352 bool alreadyExisted; 353 354 /* create subfolders, if any */ 355 if( writable ) 356 { 357 char * dir = tr_dirname( filename ); 358 const int err = tr_mkdirp( dir, 0777 ) ? errno : 0; 359 if( err ) { 360 tr_err( _( "Couldn't create \"%1$s\": %2$s" ), dir, tr_strerror( err ) ); 361 tr_free( dir ); 362 return err; 363 } 364 tr_free( dir ); 365 } 366 367 alreadyExisted = !stat( filename, &sb ) && S_ISREG( sb.st_mode ); 368 369 if( writable && !alreadyExisted && ( allocation == TR_PREALLOCATE_FULL ) ) 370 if( preallocate_file_full( filename, file_size ) ) 371 tr_dbg( "Preallocated file \"%s\"", filename ); 372 373 /* open the file */ 374 flags = writable ? ( O_RDWR | O_CREAT ) : O_RDONLY; 375 flags |= O_LARGEFILE | O_BINARY | O_SEQUENTIAL; 376 o->fd = open( filename, flags, 0666 ); 377 378 if( o->fd == -1 ) 379 { 380 const int err = errno; 381 tr_err( _( "Couldn't open \"%1$s\": %2$s" ), filename, tr_strerror( err ) ); 382 return err; 383 } 384 385 /* If the file already exists and it's too large, truncate it. 386 * This is a fringe case that happens if a torrent's been updated 387 * and one of the updated torrent's files is smaller. 388 * http://trac.transmissionbt.com/ticket/2228 389 * https://bugs.launchpad.net/ubuntu/+source/transmission/+bug/318249 390 */ 391 if( alreadyExisted && ( file_size < (uint64_t)sb.st_size ) ) 392 { 393 if( ftruncate( o->fd, file_size ) == -1 ) 394 { 395 const int err = errno; 396 tr_err( _( "Couldn't truncate \"%1$s\": %2$s" ), filename, tr_strerror( err ) ); 397 return err; 398 } 399 } 400 401 if( writable && !alreadyExisted && ( allocation == TR_PREALLOCATE_SPARSE ) ) 402 preallocate_file_sparse( o->fd, file_size ); 403 404 /* Many (most?) clients request blocks in ascending order, 405 * so increase the readahead buffer. 406 * Also, disable OS-level caching because "inactive memory" angers users. */ 407 tr_set_file_for_single_pass( o->fd ); 408 409 return 0; 410} 411 412/*** 413**** 414***/ 415 416struct tr_fileset 417{ 418 struct tr_cached_file * begin; 419 const struct tr_cached_file * end; 420}; 421 422static void 423fileset_construct( struct tr_fileset * set, int n ) 424{ 425 struct tr_cached_file * o; 426 const struct tr_cached_file TR_CACHED_FILE_INIT = { 0, -1, 0, 0, 0 }; 427 428 set->begin = tr_new( struct tr_cached_file, n ); 429 set->end = set->begin + n; 430 431 for( o=set->begin; o!=set->end; ++o ) 432 *o = TR_CACHED_FILE_INIT; 433} 434 435static void 436fileset_close_all( struct tr_fileset * set ) 437{ 438 struct tr_cached_file * o; 439 440 if( set != NULL ) 441 for( o=set->begin; o!=set->end; ++o ) 442 if( cached_file_is_open( o ) ) 443 cached_file_close( o ); 444} 445 446static void 447fileset_destruct( struct tr_fileset * set ) 448{ 449 fileset_close_all( set ); 450 tr_free( set->begin ); 451 set->end = set->begin = NULL; 452} 453 454static void 455fileset_close_torrent( struct tr_fileset * set, int torrent_id ) 456{ 457 struct tr_cached_file * o; 458 459 if( set != NULL ) 460 for( o=set->begin; o!=set->end; ++o ) 461 if( ( o->torrent_id == torrent_id ) && cached_file_is_open( o ) ) 462 cached_file_close( o ); 463} 464 465static struct tr_cached_file * 466fileset_lookup( struct tr_fileset * set, int torrent_id, tr_file_index_t i ) 467{ 468 struct tr_cached_file * o; 469 470 if( set != NULL ) 471 for( o=set->begin; o!=set->end; ++o ) 472 if( ( torrent_id == o->torrent_id ) && ( i == o->file_index ) && cached_file_is_open( o ) ) 473 return o; 474 475 return NULL; 476} 477 478static struct tr_cached_file * 479fileset_get_empty_slot( struct tr_fileset * set ) 480{ 481 struct tr_cached_file * cull = NULL; 482 483 if( set->begin != NULL ) 484 { 485 struct tr_cached_file * o; 486 487 /* try to find an unused slot */ 488 for( o=set->begin; o!=set->end; ++o ) 489 if( !cached_file_is_open( o ) ) 490 return o; 491 492 /* all slots are full... recycle the least recently used */ 493 for( cull=NULL, o=set->begin; o!=set->end; ++o ) 494 if( !cull || o->used_at < cull->used_at ) 495 cull = o; 496 497 cached_file_close( cull ); 498 } 499 500 return cull; 501} 502 503/*** 504**** 505**** Startup / Shutdown 506**** 507***/ 508 509struct tr_fdInfo 510{ 511 int peerCount; 512 struct tr_fileset fileset; 513}; 514 515static void 516ensureSessionFdInfoExists( tr_session * session ) 517{ 518 assert( tr_isSession( session ) ); 519 520 if( session->fdInfo == NULL ) 521 { 522 struct rlimit limit; 523 struct tr_fdInfo * i; 524 const int FILE_CACHE_SIZE = 32; 525 526 /* Create the local file cache */ 527 i = tr_new0( struct tr_fdInfo, 1 ); 528 fileset_construct( &i->fileset, FILE_CACHE_SIZE ); 529 session->fdInfo = i; 530 531 /* set the open-file limit to the largest safe size wrt FD_SETSIZE */ 532 if( !getrlimit( RLIMIT_NOFILE, &limit ) ) 533 { 534 const int old_limit = (int) limit.rlim_cur; 535 const int new_limit = MIN( limit.rlim_max, FD_SETSIZE ); 536 if( new_limit != old_limit ) 537 { 538 limit.rlim_cur = new_limit; 539 setrlimit( RLIMIT_NOFILE, &limit ); 540 getrlimit( RLIMIT_NOFILE, &limit ); 541 tr_inf( "Changed open file limit from %d to %d", old_limit, (int)limit.rlim_cur ); 542 } 543 } 544 } 545} 546 547void 548tr_fdClose( tr_session * session ) 549{ 550 if( session && session->fdInfo ) 551 { 552 struct tr_fdInfo * i = session->fdInfo; 553 fileset_destruct( &i->fileset ); 554 tr_free( i ); 555 session->fdInfo = NULL; 556 } 557} 558 559/*** 560**** 561***/ 562 563static struct tr_fileset* 564get_fileset( tr_session * session ) 565{ 566 if( !session ) 567 return NULL; 568 569 ensureSessionFdInfoExists( session ); 570 return &session->fdInfo->fileset; 571} 572 573void 574tr_fdFileClose( tr_session * s, const tr_torrent * tor, tr_file_index_t i ) 575{ 576 struct tr_cached_file * o; 577 578 if(( o = fileset_lookup( get_fileset( s ), tr_torrentId( tor ), i ))) 579 { 580 /* flush writable files so that their mtimes will be 581 * up-to-date when this function returns to the caller... */ 582 if( o->is_writable ) 583 tr_fsync( o->fd ); 584 585 cached_file_close( o ); 586 } 587} 588 589int 590tr_fdFileGetCached( tr_session * s, int torrent_id, tr_file_index_t i, bool writable ) 591{ 592 struct tr_cached_file * o = fileset_lookup( get_fileset( s ), torrent_id, i ); 593 594 if( !o || ( writable && !o->is_writable ) ) 595 return -1; 596 597 o->used_at = tr_time( ); 598 return o->fd; 599} 600 601#ifdef SYS_DARWIN 602 #define TR_STAT_MTIME(sb) ((sb).st_mtimespec.tv_sec) 603#else 604 #define TR_STAT_MTIME(sb) ((sb).st_mtime) 605#endif 606 607bool 608tr_fdFileGetCachedMTime( tr_session * s, int torrent_id, tr_file_index_t i, time_t * mtime ) 609{ 610 bool success; 611 struct stat sb; 612 struct tr_cached_file * o = fileset_lookup( get_fileset( s ), torrent_id, i ); 613 614 if(( success = ( o != NULL ) && !fstat( o->fd, &sb ))) 615 *mtime = TR_STAT_MTIME( sb ); 616 617 return success; 618} 619 620void 621tr_fdTorrentClose( tr_session * session, int torrent_id ) 622{ 623 fileset_close_torrent( get_fileset( session ), torrent_id ); 624} 625 626/* returns an fd on success, or a -1 on failure and sets errno */ 627int 628tr_fdFileCheckout( tr_session * session, 629 int torrent_id, 630 tr_file_index_t i, 631 const char * filename, 632 bool writable, 633 tr_preallocation_mode allocation, 634 uint64_t file_size ) 635{ 636 struct tr_fileset * set = get_fileset( session ); 637 struct tr_cached_file * o = fileset_lookup( set, torrent_id, i ); 638 639 if( o && writable && !o->is_writable ) 640 cached_file_close( o ); /* close it so we can reopen in rw mode */ 641 else if( !o ) 642 o = fileset_get_empty_slot( set ); 643 644 if( !cached_file_is_open( o ) ) 645 { 646 const int err = cached_file_open( o, filename, writable, allocation, file_size ); 647 if( err ) { 648 errno = err; 649 return -1; 650 } 651 652 dbgmsg( "opened '%s' writable %c", filename, writable?'y':'n' ); 653 o->is_writable = writable; 654 } 655 656 dbgmsg( "checking out '%s'", filename ); 657 o->torrent_id = torrent_id; 658 o->file_index = i; 659 o->used_at = tr_time( ); 660 return o->fd; 661} 662 663/*** 664**** 665**** Sockets 666**** 667***/ 668 669int 670tr_fdSocketCreate( tr_session * session, int domain, int type ) 671{ 672 int s = -1; 673 struct tr_fdInfo * gFd; 674 assert( tr_isSession( session ) ); 675 676 ensureSessionFdInfoExists( session ); 677 gFd = session->fdInfo; 678 679 if( gFd->peerCount < session->peerLimit ) 680 if(( s = socket( domain, type, 0 )) < 0 ) 681 if( sockerrno != EAFNOSUPPORT ) 682 tr_err( _( "Couldn't create socket: %s" ), tr_strerror( sockerrno ) ); 683 684 if( s > -1 ) 685 ++gFd->peerCount; 686 687 assert( gFd->peerCount >= 0 ); 688 689 if( s >= 0 ) 690 { 691 static bool buf_logged = false; 692 if( !buf_logged ) 693 { 694 int i; 695 socklen_t size = sizeof( int ); 696 buf_logged = true; 697 getsockopt( s, SOL_SOCKET, SO_SNDBUF, &i, &size ); 698 tr_dbg( "SO_SNDBUF size is %d", i ); 699 getsockopt( s, SOL_SOCKET, SO_RCVBUF, &i, &size ); 700 tr_dbg( "SO_RCVBUF size is %d", i ); 701 } 702 } 703 704 return s; 705} 706 707int 708tr_fdSocketAccept( tr_session * s, int sockfd, tr_address * addr, tr_port * port ) 709{ 710 int fd; 711 unsigned int len; 712 struct tr_fdInfo * gFd; 713 struct sockaddr_storage sock; 714 715 assert( tr_isSession( s ) ); 716 assert( addr ); 717 assert( port ); 718 719 ensureSessionFdInfoExists( s ); 720 gFd = s->fdInfo; 721 722 len = sizeof( struct sockaddr_storage ); 723 fd = accept( sockfd, (struct sockaddr *) &sock, &len ); 724 725 if( fd >= 0 ) 726 { 727 if( ( gFd->peerCount < s->peerLimit ) 728 && tr_address_from_sockaddr_storage( addr, port, &sock ) ) 729 { 730 ++gFd->peerCount; 731 } 732 else 733 { 734 tr_netCloseSocket( fd ); 735 fd = -1; 736 } 737 } 738 739 return fd; 740} 741 742void 743tr_fdSocketClose( tr_session * session, int fd ) 744{ 745 assert( tr_isSession( session ) ); 746 747 if( session->fdInfo != NULL ) 748 { 749 struct tr_fdInfo * gFd = session->fdInfo; 750 751 if( fd >= 0 ) 752 { 753 tr_netCloseSocket( fd ); 754 --gFd->peerCount; 755 } 756 757 assert( gFd->peerCount >= 0 ); 758 } 759} 760