1/* Parsing FTP `ls' output. 2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. 4 5This file is part of GNU Wget. 6 7GNU Wget is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3 of the License, or 10(at your option) any later version. 11 12GNU Wget is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with Wget. If not, see <http://www.gnu.org/licenses/>. 19 20Additional permission under GNU GPL version 3 section 7 21 22If you modify this program, or any covered work, by linking or 23combining it with the OpenSSL project's OpenSSL library (or a 24modified version of that library), containing parts covered by the 25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation 26grants you additional permission to convey the resulting work. 27Corresponding Source for a non-source form of such a combination 28shall include the source code for the parts of OpenSSL used as well 29as that of the covered work. */ 30 31#include "wget.h" 32 33#include <stdio.h> 34#include <stdlib.h> 35#include <string.h> 36#ifdef HAVE_UNISTD_H 37# include <unistd.h> 38#endif 39#include <errno.h> 40#include <time.h> 41#include "utils.h" 42#include "ftp.h" 43#include "url.h" 44#include "convert.h" /* for html_quote_string prototype */ 45#include "retr.h" /* for output_stream */ 46 47/* Converts symbolic permissions to number-style ones, e.g. string 48 rwxr-xr-x to 755. For now, it knows nothing of 49 setuid/setgid/sticky. ACLs are ignored. */ 50static int 51symperms (const char *s) 52{ 53 int perms = 0, i; 54 55 if (strlen (s) < 9) 56 return 0; 57 for (i = 0; i < 3; i++, s += 3) 58 { 59 perms <<= 3; 60 perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) + 61 (s[2] == 'x' || s[2] == 's')); 62 } 63 return perms; 64} 65 66 67/* Cleans a line of text so that it can be consistently parsed. Destroys 68 <CR> and <LF> in case that thay occur at the end of the line and 69 replaces all <TAB> character with <SPACE>. Returns the length of the 70 modified line. */ 71static int 72clean_line(char *line) 73{ 74 int len = strlen (line); 75 if (!len) return 0; 76 if (line[len - 1] == '\n') 77 line[--len] = '\0'; 78 if (!len) return 0; 79 if (line[len - 1] == '\r') 80 line[--len] = '\0'; 81 for ( ; *line ; line++ ) if (*line == '\t') *line = ' '; 82 return len; 83} 84 85/* Convert the Un*x-ish style directory listing stored in FILE to a 86 linked list of fileinfo (system-independent) entries. The contents 87 of FILE are considered to be produced by the standard Unix `ls -la' 88 output (whatever that might be). BSD (no group) and SYSV (with 89 group) listings are handled. 90 91 The time stamps are stored in a separate variable, time_t 92 compatible (I hope). The timezones are ignored. */ 93static struct fileinfo * 94ftp_parse_unix_ls (const char *file, int ignore_perms) 95{ 96 FILE *fp; 97 static const char *months[] = { 98 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 99 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 100 }; 101 int next, len, i, error, ignore; 102 int year, month, day; /* for time analysis */ 103 int hour, min, sec; 104 struct tm timestruct, *tnow; 105 time_t timenow; 106 107 char *line, *tok, *ptok; /* tokenizer */ 108 struct fileinfo *dir, *l, cur; /* list creation */ 109 110 fp = fopen (file, "rb"); 111 if (!fp) 112 { 113 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); 114 return NULL; 115 } 116 dir = l = NULL; 117 118 /* Line loop to end of file: */ 119 while ((line = read_whole_line (fp)) != NULL) 120 { 121 len = clean_line (line); 122 /* Skip if total... */ 123 if (!strncasecmp (line, "total", 5)) 124 { 125 xfree (line); 126 continue; 127 } 128 /* Get the first token (permissions). */ 129 tok = strtok (line, " "); 130 if (!tok) 131 { 132 xfree (line); 133 continue; 134 } 135 136 cur.name = NULL; 137 cur.linkto = NULL; 138 139 /* Decide whether we deal with a file or a directory. */ 140 switch (*tok) 141 { 142 case '-': 143 cur.type = FT_PLAINFILE; 144 DEBUGP (("PLAINFILE; ")); 145 break; 146 case 'd': 147 cur.type = FT_DIRECTORY; 148 DEBUGP (("DIRECTORY; ")); 149 break; 150 case 'l': 151 cur.type = FT_SYMLINK; 152 DEBUGP (("SYMLINK; ")); 153 break; 154 default: 155 cur.type = FT_UNKNOWN; 156 DEBUGP (("UNKNOWN; ")); 157 break; 158 } 159 160 if (ignore_perms) 161 { 162 switch (cur.type) 163 { 164 case FT_PLAINFILE: 165 cur.perms = 0644; 166 break; 167 case FT_DIRECTORY: 168 cur.perms = 0755; 169 break; 170 default: 171 /*cur.perms = 1023;*/ /* #### What is this? --hniksic */ 172 cur.perms = 0644; 173 } 174 DEBUGP (("implicit perms %0o; ", cur.perms)); 175 } 176 else 177 { 178 cur.perms = symperms (tok + 1); 179 DEBUGP (("perms %0o; ", cur.perms)); 180 } 181 182 error = ignore = 0; /* Erroneous and ignoring entries are 183 treated equally for now. */ 184 year = hour = min = sec = 0; /* Silence the compiler. */ 185 month = day = 0; 186 next = -1; 187 /* While there are tokens on the line, parse them. Next is the 188 number of tokens left until the filename. 189 190 Use the month-name token as the "anchor" (the place where the 191 position wrt the file name is "known"). When a month name is 192 encountered, `next' is set to 5. Also, the preceding 193 characters are parsed to get the file size. 194 195 This tactic is quite dubious when it comes to 196 internationalization issues (non-English month names), but it 197 works for now. */ 198 tok = line; 199 while (ptok = tok, 200 (tok = strtok (NULL, " ")) != NULL) 201 { 202 --next; 203 if (next < 0) /* a month name was not encountered */ 204 { 205 for (i = 0; i < 12; i++) 206 if (!strcmp (tok, months[i])) 207 break; 208 /* If we got a month, it means the token before it is the 209 size, and the filename is three tokens away. */ 210 if (i != 12) 211 { 212 wgint size; 213 214 /* Parse the previous token with str_to_wgint. */ 215 if (ptok == line) 216 { 217 /* Something has gone wrong during parsing. */ 218 error = 1; 219 break; 220 } 221 errno = 0; 222 size = str_to_wgint (ptok, NULL, 10); 223 if (size == WGINT_MAX && errno == ERANGE) 224 /* Out of range -- ignore the size. #### Should 225 we refuse to start the download. */ 226 cur.size = 0; 227 else 228 cur.size = size; 229 DEBUGP (("size: %s; ", number_to_static_string(cur.size))); 230 231 month = i; 232 next = 5; 233 DEBUGP (("month: %s; ", months[month])); 234 } 235 } 236 else if (next == 4) /* days */ 237 { 238 if (tok[1]) /* two-digit... */ 239 day = 10 * (*tok - '0') + tok[1] - '0'; 240 else /* ...or one-digit */ 241 day = *tok - '0'; 242 DEBUGP (("day: %d; ", day)); 243 } 244 else if (next == 3) 245 { 246 /* This ought to be either the time, or the year. Let's 247 be flexible! 248 249 If we have a number x, it's a year. If we have x:y, 250 it's hours and minutes. If we have x:y:z, z are 251 seconds. */ 252 year = 0; 253 min = hour = sec = 0; 254 /* We must deal with digits. */ 255 if (c_isdigit (*tok)) 256 { 257 /* Suppose it's year. */ 258 for (; c_isdigit (*tok); tok++) 259 year = (*tok - '0') + 10 * year; 260 if (*tok == ':') 261 { 262 /* This means these were hours! */ 263 hour = year; 264 year = 0; 265 ++tok; 266 /* Get the minutes... */ 267 for (; c_isdigit (*tok); tok++) 268 min = (*tok - '0') + 10 * min; 269 if (*tok == ':') 270 { 271 /* ...and the seconds. */ 272 ++tok; 273 for (; c_isdigit (*tok); tok++) 274 sec = (*tok - '0') + 10 * sec; 275 } 276 } 277 } 278 if (year) 279 DEBUGP (("year: %d (no tm); ", year)); 280 else 281 DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec)); 282 } 283 else if (next == 2) /* The file name */ 284 { 285 int fnlen; 286 char *p; 287 288 /* Since the file name may contain a SPC, it is possible 289 for strtok to handle it wrong. */ 290 fnlen = strlen (tok); 291 if (fnlen < len - (tok - line)) 292 { 293 /* So we have a SPC in the file name. Restore the 294 original. */ 295 tok[fnlen] = ' '; 296 /* If the file is a symbolic link, it should have a 297 ` -> ' somewhere. */ 298 if (cur.type == FT_SYMLINK) 299 { 300 p = strstr (tok, " -> "); 301 if (!p) 302 { 303 error = 1; 304 break; 305 } 306 cur.linkto = xstrdup (p + 4); 307 DEBUGP (("link to: %s\n", cur.linkto)); 308 /* And separate it from the file name. */ 309 *p = '\0'; 310 } 311 } 312 /* If we have the filename, add it to the list of files or 313 directories. */ 314 /* "." and ".." are an exception! */ 315 if (!strcmp (tok, ".") || !strcmp (tok, "..")) 316 { 317 DEBUGP (("\nIgnoring `.' and `..'; ")); 318 ignore = 1; 319 break; 320 } 321 /* Some FTP sites choose to have ls -F as their default 322 LIST output, which marks the symlinks with a trailing 323 `@', directory names with a trailing `/' and 324 executables with a trailing `*'. This is no problem 325 unless encountering a symbolic link ending with `@', 326 or an executable ending with `*' on a server without 327 default -F output. I believe these cases are very 328 rare. */ 329 fnlen = strlen (tok); /* re-calculate `fnlen' */ 330 cur.name = xmalloc (fnlen + 1); 331 memcpy (cur.name, tok, fnlen + 1); 332 if (fnlen) 333 { 334 if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/') 335 { 336 cur.name[fnlen - 1] = '\0'; 337 DEBUGP (("trailing `/' on dir.\n")); 338 } 339 else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@') 340 { 341 cur.name[fnlen - 1] = '\0'; 342 DEBUGP (("trailing `@' on link.\n")); 343 } 344 else if (cur.type == FT_PLAINFILE 345 && (cur.perms & 0111) 346 && cur.name[fnlen - 1] == '*') 347 { 348 cur.name[fnlen - 1] = '\0'; 349 DEBUGP (("trailing `*' on exec.\n")); 350 } 351 } /* if (fnlen) */ 352 else 353 error = 1; 354 break; 355 } 356 else 357 abort (); 358 } /* while */ 359 360 if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto)) 361 error = 1; 362 363 DEBUGP (("%s\n", cur.name ? cur.name : "")); 364 365 if (error || ignore) 366 { 367 DEBUGP (("Skipping.\n")); 368 xfree_null (cur.name); 369 xfree_null (cur.linkto); 370 xfree (line); 371 continue; 372 } 373 374 if (!dir) 375 { 376 l = dir = xnew (struct fileinfo); 377 memcpy (l, &cur, sizeof (cur)); 378 l->prev = l->next = NULL; 379 } 380 else 381 { 382 cur.prev = l; 383 l->next = xnew (struct fileinfo); 384 l = l->next; 385 memcpy (l, &cur, sizeof (cur)); 386 l->next = NULL; 387 } 388 /* Get the current time. */ 389 timenow = time (NULL); 390 tnow = localtime (&timenow); 391 /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr). */ 392 timestruct.tm_sec = sec; 393 timestruct.tm_min = min; 394 timestruct.tm_hour = hour; 395 timestruct.tm_mday = day; 396 timestruct.tm_mon = month; 397 if (year == 0) 398 { 399 /* Some listings will not specify the year if it is "obvious" 400 that the file was from the previous year. E.g. if today 401 is 97-01-12, and you see a file of Dec 15th, its year is 402 1996, not 1997. Thanks to Vladimir Volovich for 403 mentioning this! */ 404 if (month > tnow->tm_mon) 405 timestruct.tm_year = tnow->tm_year - 1; 406 else 407 timestruct.tm_year = tnow->tm_year; 408 } 409 else 410 timestruct.tm_year = year; 411 if (timestruct.tm_year >= 1900) 412 timestruct.tm_year -= 1900; 413 timestruct.tm_wday = 0; 414 timestruct.tm_yday = 0; 415 timestruct.tm_isdst = -1; 416 l->tstamp = mktime (×truct); /* store the time-stamp */ 417 418 xfree (line); 419 } 420 421 fclose (fp); 422 return dir; 423} 424 425static struct fileinfo * 426ftp_parse_winnt_ls (const char *file) 427{ 428 FILE *fp; 429 int len; 430 int year, month, day; /* for time analysis */ 431 int hour, min; 432 struct tm timestruct; 433 434 char *line, *tok; /* tokenizer */ 435 struct fileinfo *dir, *l, cur; /* list creation */ 436 437 fp = fopen (file, "rb"); 438 if (!fp) 439 { 440 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); 441 return NULL; 442 } 443 dir = l = NULL; 444 445 /* Line loop to end of file: */ 446 while ((line = read_whole_line (fp)) != NULL) 447 { 448 len = clean_line (line); 449 450 /* Extracting name is a bit of black magic and we have to do it 451 before `strtok' inserted extra \0 characters in the line 452 string. For the moment let us just suppose that the name starts at 453 column 39 of the listing. This way we could also recognize 454 filenames that begin with a series of space characters (but who 455 really wants to use such filenames anyway?). */ 456 if (len < 40) continue; 457 tok = line + 39; 458 cur.name = xstrdup(tok); 459 DEBUGP(("Name: '%s'\n", cur.name)); 460 461 /* First column: mm-dd-yy. Should atoi() on the month fail, january 462 will be assumed. */ 463 tok = strtok(line, "-"); 464 if (tok == NULL) continue; 465 month = atoi(tok) - 1; 466 if (month < 0) month = 0; 467 tok = strtok(NULL, "-"); 468 if (tok == NULL) continue; 469 day = atoi(tok); 470 tok = strtok(NULL, " "); 471 if (tok == NULL) continue; 472 year = atoi(tok); 473 /* Assuming the epoch starting at 1.1.1970 */ 474 if (year <= 70) year += 100; 475 476 /* Second column: hh:mm[AP]M, listing does not contain value for 477 seconds */ 478 tok = strtok(NULL, ":"); 479 if (tok == NULL) continue; 480 hour = atoi(tok); 481 tok = strtok(NULL, "M"); 482 if (tok == NULL) continue; 483 min = atoi(tok); 484 /* Adjust hour from AM/PM. Just for the record, the sequence goes 485 11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */ 486 tok+=2; 487 if (hour == 12) hour = 0; 488 if (*tok == 'P') hour += 12; 489 490 DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n", 491 year+1900, month, day, hour, min)); 492 493 /* Build the time-stamp (copy & paste from above) */ 494 timestruct.tm_sec = 0; 495 timestruct.tm_min = min; 496 timestruct.tm_hour = hour; 497 timestruct.tm_mday = day; 498 timestruct.tm_mon = month; 499 timestruct.tm_year = year; 500 timestruct.tm_wday = 0; 501 timestruct.tm_yday = 0; 502 timestruct.tm_isdst = -1; 503 cur.tstamp = mktime (×truct); /* store the time-stamp */ 504 505 DEBUGP(("Timestamp: %ld\n", cur.tstamp)); 506 507 /* Third column: Either file length, or <DIR>. We also set the 508 permissions (guessed as 0644 for plain files and 0755 for 509 directories as the listing does not give us a clue) and filetype 510 here. */ 511 tok = strtok(NULL, " "); 512 if (tok == NULL) continue; 513 while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " "); 514 if (tok == NULL) continue; 515 if (*tok == '<') 516 { 517 cur.type = FT_DIRECTORY; 518 cur.size = 0; 519 cur.perms = 0755; 520 DEBUGP(("Directory\n")); 521 } 522 else 523 { 524 wgint size; 525 cur.type = FT_PLAINFILE; 526 errno = 0; 527 size = str_to_wgint (tok, NULL, 10); 528 if (size == WGINT_MAX && errno == ERANGE) 529 cur.size = 0; /* overflow */ 530 else 531 cur.size = size; 532 cur.perms = 0644; 533 DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size))); 534 } 535 536 cur.linkto = NULL; 537 538 /* And put everything into the linked list */ 539 if (!dir) 540 { 541 l = dir = xnew (struct fileinfo); 542 memcpy (l, &cur, sizeof (cur)); 543 l->prev = l->next = NULL; 544 } 545 else 546 { 547 cur.prev = l; 548 l->next = xnew (struct fileinfo); 549 l = l->next; 550 memcpy (l, &cur, sizeof (cur)); 551 l->next = NULL; 552 } 553 554 xfree (line); 555 } 556 557 fclose(fp); 558 return dir; 559} 560 561 562 563/* Convert the VMS-style directory listing stored in "file" to a 564 linked list of fileinfo (system-independent) entries. The contents 565 of FILE are considered to be produced by the standard VMS 566 "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command, 567 more or less. (Different VMS FTP servers may have different headers, 568 and may not supply the same data, but all should be subsets of this.) 569 570 VMS normally provides local (server) time and date information. 571 Define the logical name or environment variable 572 "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local 573 times if different from the remote local times. 574 575 2005-02-23 SMS. 576 Added code to eliminate "^" escape characters from ODS5 extended file 577 names. The TCPIP FTP server (V5.4) seems to prefer requests which do 578 not use the escaped names which it provides. 579*/ 580 581#define VMS_DEFAULT_PROT_FILE 0644 582#define VMS_DEFAULT_PROT_DIR 0755 583 584/* 2005-02-23 SMS. 585 eat_carets(). 586 587 Delete ODS5 extended file name escape characters ("^") in the 588 original buffer. 589 Note that the current scheme does not handle all EFN cases, but it 590 could be made more complicated. 591*/ 592 593static void eat_carets( char *str) 594/* char *str; Source pointer. */ 595{ 596 char *strd; /* Destination pointer. */ 597 char hdgt; 598 unsigned char uchr; 599 unsigned char prop; 600 601 /* Skip ahead to the first "^", if any. */ 602 while ((*str != '\0') && (*str != '^')) 603 str++; 604 605 /* If no caret was found, quit early. */ 606 if (*str != '\0') 607 { 608 /* Shift characters leftward as carets are found. */ 609 strd = str; 610 while (*str != '\0') 611 { 612 uchr = *str; 613 if (uchr == '^') 614 { 615 /* Found a caret. Skip it, and check the next character. */ 616 uchr = *(++str); 617 prop = char_prop[ uchr]; 618 if (prop& 64) 619 { 620 /* Hex digit. Get char code from this and next hex digit. */ 621 if (uchr <= '9') 622 { 623 hdgt = uchr- '0'; /* '0' - '9' -> 0 - 9. */ 624 } 625 else 626 { 627 hdgt = ((uchr- 'A')& 7)+ 10; /* [Aa] - [Ff] -> 10 - 15. */ 628 } 629 hdgt <<= 4; /* X16. */ 630 uchr = *(++str); /* Next char must be hex digit. */ 631 if (uchr <= '9') 632 { 633 uchr = hdgt+ uchr- '0'; 634 } 635 else 636 { 637 uchr = hdgt+ ((uchr- 'A')& 15)+ 10; 638 } 639 } 640 else if (uchr == '_') 641 { 642 /* Convert escaped "_" to " ". */ 643 uchr = ' '; 644 } 645 else if (uchr == '/') 646 { 647 /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */ 648 /* Note that this is a left-over from Info-ZIP code, and is 649 probably of little value here, except perhaps to avoid 650 directory confusion which an unconverted slash might cause. 651 */ 652 uchr = '?'; 653 } 654 /* Else, not a hex digit. Must be a simple escaped character 655 (or Unicode, which is not yet handled here). 656 */ 657 } 658 /* Else, not a caret. Use as-is. */ 659 *strd = uchr; 660 661 /* Advance destination and source pointers. */ 662 strd++; 663 str++; 664 } 665 /* Terminate the destination string. */ 666 *strd = '\0'; 667 } 668} 669 670 671static struct fileinfo * 672ftp_parse_vms_ls (const char *file) 673{ 674 FILE *fp; 675 int dt, i, j, len; 676 int perms; 677 time_t timenow; 678 struct tm *timestruct; 679 char date_str[ 32]; 680 681 char *line, *tok; /* tokenizer */ 682 struct fileinfo *dir, *l, cur; /* list creation */ 683 684 fp = fopen (file, "r"); 685 if (!fp) 686 { 687 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); 688 return NULL; 689 } 690 dir = l = NULL; 691 692 /* Skip blank lines, Directory heading, and more blank lines. */ 693 694 j = 0; /* Expecting initial blank line(s). */ 695 while (1) 696 { 697 line = read_whole_line (fp); 698 if (line == NULL) 699 { 700 break; 701 } 702 else 703 { 704 i = clean_line (line); 705 if (i <= 0) 706 { 707 xfree (line); /* Free useless line storage. */ 708 continue; /* Blank line. Keep looking. */ 709 } 710 else 711 { 712 if ((j == 0) && (line[ i- 1] == ']')) 713 { 714 /* Found Directory heading line. Next non-blank line 715 is significant. 716 */ 717 j = 1; 718 } 719 else if (!strncmp (line, "Total of ", 9)) 720 { 721 /* Found "Total of ..." footing line. No valid data 722 will follow (empty directory). 723 */ 724 xfree (line); /* Free useless line storage. */ 725 line = NULL; /* Arrange for early exit. */ 726 break; 727 } 728 else 729 { 730 break; /* Must be significant data. */ 731 } 732 } 733 xfree (line); /* Free useless line storage. */ 734 } 735 } 736 737 /* Read remainder of file until the next blank line or EOF. */ 738 739 while (line != NULL) 740 { 741 char *p; 742 743 /* The first token is the file name. After a long name, other 744 data may be on the following line. A valid directory name ends 745 in ".DIR;1" (any case), although some VMS FTP servers may omit 746 the version number (";1"). 747 */ 748 749 tok = strtok(line, " "); 750 if (tok == NULL) tok = line; 751 DEBUGP(("file name: '%s'\n", tok)); 752 753 /* Stripping the version number on a VMS system would be wrong. 754 It may be foolish on a non-VMS system, too, but that's someone 755 else's problem. (Define PRESERVE_VMS_VERSIONS for proper 756 operation on other operating systems.) 757 758 2005-02-23 SMS. 759 ODS5 extended file names may contain escaped semi-colons, so 760 the version number is identified as right-side decimal digits 761 led by a non-escaped semi-colon. It may be absent. 762 */ 763 764#if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) 765 for (p = tok+ strlen( tok); (--p > tok) && c_isdigit( *p); ); 766 if ((*p == ';') && (*(p- 1) != '^')) 767 { 768 *p = '\0'; 769 } 770#endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */ 771 772 /* 2005-02-23 SMS. 773 Eliminate "^" escape characters from ODS5 extended file name. 774 (A caret is invalid in an ODS2 name, so this is always safe.) 775 */ 776 eat_carets( tok); 777 DEBUGP(("file name-^: '%s'\n", tok)); 778 779 /* Differentiate between a directory and any other file. A VMS 780 listing may not include file protections (permissions). Set a 781 default permissions value (according to the file type), which 782 may be overwritten later. Store directory names without the 783 ".DIR;1" file type and version number, as the plain name is 784 what will work in a CWD command. 785 */ 786 len = strlen( tok); 787 if (!strncasecmp( (tok+ (len- 4)), ".DIR", 4)) 788 { 789 *(tok+ (len -= 4)) = '\0'; /* Discard ".DIR". */ 790 cur.type = FT_DIRECTORY; 791 cur.perms = VMS_DEFAULT_PROT_DIR; 792 DEBUGP(("Directory (nv)\n")); 793 } 794 else if (!strncasecmp( (tok+ (len- 6)), ".DIR;1", 6)) 795 { 796 *(tok+ (len -= 6)) = '\0'; /* Discard ".DIR;1". */ 797 cur.type = FT_DIRECTORY; 798 cur.perms = VMS_DEFAULT_PROT_DIR; 799 DEBUGP(("Directory (v)\n")); 800 } 801 else 802 { 803 cur.type = FT_PLAINFILE; 804 cur.perms = VMS_DEFAULT_PROT_FILE; 805 DEBUGP(("File\n")); 806 } 807 cur.name = xstrdup(tok); 808 DEBUGP(("Name: '%s'\n", cur.name)); 809 810 /* Null the date and time string. */ 811 *date_str = '\0'; 812 813 /* VMS lacks symbolic links. */ 814 cur.linkto = NULL; 815 816 /* VMS reports file sizes in (512-byte) disk blocks, not bytes, 817 hence useless for an integrity check based on byte-count. 818 Set size to unknown. 819 */ 820 cur.size = 0; 821 822 /* Get token 2, if any. A long name may force all other data onto 823 a second line. If needed, read the second line. 824 */ 825 826 tok = strtok(NULL, " "); 827 if (tok == NULL) 828 { 829 DEBUGP(("Getting additional line.\n")); 830 xfree (line); 831 line = read_whole_line (fp); 832 if (!line) 833 { 834 DEBUGP(("EOF. Leaving listing parser.\n")); 835 break; 836 } 837 838 /* Second line must begin with " ". Otherwise, it's a first 839 line (and we may be confused). 840 */ 841 if (i <= 0) 842 { 843 /* Blank line. End of significant file listing. */ 844 DEBUGP(("Blank line. Leaving listing parser.\n")); 845 xfree (line); /* Free useless line storage. */ 846 break; 847 } 848 else if (line[ 0] != ' ') 849 { 850 DEBUGP(("Non-blank in column 1. Must be a new file name?\n")); 851 continue; 852 } 853 else 854 { 855 tok = strtok (line, " "); 856 if (tok == NULL) 857 { 858 /* Unexpected non-empty but apparently blank line. */ 859 DEBUGP(("Null token. Leaving listing parser.\n")); 860 xfree (line); /* Free useless line storage. */ 861 break; 862 } 863 } 864 } 865 866 /* Analyze tokens. (Order is not significant, except date must 867 precede time.) 868 869 Size: ddd or ddd/ddd (where "ddd" is a decimal number) 870 Date: DD-MMM-YYYY 871 Time: HH:MM or HH:MM:SS or HH:MM:SS.CC 872 Owner: [user] or [user,group] 873 Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some 874 subset thereof, for System, Owner, Group, World. 875 876 If permission is lacking, info may be replaced by the string: 877 "No privilege for attempted operation". 878 */ 879 while (tok != NULL) 880 { 881 DEBUGP (("Token: >%s<: ", tok)); 882 883 if ((strlen( tok) < 12) && (strchr( tok, '-') != NULL)) 884 { 885 /* Date. */ 886 DEBUGP (("Date.\n")); 887 strcpy( date_str, tok); 888 strcat( date_str, " "); 889 } 890 else if ((strlen( tok) < 12) && (strchr( tok, ':') != NULL)) 891 { 892 /* Time. */ 893 DEBUGP (("Time. ")); 894 strncat( date_str, 895 tok, 896 (sizeof( date_str)- strlen( date_str)- 1)); 897 DEBUGP (("Date time: >%s<\n", date_str)); 898 } 899 else if (strchr( tok, '[') != NULL) 900 { 901 /* Owner. (Ignore.) */ 902 DEBUGP (("Owner.\n")); 903 } 904 else if (strchr( tok, '(') != NULL) 905 { 906 /* Protections (permissions). */ 907 perms = 0; 908 j = 0; 909 for (i = 0; i < strlen( tok); i++) 910 { 911 switch (tok[ i]) 912 { 913 case '(': 914 break; 915 case ')': 916 break; 917 case ',': 918 if (j == 0) 919 { 920 perms = 0; 921 j = 1; 922 } 923 else 924 { 925 perms <<= 3; 926 } 927 break; 928 case 'R': 929 perms |= 4; 930 break; 931 case 'W': 932 perms |= 2; 933 break; 934 case 'E': 935 perms |= 1; 936 break; 937 case 'D': 938 perms |= 2; 939 break; 940 } 941 } 942 cur.perms = perms; 943 DEBUGP (("Prot. perms = %0o.\n", cur.perms)); 944 } 945 else 946 { 947 /* Nondescript. Probably size(s), probably in blocks. 948 Could be "No privilege ..." message. (Ignore.) 949 */ 950 DEBUGP (("Ignored (size?).\n")); 951 } 952 953 tok = strtok (NULL, " "); 954 } 955 956 /* Tokens exhausted. Interpret the data, and fill in the 957 structure. 958 */ 959 /* Fill tm timestruct according to date-time string. Fractional 960 seconds are ignored. Default to current time, if conversion 961 fails. 962 */ 963 timenow = time( NULL); 964 timestruct = localtime( &timenow ); 965 strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct); 966 967 /* Convert struct tm local time to time_t local time. */ 968 timenow = mktime (timestruct); 969 /* Offset local time according to environment variable (seconds). */ 970 if ((tok = getenv( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL) 971 { 972 dt = atoi( tok); 973 DEBUGP (("Time differential = %d.\n", dt)); 974 } 975 else 976 { 977 dt = 0; 978 } 979 980 if (dt >= 0) 981 { 982 timenow += dt; 983 } 984 else 985 { 986 timenow -= (-dt); 987 } 988 cur.tstamp = timenow; /* Store the time-stamp. */ 989 DEBUGP(("Timestamp: %ld\n", cur.tstamp)); 990 991 /* Add the data for this item to the linked list, */ 992 if (!dir) 993 { 994 l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo)); 995 memcpy (l, &cur, sizeof (cur)); 996 l->prev = l->next = NULL; 997 } 998 else 999 { 1000 cur.prev = l; 1001 l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo)); 1002 l = l->next; 1003 memcpy (l, &cur, sizeof (cur)); 1004 l->next = NULL; 1005 } 1006 1007 /* Free old line storage. Read a new line. */ 1008 xfree (line); 1009 line = read_whole_line (fp); 1010 if (line != NULL) 1011 { 1012 i = clean_line (line); 1013 if (i <= 0) 1014 { 1015 /* Blank line. End of significant file listing. */ 1016 xfree (line); /* Free useless line storage. */ 1017 break; 1018 } 1019 } 1020 } 1021 1022 fclose (fp); 1023 return dir; 1024} 1025 1026 1027/* This function switches between the correct parsing routine depending on 1028 the SYSTEM_TYPE. The system type should be based on the result of the 1029 "SYST" response of the FTP server. According to this repsonse we will 1030 use on of the three different listing parsers that cover the most of FTP 1031 servers used nowadays. */ 1032 1033struct fileinfo * 1034ftp_parse_ls (const char *file, const enum stype system_type) 1035{ 1036 switch (system_type) 1037 { 1038 case ST_UNIX: 1039 return ftp_parse_unix_ls (file, 0); 1040 case ST_WINNT: 1041 { 1042 /* Detect whether the listing is simulating the UNIX format */ 1043 FILE *fp; 1044 int c; 1045 fp = fopen (file, "rb"); 1046 if (!fp) 1047 { 1048 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); 1049 return NULL; 1050 } 1051 c = fgetc(fp); 1052 fclose(fp); 1053 /* If the first character of the file is '0'-'9', it's WINNT 1054 format. */ 1055 if (c >= '0' && c <='9') 1056 return ftp_parse_winnt_ls (file); 1057 else 1058 return ftp_parse_unix_ls (file, 1); 1059 } 1060 case ST_VMS: 1061 return ftp_parse_vms_ls (file); 1062 case ST_MACOS: 1063 return ftp_parse_unix_ls (file, 1); 1064 default: 1065 logprintf (LOG_NOTQUIET, _("\ 1066Unsupported listing type, trying Unix listing parser.\n")); 1067 return ftp_parse_unix_ls (file, 0); 1068 } 1069} 1070 1071/* Stuff for creating FTP index. */ 1072 1073/* The function creates an HTML index containing references to given 1074 directories and files on the appropriate host. The references are 1075 FTP. */ 1076uerr_t 1077ftp_index (const char *file, struct url *u, struct fileinfo *f) 1078{ 1079 FILE *fp; 1080 char *upwd; 1081 char *htcldir; /* HTML-clean dir name */ 1082 char *htclfile; /* HTML-clean file name */ 1083 char *urlclfile; /* URL-clean file name */ 1084 1085 if (!output_stream) 1086 { 1087 fp = fopen (file, "wb"); 1088 if (!fp) 1089 { 1090 logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno)); 1091 return FOPENERR; 1092 } 1093 } 1094 else 1095 fp = output_stream; 1096 if (u->user) 1097 { 1098 char *tmpu, *tmpp; /* temporary, clean user and passwd */ 1099 1100 tmpu = url_escape (u->user); 1101 tmpp = u->passwd ? url_escape (u->passwd) : NULL; 1102 if (tmpp) 1103 upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0); 1104 else 1105 upwd = concat_strings (tmpu, "@", (char *) 0); 1106 xfree (tmpu); 1107 xfree_null (tmpp); 1108 } 1109 else 1110 upwd = xstrdup (""); 1111 1112 htcldir = html_quote_string (u->dir); 1113 1114 fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"); 1115 fprintf (fp, "<html>\n<head>\n<title>"); 1116 fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port); 1117 fprintf (fp, "</title>\n</head>\n<body>\n<h1>"); 1118 fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port); 1119 fprintf (fp, "</h1>\n<hr>\n<pre>\n"); 1120 1121 while (f) 1122 { 1123 fprintf (fp, " "); 1124 if (f->tstamp != -1) 1125 { 1126 /* #### Should we translate the months? Or, even better, use 1127 ISO 8601 dates? */ 1128 static const char *months[] = { 1129 "Jan", "Feb", "Mar", "Apr", "May", "Jun", 1130 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 1131 }; 1132 time_t tstamp = f->tstamp; 1133 struct tm *ptm = localtime (&tstamp); 1134 1135 fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon], 1136 ptm->tm_mday); 1137 if (ptm->tm_hour) 1138 fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min); 1139 else 1140 fprintf (fp, " "); 1141 } 1142 else 1143 fprintf (fp, _("time unknown ")); 1144 switch (f->type) 1145 { 1146 case FT_PLAINFILE: 1147 fprintf (fp, _("File ")); 1148 break; 1149 case FT_DIRECTORY: 1150 fprintf (fp, _("Directory ")); 1151 break; 1152 case FT_SYMLINK: 1153 fprintf (fp, _("Link ")); 1154 break; 1155 default: 1156 fprintf (fp, _("Not sure ")); 1157 break; 1158 } 1159 htclfile = html_quote_string (f->name); 1160 urlclfile = url_escape_unsafe_and_reserved (f->name); 1161 fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port); 1162 if (*u->dir != '/') 1163 putc ('/', fp); 1164 /* XXX: Should probably URL-escape dir components here, rather 1165 * than just HTML-escape, for consistency with the next bit where 1166 * we use urlclfile for the file component. Anyway, this is safer 1167 * than what we had... */ 1168 fprintf (fp, "%s", htcldir); 1169 if (*u->dir) 1170 putc ('/', fp); 1171 fprintf (fp, "%s", urlclfile); 1172 if (f->type == FT_DIRECTORY) 1173 putc ('/', fp); 1174 fprintf (fp, "\">%s", htclfile); 1175 if (f->type == FT_DIRECTORY) 1176 putc ('/', fp); 1177 fprintf (fp, "</a> "); 1178 if (f->type == FT_PLAINFILE) 1179 fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size)); 1180 else if (f->type == FT_SYMLINK) 1181 fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)"); 1182 putc ('\n', fp); 1183 xfree (htclfile); 1184 xfree (urlclfile); 1185 f = f->next; 1186 } 1187 fprintf (fp, "</pre>\n</body>\n</html>\n"); 1188 xfree (htcldir); 1189 xfree (upwd); 1190 if (!output_stream) 1191 fclose (fp); 1192 else 1193 fflush (fp); 1194 return FTPOK; 1195} 1196