1/* Parsing FTP `ls' output.
2   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3   2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4
5This file is part of GNU Wget.
6
7GNU Wget is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3 of the License, or
10(at your option) any later version.
11
12GNU Wget is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20Additional permission under GNU GPL version 3 section 7
21
22If you modify this program, or any covered work, by linking or
23combining it with the OpenSSL project's OpenSSL library (or a
24modified version of that library), containing parts covered by the
25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26grants you additional permission to convey the resulting work.
27Corresponding Source for a non-source form of such a combination
28shall include the source code for the parts of OpenSSL used as well
29as that of the covered work.  */
30
31#include "wget.h"
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#ifdef HAVE_UNISTD_H
37# include <unistd.h>
38#endif
39#include <errno.h>
40#include <time.h>
41#include "utils.h"
42#include "ftp.h"
43#include "url.h"
44#include "convert.h"            /* for html_quote_string prototype */
45#include "retr.h"               /* for output_stream */
46
47/* Converts symbolic permissions to number-style ones, e.g. string
48   rwxr-xr-x to 755.  For now, it knows nothing of
49   setuid/setgid/sticky.  ACLs are ignored.  */
50static int
51symperms (const char *s)
52{
53  int perms = 0, i;
54
55  if (strlen (s) < 9)
56    return 0;
57  for (i = 0; i < 3; i++, s += 3)
58    {
59      perms <<= 3;
60      perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
61                (s[2] == 'x' || s[2] == 's'));
62    }
63  return perms;
64}
65
66
67/* Cleans a line of text so that it can be consistently parsed. Destroys
68   <CR> and <LF> in case that thay occur at the end of the line and
69   replaces all <TAB> character with <SPACE>. Returns the length of the
70   modified line. */
71static int
72clean_line(char *line)
73{
74  int len = strlen (line);
75  if (!len) return 0;
76  if (line[len - 1] == '\n')
77    line[--len] = '\0';
78  if (!len) return 0;
79  if (line[len - 1] == '\r')
80    line[--len] = '\0';
81  for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
82  return len;
83}
84
85/* Convert the Un*x-ish style directory listing stored in FILE to a
86   linked list of fileinfo (system-independent) entries.  The contents
87   of FILE are considered to be produced by the standard Unix `ls -la'
88   output (whatever that might be).  BSD (no group) and SYSV (with
89   group) listings are handled.
90
91   The time stamps are stored in a separate variable, time_t
92   compatible (I hope).  The timezones are ignored.  */
93static struct fileinfo *
94ftp_parse_unix_ls (const char *file, int ignore_perms)
95{
96  FILE *fp;
97  static const char *months[] = {
98    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
99    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
100  };
101  int next, len, i, error, ignore;
102  int year, month, day;         /* for time analysis */
103  int hour, min, sec;
104  struct tm timestruct, *tnow;
105  time_t timenow;
106
107  char *line, *tok, *ptok;      /* tokenizer */
108  struct fileinfo *dir, *l, cur; /* list creation */
109
110  fp = fopen (file, "rb");
111  if (!fp)
112    {
113      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
114      return NULL;
115    }
116  dir = l = NULL;
117
118  /* Line loop to end of file: */
119  while ((line = read_whole_line (fp)) != NULL)
120    {
121      len = clean_line (line);
122      /* Skip if total...  */
123      if (!strncasecmp (line, "total", 5))
124        {
125          xfree (line);
126          continue;
127        }
128      /* Get the first token (permissions).  */
129      tok = strtok (line, " ");
130      if (!tok)
131        {
132          xfree (line);
133          continue;
134        }
135
136      cur.name = NULL;
137      cur.linkto = NULL;
138
139      /* Decide whether we deal with a file or a directory.  */
140      switch (*tok)
141        {
142        case '-':
143          cur.type = FT_PLAINFILE;
144          DEBUGP (("PLAINFILE; "));
145          break;
146        case 'd':
147          cur.type = FT_DIRECTORY;
148          DEBUGP (("DIRECTORY; "));
149          break;
150        case 'l':
151          cur.type = FT_SYMLINK;
152          DEBUGP (("SYMLINK; "));
153          break;
154        default:
155          cur.type = FT_UNKNOWN;
156          DEBUGP (("UNKNOWN; "));
157          break;
158        }
159
160      if (ignore_perms)
161        {
162          switch (cur.type)
163            {
164            case FT_PLAINFILE:
165              cur.perms = 0644;
166              break;
167            case FT_DIRECTORY:
168              cur.perms = 0755;
169              break;
170            default:
171              /*cur.perms = 1023;*/     /* #### What is this?  --hniksic */
172              cur.perms = 0644;
173            }
174          DEBUGP (("implicit perms %0o; ", cur.perms));
175        }
176       else
177         {
178           cur.perms = symperms (tok + 1);
179           DEBUGP (("perms %0o; ", cur.perms));
180         }
181
182      error = ignore = 0;       /* Erroneous and ignoring entries are
183                                   treated equally for now.  */
184      year = hour = min = sec = 0; /* Silence the compiler.  */
185      month = day = 0;
186      next = -1;
187      /* While there are tokens on the line, parse them.  Next is the
188         number of tokens left until the filename.
189
190         Use the month-name token as the "anchor" (the place where the
191         position wrt the file name is "known").  When a month name is
192         encountered, `next' is set to 5.  Also, the preceding
193         characters are parsed to get the file size.
194
195         This tactic is quite dubious when it comes to
196         internationalization issues (non-English month names), but it
197         works for now.  */
198      tok = line;
199      while (ptok = tok,
200             (tok = strtok (NULL, " ")) != NULL)
201        {
202          --next;
203          if (next < 0)         /* a month name was not encountered */
204            {
205              for (i = 0; i < 12; i++)
206                if (!strcmp (tok, months[i]))
207                  break;
208              /* If we got a month, it means the token before it is the
209                 size, and the filename is three tokens away.  */
210              if (i != 12)
211                {
212                  wgint size;
213
214                  /* Parse the previous token with str_to_wgint.  */
215                  if (ptok == line)
216                    {
217                      /* Something has gone wrong during parsing. */
218                      error = 1;
219                      break;
220                    }
221                  errno = 0;
222                  size = str_to_wgint (ptok, NULL, 10);
223                  if (size == WGINT_MAX && errno == ERANGE)
224                    /* Out of range -- ignore the size.  #### Should
225                       we refuse to start the download.  */
226                    cur.size = 0;
227                  else
228                    cur.size = size;
229                  DEBUGP (("size: %s; ", number_to_static_string(cur.size)));
230
231                  month = i;
232                  next = 5;
233                  DEBUGP (("month: %s; ", months[month]));
234                }
235            }
236          else if (next == 4)   /* days */
237            {
238              if (tok[1])       /* two-digit... */
239                day = 10 * (*tok - '0') + tok[1] - '0';
240              else              /* ...or one-digit */
241                day = *tok - '0';
242              DEBUGP (("day: %d; ", day));
243            }
244          else if (next == 3)
245            {
246              /* This ought to be either the time, or the year.  Let's
247                 be flexible!
248
249                 If we have a number x, it's a year.  If we have x:y,
250                 it's hours and minutes.  If we have x:y:z, z are
251                 seconds.  */
252              year = 0;
253              min = hour = sec = 0;
254              /* We must deal with digits.  */
255              if (c_isdigit (*tok))
256                {
257                  /* Suppose it's year.  */
258                  for (; c_isdigit (*tok); tok++)
259                    year = (*tok - '0') + 10 * year;
260                  if (*tok == ':')
261                    {
262                      /* This means these were hours!  */
263                      hour = year;
264                      year = 0;
265                      ++tok;
266                      /* Get the minutes...  */
267                      for (; c_isdigit (*tok); tok++)
268                        min = (*tok - '0') + 10 * min;
269                      if (*tok == ':')
270                        {
271                          /* ...and the seconds.  */
272                          ++tok;
273                          for (; c_isdigit (*tok); tok++)
274                            sec = (*tok - '0') + 10 * sec;
275                        }
276                    }
277                }
278              if (year)
279                DEBUGP (("year: %d (no tm); ", year));
280              else
281                DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
282            }
283          else if (next == 2)    /* The file name */
284            {
285              int fnlen;
286              char *p;
287
288              /* Since the file name may contain a SPC, it is possible
289                 for strtok to handle it wrong.  */
290              fnlen = strlen (tok);
291              if (fnlen < len - (tok - line))
292                {
293                  /* So we have a SPC in the file name.  Restore the
294                     original.  */
295                  tok[fnlen] = ' ';
296                  /* If the file is a symbolic link, it should have a
297                     ` -> ' somewhere.  */
298                  if (cur.type == FT_SYMLINK)
299                    {
300                      p = strstr (tok, " -> ");
301                      if (!p)
302                        {
303                          error = 1;
304                          break;
305                        }
306                      cur.linkto = xstrdup (p + 4);
307                      DEBUGP (("link to: %s\n", cur.linkto));
308                      /* And separate it from the file name.  */
309                      *p = '\0';
310                    }
311                }
312              /* If we have the filename, add it to the list of files or
313                 directories.  */
314              /* "." and ".." are an exception!  */
315              if (!strcmp (tok, ".") || !strcmp (tok, ".."))
316                {
317                  DEBUGP (("\nIgnoring `.' and `..'; "));
318                  ignore = 1;
319                  break;
320                }
321              /* Some FTP sites choose to have ls -F as their default
322                 LIST output, which marks the symlinks with a trailing
323                 `@', directory names with a trailing `/' and
324                 executables with a trailing `*'.  This is no problem
325                 unless encountering a symbolic link ending with `@',
326                 or an executable ending with `*' on a server without
327                 default -F output.  I believe these cases are very
328                 rare.  */
329              fnlen = strlen (tok); /* re-calculate `fnlen' */
330              cur.name = xmalloc (fnlen + 1);
331              memcpy (cur.name, tok, fnlen + 1);
332              if (fnlen)
333                {
334                  if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
335                    {
336                      cur.name[fnlen - 1] = '\0';
337                      DEBUGP (("trailing `/' on dir.\n"));
338                    }
339                  else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
340                    {
341                      cur.name[fnlen - 1] = '\0';
342                      DEBUGP (("trailing `@' on link.\n"));
343                    }
344                  else if (cur.type == FT_PLAINFILE
345                           && (cur.perms & 0111)
346                           && cur.name[fnlen - 1] == '*')
347                    {
348                      cur.name[fnlen - 1] = '\0';
349                      DEBUGP (("trailing `*' on exec.\n"));
350                    }
351                } /* if (fnlen) */
352              else
353                error = 1;
354              break;
355            }
356          else
357            abort ();
358        } /* while */
359
360      if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
361        error = 1;
362
363      DEBUGP (("%s\n", cur.name ? cur.name : ""));
364
365      if (error || ignore)
366        {
367          DEBUGP (("Skipping.\n"));
368          xfree_null (cur.name);
369          xfree_null (cur.linkto);
370          xfree (line);
371          continue;
372        }
373
374      if (!dir)
375        {
376          l = dir = xnew (struct fileinfo);
377          memcpy (l, &cur, sizeof (cur));
378          l->prev = l->next = NULL;
379        }
380      else
381        {
382          cur.prev = l;
383          l->next = xnew (struct fileinfo);
384          l = l->next;
385          memcpy (l, &cur, sizeof (cur));
386          l->next = NULL;
387        }
388      /* Get the current time.  */
389      timenow = time (NULL);
390      tnow = localtime (&timenow);
391      /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr).  */
392      timestruct.tm_sec   = sec;
393      timestruct.tm_min   = min;
394      timestruct.tm_hour  = hour;
395      timestruct.tm_mday  = day;
396      timestruct.tm_mon   = month;
397      if (year == 0)
398        {
399          /* Some listings will not specify the year if it is "obvious"
400             that the file was from the previous year.  E.g. if today
401             is 97-01-12, and you see a file of Dec 15th, its year is
402             1996, not 1997.  Thanks to Vladimir Volovich for
403             mentioning this!  */
404          if (month > tnow->tm_mon)
405            timestruct.tm_year = tnow->tm_year - 1;
406          else
407            timestruct.tm_year = tnow->tm_year;
408        }
409      else
410        timestruct.tm_year = year;
411      if (timestruct.tm_year >= 1900)
412        timestruct.tm_year -= 1900;
413      timestruct.tm_wday  = 0;
414      timestruct.tm_yday  = 0;
415      timestruct.tm_isdst = -1;
416      l->tstamp = mktime (&timestruct); /* store the time-stamp */
417
418      xfree (line);
419    }
420
421  fclose (fp);
422  return dir;
423}
424
425static struct fileinfo *
426ftp_parse_winnt_ls (const char *file)
427{
428  FILE *fp;
429  int len;
430  int year, month, day;         /* for time analysis */
431  int hour, min;
432  struct tm timestruct;
433
434  char *line, *tok;             /* tokenizer */
435  struct fileinfo *dir, *l, cur; /* list creation */
436
437  fp = fopen (file, "rb");
438  if (!fp)
439    {
440      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
441      return NULL;
442    }
443  dir = l = NULL;
444
445  /* Line loop to end of file: */
446  while ((line = read_whole_line (fp)) != NULL)
447    {
448      len = clean_line (line);
449
450      /* Extracting name is a bit of black magic and we have to do it
451         before `strtok' inserted extra \0 characters in the line
452         string. For the moment let us just suppose that the name starts at
453         column 39 of the listing. This way we could also recognize
454         filenames that begin with a series of space characters (but who
455         really wants to use such filenames anyway?). */
456      if (len < 40) continue;
457      tok = line + 39;
458      cur.name = xstrdup(tok);
459      DEBUGP(("Name: '%s'\n", cur.name));
460
461      /* First column: mm-dd-yy. Should atoi() on the month fail, january
462         will be assumed.  */
463      tok = strtok(line, "-");
464      if (tok == NULL) continue;
465      month = atoi(tok) - 1;
466      if (month < 0) month = 0;
467      tok = strtok(NULL, "-");
468      if (tok == NULL) continue;
469      day = atoi(tok);
470      tok = strtok(NULL, " ");
471      if (tok == NULL) continue;
472      year = atoi(tok);
473      /* Assuming the epoch starting at 1.1.1970 */
474      if (year <= 70) year += 100;
475
476      /* Second column: hh:mm[AP]M, listing does not contain value for
477         seconds */
478      tok = strtok(NULL,  ":");
479      if (tok == NULL) continue;
480      hour = atoi(tok);
481      tok = strtok(NULL,  "M");
482      if (tok == NULL) continue;
483      min = atoi(tok);
484      /* Adjust hour from AM/PM. Just for the record, the sequence goes
485         11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
486      tok+=2;
487      if (hour == 12)  hour  = 0;
488      if (*tok == 'P') hour += 12;
489
490      DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
491              year+1900, month, day, hour, min));
492
493      /* Build the time-stamp (copy & paste from above) */
494      timestruct.tm_sec   = 0;
495      timestruct.tm_min   = min;
496      timestruct.tm_hour  = hour;
497      timestruct.tm_mday  = day;
498      timestruct.tm_mon   = month;
499      timestruct.tm_year  = year;
500      timestruct.tm_wday  = 0;
501      timestruct.tm_yday  = 0;
502      timestruct.tm_isdst = -1;
503      cur.tstamp = mktime (&timestruct); /* store the time-stamp */
504
505      DEBUGP(("Timestamp: %ld\n", cur.tstamp));
506
507      /* Third column: Either file length, or <DIR>. We also set the
508         permissions (guessed as 0644 for plain files and 0755 for
509         directories as the listing does not give us a clue) and filetype
510         here. */
511      tok = strtok(NULL, " ");
512      if (tok == NULL) continue;
513      while ((tok != NULL) && (*tok == '\0'))  tok = strtok(NULL, " ");
514      if (tok == NULL) continue;
515      if (*tok == '<')
516        {
517          cur.type  = FT_DIRECTORY;
518          cur.size  = 0;
519          cur.perms = 0755;
520          DEBUGP(("Directory\n"));
521        }
522      else
523        {
524          wgint size;
525          cur.type  = FT_PLAINFILE;
526          errno = 0;
527          size = str_to_wgint (tok, NULL, 10);
528          if (size == WGINT_MAX && errno == ERANGE)
529            cur.size = 0;       /* overflow */
530          else
531            cur.size = size;
532          cur.perms = 0644;
533          DEBUGP(("File, size %s bytes\n", number_to_static_string (cur.size)));
534        }
535
536      cur.linkto = NULL;
537
538      /* And put everything into the linked list */
539      if (!dir)
540        {
541          l = dir = xnew (struct fileinfo);
542          memcpy (l, &cur, sizeof (cur));
543          l->prev = l->next = NULL;
544        }
545      else
546        {
547          cur.prev = l;
548          l->next = xnew (struct fileinfo);
549          l = l->next;
550          memcpy (l, &cur, sizeof (cur));
551          l->next = NULL;
552        }
553
554      xfree (line);
555    }
556
557  fclose(fp);
558  return dir;
559}
560
561
562
563/* Convert the VMS-style directory listing stored in "file" to a
564   linked list of fileinfo (system-independent) entries.  The contents
565   of FILE are considered to be produced by the standard VMS
566   "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
567   more or less.  (Different VMS FTP servers may have different headers,
568   and may not supply the same data, but all should be subsets of this.)
569
570   VMS normally provides local (server) time and date information.
571   Define the logical name or environment variable
572   "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
573   times if different from the remote local times.
574
575   2005-02-23 SMS.
576   Added code to eliminate "^" escape characters from ODS5 extended file
577   names.  The TCPIP FTP server (V5.4) seems to prefer requests which do
578   not use the escaped names which it provides.
579*/
580
581#define VMS_DEFAULT_PROT_FILE 0644
582#define VMS_DEFAULT_PROT_DIR 0755
583
584/* 2005-02-23 SMS.
585   eat_carets().
586
587   Delete ODS5 extended file name escape characters ("^") in the
588   original buffer.
589   Note that the current scheme does not handle all EFN cases, but it
590   could be made more complicated.
591*/
592
593static void eat_carets( char *str)
594/* char *str;      Source pointer. */
595{
596  char *strd;   /* Destination pointer. */
597  char hdgt;
598  unsigned char uchr;
599  unsigned char prop;
600
601  /* Skip ahead to the first "^", if any. */
602  while ((*str != '\0') && (*str != '^'))
603     str++;
604
605  /* If no caret was found, quit early. */
606  if (*str != '\0')
607  {
608    /* Shift characters leftward as carets are found. */
609    strd = str;
610    while (*str != '\0')
611    {
612      uchr = *str;
613      if (uchr == '^')
614      {
615        /* Found a caret.  Skip it, and check the next character. */
616        uchr = *(++str);
617        prop = char_prop[ uchr];
618        if (prop& 64)
619        {
620          /* Hex digit.  Get char code from this and next hex digit. */
621          if (uchr <= '9')
622          {
623            hdgt = uchr- '0';           /* '0' - '9' -> 0 - 9. */
624          }
625          else
626          {
627            hdgt = ((uchr- 'A')& 7)+ 10;    /* [Aa] - [Ff] -> 10 - 15. */
628          }
629          hdgt <<= 4;                   /* X16. */
630          uchr = *(++str);              /* Next char must be hex digit. */
631          if (uchr <= '9')
632          {
633            uchr = hdgt+ uchr- '0';
634          }
635          else
636          {
637            uchr = hdgt+ ((uchr- 'A')& 15)+ 10;
638          }
639        }
640        else if (uchr == '_')
641        {
642          /* Convert escaped "_" to " ". */
643          uchr = ' ';
644        }
645        else if (uchr == '/')
646        {
647          /* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
648          /* Note that this is a left-over from Info-ZIP code, and is
649             probably of little value here, except perhaps to avoid
650             directory confusion which an unconverted slash might cause.
651          */
652          uchr = '?';
653        }
654        /* Else, not a hex digit.  Must be a simple escaped character
655           (or Unicode, which is not yet handled here).
656        */
657      }
658      /* Else, not a caret.  Use as-is. */
659      *strd = uchr;
660
661      /* Advance destination and source pointers. */
662      strd++;
663      str++;
664    }
665    /* Terminate the destination string. */
666    *strd = '\0';
667  }
668}
669
670
671static struct fileinfo *
672ftp_parse_vms_ls (const char *file)
673{
674  FILE *fp;
675  int dt, i, j, len;
676  int perms;
677  time_t timenow;
678  struct tm *timestruct;
679  char date_str[ 32];
680
681  char *line, *tok;		 /* tokenizer */
682  struct fileinfo *dir, *l, cur; /* list creation */
683
684  fp = fopen (file, "r");
685  if (!fp)
686    {
687      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
688      return NULL;
689    }
690  dir = l = NULL;
691
692  /* Skip blank lines, Directory heading, and more blank lines. */
693
694  j = 0; /* Expecting initial blank line(s). */
695  while (1)
696    {
697      line = read_whole_line (fp);
698      if (line == NULL)
699        {
700        break;
701        }
702      else
703        {
704          i = clean_line (line);
705          if (i <= 0)
706            {
707              xfree (line); /* Free useless line storage. */
708              continue; /* Blank line.  Keep looking. */
709            }
710          else
711            {
712              if ((j == 0) && (line[ i- 1] == ']'))
713                {
714                  /* Found Directory heading line.  Next non-blank line
715                  is significant.
716                  */
717                  j = 1;
718                }
719              else if (!strncmp (line, "Total of ", 9))
720                {
721                  /* Found "Total of ..." footing line.  No valid data
722                     will follow (empty directory).
723                  */
724                  xfree (line); /* Free useless line storage. */
725                  line = NULL; /* Arrange for early exit. */
726                  break;
727                }
728              else
729                {
730                  break; /* Must be significant data. */
731                }
732            }
733          xfree (line); /* Free useless line storage. */
734        }
735    }
736
737  /* Read remainder of file until the next blank line or EOF. */
738
739  while (line != NULL)
740    {
741      char *p;
742
743      /* The first token is the file name.  After a long name, other
744         data may be on the following line.  A valid directory name ends
745         in ".DIR;1" (any case), although some VMS FTP servers may omit
746         the version number (";1").
747      */
748
749      tok = strtok(line, " ");
750      if (tok == NULL) tok = line;
751      DEBUGP(("file name:   '%s'\n", tok));
752
753      /* Stripping the version number on a VMS system would be wrong.
754         It may be foolish on a non-VMS system, too, but that's someone
755         else's problem.  (Define PRESERVE_VMS_VERSIONS for proper
756         operation on other operating systems.)
757
758         2005-02-23 SMS.
759         ODS5 extended file names may contain escaped semi-colons, so
760         the version number is identified as right-side decimal digits
761         led by a non-escaped semi-colon.  It may be absent.
762      */
763
764#if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
765      for (p = tok+ strlen( tok); (--p > tok) && c_isdigit( *p); );
766      if ((*p == ';') && (*(p- 1) != '^'))
767        {
768          *p = '\0';
769        }
770#endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
771
772      /* 2005-02-23 SMS.
773         Eliminate "^" escape characters from ODS5 extended file name.
774         (A caret is invalid in an ODS2 name, so this is always safe.)
775      */
776      eat_carets( tok);
777      DEBUGP(("file name-^: '%s'\n", tok));
778
779      /* Differentiate between a directory and any other file.  A VMS
780         listing may not include file protections (permissions).  Set a
781         default permissions value (according to the file type), which
782         may be overwritten later.  Store directory names without the
783         ".DIR;1" file type and version number, as the plain name is
784         what will work in a CWD command.
785      */
786      len = strlen( tok);
787      if (!strncasecmp( (tok+ (len- 4)), ".DIR", 4))
788        {
789          *(tok+ (len -= 4)) = '\0'; /* Discard ".DIR". */
790          cur.type  = FT_DIRECTORY;
791          cur.perms = VMS_DEFAULT_PROT_DIR;
792          DEBUGP(("Directory (nv)\n"));
793        }
794      else if (!strncasecmp( (tok+ (len- 6)), ".DIR;1", 6))
795        {
796          *(tok+ (len -= 6)) = '\0'; /* Discard ".DIR;1". */
797          cur.type  = FT_DIRECTORY;
798          cur.perms = VMS_DEFAULT_PROT_DIR;
799          DEBUGP(("Directory (v)\n"));
800        }
801      else
802        {
803          cur.type  = FT_PLAINFILE;
804          cur.perms = VMS_DEFAULT_PROT_FILE;
805          DEBUGP(("File\n"));
806        }
807      cur.name = xstrdup(tok);
808      DEBUGP(("Name: '%s'\n", cur.name));
809
810      /* Null the date and time string. */
811      *date_str = '\0';
812
813      /* VMS lacks symbolic links. */
814      cur.linkto = NULL;
815
816      /* VMS reports file sizes in (512-byte) disk blocks, not bytes,
817         hence useless for an integrity check based on byte-count.
818         Set size to unknown.
819      */
820      cur.size  = 0;
821
822      /* Get token 2, if any.  A long name may force all other data onto
823         a second line.  If needed, read the second line.
824      */
825
826      tok = strtok(NULL, " ");
827      if (tok == NULL)
828        {
829          DEBUGP(("Getting additional line.\n"));
830          xfree (line);
831          line = read_whole_line (fp);
832          if (!line)
833            {
834              DEBUGP(("EOF.  Leaving listing parser.\n"));
835              break;
836            }
837
838          /* Second line must begin with " ".  Otherwise, it's a first
839             line (and we may be confused).
840          */
841          if (i <= 0)
842	    {
843              /* Blank line.  End of significant file listing. */
844              DEBUGP(("Blank line.  Leaving listing parser.\n"));
845              xfree (line); /* Free useless line storage. */
846	      break;
847	    }
848          else if (line[ 0] != ' ')
849            {
850              DEBUGP(("Non-blank in column 1.  Must be a new file name?\n"));
851              continue;
852            }
853          else
854            {
855              tok = strtok (line, " ");
856              if (tok == NULL)
857                {
858                  /* Unexpected non-empty but apparently blank line. */
859                  DEBUGP(("Null token.  Leaving listing parser.\n"));
860                  xfree (line); /* Free useless line storage. */
861                  break;
862                }
863            }
864        }
865
866      /* Analyze tokens.  (Order is not significant, except date must
867         precede time.)
868
869         Size:       ddd or ddd/ddd (where "ddd" is a decimal number)
870         Date:       DD-MMM-YYYY
871         Time:       HH:MM or HH:MM:SS or HH:MM:SS.CC
872         Owner:      [user] or [user,group]
873         Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
874                     subset thereof, for System, Owner, Group, World.
875
876         If permission is lacking, info may be replaced by the string:
877         "No privilege for attempted operation".
878      */
879      while (tok != NULL)
880	{
881	  DEBUGP (("Token: >%s<: ", tok));
882
883	  if ((strlen( tok) < 12) && (strchr( tok, '-') != NULL))
884	    {
885	      /* Date. */
886	      DEBUGP (("Date.\n"));
887	      strcpy( date_str, tok);
888	      strcat( date_str, " ");
889	    }
890	  else if ((strlen( tok) < 12) && (strchr( tok, ':') != NULL))
891	    {
892	      /* Time. */
893	      DEBUGP (("Time. "));
894	      strncat( date_str,
895	       tok,
896	       (sizeof( date_str)- strlen( date_str)- 1));
897	      DEBUGP (("Date time: >%s<\n", date_str));
898	    }
899	  else if (strchr( tok, '[') != NULL)
900	    {
901	      /* Owner.  (Ignore.) */
902	      DEBUGP (("Owner.\n"));
903	    }
904	  else if (strchr( tok, '(') != NULL)
905	    {
906	      /* Protections (permissions). */
907	      perms = 0;
908	      j = 0;
909	      for (i = 0; i < strlen( tok); i++)
910		{
911		  switch (tok[ i])
912		    {
913		      case '(':
914		        break;
915		      case ')':
916		        break;
917		      case ',':
918		        if (j == 0)
919		          {
920		            perms = 0;
921		            j = 1;
922		          }
923		        else
924		          {
925		            perms <<= 3;
926		          }
927		        break;
928		    case 'R':
929		      perms |= 4;
930		      break;
931		    case 'W':
932		      perms |= 2;
933		      break;
934		    case 'E':
935		      perms |= 1;
936		      break;
937		    case 'D':
938		      perms |= 2;
939		      break;
940		    }
941		}
942	      cur.perms = perms;
943	      DEBUGP (("Prot.  perms = %0o.\n", cur.perms));
944	    }
945	  else
946	    {
947	      /* Nondescript.  Probably size(s), probably in blocks.
948                 Could be "No privilege ..." message.  (Ignore.)
949              */
950	      DEBUGP (("Ignored (size?).\n"));
951	    }
952
953	  tok = strtok (NULL, " ");
954	}
955
956      /* Tokens exhausted.  Interpret the data, and fill in the
957         structure.
958      */
959      /* Fill tm timestruct according to date-time string.  Fractional
960         seconds are ignored.  Default to current time, if conversion
961         fails.
962      */
963      timenow = time( NULL);
964      timestruct = localtime( &timenow );
965      strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct);
966
967      /* Convert struct tm local time to time_t local time. */
968      timenow = mktime (timestruct);
969      /* Offset local time according to environment variable (seconds). */
970      if ((tok = getenv( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
971        {
972          dt = atoi( tok);
973          DEBUGP (("Time differential = %d.\n", dt));
974        }
975      else
976        {
977          dt = 0;
978        }
979
980      if (dt >= 0)
981        {
982          timenow += dt;
983        }
984      else
985        {
986          timenow -= (-dt);
987        }
988      cur.tstamp = timenow; /* Store the time-stamp. */
989      DEBUGP(("Timestamp: %ld\n", cur.tstamp));
990
991      /* Add the data for this item to the linked list, */
992      if (!dir)
993        {
994          l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
995          memcpy (l, &cur, sizeof (cur));
996          l->prev = l->next = NULL;
997        }
998      else
999        {
1000          cur.prev = l;
1001          l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
1002          l = l->next;
1003          memcpy (l, &cur, sizeof (cur));
1004          l->next = NULL;
1005        }
1006
1007      /* Free old line storage.  Read a new line. */
1008      xfree (line);
1009      line = read_whole_line (fp);
1010      if (line != NULL)
1011        {
1012          i = clean_line (line);
1013          if (i <= 0)
1014	    {
1015              /* Blank line.  End of significant file listing. */
1016              xfree (line); /* Free useless line storage. */
1017	      break;
1018	    }
1019        }
1020    }
1021
1022  fclose (fp);
1023  return dir;
1024}
1025
1026
1027/* This function switches between the correct parsing routine depending on
1028   the SYSTEM_TYPE. The system type should be based on the result of the
1029   "SYST" response of the FTP server. According to this repsonse we will
1030   use on of the three different listing parsers that cover the most of FTP
1031   servers used nowadays.  */
1032
1033struct fileinfo *
1034ftp_parse_ls (const char *file, const enum stype system_type)
1035{
1036  switch (system_type)
1037    {
1038    case ST_UNIX:
1039      return ftp_parse_unix_ls (file, 0);
1040    case ST_WINNT:
1041      {
1042        /* Detect whether the listing is simulating the UNIX format */
1043        FILE *fp;
1044        int   c;
1045        fp = fopen (file, "rb");
1046        if (!fp)
1047        {
1048          logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1049          return NULL;
1050        }
1051        c = fgetc(fp);
1052        fclose(fp);
1053        /* If the first character of the file is '0'-'9', it's WINNT
1054           format. */
1055        if (c >= '0' && c <='9')
1056          return ftp_parse_winnt_ls (file);
1057        else
1058          return ftp_parse_unix_ls (file, 1);
1059      }
1060    case ST_VMS:
1061      return ftp_parse_vms_ls (file);
1062    case ST_MACOS:
1063      return ftp_parse_unix_ls (file, 1);
1064    default:
1065      logprintf (LOG_NOTQUIET, _("\
1066Unsupported listing type, trying Unix listing parser.\n"));
1067      return ftp_parse_unix_ls (file, 0);
1068    }
1069}
1070
1071/* Stuff for creating FTP index. */
1072
1073/* The function creates an HTML index containing references to given
1074   directories and files on the appropriate host.  The references are
1075   FTP.  */
1076uerr_t
1077ftp_index (const char *file, struct url *u, struct fileinfo *f)
1078{
1079  FILE *fp;
1080  char *upwd;
1081  char *htcldir;                /* HTML-clean dir name */
1082  char *htclfile;               /* HTML-clean file name */
1083  char *urlclfile;              /* URL-clean file name */
1084
1085  if (!output_stream)
1086    {
1087      fp = fopen (file, "wb");
1088      if (!fp)
1089        {
1090          logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
1091          return FOPENERR;
1092        }
1093    }
1094  else
1095    fp = output_stream;
1096  if (u->user)
1097    {
1098      char *tmpu, *tmpp;        /* temporary, clean user and passwd */
1099
1100      tmpu = url_escape (u->user);
1101      tmpp = u->passwd ? url_escape (u->passwd) : NULL;
1102      if (tmpp)
1103        upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
1104      else
1105        upwd = concat_strings (tmpu, "@", (char *) 0);
1106      xfree (tmpu);
1107      xfree_null (tmpp);
1108    }
1109  else
1110    upwd = xstrdup ("");
1111
1112  htcldir = html_quote_string (u->dir);
1113
1114  fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
1115  fprintf (fp, "<html>\n<head>\n<title>");
1116  fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1117  fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
1118  fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
1119  fprintf (fp, "</h1>\n<hr>\n<pre>\n");
1120
1121  while (f)
1122    {
1123      fprintf (fp, "  ");
1124      if (f->tstamp != -1)
1125        {
1126          /* #### Should we translate the months?  Or, even better, use
1127             ISO 8601 dates?  */
1128          static const char *months[] = {
1129            "Jan", "Feb", "Mar", "Apr", "May", "Jun",
1130            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
1131          };
1132          time_t tstamp = f->tstamp;
1133          struct tm *ptm = localtime (&tstamp);
1134
1135          fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
1136                  ptm->tm_mday);
1137          if (ptm->tm_hour)
1138            fprintf (fp, "%02d:%02d  ", ptm->tm_hour, ptm->tm_min);
1139          else
1140            fprintf (fp, "       ");
1141        }
1142      else
1143        fprintf (fp, _("time unknown       "));
1144      switch (f->type)
1145        {
1146        case FT_PLAINFILE:
1147          fprintf (fp, _("File        "));
1148          break;
1149        case FT_DIRECTORY:
1150          fprintf (fp, _("Directory   "));
1151          break;
1152        case FT_SYMLINK:
1153          fprintf (fp, _("Link        "));
1154          break;
1155        default:
1156          fprintf (fp, _("Not sure    "));
1157          break;
1158        }
1159      htclfile = html_quote_string (f->name);
1160      urlclfile = url_escape_unsafe_and_reserved (f->name);
1161      fprintf (fp, "<a href=\"ftp://%s%s:%d", upwd, u->host, u->port);
1162      if (*u->dir != '/')
1163        putc ('/', fp);
1164      /* XXX: Should probably URL-escape dir components here, rather
1165       * than just HTML-escape, for consistency with the next bit where
1166       * we use urlclfile for the file component. Anyway, this is safer
1167       * than what we had... */
1168      fprintf (fp, "%s", htcldir);
1169      if (*u->dir)
1170        putc ('/', fp);
1171      fprintf (fp, "%s", urlclfile);
1172      if (f->type == FT_DIRECTORY)
1173        putc ('/', fp);
1174      fprintf (fp, "\">%s", htclfile);
1175      if (f->type == FT_DIRECTORY)
1176        putc ('/', fp);
1177      fprintf (fp, "</a> ");
1178      if (f->type == FT_PLAINFILE)
1179        fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
1180      else if (f->type == FT_SYMLINK)
1181        fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
1182      putc ('\n', fp);
1183      xfree (htclfile);
1184      xfree (urlclfile);
1185      f = f->next;
1186    }
1187  fprintf (fp, "</pre>\n</body>\n</html>\n");
1188  xfree (htcldir);
1189  xfree (upwd);
1190  if (!output_stream)
1191    fclose (fp);
1192  else
1193    fflush (fp);
1194  return FTPOK;
1195}
1196