srvr_nfs.c revision 310490
1/*
2 * Copyright (c) 1997-2014 Erez Zadok
3 * Copyright (c) 1990 Jan-Simon Pendry
4 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
5 * Copyright (c) 1990 The Regents of the University of California.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry at Imperial College, London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 *
36 * File: am-utils/amd/srvr_nfs.c
37 *
38 */
39
40/*
41 * NFS server modeling
42 */
43
44#ifdef HAVE_CONFIG_H
45# include <config.h>
46#endif /* HAVE_CONFIG_H */
47#include <am_defs.h>
48#include <amd.h>
49
50/*
51 * Number of pings allowed to fail before host is declared down
52 * - three-fifths of the allowed mount time...
53 */
54#define	MAX_ALLOWED_PINGS	(3 + /* for luck ... */ 1)
55
56/*
57 * How often to ping when starting a new server
58 */
59#define	FAST_NFS_PING		3
60
61#if (FAST_NFS_PING * MAX_ALLOWED_PINGS) >= ALLOWED_MOUNT_TIME
62# error: sanity check failed in srvr_nfs.c
63/*
64 * you cannot do things this way...
65 * sufficient fast pings must be given the chance to fail
66 * within the allowed mount time
67 */
68#endif /* (FAST_NFS_PING * MAX_ALLOWED_PINGS) >= ALLOWED_MOUNT_TIME */
69
70/* structures and typedefs */
71typedef struct nfs_private {
72  u_short np_mountd;		/* Mount daemon port number */
73  char np_mountd_inval;		/* Port *may* be invalid */
74  				/* 'Y' invalid, 'N' valid, 'P' permanent */
75  int np_ping;			/* Number of failed ping attempts */
76  time_t np_ttl;		/* Time when server is thought dead */
77  int np_xid;			/* RPC transaction id for pings */
78  int np_error;			/* Error during portmap request */
79} nfs_private;
80
81/* globals */
82qelem nfs_srvr_list = {&nfs_srvr_list, &nfs_srvr_list};
83
84/* statics */
85static int global_xid;		/* For NFS pings */
86#define	XID_ALLOC()		(++global_xid)
87
88#if defined(HAVE_FS_NFS4)
89# define NUM_NFS_VERS 3
90#elif defined(HAVE_FS_NFS3)
91# define NUM_NFS_VERS 2
92#else  /* not HAVE_FS_NFS3 */
93# define NUM_NFS_VERS 1
94#endif /* not HAVE_FS_NFS3 */
95static int ping_len[NUM_NFS_VERS];
96static char ping_buf[NUM_NFS_VERS][sizeof(struct rpc_msg) + 32];
97
98#if defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3)
99/*
100 * Protocols we know about, in order of preference.
101 *
102 * Note that Solaris 8 and newer NetBSD systems are switching to UDP first,
103 * so this order may have to be adjusted for Amd in the future once more
104 * vendors make that change. -Erez 11/24/2000
105 *
106 * Or we might simply make this is a platform-specific order. -Ion 09/13/2003
107 */
108static char *protocols[] = { "tcp", "udp", NULL };
109#endif /* defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3) */
110
111/* forward definitions */
112static void nfs_keepalive(voidp);
113
114
115/*
116 * Flush cached data for an fserver (or for all, if fs==NULL)
117 */
118void
119flush_srvr_nfs_cache(fserver *fs)
120{
121  fserver *fs2 = NULL;
122
123  ITER(fs2, fserver, &nfs_srvr_list) {
124    if (fs == NULL || fs == fs2) {
125      nfs_private *np = (nfs_private *) fs2->fs_private;
126      if (np && np->np_mountd_inval != 'P') {
127	np->np_mountd_inval = 'Y';
128	np->np_error = -1;
129      }
130    }
131  }
132}
133
134
135/*
136 * Startup the NFS ping for a particular version.
137 */
138static void
139create_ping_payload(u_long nfs_version)
140{
141  XDR ping_xdr;
142  struct rpc_msg ping_msg;
143
144  /*
145   * Non nfs mounts like /afs/glue.umd.edu have ended up here.
146   */
147  if (nfs_version == 0) {
148    nfs_version = NFS_VERSION;
149    plog(XLOG_WARNING, "%s: nfs_version = 0, changed to 2", __func__);
150  } else
151    plog(XLOG_INFO, "%s: nfs_version: %d", __func__, (int) nfs_version);
152
153  rpc_msg_init(&ping_msg, NFS_PROGRAM, nfs_version, NFSPROC_NULL);
154
155  /*
156   * Create an XDR endpoint
157   */
158  xdrmem_create(&ping_xdr, ping_buf[nfs_version - NFS_VERSION], sizeof(ping_buf[0]), XDR_ENCODE);
159
160  /*
161   * Create the NFS ping message
162   */
163  if (!xdr_callmsg(&ping_xdr, &ping_msg)) {
164    plog(XLOG_ERROR, "Couldn't create ping RPC message");
165    going_down(3);
166    return;
167  }
168  /*
169   * Find out how long it is
170   */
171  ping_len[nfs_version - NFS_VERSION] = xdr_getpos(&ping_xdr);
172
173  /*
174   * Destroy the XDR endpoint - we don't need it anymore
175   */
176  xdr_destroy(&ping_xdr);
177}
178
179
180/*
181 * Called when a portmap reply arrives
182 */
183static void
184got_portmap(voidp pkt, int len, struct sockaddr_in *sa, struct sockaddr_in *ia, voidp idv, int done)
185{
186  fserver *fs2 = (fserver *) idv;
187  fserver *fs = NULL;
188
189  /*
190   * Find which fileserver we are talking about
191   */
192  ITER(fs, fserver, &nfs_srvr_list)
193    if (fs == fs2)
194      break;
195
196  if (fs == fs2) {
197    u_long port = 0;	/* XXX - should be short but protocol is naff */
198    int error = done ? pickup_rpc_reply(pkt, len, (voidp) &port, (XDRPROC_T_TYPE) xdr_u_long) : -1;
199    nfs_private *np = (nfs_private *) fs->fs_private;
200
201    if (!error && port) {
202      dlog("got port (%d) for mountd on %s", (int) port, fs->fs_host);
203      /*
204       * Grab the port number.  Portmap sends back
205       * an u_long in native ordering, so it
206       * needs converting to a u_short in
207       * network ordering.
208       */
209      np->np_mountd = htons((u_short) port);
210      np->np_mountd_inval = 'N';
211      np->np_error = 0;
212    } else {
213      dlog("Error fetching port for mountd on %s", fs->fs_host);
214      dlog("\t error=%d, port=%d", error, (int) port);
215      /*
216       * Almost certainly no mountd running on remote host
217       */
218      np->np_error = error ? error : ETIMEDOUT;
219    }
220
221    if (fs->fs_flags & FSF_WANT)
222      wakeup_srvr(fs);
223  } else if (done) {
224    dlog("Got portmap for old port request");
225  } else {
226    dlog("portmap request timed out");
227  }
228}
229
230
231/*
232 * Obtain portmap information
233 */
234static int
235call_portmap(fserver *fs, AUTH *auth, u_long prog, u_long vers, u_long prot)
236{
237  struct rpc_msg pmap_msg;
238  int len;
239  char iobuf[UDPMSGSIZE];
240  int error;
241  struct pmap pmap;
242
243  rpc_msg_init(&pmap_msg, PMAPPROG, PMAPVERS, PMAPPROC_NULL);
244  pmap.pm_prog = prog;
245  pmap.pm_vers = vers;
246  pmap.pm_prot = prot;
247  pmap.pm_port = 0;
248  len = make_rpc_packet(iobuf,
249			sizeof(iobuf),
250			PMAPPROC_GETPORT,
251			&pmap_msg,
252			(voidp) &pmap,
253			(XDRPROC_T_TYPE) xdr_pmap,
254			auth);
255  if (len > 0) {
256    struct sockaddr_in sin;
257    memset((voidp) &sin, 0, sizeof(sin));
258    sin = *fs->fs_ip;
259    sin.sin_port = htons(PMAPPORT);
260    error = fwd_packet(RPC_XID_PORTMAP, iobuf, len,
261		       &sin, &sin, (voidp) fs, got_portmap);
262  } else {
263    error = -len;
264  }
265
266  return error;
267}
268
269
270static void
271recompute_portmap(fserver *fs)
272{
273  int error;
274  u_long mnt_version;
275
276  /*
277   * No portmap calls for pure WebNFS servers.
278   */
279  if (fs->fs_flags & FSF_WEBNFS)
280    return;
281
282  if (nfs_auth)
283    error = 0;
284  else
285    error = make_nfs_auth();
286
287  if (error) {
288    nfs_private *np = (nfs_private *) fs->fs_private;
289    np->np_error = error;
290    return;
291  }
292
293  if (fs->fs_version == 0)
294    plog(XLOG_WARNING, "%s: nfs_version = 0 fixed", __func__);
295
296  plog(XLOG_INFO, "%s: NFS version %d on %s", __func__,
297       (int) fs->fs_version, fs->fs_host);
298#ifdef HAVE_FS_NFS3
299  if (fs->fs_version == NFS_VERSION3)
300    mnt_version = AM_MOUNTVERS3;
301  else
302#endif /* HAVE_FS_NFS3 */
303    mnt_version = MOUNTVERS;
304
305  plog(XLOG_INFO, "Using MOUNT version: %d", (int) mnt_version);
306  call_portmap(fs, nfs_auth, MOUNTPROG, mnt_version, (u_long) IPPROTO_UDP);
307}
308
309
310int
311get_mountd_port(fserver *fs, u_short *port, wchan_t wchan)
312{
313  int error = -1;
314
315  if (FSRV_ISDOWN(fs))
316    return EWOULDBLOCK;
317
318  if (FSRV_ISUP(fs)) {
319    nfs_private *np = (nfs_private *) fs->fs_private;
320    if (np->np_error == 0) {
321      *port = np->np_mountd;
322      error = 0;
323    } else {
324      error = np->np_error;
325    }
326    /*
327     * Now go get the port mapping again in case it changed.
328     * Note that it is used even if (np_mountd_inval)
329     * is True.  The flag is used simply as an
330     * indication that the mountd may be invalid, not
331     * that it is known to be invalid.
332     */
333    switch (np->np_mountd_inval) {
334    case 'Y':
335      recompute_portmap(fs);
336      break;
337    case 'N':
338      np->np_mountd_inval = 'Y';
339      break;
340    case 'P':
341      break;
342    default:
343      abort();
344    }
345  }
346  if (error < 0 && wchan && !(fs->fs_flags & FSF_WANT)) {
347    /*
348     * If a wait channel is supplied, and no
349     * error has yet occurred, then arrange
350     * that a wakeup is done on the wait channel,
351     * whenever a wakeup is done on this fs node.
352     * Wakeup's are done on the fs node whenever
353     * it changes state - thus causing control to
354     * come back here and new, better things to happen.
355     */
356    fs->fs_flags |= FSF_WANT;
357    sched_task(wakeup_task, wchan, (wchan_t) fs);
358  }
359  return error;
360}
361
362
363/*
364 * This is called when we get a reply to an RPC ping.
365 * The value of id was taken from the nfs_private
366 * structure when the ping was transmitted.
367 */
368static void
369nfs_keepalive_callback(voidp pkt, int len, struct sockaddr_in *sp, struct sockaddr_in *tsp, voidp idv, int done)
370{
371  int xid = (long) idv;		/* cast needed for 64-bit archs */
372  fserver *fs;
373  int found_map = 0;
374
375  if (!done)
376    return;
377
378  /*
379   * For each node...
380   */
381  ITER(fs, fserver, &nfs_srvr_list) {
382    nfs_private *np = (nfs_private *) fs->fs_private;
383    if (np->np_xid == xid && (fs->fs_flags & FSF_PINGING)) {
384      /*
385       * Reset the ping counter.
386       * Update the keepalive timer.
387       * Log what happened.
388       */
389      if (fs->fs_flags & FSF_DOWN) {
390	fs->fs_flags &= ~FSF_DOWN;
391	if (fs->fs_flags & FSF_VALID) {
392	  srvrlog(fs, "is up");
393	} else {
394	  if (np->np_ping > 1)
395	    srvrlog(fs, "ok");
396	  else
397	    srvrlog(fs, "starts up");
398	  fs->fs_flags |= FSF_VALID;
399	}
400
401	map_flush_srvr(fs);
402      } else {
403	if (fs->fs_flags & FSF_VALID) {
404	  dlog("file server %s type nfs is still up", fs->fs_host);
405	} else {
406	  if (np->np_ping > 1)
407	    srvrlog(fs, "ok");
408	  fs->fs_flags |= FSF_VALID;
409	}
410      }
411
412      /*
413       * Adjust ping interval
414       */
415      untimeout(fs->fs_cid);
416      fs->fs_cid = timeout(fs->fs_pinger, nfs_keepalive, (voidp) fs);
417
418      /*
419       * Update ttl for this server
420       */
421      np->np_ttl = clocktime(NULL) +
422	(MAX_ALLOWED_PINGS - 1) * FAST_NFS_PING + fs->fs_pinger - 1;
423
424      /*
425       * New RPC xid...
426       */
427      np->np_xid = XID_ALLOC();
428
429      /*
430       * Failed pings is zero...
431       */
432      np->np_ping = 0;
433
434      /*
435       * Recompute portmap information if not known
436       */
437      if (np->np_mountd_inval == 'Y')
438	recompute_portmap(fs);
439
440      found_map++;
441      break;
442    }
443  }
444
445  if (found_map == 0)
446    dlog("Spurious ping packet");
447}
448
449
450static void
451check_fs_addr_change(fserver *fs)
452{
453  struct hostent *hp = NULL;
454  struct in_addr ia;
455  char *old_ipaddr, *new_ipaddr;
456
457  hp = gethostbyname(fs->fs_host);
458  if (!hp ||
459      hp->h_addrtype != AF_INET ||
460      !STREQ((char *) hp->h_name, fs->fs_host) ||
461      memcmp((voidp) &fs->fs_ip->sin_addr,
462	     (voidp) hp->h_addr,
463	     sizeof(fs->fs_ip->sin_addr)) == 0)
464    return;
465  /* if got here: downed server changed IP address */
466  old_ipaddr = xstrdup(inet_ntoa(fs->fs_ip->sin_addr));
467  memmove((voidp) &ia, (voidp) hp->h_addr, sizeof(struct in_addr));
468  new_ipaddr = inet_ntoa(ia);	/* ntoa uses static buf */
469  plog(XLOG_WARNING, "EZK: down fileserver %s changed ip: %s -> %s",
470       fs->fs_host, old_ipaddr, new_ipaddr);
471  XFREE(old_ipaddr);
472  /* copy new IP addr */
473  memmove((voidp) &fs->fs_ip->sin_addr,
474	  (voidp) hp->h_addr,
475	  sizeof(fs->fs_ip->sin_addr));
476  /* XXX: do we need to un/set these flags? */
477  fs->fs_flags &= ~FSF_DOWN;
478  fs->fs_flags |= FSF_VALID | FSF_WANT;
479  map_flush_srvr(fs);		/* XXX: a race with flush_srvr_nfs_cache? */
480  flush_srvr_nfs_cache(fs);
481  fs->fs_flags |= FSF_FORCE_UNMOUNT;
482
483#if 0
484  flush_nfs_fhandle_cache(fs);	/* done in caller: nfs_keepalive_timeout */
485  /* XXX: need to purge nfs_private so that somehow it will get re-initialized? */
486#endif /* 0 */
487}
488
489
490/*
491 * Called when no ping-reply received
492 */
493static void
494nfs_keepalive_timeout(voidp v)
495{
496  fserver *fs = v;
497  nfs_private *np = (nfs_private *) fs->fs_private;
498
499  /*
500   * Another ping has failed
501   */
502  np->np_ping++;
503  if (np->np_ping > 1)
504    srvrlog(fs, "not responding");
505
506  /*
507   * Not known to be up any longer
508   */
509  if (FSRV_ISUP(fs))
510    fs->fs_flags &= ~FSF_VALID;
511
512  /*
513   * If ttl has expired then guess that it is dead
514   */
515  if (np->np_ttl < clocktime(NULL)) {
516    int oflags = fs->fs_flags;
517    dlog("ttl has expired");
518    if ((fs->fs_flags & FSF_DOWN) == 0) {
519      /*
520       * Server was up, but is now down.
521       */
522      srvrlog(fs, "is down");
523      fs->fs_flags |= FSF_DOWN | FSF_VALID;
524      /*
525       * Since the server is down, the portmap
526       * information may now be wrong, so it
527       * must be flushed from the local cache
528       */
529      flush_nfs_fhandle_cache(fs);
530      np->np_error = -1;
531      check_fs_addr_change(fs); /* check if IP addr of fserver changed */
532    } else {
533      /*
534       * Known to be down
535       */
536      if ((fs->fs_flags & FSF_VALID) == 0)
537	srvrlog(fs, "starts down");
538      fs->fs_flags |= FSF_VALID;
539    }
540    if (oflags != fs->fs_flags && (fs->fs_flags & FSF_WANT))
541      wakeup_srvr(fs);
542    /*
543     * Reset failed ping count
544     */
545    np->np_ping = 0;
546  } else {
547    if (np->np_ping > 1)
548      dlog("%d pings to %s failed - at most %d allowed", np->np_ping, fs->fs_host, MAX_ALLOWED_PINGS);
549  }
550
551  /*
552   * New RPC xid, so any late responses to the previous ping
553   * get ignored...
554   */
555  np->np_xid = XID_ALLOC();
556
557  /*
558   * Run keepalive again
559   */
560  nfs_keepalive(fs);
561}
562
563
564/*
565 * Keep track of whether a server is alive
566 */
567static void
568nfs_keepalive(voidp v)
569{
570  fserver *fs = v;
571  int error;
572  nfs_private *np = (nfs_private *) fs->fs_private;
573  int fstimeo = -1;
574  int fs_version = nfs_valid_version(gopt.nfs_vers_ping) &&
575    gopt.nfs_vers_ping < fs->fs_version ? gopt.nfs_vers_ping : fs->fs_version;
576
577  /*
578   * Send an NFS ping to this node
579   */
580
581  if (ping_len[fs_version - NFS_VERSION] == 0)
582    create_ping_payload(fs_version);
583
584  /*
585   * Queue the packet...
586   */
587  error = fwd_packet(MK_RPC_XID(RPC_XID_NFSPING, np->np_xid),
588		     ping_buf[fs_version - NFS_VERSION],
589		     ping_len[fs_version - NFS_VERSION],
590		     fs->fs_ip,
591		     (struct sockaddr_in *) NULL,
592		     (voidp) ((long) np->np_xid), /* cast needed for 64-bit archs */
593		     nfs_keepalive_callback);
594
595  /*
596   * See if a hard error occurred
597   */
598  switch (error) {
599  case ENETDOWN:
600  case ENETUNREACH:
601  case EHOSTDOWN:
602  case EHOSTUNREACH:
603    np->np_ping = MAX_ALLOWED_PINGS;	/* immediately down */
604    np->np_ttl = (time_t) 0;
605    /*
606     * This causes an immediate call to nfs_keepalive_timeout
607     * whenever the server was thought to be up.
608     * See +++ below.
609     */
610    fstimeo = 0;
611    break;
612
613  case 0:
614    dlog("Sent NFS ping to %s", fs->fs_host);
615    break;
616  }
617
618  /*
619   * Back off the ping interval if we are not getting replies and
620   * the remote system is known to be down.
621   */
622  switch (fs->fs_flags & (FSF_DOWN | FSF_VALID)) {
623  case FSF_VALID:		/* Up */
624    if (fstimeo < 0)		/* +++ see above */
625      fstimeo = FAST_NFS_PING;
626    break;
627
628  case FSF_VALID | FSF_DOWN:	/* Down */
629    fstimeo = fs->fs_pinger;
630    break;
631
632  default:			/* Unknown */
633    fstimeo = FAST_NFS_PING;
634    break;
635  }
636
637  dlog("NFS timeout in %d seconds", fstimeo);
638
639  fs->fs_cid = timeout(fstimeo, nfs_keepalive_timeout, (voidp) fs);
640}
641
642
643static void
644start_nfs_pings(fserver *fs, int pingval)
645{
646  if (pingval == 0)	    /* could be because ping mnt option not found */
647    pingval = AM_PINGER;
648  /* if pings haven't been initalized, then init them for first time */
649  if (fs->fs_flags & FSF_PING_UNINIT) {
650    fs->fs_flags &= ~FSF_PING_UNINIT;
651    plog(XLOG_INFO, "initializing %s's pinger to %d sec", fs->fs_host, pingval);
652    goto do_pings;
653  }
654
655  if ((fs->fs_flags & FSF_PINGING)  &&  fs->fs_pinger == pingval) {
656    dlog("already running pings to %s", fs->fs_host);
657    return;
658  }
659
660  /* if got here, then we need to update the ping value */
661  plog(XLOG_INFO, "changing %s's ping value from %d%s to %d%s",
662       fs->fs_host,
663       fs->fs_pinger, (fs->fs_pinger < 0 ? " (off)" : ""),
664       pingval, (pingval < 0 ? " (off)" : ""));
665 do_pings:
666  fs->fs_pinger = pingval;
667
668  if (fs->fs_cid)
669    untimeout(fs->fs_cid);
670  if (pingval < 0) {
671    srvrlog(fs, "wired up (pings disabled)");
672    fs->fs_flags |= FSF_VALID;
673    fs->fs_flags &= ~FSF_DOWN;
674  } else {
675    fs->fs_flags |= FSF_PINGING;
676    nfs_keepalive(fs);
677  }
678}
679
680
681/*
682 * Find an nfs server for a host.
683 */
684fserver *
685find_nfs_srvr(mntfs *mf)
686{
687  char *host;
688  fserver *fs;
689  int pingval;
690  mntent_t mnt;
691  nfs_private *np;
692  struct hostent *hp = NULL;
693  struct sockaddr_in *ip = NULL;
694  u_long nfs_version = 0;	/* default is no version specified */
695  u_long best_nfs_version = 0;
696  char *nfs_proto = NULL;	/* no IP protocol either */
697  int nfs_port = 0;
698  int nfs_port_opt = 0;
699  int fserver_is_down = 0;
700
701  if (mf->mf_fo == NULL) {
702    plog(XLOG_ERROR, "%s: NULL mf_fo", __func__);
703    return NULL;
704  }
705  host = mf->mf_fo->opt_rhost;
706  /*
707   * Get ping interval from mount options.
708   * Current only used to decide whether pings
709   * are required or not.  < 0 = no pings.
710   */
711  mnt.mnt_opts = mf->mf_mopts;
712  pingval = hasmntval(&mnt, "ping");
713
714  if (mf->mf_flags & MFF_NFS_SCALEDOWN) {
715    /*
716     * the server granted us a filehandle, but we were unable to mount it.
717     * therefore, scale down to NFSv2/UDP and try again.
718     */
719    nfs_version = NFS_VERSION;
720    nfs_proto = "udp";
721    plog(XLOG_WARNING, "%s: NFS mount failed, trying again with NFSv2/UDP",
722      __func__);
723    mf->mf_flags &= ~MFF_NFS_SCALEDOWN;
724  } else {
725    /*
726     * Get the NFS version from the mount options. This is used
727     * to decide the highest NFS version to try.
728     */
729#ifdef MNTTAB_OPT_VERS
730    nfs_version = hasmntval(&mnt, MNTTAB_OPT_VERS);
731#endif /* MNTTAB_OPT_VERS */
732
733#ifdef MNTTAB_OPT_PROTO
734    {
735      char *proto_opt = hasmnteq(&mnt, MNTTAB_OPT_PROTO);
736      if (proto_opt) {
737	char **p;
738	for (p = protocols; *p; p++)
739	  if (NSTREQ(proto_opt, *p, strlen(*p))) {
740	    nfs_proto = *p;
741	    break;
742	  }
743	if (*p == NULL)
744	  plog(XLOG_WARNING, "ignoring unknown protocol option for %s:%s",
745	       host, mf->mf_fo->opt_rfs);
746      }
747    }
748#endif /* MNTTAB_OPT_PROTO */
749
750#ifdef HAVE_NFS_NFSV2_H
751    /* allow overriding if nfsv2 option is specified in mount options */
752    if (amu_hasmntopt(&mnt, "nfsv2")) {
753      nfs_version = NFS_VERSION;/* nullify any ``vers=X'' statements */
754      nfs_proto = "udp";	/* nullify any ``proto=tcp'' statements */
755      plog(XLOG_WARNING, "found compatibility option \"nfsv2\": set options vers=2,proto=udp for host %s", host);
756    }
757#endif /* HAVE_NFS_NFSV2_H */
758
759    /* check if we've globally overridden the NFS version/protocol */
760    if (gopt.nfs_vers) {
761      nfs_version = gopt.nfs_vers;
762      plog(XLOG_INFO, "%s: force NFS version to %d", __func__,
763	   (int) nfs_version);
764    }
765    if (gopt.nfs_proto) {
766      nfs_proto = gopt.nfs_proto;
767      plog(XLOG_INFO, "%s: force NFS protocol transport to %s", __func__,
768	nfs_proto);
769    }
770  }
771
772  /*
773   * lookup host address and canonical name
774   */
775  hp = gethostbyname(host);
776
777  /*
778   * New code from Bob Harris <harris@basil-rathbone.mit.edu>
779   * Use canonical name to keep track of file server
780   * information.  This way aliases do not generate
781   * multiple NFS pingers.  (Except when we're normalizing
782   * hosts.)
783   */
784  if (hp && !(gopt.flags & CFM_NORMALIZE_HOSTNAMES))
785    host = (char *) hp->h_name;
786
787  if (hp) {
788    switch (hp->h_addrtype) {
789    case AF_INET:
790      ip = CALLOC(struct sockaddr_in);
791      memset((voidp) ip, 0, sizeof(*ip));
792      /* as per POSIX, sin_len need not be set (used internally by kernel) */
793      ip->sin_family = AF_INET;
794      memmove((voidp) &ip->sin_addr, (voidp) hp->h_addr, sizeof(ip->sin_addr));
795      break;
796
797    default:
798      plog(XLOG_USER, "No IP address for host %s", host);
799      goto no_dns;
800    }
801  } else {
802    plog(XLOG_USER, "Unknown host: %s", host);
803    goto no_dns;
804  }
805
806  /*
807   * This may not be the best way to do things, but it really doesn't make
808   * sense to query a file server which is marked as 'down' for any
809   * version/proto combination.
810   */
811  ITER(fs, fserver, &nfs_srvr_list) {
812    if (FSRV_ISDOWN(fs) &&
813	STREQ(host, fs->fs_host)) {
814      plog(XLOG_WARNING, "fileserver %s is already hung - not running NFS proto/version discovery", host);
815      fs->fs_refc++;
816      XFREE(ip);
817      return fs;
818    }
819  }
820
821  /*
822   * Get the NFS Version, and verify server is up.
823   * If the client only supports NFSv2, hardcode it but still try to
824   * contact the remote portmapper to see if the service is running.
825   */
826#ifndef HAVE_FS_NFS3
827  nfs_version = NFS_VERSION;
828  nfs_proto = "udp";
829  plog(XLOG_INFO, "The client supports only NFS(2,udp)");
830#endif /* not HAVE_FS_NFS3 */
831
832
833  if (amu_hasmntopt(&mnt, MNTTAB_OPT_PUBLIC)) {
834    /*
835     * Use WebNFS to obtain file handles.
836     */
837    mf->mf_flags |= MFF_WEBNFS;
838    plog(XLOG_INFO, "%s option used, NOT contacting the portmapper on %s",
839	 MNTTAB_OPT_PUBLIC, host);
840    /*
841     * Prefer NFSv4/tcp if the client supports it (cf. RFC 2054, 7).
842     */
843    if (!nfs_version) {
844#if defined(HAVE_FS_NFS4)
845      nfs_version = NFS_VERSION4;
846#elif defined(HAVE_FS_NFS3)
847      nfs_version = NFS_VERSION3;
848#else /* not HAVE_FS_NFS3 */
849      nfs_version = NFS_VERSION;
850#endif /* not HAVE_FS_NFS3 */
851      plog(XLOG_INFO, "No NFS version specified, will use NFSv%d",
852	   (int) nfs_version);
853    }
854    if (!nfs_proto) {
855#if defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3) || defined(HAVE_FS_NFS4)
856      nfs_proto = "tcp";
857#else /* not defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3) || defined(HAVE_FS_NFS4) */
858      nfs_proto = "udp";
859#endif /* not defined(MNTTAB_OPT_PROTO) || defined(HAVE_FS_NFS3) || defined(HAVE_FS_NFS4) */
860      plog(XLOG_INFO, "No NFS protocol transport specified, will use %s",
861	   nfs_proto);
862    }
863  } else {
864    /*
865     * Find the best combination of NFS version and protocol.
866     * When given a choice, use the highest available version,
867     * and use TCP over UDP if available.
868     */
869    if (check_pmap_up(host, ip)) {
870      if (nfs_proto) {
871	best_nfs_version = get_nfs_version(host, ip, nfs_version, nfs_proto,
872	  gopt.nfs_vers);
873	nfs_port = ip->sin_port;
874      }
875#ifdef MNTTAB_OPT_PROTO
876      else {
877	u_int proto_nfs_version;
878	char **p;
879
880	for (p = protocols; *p; p++) {
881	  proto_nfs_version = get_nfs_version(host, ip, nfs_version, *p,
882	    gopt.nfs_vers);
883	  if (proto_nfs_version > best_nfs_version) {
884	    best_nfs_version = proto_nfs_version;
885	    nfs_proto = *p;
886	    nfs_port = ip->sin_port;
887	  }
888	}
889      }
890#endif /* MNTTAB_OPT_PROTO */
891    } else {
892      plog(XLOG_INFO, "portmapper service not running on %s", host);
893    }
894
895    /* use the portmapper results only nfs_version is not set yet */
896    if (!best_nfs_version) {
897      /*
898       * If the NFS server is down or does not support the portmapper call
899       * (such as certain Novell NFS servers) we mark it as version 2 and we
900       * let the nfs code deal with the case when it is down.  If/when the
901       * server comes back up and it can support NFSv3 and/or TCP, it will
902       * use those.
903       */
904      if (nfs_version == 0) {
905	nfs_version = NFS_VERSION;
906	nfs_proto = "udp";
907      }
908      plog(XLOG_INFO, "NFS service not running on %s", host);
909      fserver_is_down = 1;
910    } else {
911      if (nfs_version == 0)
912	nfs_version = best_nfs_version;
913      plog(XLOG_INFO, "Using NFS version %d, protocol %s on host %s",
914	   (int) nfs_version, nfs_proto, host);
915    }
916  }
917
918  /*
919   * Determine the NFS port.
920   *
921   * A valid "port" mount option overrides anything else.
922   * If the port has been determined from the portmapper, use that.
923   * Default to NFS_PORT otherwise (cf. RFC 2054, 3).
924   */
925  nfs_port_opt = hasmntval(&mnt, MNTTAB_OPT_PORT);
926  if (nfs_port_opt > 0)
927    nfs_port = htons(nfs_port_opt);
928  if (!nfs_port)
929    nfs_port = htons(NFS_PORT);
930
931  dlog("%s: using port %d for nfs on %s", __func__,
932    (int) ntohs(nfs_port), host);
933  ip->sin_port = nfs_port;
934
935no_dns:
936  /*
937   * Try to find an existing fs server structure for this host.
938   * Note that differing versions or protocols have their own structures.
939   * XXX: Need to fix the ping mechanism to actually use the NFS protocol
940   * chosen here (right now it always uses datagram sockets).
941   */
942  ITER(fs, fserver, &nfs_srvr_list) {
943    if (STREQ(host, fs->fs_host) &&
944 	nfs_version == fs->fs_version &&
945	STREQ(nfs_proto, fs->fs_proto)) {
946      /*
947       * fill in the IP address -- this is only needed
948       * if there is a chance an IP address will change
949       * between mounts.
950       * Mike Mitchell, mcm@unx.sas.com, 09/08/93
951       */
952      if (hp && fs->fs_ip &&
953	  memcmp((voidp) &fs->fs_ip->sin_addr,
954		 (voidp) hp->h_addr,
955		 sizeof(fs->fs_ip->sin_addr)) != 0) {
956	struct in_addr ia;
957	char *old_ipaddr, *new_ipaddr;
958	old_ipaddr = xstrdup(inet_ntoa(fs->fs_ip->sin_addr));
959	memmove((voidp) &ia, (voidp) hp->h_addr, sizeof(struct in_addr));
960	new_ipaddr = inet_ntoa(ia);	/* ntoa uses static buf */
961	plog(XLOG_WARNING, "fileserver %s changed ip: %s -> %s",
962	     fs->fs_host, old_ipaddr, new_ipaddr);
963	XFREE(old_ipaddr);
964	flush_nfs_fhandle_cache(fs);
965	memmove((voidp) &fs->fs_ip->sin_addr, (voidp) hp->h_addr, sizeof(fs->fs_ip->sin_addr));
966      }
967
968      /*
969       * If the new file systems doesn't use WebNFS, the nfs pings may
970       * try to contact the portmapper.
971       */
972      if (!(mf->mf_flags & MFF_WEBNFS))
973	fs->fs_flags &= ~FSF_WEBNFS;
974
975      /* check if pingval needs to be updated/set/reset */
976      start_nfs_pings(fs, pingval);
977
978      /*
979       * Following if statement from Mike Mitchell <mcm@unx.sas.com>
980       * Initialize the ping data if we aren't pinging now.  The np_ttl and
981       * np_ping fields are especially important.
982       */
983      if (!(fs->fs_flags & FSF_PINGING)) {
984	np = (nfs_private *) fs->fs_private;
985	if (np->np_mountd_inval != 'P') {
986	  np->np_mountd_inval = TRUE;
987	  np->np_xid = XID_ALLOC();
988	  np->np_error = -1;
989	  np->np_ping = 0;
990	  /*
991	   * Initially the server will be deemed dead
992	   * after MAX_ALLOWED_PINGS of the fast variety
993	   * have failed.
994	   */
995	  np->np_ttl = MAX_ALLOWED_PINGS * FAST_NFS_PING + clocktime(NULL) - 1;
996	  start_nfs_pings(fs, pingval);
997	  if (fserver_is_down)
998	    fs->fs_flags |= FSF_VALID | FSF_DOWN;
999	} else {
1000	  fs->fs_flags = FSF_VALID;
1001	}
1002
1003      }
1004
1005      fs->fs_refc++;
1006      XFREE(ip);
1007      return fs;
1008    }
1009  }
1010
1011  /*
1012   * Get here if we can't find an entry
1013   */
1014
1015  /*
1016   * Allocate a new server
1017   */
1018  fs = ALLOC(struct fserver);
1019  fs->fs_refc = 1;
1020  fs->fs_host = xstrdup(hp ? hp->h_name : "unknown_hostname");
1021  if (gopt.flags & CFM_NORMALIZE_HOSTNAMES)
1022    host_normalize(&fs->fs_host);
1023  fs->fs_ip = ip;
1024  fs->fs_cid = 0;
1025  if (ip) {
1026    fs->fs_flags = FSF_DOWN;	/* Starts off down */
1027  } else {
1028    fs->fs_flags = FSF_ERROR | FSF_VALID;
1029    mf->mf_flags |= MFF_ERROR;
1030    mf->mf_error = ENOENT;
1031  }
1032  if (mf->mf_flags & MFF_WEBNFS)
1033    fs->fs_flags |= FSF_WEBNFS;
1034  fs->fs_version = nfs_version;
1035  fs->fs_proto = nfs_proto;
1036  fs->fs_type = MNTTAB_TYPE_NFS;
1037  fs->fs_pinger = AM_PINGER;
1038  fs->fs_flags |= FSF_PING_UNINIT; /* pinger hasn't been initialized */
1039  np = ALLOC(struct nfs_private);
1040  memset((voidp) np, 0, sizeof(*np));
1041  np->np_mountd = htons(hasmntval(&mnt, "mountport"));
1042  if (np->np_mountd == 0) {
1043    np->np_mountd_inval = 'Y';
1044    np->np_xid = XID_ALLOC();
1045    np->np_error = -1;
1046  } else {
1047    plog(XLOG_INFO, "%s: using mountport: %d", __func__,
1048      (int) ntohs(np->np_mountd));
1049    np->np_mountd_inval = 'P';
1050    np->np_xid = 0;
1051    np->np_error = 0;
1052  }
1053
1054  /*
1055   * Initially the server will be deemed dead after
1056   * MAX_ALLOWED_PINGS of the fast variety have failed.
1057   */
1058  np->np_ttl = clocktime(NULL) + MAX_ALLOWED_PINGS * FAST_NFS_PING - 1;
1059  fs->fs_private = (voidp) np;
1060  fs->fs_prfree = (void (*)(voidp)) free;
1061
1062  if (!FSRV_ERROR(fs)) {
1063    /* start of keepalive timer, first updating pingval */
1064    start_nfs_pings(fs, pingval);
1065    if (fserver_is_down)
1066      fs->fs_flags |= FSF_VALID | FSF_DOWN;
1067  }
1068
1069  /*
1070   * Add to list of servers
1071   */
1072  ins_que(&fs->fs_q, &nfs_srvr_list);
1073
1074  return fs;
1075}
1076