ops_nfs.c revision 174294
1153323Srodrigc/*
2159451Srodrigc * Copyright (c) 1997-2006 Erez Zadok
3159451Srodrigc * Copyright (c) 1990 Jan-Simon Pendry
4153323Srodrigc * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
5159451Srodrigc * Copyright (c) 1990 The Regents of the University of California.
6159451Srodrigc * All rights reserved.
7159451Srodrigc *
8159451Srodrigc * This code is derived from software contributed to Berkeley by
9159451Srodrigc * Jan-Simon Pendry at Imperial College, London.
10159451Srodrigc *
11159451Srodrigc * Redistribution and use in source and binary forms, with or without
12159451Srodrigc * modification, are permitted provided that the following conditions
13153323Srodrigc * are met:
14159451Srodrigc * 1. Redistributions of source code must retain the above copyright
15159451Srodrigc *    notice, this list of conditions and the following disclaimer.
16159451Srodrigc * 2. Redistributions in binary form must reproduce the above copyright
17159451Srodrigc *    notice, this list of conditions and the following disclaimer in the
18159451Srodrigc *    documentation and/or other materials provided with the distribution.
19159451Srodrigc * 3. All advertising materials mentioning features or use of this software
20159451Srodrigc *    must display the following acknowledgment:
21159451Srodrigc *      This product includes software developed by the University of
22159451Srodrigc *      California, Berkeley and its contributors.
23159451Srodrigc * 4. Neither the name of the University nor the names of its contributors
24159451Srodrigc *    may be used to endorse or promote products derived from this software
25159451Srodrigc *    without specific prior written permission.
26159451Srodrigc *
27153323Srodrigc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28159451Srodrigc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29153323Srodrigc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30153323Srodrigc * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31153323Srodrigc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32153323Srodrigc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33153323Srodrigc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34153323Srodrigc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35153323Srodrigc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36153323Srodrigc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37153323Srodrigc * SUCH DAMAGE.
38153323Srodrigc *
39153323Srodrigc *
40153323Srodrigc * File: am-utils/amd/ops_nfs.c
41153323Srodrigc *
42159451Srodrigc */
43153323Srodrigc
44159451Srodrigc/*
45153323Srodrigc * Network file system
46153323Srodrigc */
47153323Srodrigc
48153323Srodrigc#ifdef HAVE_CONFIG_H
49153323Srodrigc# include <config.h>
50153323Srodrigc#endif /* HAVE_CONFIG_H */
51153323Srodrigc#include <am_defs.h>
52153323Srodrigc#include <amd.h>
53153323Srodrigc
54153323Srodrigc/*
55153323Srodrigc * Convert from nfsstat to UN*X error code
56153323Srodrigc */
57159451Srodrigc#define unx_error(e)	((int)(e))
58159451Srodrigc
59159451Srodrigc/*
60159451Srodrigc * FH_TTL is the time a file handle will remain in the cache since
61159451Srodrigc * last being used.  If the file handle becomes invalid, then it
62153323Srodrigc * will be flushed anyway.
63153323Srodrigc */
64153323Srodrigc#define	FH_TTL			(5 * 60) /* five minutes */
65153323Srodrigc#define	FH_TTL_ERROR		(30) /* 30 seconds */
66153323Srodrigc#define	FHID_ALLOC()		(++fh_id)
67153323Srodrigc
68153323Srodrigc/*
69153323Srodrigc * The NFS layer maintains a cache of file handles.
70153323Srodrigc * This is *fundamental* to the implementation and
71153323Srodrigc * also allows quick remounting when a filesystem
72153323Srodrigc * is accessed soon after timing out.
73153323Srodrigc *
74153323Srodrigc * The NFS server layer knows to flush this cache
75153323Srodrigc * when a server goes down so avoiding stale handles.
76153323Srodrigc *
77153323Srodrigc * Each cache entry keeps a hard reference to
78153323Srodrigc * the corresponding server.  This ensures that
79153323Srodrigc * the server keepalive information is maintained.
80153323Srodrigc *
81153323Srodrigc * The copy of the sockaddr_in here is taken so
82153323Srodrigc * that the port can be twiddled to talk to mountd
83153323Srodrigc * instead of portmap or the NFS server as used
84189878Skib * elsewhere.
85153323Srodrigc * The port# is flushed if a server goes down.
86153323Srodrigc * The IP address is never flushed - we assume
87153323Srodrigc * that the address of a mounted machine never
88153323Srodrigc * changes.  If it does, then you have other
89176249Sattilio * problems...
90153323Srodrigc */
91153323Srodrigctypedef struct fh_cache fh_cache;
92153323Srodrigcstruct fh_cache {
93153323Srodrigc  qelem			fh_q;		/* List header */
94153323Srodrigc  wchan_t		fh_wchan;	/* Wait channel */
95153323Srodrigc  int			fh_error;	/* Valid data? */
96153323Srodrigc  int			fh_id;		/* Unique id */
97153323Srodrigc  int			fh_cid;		/* Callout id */
98153323Srodrigc  u_long		fh_nfs_version;	/* highest NFS version on host */
99153323Srodrigc  am_nfs_handle_t	fh_nfs_handle;	/* Handle on filesystem */
100153323Srodrigc  int			fh_status;	/* Status of last rpc */
101153323Srodrigc  struct sockaddr_in	fh_sin;		/* Address of mountd */
102153323Srodrigc  fserver		*fh_fs;		/* Server holding filesystem */
103189878Skib  char			*fh_path;	/* Filesystem on host */
104153323Srodrigc};
105176249Sattilio
106153323Srodrigc/* forward definitions */
107153323Srodrigcstatic int nfs_init(mntfs *mf);
108153323Srodrigcstatic char *nfs_match(am_opts *fo);
109153323Srodrigcstatic int nfs_mount(am_node *am, mntfs *mf);
110153323Srodrigcstatic int nfs_umount(am_node *am, mntfs *mf);
111153323Srodrigcstatic void nfs_umounted(mntfs *mf);
112153323Srodrigcstatic int call_mountd(fh_cache *fp, u_long proc, fwd_fun f, wchan_t wchan);
113153323Srodrigcstatic int webnfs_lookup(fh_cache *fp, fwd_fun f, wchan_t wchan);
114153323Srodrigcstatic int fh_id = 0;
115153323Srodrigc
116153323Srodrigc/* globals */
117153323SrodrigcAUTH *nfs_auth;
118153323Srodrigcqelem fh_head = {&fh_head, &fh_head};
119153323Srodrigc
120153323Srodrigc/*
121153323Srodrigc * Network file system operations
122153323Srodrigc */
123153323Srodrigcam_ops nfs_ops =
124153323Srodrigc{
125153323Srodrigc  "nfs",
126153323Srodrigc  nfs_match,
127159451Srodrigc  nfs_init,
128159451Srodrigc  nfs_mount,
129159451Srodrigc  nfs_umount,
130159451Srodrigc  amfs_error_lookup_child,
131159451Srodrigc  amfs_error_mount_child,
132153323Srodrigc  amfs_error_readdir,
133153323Srodrigc  0,				/* nfs_readlink */
134153323Srodrigc  0,				/* nfs_mounted */
135153323Srodrigc  nfs_umounted,
136153323Srodrigc  find_nfs_srvr,
137153323Srodrigc  0,				/* nfs_get_wchan */
138159451Srodrigc  FS_MKMNT | FS_BACKGROUND | FS_AMQINFO,	/* nfs_fs_flags */
139153323Srodrigc#ifdef HAVE_FS_AUTOFS
140153323Srodrigc  AUTOFS_NFS_FS_FLAGS,
141153323Srodrigc#endif /* HAVE_FS_AUTOFS */
142153323Srodrigc};
143153323Srodrigc
144153323Srodrigc
145153323Srodrigcstatic fh_cache *
146153323Srodrigcfind_nfs_fhandle_cache(opaque_t arg, int done)
147153323Srodrigc{
148153323Srodrigc  fh_cache *fp, *fp2 = 0;
149153323Srodrigc  int id = (long) arg;		/* for 64-bit archs */
150153323Srodrigc
151153323Srodrigc  ITER(fp, fh_cache, &fh_head) {
152153323Srodrigc    if (fp->fh_id == id) {
153153323Srodrigc      fp2 = fp;
154153323Srodrigc      break;
155153323Srodrigc    }
156153323Srodrigc  }
157153323Srodrigc
158153323Srodrigc  if (fp2) {
159153323Srodrigc    dlog("fh cache gives fp %#lx, fs %s", (unsigned long) fp2, fp2->fh_path);
160153323Srodrigc  } else {
161153323Srodrigc    dlog("fh cache search failed");
162153323Srodrigc  }
163159451Srodrigc
164175486Sattilio  if (fp2 && !done) {
165153323Srodrigc    fp2->fh_error = ETIMEDOUT;
166153323Srodrigc    return 0;
167153323Srodrigc  }
168159451Srodrigc
169159451Srodrigc  return fp2;
170153323Srodrigc}
171153323Srodrigc
172153323Srodrigc
173153323Srodrigc/*
174153323Srodrigc * Called when a filehandle appears via the mount protocol
175153323Srodrigc */
176153323Srodrigcstatic void
177153323Srodrigcgot_nfs_fh_mount(voidp pkt, int len, struct sockaddr_in *sa, struct sockaddr_in *ia, opaque_t arg, int done)
178159451Srodrigc{
179153323Srodrigc  fh_cache *fp;
180153323Srodrigc  struct fhstatus res;
181153323Srodrigc#ifdef HAVE_FS_NFS3
182153323Srodrigc  struct am_mountres3 res3;
183153323Srodrigc#endif /* HAVE_FS_NFS3 */
184159451Srodrigc
185153323Srodrigc  fp = find_nfs_fhandle_cache(arg, done);
186153323Srodrigc  if (!fp)
187153323Srodrigc    return;
188153323Srodrigc
189153323Srodrigc  /*
190159451Srodrigc   * retrieve the correct RPC reply for the file handle, based on the
191153323Srodrigc   * NFS protocol version.
192153323Srodrigc   */
193153323Srodrigc#ifdef HAVE_FS_NFS3
194153323Srodrigc  if (fp->fh_nfs_version == NFS_VERSION3) {
195159451Srodrigc    memset(&res3, 0, sizeof(res3));
196153323Srodrigc    fp->fh_error = pickup_rpc_reply(pkt, len, (voidp) &res3,
197159451Srodrigc				    (XDRPROC_T_TYPE) xdr_am_mountres3);
198159451Srodrigc    fp->fh_status = unx_error(res3.fhs_status);
199153323Srodrigc    memset(&fp->fh_nfs_handle.v3, 0, sizeof(am_nfs_fh3));
200153323Srodrigc    fp->fh_nfs_handle.v3.am_fh3_length = res3.mountres3_u.mountinfo.fhandle.fhandle3_len;
201153323Srodrigc    memmove(fp->fh_nfs_handle.v3.am_fh3_data,
202159451Srodrigc	    res3.mountres3_u.mountinfo.fhandle.fhandle3_val,
203159451Srodrigc	    fp->fh_nfs_handle.v3.am_fh3_length);
204159451Srodrigc  } else {
205159451Srodrigc#endif /* HAVE_FS_NFS3 */
206159451Srodrigc    memset(&res, 0, sizeof(res));
207159451Srodrigc    fp->fh_error = pickup_rpc_reply(pkt, len, (voidp) &res,
208159451Srodrigc				    (XDRPROC_T_TYPE) xdr_fhstatus);
209159451Srodrigc    fp->fh_status = unx_error(res.fhs_status);
210159451Srodrigc    memmove(&fp->fh_nfs_handle.v2, &res.fhs_fh, NFS_FHSIZE);
211159451Srodrigc#ifdef HAVE_FS_NFS3
212159451Srodrigc  }
213159451Srodrigc#endif /* HAVE_FS_NFS3 */
214159451Srodrigc
215159451Srodrigc  if (!fp->fh_error) {
216159451Srodrigc    dlog("got filehandle for %s:%s", fp->fh_fs->fs_host, fp->fh_path);
217159451Srodrigc  } else {
218159451Srodrigc    plog(XLOG_USER, "filehandle denied for %s:%s", fp->fh_fs->fs_host, fp->fh_path);
219159451Srodrigc    /*
220159451Srodrigc     * Force the error to be EACCES. It's debatable whether it should be
221159451Srodrigc     * ENOENT instead, but the server really doesn't give us any clues, and
222159451Srodrigc     * EACCES is more in line with the "filehandle denied" message.
223159451Srodrigc     */
224159451Srodrigc    fp->fh_error = EACCES;
225159451Srodrigc  }
226159451Srodrigc
227159451Srodrigc  /*
228159451Srodrigc   * Wakeup anything sleeping on this filehandle
229159451Srodrigc   */
230159451Srodrigc  if (fp->fh_wchan) {
231159451Srodrigc    dlog("Calling wakeup on %#lx", (unsigned long) fp->fh_wchan);
232159451Srodrigc    wakeup(fp->fh_wchan);
233159451Srodrigc  }
234159451Srodrigc}
235159451Srodrigc
236159451Srodrigc
237159451Srodrigc/*
238159451Srodrigc * Called when a filehandle appears via WebNFS
239159451Srodrigc */
240159451Srodrigcstatic void
241159451Srodrigcgot_nfs_fh_webnfs(voidp pkt, int len, struct sockaddr_in *sa, struct sockaddr_in *ia, opaque_t arg, int done)
242159451Srodrigc{
243159451Srodrigc  fh_cache *fp;
244159451Srodrigc  nfsdiropres res;
245159451Srodrigc#ifdef HAVE_FS_NFS3
246159451Srodrigc  am_LOOKUP3res res3;
247159451Srodrigc#endif /* HAVE_FS_NFS3 */
248159451Srodrigc
249159451Srodrigc  fp = find_nfs_fhandle_cache(arg, done);
250159451Srodrigc  if (!fp)
251159451Srodrigc    return;
252159451Srodrigc
253159451Srodrigc  /*
254159451Srodrigc   * retrieve the correct RPC reply for the file handle, based on the
255159451Srodrigc   * NFS protocol version.
256159451Srodrigc   */
257159451Srodrigc#ifdef HAVE_FS_NFS3
258159451Srodrigc  if (fp->fh_nfs_version == NFS_VERSION3) {
259159451Srodrigc    memset(&res3, 0, sizeof(res3));
260159451Srodrigc    fp->fh_error = pickup_rpc_reply(pkt, len, (voidp) &res3,
261159451Srodrigc				    (XDRPROC_T_TYPE) xdr_am_LOOKUP3res);
262159451Srodrigc    fp->fh_status = unx_error(res3.status);
263159451Srodrigc    memset(&fp->fh_nfs_handle.v3, 0, sizeof(am_nfs_fh3));
264159451Srodrigc    fp->fh_nfs_handle.v3.am_fh3_length = res3.res_u.ok.object.am_fh3_length;
265159451Srodrigc    memmove(fp->fh_nfs_handle.v3.am_fh3_data,
266159451Srodrigc	    res3.res_u.ok.object.am_fh3_data,
267159451Srodrigc	    fp->fh_nfs_handle.v3.am_fh3_length);
268159451Srodrigc  } else {
269183754Sattilio#endif /* HAVE_FS_NFS3 */
270159451Srodrigc    memset(&res, 0, sizeof(res));
271159451Srodrigc    fp->fh_error = pickup_rpc_reply(pkt, len, (voidp) &res,
272159451Srodrigc				    (XDRPROC_T_TYPE) xdr_diropres);
273159451Srodrigc    fp->fh_status = unx_error(res.dr_status);
274159451Srodrigc    memmove(&fp->fh_nfs_handle.v2, &res.dr_u.dr_drok_u.drok_fhandle, NFS_FHSIZE);
275159451Srodrigc#ifdef HAVE_FS_NFS3
276159451Srodrigc  }
277159451Srodrigc#endif /* HAVE_FS_NFS3 */
278159451Srodrigc
279159451Srodrigc  if (!fp->fh_error) {
280159451Srodrigc    dlog("got filehandle for %s:%s", fp->fh_fs->fs_host, fp->fh_path);
281159451Srodrigc  } else {
282159451Srodrigc    plog(XLOG_USER, "filehandle denied for %s:%s", fp->fh_fs->fs_host, fp->fh_path);
283159451Srodrigc    /*
284159451Srodrigc     * Force the error to be EACCES. It's debatable whether it should be
285159451Srodrigc     * ENOENT instead, but the server really doesn't give us any clues, and
286159451Srodrigc     * EACCES is more in line with the "filehandle denied" message.
287159451Srodrigc     */
288159451Srodrigc    fp->fh_error = EACCES;
289159451Srodrigc  }
290159451Srodrigc
291159451Srodrigc  /*
292159451Srodrigc   * Wakeup anything sleeping on this filehandle
293159451Srodrigc   */
294159451Srodrigc  if (fp->fh_wchan) {
295159451Srodrigc    dlog("Calling wakeup on %#lx", (unsigned long) fp->fh_wchan);
296159451Srodrigc    wakeup(fp->fh_wchan);
297159451Srodrigc  }
298159451Srodrigc}
299159451Srodrigc
300159451Srodrigc
301159451Srodrigcvoid
302159451Srodrigcflush_nfs_fhandle_cache(fserver *fs)
303159451Srodrigc{
304159451Srodrigc  fh_cache *fp;
305159451Srodrigc
306159451Srodrigc  ITER(fp, fh_cache, &fh_head) {
307159451Srodrigc    if (fp->fh_fs == fs || fs == NULL) {
308159451Srodrigc      /*
309159451Srodrigc       * Only invalidate port info for non-WebNFS servers
310159451Srodrigc       */
311159451Srodrigc      if (!(fp->fh_fs->fs_flags & FSF_WEBNFS))
312159451Srodrigc	fp->fh_sin.sin_port = (u_short) 0;
313159451Srodrigc      fp->fh_error = -1;
314159451Srodrigc    }
315159451Srodrigc  }
316159451Srodrigc}
317159451Srodrigc
318159451Srodrigc
319159451Srodrigcstatic void
320159451Srodrigcdiscard_fh(opaque_t arg)
321159451Srodrigc{
322159451Srodrigc  fh_cache *fp = (fh_cache *) arg;
323159451Srodrigc
324159451Srodrigc  rem_que(&fp->fh_q);
325159451Srodrigc  if (fp->fh_fs) {
326159451Srodrigc    dlog("Discarding filehandle for %s:%s", fp->fh_fs->fs_host, fp->fh_path);
327159451Srodrigc    free_srvr(fp->fh_fs);
328159451Srodrigc  }
329159451Srodrigc  if (fp->fh_path)
330159451Srodrigc    XFREE(fp->fh_path);
331159451Srodrigc  XFREE(fp);
332159451Srodrigc}
333159451Srodrigc
334159451Srodrigc
335/*
336 * Determine the file handle for a node
337 */
338static int
339prime_nfs_fhandle_cache(char *path, fserver *fs, am_nfs_handle_t *fhbuf, mntfs *mf)
340{
341  fh_cache *fp, *fp_save = 0;
342  int error;
343  int reuse_id = FALSE;
344
345  dlog("Searching cache for %s:%s", fs->fs_host, path);
346
347  /*
348   * First search the cache
349   */
350  ITER(fp, fh_cache, &fh_head) {
351    if (fs != fp->fh_fs  ||  !STREQ(path, fp->fh_path))
352      continue;			/* skip to next ITER item */
353    /* else we got a match */
354    switch (fp->fh_error) {
355    case 0:
356      plog(XLOG_INFO, "prime_nfs_fhandle_cache: NFS version %d", (int) fp->fh_nfs_version);
357
358      error = fp->fh_error = fp->fh_status;
359
360      if (error == 0) {
361	if (mf->mf_flags & MFF_NFS_SCALEDOWN) {
362	  fp_save = fp;
363	  /* XXX: why reuse the ID? */
364	  reuse_id = TRUE;
365	  break;
366	}
367
368	if (fhbuf) {
369#ifdef HAVE_FS_NFS3
370	  if (fp->fh_nfs_version == NFS_VERSION3) {
371	    memmove((voidp) &(fhbuf->v3), (voidp) &(fp->fh_nfs_handle.v3),
372		    sizeof(fp->fh_nfs_handle.v3));
373	  } else
374#endif /* HAVE_FS_NFS3 */
375	    {
376	      memmove((voidp) &(fhbuf->v2), (voidp) &(fp->fh_nfs_handle.v2),
377		      sizeof(fp->fh_nfs_handle.v2));
378	    }
379	}
380	if (fp->fh_cid)
381	  untimeout(fp->fh_cid);
382	fp->fh_cid = timeout(FH_TTL, discard_fh, (opaque_t) fp);
383      } else if (error == EACCES) {
384	/*
385	 * Now decode the file handle return code.
386	 */
387	plog(XLOG_INFO, "Filehandle denied for \"%s:%s\"",
388	     fs->fs_host, path);
389      } else {
390	errno = error;	/* XXX */
391	plog(XLOG_INFO, "Filehandle error for \"%s:%s\": %m",
392	     fs->fs_host, path);
393      }
394
395      /*
396       * The error was returned from the remote mount daemon.
397       * Policy: this error will be cached for now...
398       */
399      return error;
400
401    case -1:
402      /*
403       * Still thinking about it, but we can re-use.
404       */
405      fp_save = fp;
406      reuse_id = TRUE;
407      break;
408
409    default:
410      /*
411       * Return the error.
412       * Policy: make sure we recompute if required again
413       * in case this was caused by a network failure.
414       * This can thrash mountd's though...  If you find
415       * your mountd going slowly then:
416       * 1.  Add a fork() loop to main.
417       * 2.  Remove the call to innetgr() and don't use
418       *     netgroups, especially if you don't use YP.
419       */
420      error = fp->fh_error;
421      fp->fh_error = -1;
422      return error;
423    }	/* end of switch statement */
424  } /* end of ITER loop */
425
426  /*
427   * Not in cache
428   */
429  if (fp_save) {
430    fp = fp_save;
431    /*
432     * Re-use existing slot
433     */
434    untimeout(fp->fh_cid);
435    free_srvr(fp->fh_fs);
436    XFREE(fp->fh_path);
437  } else {
438    fp = ALLOC(struct fh_cache);
439    memset((voidp) fp, 0, sizeof(struct fh_cache));
440    ins_que(&fp->fh_q, &fh_head);
441  }
442  if (!reuse_id)
443    fp->fh_id = FHID_ALLOC();
444  fp->fh_wchan = get_mntfs_wchan(mf);
445  fp->fh_error = -1;
446  fp->fh_cid = timeout(FH_TTL, discard_fh, (opaque_t) fp);
447
448  /*
449   * If fs->fs_ip is null, remote server is probably down.
450   */
451  if (!fs->fs_ip) {
452    /* Mark the fileserver down and invalid again */
453    fs->fs_flags &= ~FSF_VALID;
454    fs->fs_flags |= FSF_DOWN;
455    error = AM_ERRNO_HOST_DOWN;
456    return error;
457  }
458
459  /*
460   * Either fp has been freshly allocated or the address has changed.
461   * Initialize address and nfs version.  Don't try to re-use the port
462   * information unless using WebNFS where the port is fixed either by
463   * the spec or the "port" mount option.
464   */
465  if (fp->fh_sin.sin_addr.s_addr != fs->fs_ip->sin_addr.s_addr) {
466    fp->fh_sin = *fs->fs_ip;
467    if (!(mf->mf_flags & MFF_WEBNFS))
468	fp->fh_sin.sin_port = 0;
469    fp->fh_nfs_version = fs->fs_version;
470  }
471
472  fp->fh_fs = dup_srvr(fs);
473  fp->fh_path = strdup(path);
474
475  if (mf->mf_flags & MFF_WEBNFS)
476    error = webnfs_lookup(fp, got_nfs_fh_webnfs, get_mntfs_wchan(mf));
477  else
478    error = call_mountd(fp, MOUNTPROC_MNT, got_nfs_fh_mount, get_mntfs_wchan(mf));
479  if (error) {
480    /*
481     * Local error - cache for a short period
482     * just to prevent thrashing.
483     */
484    untimeout(fp->fh_cid);
485    fp->fh_cid = timeout(error < 0 ? 2 * ALLOWED_MOUNT_TIME : FH_TTL_ERROR,
486			 discard_fh, (opaque_t) fp);
487    fp->fh_error = error;
488  } else {
489    error = fp->fh_error;
490  }
491
492  return error;
493}
494
495
496int
497make_nfs_auth(void)
498{
499  AUTH_CREATE_GIDLIST_TYPE group_wheel = 0;
500
501  /* Some NFS mounts (particularly cross-domain) require FQDNs to succeed */
502
503#ifdef HAVE_TRANSPORT_TYPE_TLI
504  if (gopt.flags & CFM_FULLY_QUALIFIED_HOSTS) {
505    plog(XLOG_INFO, "Using NFS auth for FQHN \"%s\"", hostd);
506    nfs_auth = authsys_create(hostd, 0, 0, 1, &group_wheel);
507  } else {
508    nfs_auth = authsys_create_default();
509  }
510#else /* not HAVE_TRANSPORT_TYPE_TLI */
511  if (gopt.flags & CFM_FULLY_QUALIFIED_HOSTS) {
512    plog(XLOG_INFO, "Using NFS auth for FQHN \"%s\"", hostd);
513    nfs_auth = authunix_create(hostd, 0, 0, 1, &group_wheel);
514  } else {
515    nfs_auth = authunix_create_default();
516  }
517#endif /* not HAVE_TRANSPORT_TYPE_TLI */
518
519  if (!nfs_auth)
520    return ENOBUFS;
521
522  return 0;
523}
524
525
526static int
527call_mountd(fh_cache *fp, u_long proc, fwd_fun fun, wchan_t wchan)
528{
529  struct rpc_msg mnt_msg;
530  int len;
531  char iobuf[UDPMSGSIZE];
532  int error;
533  u_long mnt_version;
534
535  if (!nfs_auth) {
536    error = make_nfs_auth();
537    if (error)
538      return error;
539  }
540
541  if (fp->fh_sin.sin_port == 0) {
542    u_short mountd_port;
543    error = get_mountd_port(fp->fh_fs, &mountd_port, wchan);
544    if (error)
545      return error;
546    fp->fh_sin.sin_port = mountd_port;
547  }
548
549  /* find the right version of the mount protocol */
550#ifdef HAVE_FS_NFS3
551  if (fp->fh_nfs_version == NFS_VERSION3)
552    mnt_version = AM_MOUNTVERS3;
553  else
554#endif /* HAVE_FS_NFS3 */
555    mnt_version = MOUNTVERS;
556  plog(XLOG_INFO, "call_mountd: NFS version %d, mount version %d",
557       (int) fp->fh_nfs_version, (int) mnt_version);
558
559  rpc_msg_init(&mnt_msg, MOUNTPROG, mnt_version, MOUNTPROC_NULL);
560  len = make_rpc_packet(iobuf,
561			sizeof(iobuf),
562			proc,
563			&mnt_msg,
564			(voidp) &fp->fh_path,
565			(XDRPROC_T_TYPE) xdr_nfspath,
566			nfs_auth);
567
568  if (len > 0) {
569    error = fwd_packet(MK_RPC_XID(RPC_XID_MOUNTD, fp->fh_id),
570		       iobuf,
571		       len,
572		       &fp->fh_sin,
573		       &fp->fh_sin,
574		       (opaque_t) ((long) fp->fh_id), /* cast to long needed for 64-bit archs */
575		       fun);
576  } else {
577    error = -len;
578  }
579
580  /*
581   * It may be the case that we're sending to the wrong MOUNTD port.  This
582   * occurs if mountd is restarted on the server after the port has been
583   * looked up and stored in the filehandle cache somewhere.  The correct
584   * solution, if we're going to cache port numbers is to catch the ICMP
585   * port unreachable reply from the server and cause the portmap request
586   * to be redone.  The quick solution here is to invalidate the MOUNTD
587   * port.
588   */
589  fp->fh_sin.sin_port = 0;
590
591  return error;
592}
593
594
595static int
596webnfs_lookup(fh_cache *fp, fwd_fun fun, wchan_t wchan)
597{
598  struct rpc_msg wnfs_msg;
599  int len;
600  char iobuf[UDPMSGSIZE];
601  int error;
602  u_long proc;
603  XDRPROC_T_TYPE xdr_fn;
604  voidp argp;
605  nfsdiropargs args;
606#ifdef HAVE_FS_NFS3
607  am_LOOKUP3args args3;
608#endif
609  char *wnfs_path;
610  size_t l;
611
612  if (!nfs_auth) {
613    error = make_nfs_auth();
614    if (error)
615      return error;
616  }
617
618  if (fp->fh_sin.sin_port == 0) {
619    /* FIXME: wrong, don't discard sin_port in the first place for WebNFS. */
620    plog(XLOG_WARNING, "webnfs_lookup: port == 0 for nfs on %s, fixed",
621	 fp->fh_fs->fs_host);
622    fp->fh_sin.sin_port = htons(NFS_PORT);
623  }
624
625  /*
626   * Use native path like the rest of amd (cf. RFC 2054, 6.1).
627   */
628  l = strlen(fp->fh_path) + 2;
629  wnfs_path = (char *) xmalloc(l);
630  wnfs_path[0] = 0x80;
631  xstrlcpy(wnfs_path + 1, fp->fh_path, l - 1);
632
633  /* find the right program and lookup procedure */
634#ifdef HAVE_FS_NFS3
635  if (fp->fh_nfs_version == NFS_VERSION3) {
636    proc = AM_NFSPROC3_LOOKUP;
637    xdr_fn = (XDRPROC_T_TYPE) xdr_am_LOOKUP3args;
638    argp = &args3;
639    /* WebNFS public file handle */
640    args3.what.dir.am_fh3_length = 0;
641    args3.what.name = wnfs_path;
642  } else {
643#endif /* HAVE_FS_NFS3 */
644    proc = NFSPROC_LOOKUP;
645    xdr_fn = (XDRPROC_T_TYPE) xdr_diropargs;
646    argp = &args;
647    /* WebNFS public file handle */
648    memset(&args.da_fhandle, 0, NFS_FHSIZE);
649    args.da_name = wnfs_path;
650#ifdef HAVE_FS_NFS3
651  }
652#endif /* HAVE_FS_NFS3 */
653
654  plog(XLOG_INFO, "webnfs_lookup: NFS version %d", (int) fp->fh_nfs_version);
655
656  rpc_msg_init(&wnfs_msg, NFS_PROGRAM, fp->fh_nfs_version, proc);
657  len = make_rpc_packet(iobuf,
658			sizeof(iobuf),
659			proc,
660			&wnfs_msg,
661			argp,
662			(XDRPROC_T_TYPE) xdr_fn,
663			nfs_auth);
664
665  if (len > 0) {
666    error = fwd_packet(MK_RPC_XID(RPC_XID_WEBNFS, fp->fh_id),
667		       iobuf,
668		       len,
669		       &fp->fh_sin,
670		       &fp->fh_sin,
671		       (opaque_t) ((long) fp->fh_id), /* cast to long needed for 64-bit archs */
672		       fun);
673  } else {
674    error = -len;
675  }
676
677  XFREE(wnfs_path);
678  return error;
679}
680
681
682/*
683 * NFS needs the local filesystem, remote filesystem
684 * remote hostname.
685 * Local filesystem defaults to remote and vice-versa.
686 */
687static char *
688nfs_match(am_opts *fo)
689{
690  char *xmtab;
691  size_t l;
692
693  if (fo->opt_fs && !fo->opt_rfs)
694    fo->opt_rfs = fo->opt_fs;
695  if (!fo->opt_rfs) {
696    plog(XLOG_USER, "nfs: no remote filesystem specified");
697    return NULL;
698  }
699  if (!fo->opt_rhost) {
700    plog(XLOG_USER, "nfs: no remote host specified");
701    return NULL;
702  }
703
704  /*
705   * Determine magic cookie to put in mtab
706   */
707  l = strlen(fo->opt_rhost) + strlen(fo->opt_rfs) + 2;
708  xmtab = (char *) xmalloc(l);
709  xsnprintf(xmtab, l, "%s:%s", fo->opt_rhost, fo->opt_rfs);
710  dlog("NFS: mounting remote server \"%s\", remote fs \"%s\" on \"%s\"",
711       fo->opt_rhost, fo->opt_rfs, fo->opt_fs);
712
713  return xmtab;
714}
715
716
717/*
718 * Initialize am structure for nfs
719 */
720static int
721nfs_init(mntfs *mf)
722{
723  int error;
724  am_nfs_handle_t fhs;
725  char *colon;
726
727  if (mf->mf_private) {
728    if (mf->mf_flags & MFF_NFS_SCALEDOWN) {
729      fserver *fs;
730
731      /* tell remote mountd that we're done with this filehandle */
732      mf->mf_ops->umounted(mf);
733
734      mf->mf_prfree(mf->mf_private);
735      fs = mf->mf_ops->ffserver(mf);
736      free_srvr(mf->mf_server);
737      mf->mf_server = fs;
738    } else
739      return 0;
740  }
741
742  colon = strchr(mf->mf_info, ':');
743  if (colon == 0)
744    return ENOENT;
745
746  error = prime_nfs_fhandle_cache(colon + 1, mf->mf_server, &fhs, mf);
747  if (!error) {
748    mf->mf_private = (opaque_t) ALLOC(am_nfs_handle_t);
749    mf->mf_prfree = (void (*)(opaque_t)) free;
750    memmove(mf->mf_private, (voidp) &fhs, sizeof(fhs));
751  }
752  return error;
753}
754
755
756int
757mount_nfs_fh(am_nfs_handle_t *fhp, char *mntdir, char *fs_name, mntfs *mf)
758{
759  MTYPE_TYPE type;
760  char *colon;
761  char *xopts=NULL, transp_timeo_opts[40], transp_retrans_opts[40];
762  char host[MAXHOSTNAMELEN + MAXPATHLEN + 2];
763  fserver *fs = mf->mf_server;
764  u_long nfs_version = fs->fs_version;
765  char *nfs_proto = fs->fs_proto; /* "tcp" or "udp" */
766  int on_autofs = mf->mf_flags & MFF_ON_AUTOFS;
767  int error;
768  int genflags;
769  int retry;
770  int proto = AMU_TYPE_NONE;
771  mntent_t mnt;
772  nfs_args_t nfs_args;
773
774  /*
775   * Extract HOST name to give to kernel.
776   * Some systems like osf1/aix3/bsd44 variants may need old code
777   * for NFS_ARGS_NEEDS_PATH.
778   */
779  if (!(colon = strchr(fs_name, ':')))
780    return ENOENT;
781#ifdef MOUNT_TABLE_ON_FILE
782  *colon = '\0';
783#endif /* MOUNT_TABLE_ON_FILE */
784  xstrlcpy(host, fs_name, sizeof(host));
785#ifdef MOUNT_TABLE_ON_FILE
786  *colon = ':';
787#endif /* MOUNT_TABLE_ON_FILE */
788#ifdef MAXHOSTNAMELEN
789  /* most kernels have a name length restriction */
790  if (strlen(host) >= MAXHOSTNAMELEN)
791    xstrlcpy(host + MAXHOSTNAMELEN - 3, "..",
792	     sizeof(host) - MAXHOSTNAMELEN + 3);
793#endif /* MAXHOSTNAMELEN */
794
795  /*
796   * Create option=VAL for udp/tcp specific timeouts and retrans values, but
797   * only if these options were specified.
798   */
799
800  transp_timeo_opts[0] = transp_retrans_opts[0] = '\0';	/* initialize */
801  if (STREQ(nfs_proto, "udp"))
802    proto = AMU_TYPE_UDP;
803  else if (STREQ(nfs_proto, "tcp"))
804    proto = AMU_TYPE_TCP;
805  if (proto != AMU_TYPE_NONE) {
806    if (gopt.amfs_auto_timeo[proto] > 0)
807      xsnprintf(transp_timeo_opts, sizeof(transp_timeo_opts), "%s=%d,",
808		MNTTAB_OPT_TIMEO, gopt.amfs_auto_timeo[proto]);
809    if (gopt.amfs_auto_retrans[proto] > 0)
810      xsnprintf(transp_retrans_opts, sizeof(transp_retrans_opts), "%s=%d,",
811		MNTTAB_OPT_RETRANS, gopt.amfs_auto_retrans[proto]);
812  }
813
814  if (mf->mf_remopts && *mf->mf_remopts &&
815      !islocalnet(fs->fs_ip->sin_addr.s_addr)) {
816    plog(XLOG_INFO, "Using remopts=\"%s\"", mf->mf_remopts);
817    /* use transp_opts first, so map-specific opts will override */
818    xopts = str3cat(xopts, transp_timeo_opts, transp_retrans_opts, mf->mf_remopts);
819  } else {
820    /* use transp_opts first, so map-specific opts will override */
821    xopts = str3cat(xopts, transp_timeo_opts, transp_retrans_opts, mf->mf_mopts);
822  }
823
824  memset((voidp) &mnt, 0, sizeof(mnt));
825  mnt.mnt_dir = mntdir;
826  mnt.mnt_fsname = fs_name;
827  mnt.mnt_opts = xopts;
828
829  /*
830   * Set mount types accordingly
831   */
832#ifndef HAVE_FS_NFS3
833  type = MOUNT_TYPE_NFS;
834  mnt.mnt_type = MNTTAB_TYPE_NFS;
835#else /* HAVE_FS_NFS3 */
836  if (nfs_version == NFS_VERSION3) {
837    type = MOUNT_TYPE_NFS3;
838    /*
839     * Systems that include the mount table "vers" option generally do not
840     * set the mnttab entry to "nfs3", but to "nfs" and then they set
841     * "vers=3".  Setting it to "nfs3" works, but it may break some things
842     * like "df -t nfs" and the "quota" program (esp. on Solaris and Irix).
843     * So on those systems, set it to "nfs".
844     * Note: MNTTAB_OPT_VERS is always set for NFS3 (see am_compat.h).
845     */
846# if defined(MNTTAB_OPT_VERS) && defined(MOUNT_TABLE_ON_FILE)
847    mnt.mnt_type = MNTTAB_TYPE_NFS;
848# else /* defined(MNTTAB_OPT_VERS) && defined(MOUNT_TABLE_ON_FILE) */
849    mnt.mnt_type = MNTTAB_TYPE_NFS3;
850# endif /* defined(MNTTAB_OPT_VERS) && defined(MOUNT_TABLE_ON_FILE) */
851  } else {
852    type = MOUNT_TYPE_NFS;
853    mnt.mnt_type = MNTTAB_TYPE_NFS;
854  }
855#endif /* HAVE_FS_NFS3 */
856  plog(XLOG_INFO, "mount_nfs_fh: NFS version %d", (int) nfs_version);
857  plog(XLOG_INFO, "mount_nfs_fh: using NFS transport %s", nfs_proto);
858
859  retry = hasmntval(&mnt, MNTTAB_OPT_RETRY);
860  if (retry <= 0)
861    retry = 1;			/* XXX */
862
863  genflags = compute_mount_flags(&mnt);
864#ifdef HAVE_FS_AUTOFS
865  if (on_autofs)
866    genflags |= autofs_compute_mount_flags(&mnt);
867#endif /* HAVE_FS_AUTOFS */
868
869  /* setup the many fields and flags within nfs_args */
870  compute_nfs_args(&nfs_args,
871		   &mnt,
872		   genflags,
873		   NULL,	/* struct netconfig *nfsncp */
874		   fs->fs_ip,
875		   nfs_version,
876		   nfs_proto,
877		   fhp,
878		   host,
879		   fs_name);
880
881  /* finally call the mounting function */
882  if (amuDebug(D_TRACE)) {
883    print_nfs_args(&nfs_args, nfs_version);
884    plog(XLOG_DEBUG, "Generic mount flags 0x%x used for NFS mount", genflags);
885  }
886  error = mount_fs(&mnt, genflags, (caddr_t) &nfs_args, retry, type,
887		    nfs_version, nfs_proto, mnttab_file_name, on_autofs);
888  XFREE(xopts);
889
890#ifdef HAVE_TRANSPORT_TYPE_TLI
891  free_knetconfig(nfs_args.knconf);
892  if (nfs_args.addr)
893    XFREE(nfs_args.addr);	/* allocated in compute_nfs_args() */
894#endif /* HAVE_TRANSPORT_TYPE_TLI */
895
896  return error;
897}
898
899
900static int
901nfs_mount(am_node *am, mntfs *mf)
902{
903  int error = 0;
904  mntent_t mnt;
905
906  if (!mf->mf_private) {
907    plog(XLOG_ERROR, "Missing filehandle for %s", mf->mf_info);
908    return EINVAL;
909  }
910
911  mnt.mnt_opts = mf->mf_mopts;
912  if (amu_hasmntopt(&mnt, "softlookup") ||
913      (amu_hasmntopt(&mnt, "soft") && !amu_hasmntopt(&mnt, "nosoftlookup")))
914    am->am_flags |= AMF_SOFTLOOKUP;
915
916  error = mount_nfs_fh((am_nfs_handle_t *) mf->mf_private,
917		       mf->mf_mount,
918		       mf->mf_info,
919		       mf);
920
921  if (error) {
922    errno = error;
923    dlog("mount_nfs: %m");
924  }
925
926  return error;
927}
928
929
930static int
931nfs_umount(am_node *am, mntfs *mf)
932{
933  int unmount_flags, new_unmount_flags, error;
934
935  unmount_flags = (mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
936  error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
937
938#if defined(HAVE_UMOUNT2) && (defined(MNT2_GEN_OPT_FORCE) || defined(MNT2_GEN_OPT_DETACH))
939  /*
940   * If the attempt to unmount failed with EBUSY, and this fserver was
941   * marked for forced unmounts, then use forced/lazy unmounts.
942   */
943  if (error == EBUSY &&
944      gopt.flags & CFM_FORCED_UNMOUNTS &&
945      mf->mf_server->fs_flags & FSF_FORCE_UNMOUNT) {
946    plog(XLOG_INFO, "EZK: nfs_umount: trying forced/lazy unmounts");
947    /*
948     * XXX: turning off the FSF_FORCE_UNMOUNT may not be perfectly
949     * incorrect.  Multiple nodes may need to be timed out and restarted for
950     * a single hung fserver.
951     */
952    mf->mf_server->fs_flags &= ~FSF_FORCE_UNMOUNT;
953    new_unmount_flags = unmount_flags | AMU_UMOUNT_FORCE | AMU_UMOUNT_DETACH;
954    error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, new_unmount_flags);
955  }
956#endif /* HAVE_UMOUNT2 && (MNT2_GEN_OPT_FORCE || MNT2_GEN_OPT_DETACH) */
957
958  /*
959   * Here is some code to unmount 'restarted' file systems.
960   * The restarted file systems are marked as 'nfs', not
961   * 'host', so we only have the map information for the
962   * the top-level mount.  The unmount will fail (EBUSY)
963   * if there are anything else from the NFS server mounted
964   * below the mount-point.  This code checks to see if there
965   * is anything mounted with the same prefix as the
966   * file system to be unmounted ("/a/b/c" when unmounting "/a/b").
967   * If there is, and it is a 'restarted' file system, we unmount
968   * it.
969   * Added by Mike Mitchell, mcm@unx.sas.com, 09/08/93
970   */
971  if (error == EBUSY) {
972    mntfs *new_mf;
973    int len = strlen(mf->mf_mount);
974    int didsome = 0;
975
976    ITER(new_mf, mntfs, &mfhead) {
977      if (new_mf->mf_ops != mf->mf_ops ||
978	  new_mf->mf_refc > 1 ||
979	  mf == new_mf ||
980	  ((new_mf->mf_flags & (MFF_MOUNTED | MFF_UNMOUNTING | MFF_RESTART)) == (MFF_MOUNTED | MFF_RESTART)))
981	continue;
982
983      if (NSTREQ(mf->mf_mount, new_mf->mf_mount, len) &&
984	  new_mf->mf_mount[len] == '/') {
985	new_unmount_flags =
986	  (new_mf->mf_flags & MFF_ON_AUTOFS) ? AMU_UMOUNT_AUTOFS : 0;
987	UMOUNT_FS(new_mf->mf_mount, mnttab_file_name, new_unmount_flags);
988	didsome = 1;
989      }
990    }
991    if (didsome)
992      error = UMOUNT_FS(mf->mf_mount, mnttab_file_name, unmount_flags);
993  }
994  if (error)
995    return error;
996
997  return 0;
998}
999
1000
1001static void
1002nfs_umounted(mntfs *mf)
1003{
1004  fserver *fs;
1005  char *colon, *path;
1006
1007  if (mf->mf_error || mf->mf_refc > 1)
1008    return;
1009
1010  /*
1011   * No need to inform mountd when WebNFS is in use.
1012   */
1013  if (mf->mf_flags & MFF_WEBNFS)
1014    return;
1015
1016  /*
1017   * Call the mount daemon on the server to announce that we are not using
1018   * the fs any more.
1019   *
1020   * XXX: This is *wrong*.  The mountd should be called when the fhandle is
1021   * flushed from the cache, and a reference held to the cached entry while
1022   * the fs is mounted...
1023   */
1024  fs = mf->mf_server;
1025  colon = path = strchr(mf->mf_info, ':');
1026  if (fs && colon) {
1027    fh_cache f;
1028
1029    dlog("calling mountd for %s", mf->mf_info);
1030    *path++ = '\0';
1031    f.fh_path = path;
1032    f.fh_sin = *fs->fs_ip;
1033    f.fh_sin.sin_port = (u_short) 0;
1034    f.fh_nfs_version = fs->fs_version;
1035    f.fh_fs = fs;
1036    f.fh_id = 0;
1037    f.fh_error = 0;
1038    prime_nfs_fhandle_cache(colon + 1, mf->mf_server, (am_nfs_handle_t *) 0, mf);
1039    call_mountd(&f, MOUNTPROC_UMNT, (fwd_fun *) 0, (wchan_t) 0);
1040    *colon = ':';
1041  }
1042}
1043