vfs_cache.c (112342) | vfs_cache.c (112430) |
---|---|
1/* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without --- 20 unchanged lines hidden (view full) --- 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 | 1/* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Poul-Henning Kamp of the FreeBSD Project. 7 * 8 * Redistribution and use in source and binary forms, with or without --- 20 unchanged lines hidden (view full) --- 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 |
37 * $FreeBSD: head/sys/kern/vfs_cache.c 112342 2003-03-17 12:21:08Z phk $ | 37 * $FreeBSD: head/sys/kern/vfs_cache.c 112430 2003-03-20 10:40:45Z phk $ |
38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/sysctl.h> 46#include <sys/mount.h> 47#include <sys/vnode.h> 48#include <sys/namei.h> 49#include <sys/malloc.h> 50#include <sys/syscallsubr.h> 51#include <sys/sysproto.h> 52#include <sys/proc.h> 53#include <sys/filedesc.h> 54#include <sys/fnv_hash.h> | 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/sysctl.h> 46#include <sys/mount.h> 47#include <sys/vnode.h> 48#include <sys/namei.h> 49#include <sys/malloc.h> 50#include <sys/syscallsubr.h> 51#include <sys/sysproto.h> 52#include <sys/proc.h> 53#include <sys/filedesc.h> 54#include <sys/fnv_hash.h> |
55#include <ufs/ufs/dir.h> /* XXX only for DIRBLKSIZ */ 56#include <sys/dirent.h> | |
57 58/* 59 * This structure describes the elements in the cache of recent 60 * names looked up by namei. 61 */ 62 63struct namecache { 64 LIST_ENTRY(namecache) nc_hash; /* hash chain */ --- 633 unchanged lines hidden (view full) --- 698#endif 699 700/* 701 * XXX All of these sysctls would probably be more productive dead. 702 */ 703static int disablecwd; 704SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 705 "Disable the getcwd syscall"); | 55 56/* 57 * This structure describes the elements in the cache of recent 58 * names looked up by namei. 59 */ 60 61struct namecache { 62 LIST_ENTRY(namecache) nc_hash; /* hash chain */ --- 633 unchanged lines hidden (view full) --- 696#endif 697 698/* 699 * XXX All of these sysctls would probably be more productive dead. 700 */ 701static int disablecwd; 702SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 703 "Disable the getcwd syscall"); |
706static int disable_cached_getcwd; 707SYSCTL_INT (_debug, OID_AUTO, disable_cached_getcwd, CTLFLAG_RW, 708 &disable_cached_getcwd, 0, "Disable getcwd using vfs vnode cache"); | |
709 710/* Various statistics for the getcwd syscall */ 711static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 712static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 713static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 714static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 715static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); | 704 705/* Various statistics for the getcwd syscall */ 706static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 707static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 708static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 709static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 710static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); |
716static u_long numcwdtraverse; STATNODE(CTLFLAG_RD, numcwdtraverse, &numcwdtraverse); | |
717static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 718 | 711static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 712 |
719#define GETCWD_CHECK_ACCESS 0x0001 720 721#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4) 722 723static int 724kern___getcwd_cached(struct thread *td, u_char *buf, enum uio_seg bufseg, 725 u_int buflen); 726static int 727kern___getcwd_traverse(struct vnode *lvp, struct vnode *rvp, char **bpp, 728 char *bufp, int limit, int flags, struct thread *td); 729static int 730kern___getcwd_scandir(struct vnode **lvpp, struct vnode **uvpp, 731 char **bpp, char *bufp, struct thread *td); 732 | |
733/* Implementation of the getcwd syscall */ 734int 735__getcwd(td, uap) 736 struct thread *td; 737 struct __getcwd_args *uap; 738{ | 713/* Implementation of the getcwd syscall */ 714int 715__getcwd(td, uap) 716 struct thread *td; 717 struct __getcwd_args *uap; 718{ |
719 |
|
739 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 740} 741 | 720 return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); 721} 722 |
742/* 743 * this part mostly from linux_getcwd. use the original kern___getcwd() 744 * routine first, which uses the vfs vnode-to-name reverse cache. If 745 * that fails, use the routines originally from linux_getcwd.c to 746 * traverse the directory contents (much slower!) 747 */ | |
748int | 723int |
749kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, 750 u_int buflen) | 724kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) |
751{ | 725{ |
752 int error; 753 char *bp, *bend; 754 755 if (disablecwd) 756 return (ENODEV); 757 758 if (kern___getcwd_cached (td, buf, bufseg, buflen) == 0) 759 return 0; 760 761 bp = &buf[buflen]; 762 bend = bp; 763 *(--bp) = '\0'; 764 error = kern___getcwd_traverse (td->td_proc->p_fd->fd_cdir, NULL, &bp, 765 buf, buflen / 2, GETCWD_CHECK_ACCESS, td); 766 if (!error) { 767 if (bufseg == UIO_USERSPACE) 768 error = copyout (bp, buf, bend - bp); 769 /* linux_getcwd has this -- needed? */ 770 td->td_retval[0] = bend - bp; 771 } 772 return error; 773} 774 775int 776kern___getcwd_cached (struct thread *td, u_char *buf, enum uio_seg bufseg, 777 u_int buflen) 778{ | |
779 char *bp, *tmpbuf; 780 int error, i, slash_prefixed; 781 struct filedesc *fdp; 782 struct namecache *ncp; 783 struct vnode *vp; 784 785 numcwdcalls++; | 726 char *bp, *tmpbuf; 727 int error, i, slash_prefixed; 728 struct filedesc *fdp; 729 struct namecache *ncp; 730 struct vnode *vp; 731 732 numcwdcalls++; |
786 if (disable_cached_getcwd) | 733 if (disablecwd) |
787 return (ENODEV); 788 if (buflen < 2) 789 return (EINVAL); 790 if (buflen > MAXPATHLEN) 791 buflen = MAXPATHLEN; 792 error = 0; 793 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 794 bp += buflen - 1; --- 64 unchanged lines hidden (view full) --- 859 bcopy(bp, buf, strlen(bp) + 1); 860 else 861 error = copyout(bp, buf, strlen(bp) + 1); 862 free(tmpbuf, M_TEMP); 863 return (error); 864} 865 866/* | 734 return (ENODEV); 735 if (buflen < 2) 736 return (EINVAL); 737 if (buflen > MAXPATHLEN) 738 buflen = MAXPATHLEN; 739 error = 0; 740 tmpbuf = bp = malloc(buflen, M_TEMP, M_WAITOK); 741 bp += buflen - 1; --- 64 unchanged lines hidden (view full) --- 806 bcopy(bp, buf, strlen(bp) + 1); 807 else 808 error = copyout(bp, buf, strlen(bp) + 1); 809 free(tmpbuf, M_TEMP); 810 return (error); 811} 812 813/* |
867 * Vnode variable naming conventions in this file: 868 * 869 * rvp: the current root we're aiming towards. 870 * lvp, *lvpp: the "lower" vnode 871 * uvp, *uvpp: the "upper" vnode. 872 * 873 * Since all the vnodes we're dealing with are directories, and the 874 * lookups are going *up* in the filesystem rather than *down*, the 875 * usual "pvp" (parent) or "dvp" (directory) naming conventions are 876 * too confusing. 877 */ 878 879/* 880 * XXX Will infinite loop in certain cases if a directory read reliably 881 * returns EINVAL on last block. 882 * XXX is EINVAL the right thing to return if a directory is malformed? 883 */ 884 885/* 886 * XXX Untested vs. mount -o union; probably does the wrong thing. 887 */ 888 889int 890kern___getcwd_traverse (struct vnode *lvp, struct vnode *rvp, char **bpp, 891 char *bufp, int limit, int flags, struct thread *td) 892{ 893 struct filedesc *fdp = td->td_proc->p_fd; 894 struct vnode *uvp = NULL; 895 char *bp = NULL; 896 int error; 897 int perms = VEXEC; 898 899 numcwdtraverse++; 900 901 if (rvp == NULL) { 902 rvp = fdp->fd_rdir; 903 if (rvp == NULL) 904 rvp = rootvnode; 905 } 906 907 VREF(rvp); 908 VREF(lvp); 909 910 /* 911 * Error handling invariant: 912 * Before a `goto out': 913 * lvp is either NULL, or locked and held. 914 * uvp is either NULL, or locked and held. 915 */ 916 917 error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); 918 if (error) { 919 vrele(lvp); 920 lvp = NULL; 921 goto out; 922 } 923 if (bufp) 924 bp = *bpp; 925 /* 926 * this loop will terminate when one of the following happens: 927 * - we hit the root 928 * - getdirentries or lookup fails 929 * - we run out of space in the buffer. 930 */ 931 if (lvp == rvp) { 932 if (bp) 933 *(--bp) = '/'; 934 goto out; 935 } 936 do { 937 if (lvp->v_type != VDIR) { 938 error = ENOTDIR; 939 goto out; 940 } 941 942 /* 943 * access check here is optional, depending on 944 * whether or not caller cares. 945 */ 946 if (flags & GETCWD_CHECK_ACCESS) { 947 error = VOP_ACCESS(lvp, perms, td->td_ucred, td); 948 if (error) 949 goto out; 950 perms = VEXEC|VREAD; 951 } 952 953 /* 954 * step up if we're a covered vnode.. 955 */ 956 while (lvp->v_vflag & VV_ROOT) { 957 struct vnode *tvp; 958 959 if (lvp == rvp) 960 goto out; 961 962 tvp = lvp; 963 lvp = lvp->v_mount->mnt_vnodecovered; 964 vput(tvp); 965 /* 966 * hodie natus est radici frater 967 */ 968 if (lvp == NULL) { 969 error = ENOENT; 970 goto out; 971 } 972 VREF(lvp); 973 error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); 974 if (error != 0) { 975 vrele(lvp); 976 lvp = NULL; 977 goto out; 978 } 979 } 980 error = kern___getcwd_scandir(&lvp, &uvp, &bp, bufp, td); 981 if (error) 982 goto out; 983#if DIAGNOSTIC 984 if (lvp != NULL) 985 panic("getcwd: oops, forgot to null lvp"); 986 if (bufp && (bp <= bufp)) { 987 panic("getcwd: oops, went back too far"); 988 } 989#endif 990 if (bp) 991 *(--bp) = '/'; 992 lvp = uvp; 993 uvp = NULL; 994 limit--; 995 } while ((lvp != rvp) && (limit > 0)); 996 997out: 998 if (bpp) 999 *bpp = bp; 1000 if (uvp) 1001 vput(uvp); 1002 if (lvp) 1003 vput(lvp); 1004 vrele(rvp); 1005 return error; 1006} 1007 1008 1009/* 1010 * Find parent vnode of *lvpp, return in *uvpp 1011 * 1012 * If we care about the name, scan it looking for name of directory 1013 * entry pointing at lvp. 1014 * 1015 * Place the name in the buffer which starts at bufp, immediately 1016 * before *bpp, and move bpp backwards to point at the start of it. 1017 * 1018 * On entry, *lvpp is a locked vnode reference; on exit, it is vput and NULL'ed 1019 * On exit, *uvpp is either NULL or is a locked vnode reference. 1020 */ 1021static int 1022kern___getcwd_scandir (struct vnode **lvpp, struct vnode **uvpp, 1023 char **bpp, char *bufp, struct thread *td) 1024{ 1025 int error = 0; 1026 int eofflag; 1027 off_t off; 1028 int tries; 1029 struct uio uio; 1030 struct iovec iov; 1031 char *dirbuf = NULL; 1032 int dirbuflen; 1033 ino_t fileno; 1034 struct vattr va; 1035 struct vnode *uvp = NULL; 1036 struct vnode *lvp = *lvpp; 1037 struct componentname cn; 1038 int len, reclen; 1039 tries = 0; 1040 1041 /* 1042 * If we want the filename, get some info we need while the 1043 * current directory is still locked. 1044 */ 1045 if (bufp != NULL) { 1046 error = VOP_GETATTR(lvp, &va, td->td_ucred, td); 1047 if (error) { 1048 vput(lvp); 1049 *lvpp = NULL; 1050 *uvpp = NULL; 1051 return error; 1052 } 1053 } 1054 1055 /* 1056 * Ok, we have to do it the hard way.. 1057 * Next, get parent vnode using lookup of .. 1058 */ 1059 cn.cn_nameiop = LOOKUP; 1060 cn.cn_flags = ISLASTCN | ISDOTDOT | RDONLY; 1061 cn.cn_thread = td; 1062 cn.cn_cred = td->td_ucred; 1063 cn.cn_pnbuf = NULL; 1064 cn.cn_nameptr = ".."; 1065 cn.cn_namelen = 2; 1066 cn.cn_consume = 0; 1067 1068 /* 1069 * At this point, lvp is locked and will be unlocked by the lookup. 1070 * On successful return, *uvpp will be locked 1071 */ 1072 error = VOP_LOOKUP(lvp, uvpp, &cn); 1073 if (error) { 1074 vput(lvp); 1075 *lvpp = NULL; 1076 *uvpp = NULL; 1077 return error; 1078 } 1079 uvp = *uvpp; 1080 1081 /* If we don't care about the pathname, we're done */ 1082 if (bufp == NULL) { 1083 vrele(lvp); 1084 *lvpp = NULL; 1085 return 0; 1086 } 1087 1088 fileno = va.va_fileid; 1089 1090 dirbuflen = DIRBLKSIZ; 1091 if (dirbuflen < va.va_blocksize) 1092 dirbuflen = va.va_blocksize; 1093 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK); 1094 1095#if 0 1096unionread: 1097#endif 1098 off = 0; 1099 do { 1100 /* call VOP_READDIR of parent */ 1101 iov.iov_base = dirbuf; 1102 iov.iov_len = dirbuflen; 1103 1104 uio.uio_iov = &iov; 1105 uio.uio_iovcnt = 1; 1106 uio.uio_offset = off; 1107 uio.uio_resid = dirbuflen; 1108 uio.uio_segflg = UIO_SYSSPACE; 1109 uio.uio_rw = UIO_READ; 1110 uio.uio_td = td; 1111 1112 eofflag = 0; 1113 1114#ifdef MAC 1115 error = mac_check_vnode_readdir(td->td_ucred, uvp); 1116 if (error == 0) 1117#endif /* MAC */ 1118 error = VOP_READDIR(uvp, &uio, td->td_ucred, &eofflag, 1119 0, 0); 1120 1121 off = uio.uio_offset; 1122 1123 /* 1124 * Try again if NFS tosses its cookies. 1125 * XXX this can still loop forever if the directory is busted 1126 * such that the second or subsequent page of it always 1127 * returns EINVAL 1128 */ 1129 if ((error == EINVAL) && (tries < 3)) { 1130 off = 0; 1131 tries++; 1132 continue; /* once more, with feeling */ 1133 } 1134 1135 if (!error) { 1136 char *cpos; 1137 struct dirent *dp; 1138 1139 cpos = dirbuf; 1140 tries = 0; 1141 1142 /* scan directory page looking for matching vnode */ 1143 for (len = (dirbuflen - uio.uio_resid); len > 0; len -= reclen) { 1144 dp = (struct dirent *) cpos; 1145 reclen = dp->d_reclen; 1146 1147 /* check for malformed directory.. */ 1148 if (reclen < DIRENT_MINSIZE) { 1149 error = EINVAL; 1150 goto out; 1151 } 1152 /* 1153 * XXX should perhaps do VOP_LOOKUP to 1154 * check that we got back to the right place, 1155 * but getting the locking games for that 1156 * right would be heinous. 1157 */ 1158 if ((dp->d_type != DT_WHT) && 1159 (dp->d_fileno == fileno)) { 1160 char *bp = *bpp; 1161 bp -= dp->d_namlen; 1162 1163 if (bp <= bufp) { 1164 error = ERANGE; 1165 goto out; 1166 } 1167 bcopy(dp->d_name, bp, dp->d_namlen); 1168 error = 0; 1169 *bpp = bp; 1170 goto out; 1171 } 1172 cpos += reclen; 1173 } 1174 } 1175 } while (!eofflag); 1176 error = ENOENT; 1177 1178out: 1179 vrele(lvp); 1180 *lvpp = NULL; 1181 free(dirbuf, M_TEMP); 1182 return error; 1183} 1184 1185/* | |
1186 * Thus begins the fullpath magic. 1187 */ 1188 1189#undef STATNODE 1190#define STATNODE(name) \ 1191 static u_int name; \ 1192 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 1193 --- 99 unchanged lines hidden --- | 814 * Thus begins the fullpath magic. 815 */ 816 817#undef STATNODE 818#define STATNODE(name) \ 819 static u_int name; \ 820 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 821 --- 99 unchanged lines hidden --- |