1281681Srpaulo/*-
2281681Srpaulo * Copyright (c) 2003-2007 Tim Kientzle
3281681Srpaulo * All rights reserved.
4281681Srpaulo *
5281681Srpaulo * Redistribution and use in source and binary forms, with or without
6281681Srpaulo * modification, are permitted provided that the following conditions
7281681Srpaulo * are met:
8281681Srpaulo * 1. Redistributions of source code must retain the above copyright
9281681Srpaulo *    notice, this list of conditions and the following disclaimer.
10281681Srpaulo * 2. Redistributions in binary form must reproduce the above copyright
11281681Srpaulo *    notice, this list of conditions and the following disclaimer in the
12281681Srpaulo *    documentation and/or other materials provided with the distribution.
13281681Srpaulo *
14281681Srpaulo * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15281681Srpaulo * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16281681Srpaulo * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17281681Srpaulo * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18281681Srpaulo * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19281681Srpaulo * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20337817Scy * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21281681Srpaulo * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22281681Srpaulo * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23281681Srpaulo * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24281681Srpaulo */
25281681Srpaulo
26281681Srpaulo#include "archive_platform.h"
27281681Srpaulo__FBSDID("$FreeBSD$");
28281681Srpaulo
29281681Srpaulo#ifdef HAVE_SYS_TYPES_H
30281681Srpaulo#include <sys/types.h>
31281681Srpaulo#endif
32281681Srpaulo#ifdef HAVE_ERRNO_H
33281681Srpaulo#include <errno.h>
34281681Srpaulo#endif
35281681Srpaulo#ifdef HAVE_GRP_H
36281681Srpaulo#include <grp.h>
37281681Srpaulo#endif
38281681Srpaulo#ifdef HAVE_PWD_H
39281681Srpaulo#include <pwd.h>
40281681Srpaulo#endif
41281681Srpaulo#ifdef HAVE_STDLIB_H
42281681Srpaulo#include <stdlib.h>
43281681Srpaulo#endif
44281681Srpaulo#ifdef HAVE_STRING_H
45281681Srpaulo#include <string.h>
46281681Srpaulo#endif
47281681Srpaulo
48281681Srpaulo#include "archive.h"
49281681Srpaulo#include "archive_private.h"
50281681Srpaulo#include "archive_read_private.h"
51281681Srpaulo#include "archive_write_disk_private.h"
52281681Srpaulo
53281681Srpaulostruct bucket {
54281681Srpaulo	char	*name;
55281681Srpaulo	int	 hash;
56281681Srpaulo	id_t	 id;
57281681Srpaulo};
58281681Srpaulo
59281681Srpaulostatic const size_t cache_size = 127;
60281681Srpaulostatic unsigned int	hash(const char *);
61281681Srpaulostatic gid_t	lookup_gid(void *, const char *uname, gid_t);
62281681Srpaulostatic uid_t	lookup_uid(void *, const char *uname, uid_t);
63281681Srpaulostatic void	cleanup(void *);
64281681Srpaulo
65281681Srpaulo/*
66281681Srpaulo * Installs functions that use getpwnam()/getgrnam()---along with
67281681Srpaulo * a simple cache to accelerate such lookups---into the archive_write_disk
68281681Srpaulo * object.  This is in a separate file because getpwnam()/getgrnam()
69281681Srpaulo * can pull in a LOT of library code (including NIS/LDAP functions, which
70281681Srpaulo * pull in DNS resolveers, etc).  This can easily top 500kB, which makes
71281681Srpaulo * it inappropriate for some space-constrained applications.
72281681Srpaulo *
73281681Srpaulo * Applications that are size-sensitive may want to just use the
74281681Srpaulo * real default functions (defined in archive_write_disk.c) that just
75281681Srpaulo * use the uid/gid without the lookup.  Or define your own custom functions
76281681Srpaulo * if you prefer.
77281681Srpaulo *
78281681Srpaulo * TODO: Replace these hash tables with simpler move-to-front LRU
79281681Srpaulo * lists with a bounded size (128 items?).  The hash is a bit faster,
80281681Srpaulo * but has a bad pathology in which it thrashes a single bucket.  Even
81281681Srpaulo * walking a list of 128 items is a lot faster than calling
82281681Srpaulo * getpwnam()!
83281681Srpaulo */
84281681Srpauloint
85289549Srpauloarchive_write_disk_set_standard_lookup(struct archive *a)
86289549Srpaulo{
87289549Srpaulo	struct bucket *ucache = malloc(cache_size * sizeof(struct bucket));
88289549Srpaulo	struct bucket *gcache = malloc(cache_size * sizeof(struct bucket));
89289549Srpaulo	memset(ucache, 0, cache_size * sizeof(struct bucket));
90289549Srpaulo	memset(gcache, 0, cache_size * sizeof(struct bucket));
91289549Srpaulo	archive_write_disk_set_group_lookup(a, gcache, lookup_gid, cleanup);
92289549Srpaulo	archive_write_disk_set_user_lookup(a, ucache, lookup_uid, cleanup);
93289549Srpaulo	return (ARCHIVE_OK);
94289549Srpaulo}
95289549Srpaulo
96289549Srpaulostatic gid_t
97281681Srpaulolookup_gid(void *private_data, const char *gname, gid_t gid)
98281681Srpaulo{
99281681Srpaulo	int h;
100281681Srpaulo	struct bucket *b;
101281681Srpaulo	struct bucket *gcache = (struct bucket *)private_data;
102289549Srpaulo
103281681Srpaulo	/* If no gname, just use the gid provided. */
104281681Srpaulo	if (gname == NULL || *gname == '\0')
105281681Srpaulo		return (gid);
106281681Srpaulo
107281681Srpaulo	/* Try to find gname in the cache. */
108281681Srpaulo	h = hash(gname);
109281681Srpaulo	b = &gcache[h % cache_size ];
110281681Srpaulo	if (b->name != NULL && b->hash == h && strcmp(gname, b->name) == 0)
111281681Srpaulo		return ((gid_t)b->id);
112281681Srpaulo
113281681Srpaulo	/* Free the cache slot for a new entry. */
114281681Srpaulo	if (b->name != NULL)
115281681Srpaulo		free(b->name);
116281681Srpaulo	b->name = strdup(gname);
117289549Srpaulo	/* Note: If strdup fails, that's okay; we just won't cache. */
118289549Srpaulo	b->hash = h;
119289549Srpaulo#if HAVE_GRP_H
120289549Srpaulo#  if HAVE_GETGRNAM_R
121289549Srpaulo	{
122289549Srpaulo		char _buffer[128];
123289549Srpaulo		size_t bufsize = 128;
124281681Srpaulo		char *buffer = _buffer;
125281681Srpaulo		struct group	grent, *result;
126281681Srpaulo		int r;
127281681Srpaulo
128281681Srpaulo		for (;;) {
129281681Srpaulo			result = &grent; /* Old getgrnam_r ignores last arg. */
130289549Srpaulo			r = getgrnam_r(gname, &grent, buffer, bufsize, &result);
131281681Srpaulo			if (r == 0)
132281681Srpaulo				break;
133289549Srpaulo			if (r != ERANGE)
134289549Srpaulo				break;
135281681Srpaulo			bufsize *= 2;
136281681Srpaulo			if (buffer != _buffer)
137281681Srpaulo				free(buffer);
138289549Srpaulo			buffer = malloc(bufsize);
139289549Srpaulo			if (buffer == NULL)
140289549Srpaulo				break;
141281681Srpaulo		}
142281681Srpaulo		if (result != NULL)
143281681Srpaulo			gid = result->gr_gid;
144281681Srpaulo		if (buffer != _buffer)
145281681Srpaulo			free(buffer);
146281681Srpaulo	}
147281681Srpaulo#  else /* HAVE_GETGRNAM_R */
148281681Srpaulo	{
149289549Srpaulo		struct group *result;
150281681Srpaulo
151281681Srpaulo		result = getgrnam(gname);
152281681Srpaulo		if (result != NULL)
153281681Srpaulo			gid = result->gr_gid;
154289549Srpaulo	}
155289549Srpaulo#  endif /* HAVE_GETGRNAM_R */
156289549Srpaulo#elif defined(_WIN32) && !defined(__CYGWIN__)
157281681Srpaulo	/* TODO: do a gname->gid lookup for Windows. */
158289549Srpaulo#else
159289549Srpaulo	#error No way to perform gid lookups on this platform
160281681Srpaulo#endif
161281681Srpaulo	b->id = gid;
162281681Srpaulo
163281681Srpaulo	return (gid);
164281681Srpaulo}
165281681Srpaulo
166281681Srpaulostatic uid_t
167281681Srpaulolookup_uid(void *private_data, const char *uname, uid_t uid)
168281681Srpaulo{
169281681Srpaulo	int h;
170281681Srpaulo	struct bucket *b;
171281681Srpaulo	struct bucket *ucache = (struct bucket *)private_data;
172281681Srpaulo
173281681Srpaulo	/* If no uname, just use the uid provided. */
174281681Srpaulo	if (uname == NULL || *uname == '\0')
175281681Srpaulo		return (uid);
176281681Srpaulo
177281681Srpaulo	/* Try to find uname in the cache. */
178281681Srpaulo	h = hash(uname);
179281681Srpaulo	b = &ucache[h % cache_size ];
180281681Srpaulo	if (b->name != NULL && b->hash == h && strcmp(uname, b->name) == 0)
181281681Srpaulo		return ((uid_t)b->id);
182281681Srpaulo
183281681Srpaulo	/* Free the cache slot for a new entry. */
184281681Srpaulo	if (b->name != NULL)
185346981Scy		free(b->name);
186281681Srpaulo	b->name = strdup(uname);
187	/* Note: If strdup fails, that's okay; we just won't cache. */
188	b->hash = h;
189#if HAVE_PWD_H
190#  if HAVE_GETPWNAM_R
191	{
192		char _buffer[128];
193		size_t bufsize = 128;
194		char *buffer = _buffer;
195		struct passwd	pwent, *result;
196		int r;
197
198		for (;;) {
199			result = &pwent; /* Old getpwnam_r ignores last arg. */
200			r = getpwnam_r(uname, &pwent, buffer, bufsize, &result);
201			if (r == 0)
202				break;
203			if (r != ERANGE)
204				break;
205			bufsize *= 2;
206			if (buffer != _buffer)
207				free(buffer);
208			buffer = malloc(bufsize);
209			if (buffer == NULL)
210				break;
211		}
212		if (result != NULL)
213			uid = result->pw_uid;
214		if (buffer != _buffer)
215			free(buffer);
216	}
217#  else /* HAVE_GETPWNAM_R */
218	{
219		struct passwd *result;
220
221		result = getpwnam(uname);
222		if (result != NULL)
223			uid = result->pw_uid;
224	}
225#endif	/* HAVE_GETPWNAM_R */
226#elif defined(_WIN32) && !defined(__CYGWIN__)
227	/* TODO: do a uname->uid lookup for Windows. */
228#else
229	#error No way to look up uids on this platform
230#endif
231	b->id = uid;
232
233	return (uid);
234}
235
236static void
237cleanup(void *private)
238{
239	size_t i;
240	struct bucket *cache = (struct bucket *)private;
241
242	for (i = 0; i < cache_size; i++)
243		free(cache[i].name);
244	free(cache);
245}
246
247
248static unsigned int
249hash(const char *p)
250{
251	/* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
252	   as used by ELF for hashing function names. */
253	unsigned g, h = 0;
254	while (*p != '\0') {
255		h = (h << 4) + *p++;
256		if ((g = h & 0xF0000000) != 0) {
257			h ^= g >> 24;
258			h &= 0x0FFFFFFF;
259		}
260	}
261	return h;
262}
263