archive_write_disk_set_standard_lookup.c revision 228753
1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27228753Smm__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_disk_set_standard_lookup.c 201083 2009-12-28 02:09:57Z kientzle $");
28228753Smm
29228753Smm#ifdef HAVE_SYS_TYPES_H
30228753Smm#include <sys/types.h>
31228753Smm#endif
32228753Smm#ifdef HAVE_ERRNO_H
33228753Smm#include <errno.h>
34228753Smm#endif
35228753Smm#ifdef HAVE_GRP_H
36228753Smm#include <grp.h>
37228753Smm#endif
38228753Smm#ifdef HAVE_PWD_H
39228753Smm#include <pwd.h>
40228753Smm#endif
41228753Smm#ifdef HAVE_STDLIB_H
42228753Smm#include <stdlib.h>
43228753Smm#endif
44228753Smm#ifdef HAVE_STRING_H
45228753Smm#include <string.h>
46228753Smm#endif
47228753Smm
48228753Smm#include "archive.h"
49228753Smm#include "archive_private.h"
50228753Smm#include "archive_read_private.h"
51228753Smm#include "archive_write_disk_private.h"
52228753Smm
53228753Smmstruct bucket {
54228753Smm	char	*name;
55228753Smm	int	 hash;
56228753Smm	id_t	 id;
57228753Smm};
58228753Smm
59228753Smmstatic const size_t cache_size = 127;
60228753Smmstatic unsigned int	hash(const char *);
61228753Smmstatic gid_t	lookup_gid(void *, const char *uname, gid_t);
62228753Smmstatic uid_t	lookup_uid(void *, const char *uname, uid_t);
63228753Smmstatic void	cleanup(void *);
64228753Smm
65228753Smm/*
66228753Smm * Installs functions that use getpwnam()/getgrnam()---along with
67228753Smm * a simple cache to accelerate such lookups---into the archive_write_disk
68228753Smm * object.  This is in a separate file because getpwnam()/getgrnam()
69228753Smm * can pull in a LOT of library code (including NIS/LDAP functions, which
70228753Smm * pull in DNS resolveers, etc).  This can easily top 500kB, which makes
71228753Smm * it inappropriate for some space-constrained applications.
72228753Smm *
73228753Smm * Applications that are size-sensitive may want to just use the
74228753Smm * real default functions (defined in archive_write_disk.c) that just
75228753Smm * use the uid/gid without the lookup.  Or define your own custom functions
76228753Smm * if you prefer.
77228753Smm *
78228753Smm * TODO: Replace these hash tables with simpler move-to-front LRU
79228753Smm * lists with a bounded size (128 items?).  The hash is a bit faster,
80228753Smm * but has a bad pathology in which it thrashes a single bucket.  Even
81228753Smm * walking a list of 128 items is a lot faster than calling
82228753Smm * getpwnam()!
83228753Smm */
84228753Smmint
85228753Smmarchive_write_disk_set_standard_lookup(struct archive *a)
86228753Smm{
87228753Smm	struct bucket *ucache = malloc(cache_size * sizeof(struct bucket));
88228753Smm	struct bucket *gcache = malloc(cache_size * sizeof(struct bucket));
89228753Smm	memset(ucache, 0, cache_size * sizeof(struct bucket));
90228753Smm	memset(gcache, 0, cache_size * sizeof(struct bucket));
91228753Smm	archive_write_disk_set_group_lookup(a, gcache, lookup_gid, cleanup);
92228753Smm	archive_write_disk_set_user_lookup(a, ucache, lookup_uid, cleanup);
93228753Smm	return (ARCHIVE_OK);
94228753Smm}
95228753Smm
96228753Smmstatic gid_t
97228753Smmlookup_gid(void *private_data, const char *gname, gid_t gid)
98228753Smm{
99228753Smm	int h;
100228753Smm	struct bucket *b;
101228753Smm	struct bucket *gcache = (struct bucket *)private_data;
102228753Smm
103228753Smm	/* If no gname, just use the gid provided. */
104228753Smm	if (gname == NULL || *gname == '\0')
105228753Smm		return (gid);
106228753Smm
107228753Smm	/* Try to find gname in the cache. */
108228753Smm	h = hash(gname);
109228753Smm	b = &gcache[h % cache_size ];
110228753Smm	if (b->name != NULL && b->hash == h && strcmp(gname, b->name) == 0)
111228753Smm		return ((gid_t)b->id);
112228753Smm
113228753Smm	/* Free the cache slot for a new entry. */
114228753Smm	if (b->name != NULL)
115228753Smm		free(b->name);
116228753Smm	b->name = strdup(gname);
117228753Smm	/* Note: If strdup fails, that's okay; we just won't cache. */
118228753Smm	b->hash = h;
119228753Smm#if HAVE_GRP_H
120228753Smm#  if HAVE_GETGRNAM_R
121228753Smm	{
122228753Smm		char _buffer[128];
123228753Smm		size_t bufsize = 128;
124228753Smm		char *buffer = _buffer;
125228753Smm		struct group	grent, *result;
126228753Smm		int r;
127228753Smm
128228753Smm		for (;;) {
129228753Smm			result = &grent; /* Old getgrnam_r ignores last arg. */
130228753Smm			r = getgrnam_r(gname, &grent, buffer, bufsize, &result);
131228753Smm			if (r == 0)
132228753Smm				break;
133228753Smm			if (r != ERANGE)
134228753Smm				break;
135228753Smm			bufsize *= 2;
136228753Smm			if (buffer != _buffer)
137228753Smm				free(buffer);
138228753Smm			buffer = malloc(bufsize);
139228753Smm			if (buffer == NULL)
140228753Smm				break;
141228753Smm		}
142228753Smm		if (result != NULL)
143228753Smm			gid = result->gr_gid;
144228753Smm		if (buffer != _buffer)
145228753Smm			free(buffer);
146228753Smm	}
147228753Smm#  else /* HAVE_GETGRNAM_R */
148228753Smm	{
149228753Smm		struct group *result;
150228753Smm
151228753Smm		result = getgrnam(gname);
152228753Smm		if (result != NULL)
153228753Smm			gid = result->gr_gid;
154228753Smm	}
155228753Smm#  endif /* HAVE_GETGRNAM_R */
156228753Smm#elif defined(_WIN32) && !defined(__CYGWIN__)
157228753Smm	/* TODO: do a gname->gid lookup for Windows. */
158228753Smm#else
159228753Smm	#error No way to perform gid lookups on this platform
160228753Smm#endif
161228753Smm	b->id = gid;
162228753Smm
163228753Smm	return (gid);
164228753Smm}
165228753Smm
166228753Smmstatic uid_t
167228753Smmlookup_uid(void *private_data, const char *uname, uid_t uid)
168228753Smm{
169228753Smm	int h;
170228753Smm	struct bucket *b;
171228753Smm	struct bucket *ucache = (struct bucket *)private_data;
172228753Smm
173228753Smm	/* If no uname, just use the uid provided. */
174228753Smm	if (uname == NULL || *uname == '\0')
175228753Smm		return (uid);
176228753Smm
177228753Smm	/* Try to find uname in the cache. */
178228753Smm	h = hash(uname);
179228753Smm	b = &ucache[h % cache_size ];
180228753Smm	if (b->name != NULL && b->hash == h && strcmp(uname, b->name) == 0)
181228753Smm		return ((uid_t)b->id);
182228753Smm
183228753Smm	/* Free the cache slot for a new entry. */
184228753Smm	if (b->name != NULL)
185228753Smm		free(b->name);
186228753Smm	b->name = strdup(uname);
187228753Smm	/* Note: If strdup fails, that's okay; we just won't cache. */
188228753Smm	b->hash = h;
189228753Smm#if HAVE_PWD_H
190228753Smm#  if HAVE_GETPWNAM_R
191228753Smm	{
192228753Smm		char _buffer[128];
193228753Smm		size_t bufsize = 128;
194228753Smm		char *buffer = _buffer;
195228753Smm		struct passwd	pwent, *result;
196228753Smm		int r;
197228753Smm
198228753Smm		for (;;) {
199228753Smm			result = &pwent; /* Old getpwnam_r ignores last arg. */
200228753Smm			r = getpwnam_r(uname, &pwent, buffer, bufsize, &result);
201228753Smm			if (r == 0)
202228753Smm				break;
203228753Smm			if (r != ERANGE)
204228753Smm				break;
205228753Smm			bufsize *= 2;
206228753Smm			if (buffer != _buffer)
207228753Smm				free(buffer);
208228753Smm			buffer = malloc(bufsize);
209228753Smm			if (buffer == NULL)
210228753Smm				break;
211228753Smm		}
212228753Smm		if (result != NULL)
213228753Smm			uid = result->pw_uid;
214228753Smm		if (buffer != _buffer)
215228753Smm			free(buffer);
216228753Smm	}
217228753Smm#  else /* HAVE_GETPWNAM_R */
218228753Smm	{
219228753Smm		struct passwd *result;
220228753Smm
221228753Smm		result = getpwnam(uname);
222228753Smm		if (result != NULL)
223228753Smm			uid = result->pw_uid;
224228753Smm	}
225228753Smm#endif	/* HAVE_GETPWNAM_R */
226228753Smm#elif defined(_WIN32) && !defined(__CYGWIN__)
227228753Smm	/* TODO: do a uname->uid lookup for Windows. */
228228753Smm#else
229228753Smm	#error No way to look up uids on this platform
230228753Smm#endif
231228753Smm	b->id = uid;
232228753Smm
233228753Smm	return (uid);
234228753Smm}
235228753Smm
236228753Smmstatic void
237228753Smmcleanup(void *private)
238228753Smm{
239228753Smm	size_t i;
240228753Smm	struct bucket *cache = (struct bucket *)private;
241228753Smm
242228753Smm	for (i = 0; i < cache_size; i++)
243228753Smm		free(cache[i].name);
244228753Smm	free(cache);
245228753Smm}
246228753Smm
247228753Smm
248228753Smmstatic unsigned int
249228753Smmhash(const char *p)
250228753Smm{
251228753Smm	/* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
252228753Smm	   as used by ELF for hashing function names. */
253228753Smm	unsigned g, h = 0;
254228753Smm	while (*p != '\0') {
255228753Smm		h = (h << 4) + *p++;
256228753Smm		if ((g = h & 0xF0000000) != 0) {
257228753Smm			h ^= g >> 24;
258228753Smm			h &= 0x0FFFFFFF;
259228753Smm		}
260228753Smm	}
261228753Smm	return h;
262228753Smm}
263