1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27228763Smm__FBSDID("$FreeBSD: stable/10/contrib/libarchive/libarchive/archive_write_disk_set_standard_lookup.c 344674 2019-02-28 22:57:09Z mm $");
28228753Smm
29228753Smm#ifdef HAVE_SYS_TYPES_H
30228753Smm#include <sys/types.h>
31228753Smm#endif
32228753Smm#ifdef HAVE_ERRNO_H
33228753Smm#include <errno.h>
34228753Smm#endif
35228753Smm#ifdef HAVE_GRP_H
36228753Smm#include <grp.h>
37228753Smm#endif
38228753Smm#ifdef HAVE_PWD_H
39228753Smm#include <pwd.h>
40228753Smm#endif
41228753Smm#ifdef HAVE_STDLIB_H
42228753Smm#include <stdlib.h>
43228753Smm#endif
44228753Smm#ifdef HAVE_STRING_H
45228753Smm#include <string.h>
46228753Smm#endif
47228753Smm
48228753Smm#include "archive.h"
49228753Smm#include "archive_private.h"
50228753Smm#include "archive_read_private.h"
51228753Smm#include "archive_write_disk_private.h"
52228753Smm
53228753Smmstruct bucket {
54228753Smm	char	*name;
55228753Smm	int	 hash;
56228753Smm	id_t	 id;
57228753Smm};
58228753Smm
59228753Smmstatic const size_t cache_size = 127;
60228753Smmstatic unsigned int	hash(const char *);
61232153Smmstatic int64_t	lookup_gid(void *, const char *uname, int64_t);
62232153Smmstatic int64_t	lookup_uid(void *, const char *uname, int64_t);
63228753Smmstatic void	cleanup(void *);
64228753Smm
65228753Smm/*
66228753Smm * Installs functions that use getpwnam()/getgrnam()---along with
67228753Smm * a simple cache to accelerate such lookups---into the archive_write_disk
68228753Smm * object.  This is in a separate file because getpwnam()/getgrnam()
69228753Smm * can pull in a LOT of library code (including NIS/LDAP functions, which
70302001Smm * pull in DNS resolvers, etc).  This can easily top 500kB, which makes
71228753Smm * it inappropriate for some space-constrained applications.
72228753Smm *
73228753Smm * Applications that are size-sensitive may want to just use the
74228753Smm * real default functions (defined in archive_write_disk.c) that just
75228753Smm * use the uid/gid without the lookup.  Or define your own custom functions
76228753Smm * if you prefer.
77228753Smm *
78228753Smm * TODO: Replace these hash tables with simpler move-to-front LRU
79228753Smm * lists with a bounded size (128 items?).  The hash is a bit faster,
80228753Smm * but has a bad pathology in which it thrashes a single bucket.  Even
81228753Smm * walking a list of 128 items is a lot faster than calling
82228753Smm * getpwnam()!
83228753Smm */
84228753Smmint
85228753Smmarchive_write_disk_set_standard_lookup(struct archive *a)
86228753Smm{
87311042Smm	struct bucket *ucache = calloc(cache_size, sizeof(struct bucket));
88311042Smm	struct bucket *gcache = calloc(cache_size, sizeof(struct bucket));
89302001Smm	if (ucache == NULL || gcache == NULL) {
90302001Smm		free(ucache);
91302001Smm		free(gcache);
92302001Smm		return (ARCHIVE_FATAL);
93302001Smm	}
94228753Smm	archive_write_disk_set_group_lookup(a, gcache, lookup_gid, cleanup);
95228753Smm	archive_write_disk_set_user_lookup(a, ucache, lookup_uid, cleanup);
96228753Smm	return (ARCHIVE_OK);
97228753Smm}
98228753Smm
99232153Smmstatic int64_t
100232153Smmlookup_gid(void *private_data, const char *gname, int64_t gid)
101228753Smm{
102228753Smm	int h;
103228753Smm	struct bucket *b;
104228753Smm	struct bucket *gcache = (struct bucket *)private_data;
105228753Smm
106228753Smm	/* If no gname, just use the gid provided. */
107228753Smm	if (gname == NULL || *gname == '\0')
108228753Smm		return (gid);
109228753Smm
110228753Smm	/* Try to find gname in the cache. */
111228753Smm	h = hash(gname);
112228753Smm	b = &gcache[h % cache_size ];
113228753Smm	if (b->name != NULL && b->hash == h && strcmp(gname, b->name) == 0)
114228753Smm		return ((gid_t)b->id);
115228753Smm
116228753Smm	/* Free the cache slot for a new entry. */
117344674Smm	free(b->name);
118228753Smm	b->name = strdup(gname);
119228753Smm	/* Note: If strdup fails, that's okay; we just won't cache. */
120228753Smm	b->hash = h;
121228753Smm#if HAVE_GRP_H
122228753Smm#  if HAVE_GETGRNAM_R
123228753Smm	{
124228753Smm		char _buffer[128];
125228753Smm		size_t bufsize = 128;
126228753Smm		char *buffer = _buffer;
127238856Smm		char *allocated = NULL;
128228753Smm		struct group	grent, *result;
129228753Smm		int r;
130228753Smm
131228753Smm		for (;;) {
132228753Smm			result = &grent; /* Old getgrnam_r ignores last arg. */
133228753Smm			r = getgrnam_r(gname, &grent, buffer, bufsize, &result);
134228753Smm			if (r == 0)
135228753Smm				break;
136228753Smm			if (r != ERANGE)
137228753Smm				break;
138228753Smm			bufsize *= 2;
139238856Smm			free(allocated);
140238856Smm			allocated = malloc(bufsize);
141238856Smm			if (allocated == NULL)
142228753Smm				break;
143238856Smm			buffer = allocated;
144228753Smm		}
145228753Smm		if (result != NULL)
146228753Smm			gid = result->gr_gid;
147238856Smm		free(allocated);
148228753Smm	}
149228753Smm#  else /* HAVE_GETGRNAM_R */
150228753Smm	{
151228753Smm		struct group *result;
152228753Smm
153228753Smm		result = getgrnam(gname);
154228753Smm		if (result != NULL)
155228753Smm			gid = result->gr_gid;
156228753Smm	}
157228753Smm#  endif /* HAVE_GETGRNAM_R */
158228753Smm#elif defined(_WIN32) && !defined(__CYGWIN__)
159228753Smm	/* TODO: do a gname->gid lookup for Windows. */
160228753Smm#else
161228753Smm	#error No way to perform gid lookups on this platform
162228753Smm#endif
163238856Smm	b->id = (gid_t)gid;
164228753Smm
165228753Smm	return (gid);
166228753Smm}
167228753Smm
168232153Smmstatic int64_t
169232153Smmlookup_uid(void *private_data, const char *uname, int64_t uid)
170228753Smm{
171228753Smm	int h;
172228753Smm	struct bucket *b;
173228753Smm	struct bucket *ucache = (struct bucket *)private_data;
174228753Smm
175228753Smm	/* If no uname, just use the uid provided. */
176228753Smm	if (uname == NULL || *uname == '\0')
177228753Smm		return (uid);
178228753Smm
179228753Smm	/* Try to find uname in the cache. */
180228753Smm	h = hash(uname);
181228753Smm	b = &ucache[h % cache_size ];
182228753Smm	if (b->name != NULL && b->hash == h && strcmp(uname, b->name) == 0)
183228753Smm		return ((uid_t)b->id);
184228753Smm
185228753Smm	/* Free the cache slot for a new entry. */
186344674Smm	free(b->name);
187228753Smm	b->name = strdup(uname);
188228753Smm	/* Note: If strdup fails, that's okay; we just won't cache. */
189228753Smm	b->hash = h;
190228753Smm#if HAVE_PWD_H
191228753Smm#  if HAVE_GETPWNAM_R
192228753Smm	{
193228753Smm		char _buffer[128];
194228753Smm		size_t bufsize = 128;
195228753Smm		char *buffer = _buffer;
196238856Smm		char *allocated = NULL;
197228753Smm		struct passwd	pwent, *result;
198228753Smm		int r;
199228753Smm
200228753Smm		for (;;) {
201228753Smm			result = &pwent; /* Old getpwnam_r ignores last arg. */
202228753Smm			r = getpwnam_r(uname, &pwent, buffer, bufsize, &result);
203228753Smm			if (r == 0)
204228753Smm				break;
205228753Smm			if (r != ERANGE)
206228753Smm				break;
207228753Smm			bufsize *= 2;
208238856Smm			free(allocated);
209238856Smm			allocated = malloc(bufsize);
210238856Smm			if (allocated == NULL)
211228753Smm				break;
212238856Smm			buffer = allocated;
213228753Smm		}
214228753Smm		if (result != NULL)
215228753Smm			uid = result->pw_uid;
216238856Smm		free(allocated);
217228753Smm	}
218228753Smm#  else /* HAVE_GETPWNAM_R */
219228753Smm	{
220228753Smm		struct passwd *result;
221228753Smm
222228753Smm		result = getpwnam(uname);
223228753Smm		if (result != NULL)
224228753Smm			uid = result->pw_uid;
225228753Smm	}
226228753Smm#endif	/* HAVE_GETPWNAM_R */
227228753Smm#elif defined(_WIN32) && !defined(__CYGWIN__)
228228753Smm	/* TODO: do a uname->uid lookup for Windows. */
229228753Smm#else
230228753Smm	#error No way to look up uids on this platform
231228753Smm#endif
232238856Smm	b->id = (uid_t)uid;
233228753Smm
234228753Smm	return (uid);
235228753Smm}
236228753Smm
237228753Smmstatic void
238228753Smmcleanup(void *private)
239228753Smm{
240228753Smm	size_t i;
241228753Smm	struct bucket *cache = (struct bucket *)private;
242228753Smm
243228753Smm	for (i = 0; i < cache_size; i++)
244228753Smm		free(cache[i].name);
245228753Smm	free(cache);
246228753Smm}
247228753Smm
248228753Smm
249228753Smmstatic unsigned int
250228753Smmhash(const char *p)
251228753Smm{
252228753Smm	/* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
253228753Smm	   as used by ELF for hashing function names. */
254228753Smm	unsigned g, h = 0;
255228753Smm	while (*p != '\0') {
256228753Smm		h = (h << 4) + *p++;
257228753Smm		if ((g = h & 0xF0000000) != 0) {
258228753Smm			h ^= g >> 24;
259228753Smm			h &= 0x0FFFFFFF;
260228753Smm		}
261228753Smm	}
262228753Smm	return h;
263228753Smm}
264