1/*
2 * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24#include <_simple.h>            // as included by malloc.c, this defines ASL_LEVEL_INFO
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <limits.h>
29#include <unistd.h>
30#include <fcntl.h>
31#include <dirent.h>
32#include <libkern/OSAtomic.h>
33#include <mach/mach.h>
34#include <mach/mach_vm.h>
35#include <os/tsd.h>
36#include <sys/sysctl.h>
37#include <sys/stat.h>
38#include <sys/mman.h>
39#include <paths.h>
40#include <errno.h>
41#include <assert.h>
42#include <TargetConditionals.h>		// for TARGET_OS_EMBEDDED, TARGET_IPHONE_SIMULATOR
43#include "stack_logging.h"
44#include "malloc_printf.h"
45#include "malloc_internal.h"
46
47#pragma mark -
48#pragma mark Defines
49
50#if TARGET_OS_EMBEDDED || TARGET_IPHONE_SIMULATOR
51// _malloc_printf(ASL_LEVEL_INFO...) on iOS doesn't show up in the Xcode Console log of the device,
52// but ASL_LEVEL_NOTICE does.  So raising the log level is helpful.
53#undef ASL_LEVEL_INFO
54#define ASL_LEVEL_INFO ASL_LEVEL_NOTICE
55#endif
56
57#ifdef TEST_DISK_STACK_LOGGING
58#define _malloc_printf fprintf
59#undef ASL_LEVEL_INFO
60#define ASL_LEVEL_INFO stderr
61#endif
62
63#define STACK_LOGGING_MAX_STACK_SIZE 512
64#define STACK_LOGGING_BLOCK_WRITING_SIZE 8192
65#define STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED 3
66
67#define BACKTRACE_UNIQUING_DEBUG 0
68
69// The expansion factor controls the shifting up of table size. A factor of 1 will double the size upon expanding,
70// 2 will quadruple the size, etc. Maintaining a 66% fill in an ideal table requires the collision allowance to
71// increase by 3 for every quadrupling of the table size (although this the constant applied to insertion
72// performance O(c*n))
73#define EXPAND_FACTOR 2
74#define COLLISION_GROWTH_RATE 3
75
76// For a uniquing table, the useful node size is slots := floor(table_byte_size / (2 * sizeof(mach_vm_address_t)))
77// Some useful numbers for the initial max collision value (desiring 66% fill):
78// 16K-23K slots -> 16 collisions
79// 24K-31K slots -> 17 collisions
80// 32K-47K slots -> 18 collisions
81// 48K-79K slots -> 19 collisions
82// 80K-96K slots -> 20 collisions
83#define INITIAL_MAX_COLLIDE	19
84#define DEFAULT_UNIQUING_PAGE_SIZE 256
85
86#pragma mark -
87#pragma mark Macros
88
89#define STACK_LOGGING_FLAGS_SHIFT 56
90#define STACK_LOGGING_USER_TAG_SHIFT 24
91#define STACK_LOGGING_FLAGS(longlongvar) (uint32_t)((uint64_t)(longlongvar) >> STACK_LOGGING_FLAGS_SHIFT)
92#define STACK_LOGGING_FLAGS_AND_USER_TAG(longlongvar) (uint32_t)(STACK_LOGGING_FLAGS(longlongvar) | (((uint64_t)(longlongvar) & 0x00FF000000000000ull) >> STACK_LOGGING_USER_TAG_SHIFT) )
93
94#define STACK_LOGGING_OFFSET_MASK 0x0000FFFFFFFFFFFFull
95#define STACK_LOGGING_OFFSET(longlongvar) ((longlongvar) & STACK_LOGGING_OFFSET_MASK)
96
97#define STACK_LOGGING_OFFSET_AND_FLAGS(longlongvar, type_flags) ( ((uint64_t)(longlongvar) & STACK_LOGGING_OFFSET_MASK) | ((uint64_t)(type_flags) << STACK_LOGGING_FLAGS_SHIFT) | (((uint64_t)(type_flags) & 0xFF000000ull) << STACK_LOGGING_USER_TAG_SHIFT) )
98
99#pragma mark -
100#pragma mark Types
101
102typedef struct {
103	uintptr_t	argument;
104	uintptr_t	address;
105	uint64_t	offset_and_flags; // top 8 bits are actually the flags!
106} stack_logging_index_event;
107
108typedef struct {
109	uint32_t	argument;
110	uint32_t	address;
111	uint64_t	offset_and_flags; // top 8 bits are actually the flags!
112} stack_logging_index_event32;
113
114typedef struct {
115	uint64_t	argument;
116	uint64_t	address;
117	uint64_t	offset_and_flags; // top 8 bits are actually the flags!
118} stack_logging_index_event64;
119
120// backtrace uniquing table chunks used in client-side stack log reading code,
121// in case we can't read the whole table in one mach_vm_read() call.
122typedef struct table_chunk_header {
123	uint64_t							num_nodes_in_chunk;
124	uint64_t							table_chunk_size;
125	mach_vm_address_t					*table_chunk;
126	struct table_chunk_header			*next_table_chunk_header;
127} table_chunk_header_t;
128
129#pragma pack(push,4)
130typedef struct {
131	uint64_t							numPages; // number of pages of the table
132	uint64_t							numNodes;
133	uint64_t							tableSize;
134	uint64_t							untouchableNodes;
135	mach_vm_address_t					table_address;
136	int32_t								max_collide;
137	// 'table_address' is just an always 64-bit version of the pointer-sized 'table' field to remotely read;
138	// it's important that the offset of 'table_address' in the struct does not change between 32 and 64-bit.
139#if BACKTRACE_UNIQUING_DEBUG
140	uint64_t nodesFull;
141	uint64_t backtracesContained;
142#endif
143	union {
144		mach_vm_address_t				*table;		// in "target" process;  allocated using vm_allocate()
145		table_chunk_header_t			*first_table_chunk_hdr;		// in analysis process
146	} u;
147} backtrace_uniquing_table;
148#pragma pack(pop)
149
150// for storing/looking up allocations that haven't yet be written to disk; consistent size across 32/64-bit processes.
151// It's important that these fields don't change alignment due to the architecture because they may be accessed from an
152// analyzing process with a different arch - hence the pragmas.
153#pragma pack(push,4)
154typedef struct {
155	uint64_t			start_index_offset;
156	uint32_t			next_free_index_buffer_offset;
157	char				index_buffer[STACK_LOGGING_BLOCK_WRITING_SIZE];
158	backtrace_uniquing_table	*uniquing_table;
159} stack_buffer_shared_memory;
160#pragma pack(pop)
161
162// target process address -> record table (for __mach_stack_logging_get_frames)
163typedef struct {
164	uint64_t		address;
165	uint64_t		index_file_offset;
166} remote_index_node;
167
168// for caching index information client-side:
169typedef struct {
170	size_t			cache_size;
171	size_t			cache_node_capacity;
172	uint32_t		collision_allowance;
173	remote_index_node	*table_memory; // this can be malloced; it's on the client side.
174	stack_buffer_shared_memory *shmem; // shared memory
175	stack_buffer_shared_memory snapshot; // memory snapshot of the remote process' shared memory
176	uint32_t		last_pre_written_index_size;
177	uint64_t		last_index_file_offset;
178	backtrace_uniquing_table uniquing_table_snapshot; // snapshot of the remote process' uniquing table
179} remote_index_cache;
180
181// for reading stack history information from remote processes:
182typedef struct {
183	task_t			remote_task;
184	pid_t			remote_pid;
185	int32_t			task_is_64_bit;
186	int32_t			in_use_count;
187	FILE			*index_file_stream;
188	uint64_t		remote_stack_buffer_shared_memory_address;
189	remote_index_cache	*cache;
190} remote_task_file_streams;
191
192#pragma mark -
193#pragma mark Constants/Globals
194
195static _malloc_lock_s stack_logging_lock = _MALLOC_LOCK_INIT;
196
197// support for multi-threaded forks
198extern void __stack_logging_fork_prepare();
199extern void __stack_logging_fork_parent();
200extern void __stack_logging_fork_child();
201extern void __stack_logging_early_finished();
202
203// support for gdb and others checking for stack_logging locks
204extern boolean_t __stack_logging_locked();
205
206// single-thread access variables
207static stack_buffer_shared_memory *pre_write_buffers;
208static vm_address_t *stack_buffer;
209static uintptr_t last_logged_malloc_address = 0;
210
211// Constants to define part of stack logging file path names.
212// File names are of the form stack-logs.<pid>.<address>.<progname>.XXXXXX.index
213// where <address> is the address of the pre_write_buffers VM region in the target
214// process that will need to be mapped into analysis tool processes.
215static const char *stack_log_file_base_name = "stack-logs.";
216static const char *stack_log_file_suffix = ".index";
217
218char *__stack_log_file_path__ = NULL;
219static int index_file_descriptor = -1;
220
221// for accessing remote log files
222static remote_task_file_streams remote_fds[STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED];
223static uint32_t next_remote_task_fd = 0;
224static uint32_t remote_task_fd_count = 0;
225static _malloc_lock_s remote_fd_list_lock = _MALLOC_LOCK_INIT;
226
227// activation variables
228static int logging_use_compaction = 1; // set this to zero to always disable compaction.
229
230// We set malloc_logger to NULL to disable logging, if we encounter errors
231// during file writing
232typedef void (malloc_logger_t)(uint32_t type, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t result, uint32_t num_hot_frames_to_skip);
233extern malloc_logger_t *malloc_logger;
234
235extern malloc_logger_t *__syscall_logger;	// use this to set up syscall logging (e.g., vm_allocate, vm_deallocate, mmap, munmap)
236
237#pragma mark -
238#pragma mark In-Memory Backtrace Uniquing
239
240static __attribute__((always_inline))
241inline void*
242allocate_pages(uint64_t memSize)
243{
244	mach_vm_address_t allocatedMem = 0ull;
245	if (mach_vm_allocate(mach_task_self(), &allocatedMem, memSize, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_MEMORY_ANALYSIS_TOOL)) != KERN_SUCCESS) {
246		malloc_printf("allocate_pages(): virtual memory exhausted!\n");
247	}
248	return (void*)(uintptr_t)allocatedMem;
249}
250
251static __attribute__((always_inline))
252inline int
253deallocate_pages(void* memPointer, uint64_t memSize)
254{
255	return mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)(uintptr_t)memPointer, memSize);
256}
257
258static backtrace_uniquing_table*
259__create_uniquing_table(void)
260{
261	backtrace_uniquing_table *uniquing_table = (backtrace_uniquing_table*)allocate_pages((uint64_t)round_page(sizeof(backtrace_uniquing_table)));
262	if (!uniquing_table) return NULL;
263	bzero(uniquing_table, sizeof(backtrace_uniquing_table));
264	uniquing_table->numPages = DEFAULT_UNIQUING_PAGE_SIZE;
265	uniquing_table->tableSize = uniquing_table->numPages * vm_page_size;
266	uniquing_table->numNodes = ((uniquing_table->tableSize / (sizeof(mach_vm_address_t) * 2)) >> 1) << 1; // make sure it's even.
267	uniquing_table->u.table = (mach_vm_address_t*)(uintptr_t)allocate_pages(uniquing_table->tableSize);
268	uniquing_table->table_address = (uintptr_t)uniquing_table->u.table;
269	uniquing_table->max_collide = INITIAL_MAX_COLLIDE;
270	uniquing_table->untouchableNodes = 0;
271
272#if BACKTRACE_UNIQUING_DEBUG
273	malloc_printf("create_uniquing_table(): creating. size: %lldKB == %lldMB, numnodes: %lld (%lld untouchable)\n", uniquing_table->tableSize >> 10, uniquing_table->tableSize >> 20, uniquing_table->numNodes, uniquing_table->untouchableNodes);
274	malloc_printf("create_uniquing_table(): table: %p; end: %p\n", uniquing_table->table, (void*)((uintptr_t)uniquing_table->table + (uintptr_t)uniquing_table->tableSize));
275#endif
276	return uniquing_table;
277}
278
279static void
280__destroy_uniquing_table(backtrace_uniquing_table* table)
281{
282	deallocate_pages(table->u.table, table->tableSize);
283	deallocate_pages(table, sizeof(backtrace_uniquing_table));
284}
285
286static void
287__expand_uniquing_table(backtrace_uniquing_table *uniquing_table)
288{
289	mach_vm_address_t *oldTable = uniquing_table->u.table;
290	uint64_t oldsize = uniquing_table->tableSize;
291	uint64_t oldnumnodes = uniquing_table->numNodes;
292
293	uniquing_table->numPages = uniquing_table->numPages << EXPAND_FACTOR;
294	uniquing_table->tableSize = uniquing_table->numPages * vm_page_size;
295	uniquing_table->numNodes = ((uniquing_table->tableSize / (sizeof(mach_vm_address_t) * 2)) >> 1) << 1; // make sure it's even.
296	mach_vm_address_t *newTable = (mach_vm_address_t*)(uintptr_t)allocate_pages(uniquing_table->tableSize);
297
298	uniquing_table->u.table = newTable;
299	uniquing_table->table_address = (uintptr_t)uniquing_table->u.table;
300	uniquing_table->max_collide = uniquing_table->max_collide + COLLISION_GROWTH_RATE;
301
302	if (mach_vm_copy(mach_task_self(), (mach_vm_address_t)(uintptr_t)oldTable, oldsize, (mach_vm_address_t)(uintptr_t)newTable) != KERN_SUCCESS) {
303		malloc_printf("expandUniquingTable(): VMCopyFailed\n");
304	}
305	uniquing_table->untouchableNodes = oldnumnodes;
306
307#if BACKTRACE_UNIQUING_DEBUG
308	malloc_printf("expandUniquingTable(): expanded from nodes full: %lld of: %lld (~%2d%%); to nodes: %lld (inactive = %lld); unique bts: %lld\n",
309				  uniquing_table->nodesFull, oldnumnodes, (int)(((uniquing_table->nodesFull * 100.0) / (double)oldnumnodes) + 0.5),
310				  uniquing_table->numNodes, uniquing_table->untouchableNodes, uniquing_table->backtracesContained);
311	malloc_printf("expandUniquingTable(): allocate: %p; end: %p\n", newTable, (void*)((uintptr_t)newTable + (uintptr_t)(uniquing_table->tableSize)));
312	malloc_printf("expandUniquingTable(): deallocate: %p; end: %p\n", oldTable, (void*)((uintptr_t)oldTable + (uintptr_t)oldsize));
313#endif
314
315	if (deallocate_pages(oldTable, oldsize) != KERN_SUCCESS) {
316		malloc_printf("expandUniquingTable(): mach_vm_deallocate failed. [%p]\n", uniquing_table->u.table);
317	}
318}
319
320static int
321__enter_frames_in_table(backtrace_uniquing_table *uniquing_table, uint64_t *foundIndex, mach_vm_address_t *frames, int32_t count)
322{
323	// The hash values need to be the same size as the addresses (because we use the value -1), for clarity, define a new type
324	typedef mach_vm_address_t hash_index_t;
325
326	mach_vm_address_t thisPC;
327	hash_index_t hash, uParent = (hash_index_t)(-1ll), modulus = (uniquing_table->numNodes-uniquing_table->untouchableNodes-1);
328	int32_t collisions, lcopy = count, returnVal = 1;
329	hash_index_t hash_multiplier = ((uniquing_table->numNodes - uniquing_table->untouchableNodes)/(uniquing_table->max_collide*2+1));
330	mach_vm_address_t *node;
331	while (--lcopy >= 0) {
332		thisPC = frames[lcopy];
333
334		// hash = initialHash(uniquing_table, uParent, thisPC);
335		hash = uniquing_table->untouchableNodes + (((uParent << 4) ^ (thisPC >> 2)) % modulus);
336		collisions = uniquing_table->max_collide;
337
338		while (collisions--) {
339			node = uniquing_table->u.table + (hash * 2);
340
341			if (*node == 0 && node[1] == 0) {
342				// blank; store this entry!
343				// Note that we need to test for both head[0] and head[1] as (0, -1) is a valid entry
344				node[0] = thisPC;
345				node[1] = uParent;
346				uParent = hash;
347#if BACKTRACE_UNIQUING_DEBUG
348				uniquing_table->nodesFull++;
349				if (lcopy == 0) {
350					uniquing_table->backtracesContained++;
351				}
352#endif
353				break;
354			}
355			if (*node == thisPC && node[1] == uParent) {
356				// hit! retrieve index and go.
357				uParent = hash;
358				break;
359			}
360
361			hash += collisions * hash_multiplier + 1;
362
363			if (hash >= uniquing_table->numNodes) {
364				hash -= (uniquing_table->numNodes - uniquing_table->untouchableNodes); // wrap around.
365			}
366		}
367
368		if (collisions < 0) {
369			returnVal = 0;
370			break;
371		}
372	}
373
374	if (returnVal) *foundIndex = uParent;
375
376	return returnVal;
377}
378
379#pragma mark -
380#pragma mark Disk Stack Logging
381
382// pre-declarations
383static void delete_log_files(void);
384static int delete_logging_file(char *log_location);
385static bool getenv_from_process(pid_t pid, char *env_var_name, char *env_var_value_buf, size_t max_path_len);
386
387#define BASE10 10
388#define BASE16 16
389
390static void
391append_int(char * filename, uint64_t inputValue, unsigned base, size_t maxLength)
392{
393	const char *digits = "0123456789abcdef";
394	if (base > strlen(digits)) return;		// sanity check
395
396	size_t len = strlen(filename);
397
398	uint32_t count = 0;
399	uint64_t value = inputValue;
400	while (value > 0) {
401		value /= base;
402		count++;
403	}
404
405	if (len + count >= maxLength) return; // don't modify the string if it would violate maxLength
406
407	filename[len + count] = '\0';
408
409	value = inputValue;
410	uint32_t i;
411	for (i = 0 ; i < count ; i ++) {
412		filename[len + count - 1 - i] = digits[value % base];
413		value /= base;
414	}
415}
416
417/*
418 * <rdar://problem/11128080> if we needed to call confstr during init then setting this
419 * flag will postpone stack logging until after Libsystem's initialiser has run.
420 */
421static void
422postpone_stack_logging(void)
423{
424	_malloc_printf(ASL_LEVEL_INFO, "stack logging postponed until after initialization.\n");
425	stack_logging_postponed = 1;
426}
427
428/*
429 * Check various logging directory options, in order of preference:
430 *
431 *      value of MallocStackLoggingDirectory env var if user has set it.  Typically
432 *			used on Mac OS X to write to a non-root file system with more free space.
433 *
434 *      _PATH_TMP - /tmp usually writable for desktop apps and internal iOS apps
435 *
436 *      value of TMPDIR env var - for sandboxed apps that can't write into /tmp
437 *
438 *      confstr(_CS_DARWIN_USER_TEMP_DIR, ...) - should be same as TMPDIR if that is set,
439 *          but will create it safely if it doesn't yet exist.  (See <rdar://problem/4706096>)
440 *
441 * Allocating and releasing target buffer is the caller's responsibility.
442 */
443static bool
444get_writeable_logging_directory(char* target)
445{
446	if (!target) return false;
447
448	char *evn_log_directory = getenv("MallocStackLoggingDirectory");
449	if (evn_log_directory) {
450		if (-1 != access(evn_log_directory, W_OK)) {
451			strlcpy(target, evn_log_directory, (size_t)PATH_MAX);
452			return true;
453		} else {
454			_malloc_printf(ASL_LEVEL_INFO, "MallocStackLoggingDirectory env var set to unwritable path '%s'\n", evn_log_directory);
455		}
456	}
457
458	if (-1 != access(_PATH_TMP, W_OK)) {
459		strlcpy(target, _PATH_TMP, (size_t)PATH_MAX);
460		return true;
461	}
462
463	evn_log_directory = getenv("TMPDIR");
464	if (evn_log_directory && (-1 != access(evn_log_directory, W_OK))) {
465		strlcpy(target, evn_log_directory, (size_t)PATH_MAX);
466		return true;
467	}
468
469	if (stack_logging_finished_init) {
470		size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, target, (size_t) PATH_MAX);
471		if ((n > 0) && (n < PATH_MAX)) return true;
472	} else {
473		/* <rdar://problem/11128080> Can't call confstr during init, so postpone
474		 logging till after */
475		postpone_stack_logging();
476	}
477	*target = '\0';
478	return false;
479}
480
481// If successful, returns path to log file that was created, otherwise NULL.
482static char *
483create_log_file(void)
484{
485	pid_t pid = getpid();
486	const char *progname = getprogname();
487	char *created_log_location = NULL;
488
489	if (__stack_log_file_path__ == NULL) {
490		// On first use, allocate space directly from the OS without using malloc
491		__stack_log_file_path__ = allocate_pages((uint64_t)round_page(PATH_MAX));
492		if (__stack_log_file_path__ == NULL) {
493			_malloc_printf(ASL_LEVEL_INFO, "unable to allocate memory for stack log file path\n");
494			return NULL;
495		}
496	}
497
498	if (!get_writeable_logging_directory(__stack_log_file_path__)) {
499		if (!stack_logging_postponed) {
500			_malloc_printf(ASL_LEVEL_INFO, "No writeable tmp dir\n");
501		}
502		return NULL;
503	}
504
505	// Add the '/' only if it's not already there.  Having multiple '/' characters works
506	// but is unsightly when we print these stack log file names out.
507	size_t stack_log_len = strlen(__stack_log_file_path__);
508	if (__stack_log_file_path__[stack_log_len-1] != '/') {
509		// use strlcat to null-terminate for the next strlcat call, and to check buffer size
510		strlcat(__stack_log_file_path__ + stack_log_len, "/", (size_t)PATH_MAX);
511	}
512
513	// Append the file name to __stack_log_file_path__ but don't use snprintf since
514	// it can cause malloc() calls.
515	//
516	// The file name is of the form "stack-logs.<pid>.<address>.<progname>.XXXXXX.index"
517	// where <address> is the address of the pre_write_buffers VM region in the target
518	// process that will need to be mapped into analysis tool processes. We used to just
519	// use a shared memory segment for that, but sandbox'ed target apps may not be able
520	// to create shared memory segments so including the address of the VM region in the
521	// file name is a simple way to communicate the address to analysis tools so the
522	// stack log reading code can map in the region with mach_vm_remap().
523
524	strlcat(__stack_log_file_path__, stack_log_file_base_name, (size_t)PATH_MAX);
525	append_int(__stack_log_file_path__, pid, BASE10, (size_t)PATH_MAX);
526	strlcat(__stack_log_file_path__, ".", (size_t)PATH_MAX);
527	append_int(__stack_log_file_path__, pre_write_buffers, BASE16, (size_t)PATH_MAX);
528	if (progname && progname[0] != '\0') {
529		strlcat(__stack_log_file_path__, ".", (size_t)PATH_MAX);
530		strlcat(__stack_log_file_path__, progname, (size_t)PATH_MAX);
531	}
532	strlcat(__stack_log_file_path__, ".XXXXXX", (size_t)PATH_MAX);
533	strlcat(__stack_log_file_path__, stack_log_file_suffix, (size_t)PATH_MAX);
534
535	// Securely create the log file.
536	if ((index_file_descriptor = mkstemps(__stack_log_file_path__, (int)strlen(stack_log_file_suffix))) != -1) {
537		_malloc_printf(ASL_LEVEL_INFO, "stack logs being written into %s\n", __stack_log_file_path__);
538		created_log_location = __stack_log_file_path__;
539	} else {
540		_malloc_printf(ASL_LEVEL_INFO, "unable to create stack logs at %s\n", __stack_log_file_path__);
541		__stack_log_file_path__[0] = '\0';
542		created_log_location = NULL;
543	}
544
545	return created_log_location;
546}
547
548// This function may be called from either the target process when exiting, or from either the the target process or
549// a stack log analysis process, when reaping orphaned stack log files.
550// Returns -1 if the files exist and they couldn't be removed, returns 0 otherwise.
551static int
552delete_logging_file(char *log_location)
553{
554	if (log_location == NULL || log_location[0] == '\0') return 0;
555
556	struct stat statbuf;
557	if (unlink(log_location) != 0 && stat(log_location, &statbuf) == 0) {
558		return -1;
559	}
560	return 0;
561}
562
563// This function will be called from atexit() in the target process.
564static void
565delete_log_files(void)
566{
567	if (__stack_log_file_path__ && __stack_log_file_path__[0]) {
568		if (delete_logging_file(__stack_log_file_path__) == 0) {
569			_malloc_printf(ASL_LEVEL_INFO, "stack logs deleted from %s\n", __stack_log_file_path__);
570			__stack_log_file_path__[0] = '\0';
571		} else {
572			_malloc_printf(ASL_LEVEL_INFO, "unable to delete stack logs from %s\n", __stack_log_file_path__);
573		}
574	}
575}
576
577static bool
578is_process_running(pid_t pid)
579{
580	struct kinfo_proc kpt[1];
581	size_t size = sizeof(struct kinfo_proc);
582	int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, pid};
583
584	sysctl(mib, 4, kpt, &size, NULL, (size_t)0); // size is either 1 or 0 entries when we ask for a single pid
585
586	return (size==sizeof(struct kinfo_proc));
587}
588
589// Stack log files can be quite large and aren't useful after the process that created them no longer exists because
590// the stack backtrace uniquing tree was only kept in the process memory, not on disk.  Normally the log files
591// should get removed when the process exits, by the delete_log_files() atexit function.  However, there are
592// several situations in which that atexit function doesn't get called so the log files remain:
593//		- if the process crashes or is force-killed
594//		- if the app supported sudden termination, and was terminated through that
595//		- if a process such as a shell execs another binary to transform the pid into a different process;
596//			the new process will get a new log file but the old one would still be there.
597//
598// So, reap any stack log files for processes that no longer exist, or for the current process if we find a file
599// other than __stack_log_file_path__
600//
601// This function looks for log files with prefix name "stack-logs.<pid>." underneath <directory>.
602// <remaining_path_format> specifies a simple pattern of where stack logs can be down inside <directory>.
603// The pattern is essentially a relative path, where a level that start with '<' matches any name, otherwise
604// it has to be an exact name match.  See the calling function for examples.
605static void
606reap_orphaned_log_files_in_hierarchy(char *directory, char *remaining_path_format)
607{
608	DIR *dp;
609	struct dirent *entry;
610
611	// Ensure that we can access this directory - permissions or sandbox'ing might prevent it.
612	if (access(directory, R_OK | W_OK | X_OK) == -1 || (dp = opendir(directory)) == NULL) {
613		//_malloc_printf(ASL_LEVEL_INFO, "reaping: no access to %s\n", directory);
614		return;
615	}
616
617	char pathname[PATH_MAX];
618	strlcpy(pathname, directory, (size_t)PATH_MAX);
619	size_t pathname_len = strlen(pathname);
620	if (pathname[pathname_len-1] != '/') pathname[pathname_len++] = '/';
621	char *suffix = pathname + pathname_len;
622
623	// Recurse down to deeper levels of the temp directory hierarchy if necessary.
624	if (remaining_path_format) {
625		char *separator = remaining_path_format;
626		while (*separator != '/' && *separator != '\0') { separator++; }
627		size_t length_to_match = (*remaining_path_format == '<') ? 0 : separator - remaining_path_format;
628		char *next_remaining_path_format = (*separator == '\0') ? NULL : separator + 1;
629
630		while ( (entry = readdir(dp)) != NULL ) {
631			if (entry->d_type == DT_DIR && entry->d_name[0] != '.') {
632				if (length_to_match > 0 && strncmp(entry->d_name, remaining_path_format, length_to_match) != 0) {
633					continue;
634				}
635				strlcpy(suffix, entry->d_name, (size_t)PATH_MAX - pathname_len);
636				reap_orphaned_log_files_in_hierarchy(pathname, next_remaining_path_format);
637			}
638		}
639		closedir(dp);
640
641		return;
642	}
643
644	// OK, we found a lowest-level directory matching <remaining_path_format>, and we have access to it.
645	// Reap any unnecessary stack log files in here.
646
647	//_malloc_printf(ASL_LEVEL_INFO, "reaping: looking in %s\n", directory);
648
649	// __stack_log_file_path__ may be NULL if this code is running in an analysis tool client process that is not
650	// itself running with MallocStackLogging set.
651	char *curproc_stack_log_file = __stack_log_file_path__ ? strrchr(__stack_log_file_path__, '/') + 1 : NULL;
652	pid_t curpid = getpid();
653	size_t prefix_length = strlen(stack_log_file_base_name);
654
655	while ( (entry = readdir(dp)) != NULL ) {
656		if ( (entry->d_type == DT_REG || entry->d_type == DT_LNK) && ( strncmp( entry->d_name, stack_log_file_base_name, prefix_length) == 0 ) ) {
657			long pid = strtol(&entry->d_name[prefix_length], (char **)NULL, 10);
658			if ( ! is_process_running((pid_t)pid) || (pid == curpid && curproc_stack_log_file && strcmp(entry->d_name, curproc_stack_log_file) != 0) ) {
659				strlcpy(suffix, entry->d_name, (size_t)PATH_MAX - pathname_len);
660				if (delete_logging_file(pathname) == 0) {
661					if (pid == curpid) {
662						_malloc_printf(ASL_LEVEL_INFO, "stack logs deleted from %s\n", pathname);
663					} else {
664						_malloc_printf(ASL_LEVEL_INFO, "process %ld no longer exists, stack logs deleted from %s\n", pid, pathname);
665					}
666				}
667			}
668		}
669	}
670	closedir(dp);
671}
672
673static void
674reap_orphaned_log_files(pid_t pid)
675{
676	reap_orphaned_log_files_in_hierarchy(_PATH_TMP, NULL);
677
678	char *env_var_names[] = { "TMPDIR", "MallocStackLoggingDirectory" };
679	for (unsigned i = 0; i < sizeof(env_var_names) / sizeof(char *); i++) {
680		char directory[PATH_MAX];
681		bool success = getenv_from_process(pid, env_var_names[i], directory, sizeof(directory));
682		if (success && strcmp(directory, _PATH_TMP) != 0) {
683			reap_orphaned_log_files_in_hierarchy(directory, NULL);
684		}
685	}
686
687	// Now reap files left over in any other accessible app-specific temp directories.
688	// These could be from sandbox'ed apps.
689#if TARGET_OS_EMBEDDED
690	char *root_of_temp_directories = "/private/var/mobile/Containers/Data/Application";	// ugh - hard-coding to user name "mobile".  Works for all iOS's up to now.
691	char *temp_directories_path_format = "<application-UUID>/tmp";
692#else
693	char *root_of_temp_directories = "/private/var/folders";
694	char *temp_directories_path_format = "<xx>/<random>/T";
695#endif
696	reap_orphaned_log_files_in_hierarchy(root_of_temp_directories, temp_directories_path_format);
697}
698
699/*
700 * Since there a many errors that could cause stack logging to get disabled, this is a convenience method
701 * for disabling any future logging in this process and for informing the user.
702 */
703static void
704disable_stack_logging(void)
705{
706	_malloc_printf(ASL_LEVEL_INFO, "stack logging disabled due to previous errors.\n");
707	stack_logging_enable_logging = 0;
708	malloc_logger = NULL;
709	__syscall_logger = NULL;
710}
711
712/* A wrapper around write() that will try to reopen the index/stack file and
713 * write to it if someone closed it underneath us (e.g. the process we just
714 * started decide to close all file descriptors except stin/err/out). Some
715 * programs like to do that and calling abort() on them is rude.
716 */
717static ssize_t
718robust_write(int fd, const void *buf, size_t nbyte) {
719	extern int errno;
720	ssize_t written = write(fd, buf, nbyte);
721	if (written == -1 && errno == EBADF) {
722		char *file_to_reopen = NULL;
723		int *fd_to_reset = NULL;
724
725		// descriptor was closed on us. We need to reopen it
726		if (fd == index_file_descriptor) {
727			file_to_reopen = __stack_log_file_path__;
728			fd_to_reset = &index_file_descriptor;
729		} else {
730			// We don't know about this file. Return (and abort()).
731			_malloc_printf(ASL_LEVEL_INFO, "Unknown file descriptor; expecting stack logging index file\n");
732			return -1;
733		}
734
735		// The file *should* already exist. If not, fail.
736		fd = open(file_to_reopen, O_WRONLY | O_APPEND);
737		if (fd < 3) {
738			// If we somehow got stdin/out/err, we need to relinquish them and
739			// get another fd.
740			int fds_to_close[3] = { 0 };
741			while (fd < 3) {
742				if (fd == -1) {
743					_malloc_printf(ASL_LEVEL_INFO, "unable to re-open stack logging file %s\n", file_to_reopen);
744					delete_log_files();
745					return -1;
746				}
747				fds_to_close[fd] = 1;
748				fd = dup(fd);
749			}
750
751			// We have an fd we like. Close the ones we opened.
752			if (fds_to_close[0]) close(0);
753			if (fds_to_close[1]) close(1);
754			if (fds_to_close[2]) close(2);
755		}
756
757		*fd_to_reset = fd;
758		written = write(fd, buf, nbyte);
759	}
760	return written;
761}
762
763static void
764flush_data(void)
765{
766	ssize_t written; // signed size_t
767	size_t remaining;
768	char * p;
769
770	if (index_file_descriptor == -1) {
771		if (create_log_file() == NULL) {
772			return;
773		}
774	}
775
776	// Write the events before the index so that hopefully the events will be on disk if the index refers to them.
777	p = pre_write_buffers->index_buffer;
778	remaining = (size_t)pre_write_buffers->next_free_index_buffer_offset;
779	while (remaining > 0) {
780		written = robust_write(index_file_descriptor, p, remaining);
781		if (written == -1) {
782			_malloc_printf(ASL_LEVEL_INFO, "Unable to write to stack logging file %s (%s)\n",
783						   __stack_log_file_path__, strerror(errno));
784			disable_stack_logging();
785			return;
786		}
787		p += written;
788		remaining -= written;
789	}
790
791	pre_write_buffers->start_index_offset += pre_write_buffers->next_free_index_buffer_offset;
792	pre_write_buffers->next_free_index_buffer_offset = 0;
793}
794
795__attribute__((visibility("hidden")))
796void
797__prepare_to_log_stacks(void)
798{
799	if (!pre_write_buffers) {
800		last_logged_malloc_address = 0ul;
801		logging_use_compaction = (stack_logging_dontcompact ? 0 : logging_use_compaction);
802
803		// Create a VM region to hold the pre-write index and stack buffers. The address of this VM region will be
804		// encoded into the stack log file name, so that the stack log reading code running in remote analysis
805		// processes can find it and map it into the analysis process. This allows remote analysis processes to access
806		// these buffers to get logs for even the most recent allocations. The remote process will need to pause this
807		// process to assure that the contents of these buffers don't change while being inspected.
808		//
809		// We used to use shm_open() to create a shared memory region for this, but since this code runs in arbitrary
810		// processes that may have sandbox restrictions that don't allow the creation of shared memory regions,
811		// we're using this "create a region and put its address in the stack log file name" approach.
812		size_t full_shared_mem_size = sizeof(stack_buffer_shared_memory);
813		pre_write_buffers = mmap(0, full_shared_mem_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, VM_MAKE_TAG(VM_MEMORY_ANALYSIS_TOOL), 0);
814		if (MAP_FAILED == pre_write_buffers) {
815			_malloc_printf(ASL_LEVEL_INFO, "error creating VM region for stack logging output buffers\n");
816			disable_stack_logging();
817			return;
818		}
819
820		// Store and use the buffer offsets in shared memory so that they can be accessed remotely
821		pre_write_buffers->start_index_offset = 0ull;
822		pre_write_buffers->next_free_index_buffer_offset = 0;
823
824		// create the backtrace uniquing table
825		pre_write_buffers->uniquing_table = __create_uniquing_table();
826		if (!pre_write_buffers->uniquing_table) {
827			_malloc_printf(ASL_LEVEL_INFO, "error while allocating stack uniquing table\n");
828			disable_stack_logging();
829			return;
830		}
831
832		uint64_t stack_buffer_sz = (uint64_t)round_page(sizeof(vm_address_t) * STACK_LOGGING_MAX_STACK_SIZE);
833		stack_buffer = (vm_address_t*)allocate_pages(stack_buffer_sz);
834		if (!stack_buffer) {
835			_malloc_printf(ASL_LEVEL_INFO, "error while allocating stack trace buffer\n");
836			disable_stack_logging();
837			return;
838		}
839
840		// this call ensures that the log files exist; analyzing processes will rely on this assumption.
841		if (create_log_file() == NULL) {
842			/* postponement support requires cleaning up these structures now */
843			__destroy_uniquing_table(pre_write_buffers->uniquing_table);
844			deallocate_pages(stack_buffer, stack_buffer_sz);
845			stack_buffer = NULL;
846
847			munmap(pre_write_buffers, full_shared_mem_size);
848			pre_write_buffers = NULL;
849
850			if (!stack_logging_postponed) {
851				disable_stack_logging();
852			}
853			return;
854		}
855	}
856}
857
858static void
859__prepare_to_log_stacks_stage2(void)
860{
861	static int stage2done = 0;
862
863	if (! stage2done) {
864		// malloc() can be called by the following, so these need to be done outside the stack_logging_lock but after the buffers have been set up.
865		atexit(delete_log_files);				// atexit() can call malloc()
866
867		// Reaping orphaned stack log files from dead processes is a nicety, to help
868		// reduce wasted disk space.  But we don't *always* have to do it.  Specifically,
869		// do not reap orphaned stack log files if the process name is sandboxd or taskgated,
870		// or if the MallocStackLoggingNoReaping env var is set to any value other than "no"
871		// (case-insensitive) or "0".  This provides multiple ways to fix
872		// <rdar://problem/14409213> "processes hang if sandboxd is running with
873		// MallocStackLogging enabled", which happened because there were two different
874		// places down inside reap_orphaned_log_files() which called sysctl() for KERN_PROCARGS2
875		// or KERN_PROC_PID, causing iteration of the process list in the kernel, which takes
876		// a lock, which can't happen when processes are in a transitional state.
877		bool should_reap = true;
878		const char *progname = getprogname();
879		if (progname && (strcmp(progname, "sandboxd") == 0 || strcmp(progname, "taskgated") == 0)) {
880			should_reap = false;
881		}
882		if (should_reap) {
883			char *noreap = getenv("MallocStackLoggingNoReaping");
884			if (noreap && strcasecmp(noreap, "no") != 0 && strcmp(noreap, "0") != 0) {
885				should_reap = false;
886			}
887		}
888		if (should_reap) {
889			reap_orphaned_log_files(getpid());	// this calls opendir() which calls malloc()
890		}
891
892		stage2done = 1;
893	}
894}
895
896
897void
898__disk_stack_logging_log_stack(uint32_t type_flags, uintptr_t zone_ptr, uintptr_t arg2, uintptr_t arg3, uintptr_t return_val, uint32_t num_hot_to_skip)
899{
900	if (!stack_logging_enable_logging || stack_logging_postponed) return;
901
902	uintptr_t size;
903	uintptr_t ptr_arg;
904
905	// check incoming data
906	if (type_flags & stack_logging_type_alloc && type_flags & stack_logging_type_dealloc) {
907		size = arg3;
908		ptr_arg = arg2; // the original pointer
909		if (ptr_arg == return_val) return; // realloc had no effect, skipping
910
911		if (ptr_arg == 0) { // realloc(NULL, size) same as malloc(size)
912			type_flags ^= stack_logging_type_dealloc;
913		} else {
914			// realloc(arg1, arg2) -> result is same as free(arg1); malloc(arg2) -> result
915			__disk_stack_logging_log_stack(stack_logging_type_dealloc, zone_ptr, ptr_arg, (uintptr_t)0, (uintptr_t)0, num_hot_to_skip + 1);
916			__disk_stack_logging_log_stack(stack_logging_type_alloc, zone_ptr, size, (uintptr_t)0, return_val, num_hot_to_skip + 1);
917			return;
918		}
919	}
920	if (type_flags & stack_logging_type_dealloc || type_flags & stack_logging_type_vm_deallocate) {
921		// For VM deallocations we need to know the size, since they don't always match the
922		// VM allocations.  It would be nice if arg2 was the size, for consistency with alloc and
923		// realloc events.  However we can't easily make that change because all projects
924		// (malloc.c, GC auto_zone, and gmalloc) have historically put the pointer in arg2 and 0 as
925		// the size in arg3.  We'd need to change all those projects in lockstep, which isn't worth
926		// the trouble.
927		ptr_arg = arg2;
928		size = arg3;
929		if (ptr_arg == 0) return; // free(nil)
930	}
931	if (type_flags & stack_logging_type_alloc || type_flags & stack_logging_type_vm_allocate) {
932		if (return_val == 0) return; // alloc that failed
933		size = arg2;
934	}
935
936	if (type_flags & stack_logging_type_vm_allocate || type_flags & stack_logging_type_vm_deallocate) {
937		mach_port_t targetTask = (mach_port_t)zone_ptr;
938		// For now, ignore "injections" of VM into other tasks.
939		if (targetTask != mach_task_self()) return;
940	}
941
942
943	type_flags &= stack_logging_valid_type_flags;
944
945	vm_address_t self_thread = (vm_address_t)_os_tsd_get_direct(__TSD_THREAD_SELF);
946	static vm_address_t thread_doing_logging = 0;
947
948	if (thread_doing_logging == self_thread) {
949		// Prevent a thread from deadlocking against itself if vm_allocate() or malloc()
950		// is called below here, from __prepare_to_log_stacks() or _prepare_to_log_stacks_stage2(),
951		// or if we are logging an event and need to call __expand_uniquing_table() which calls
952		// vm_allocate() to grow stack logging data structures.  Any such "administrative"
953		// vm_allocate or malloc calls would attempt to recursively log those events.
954		return;
955	}
956
957	// lock and enter
958	_malloc_lock_lock(&stack_logging_lock);
959
960	thread_doing_logging = self_thread;		// for preventing deadlock'ing on stack logging on a single thread
961
962	// now actually begin
963	__prepare_to_log_stacks();
964
965	// since there could have been a fatal (to stack logging) error such as the log files not being created, check these variables before continuing
966	if (!stack_logging_enable_logging || stack_logging_postponed) {
967		thread_doing_logging = 0;
968		_malloc_lock_unlock(&stack_logging_lock);
969		return;
970	}
971
972	if (type_flags & stack_logging_type_alloc) {
973		// Only do this second stage of setup when we first record a malloc (as opposed to a VM allocation),
974		// to ensure that the malloc zone has already been created as is necessary for this.
975		__prepare_to_log_stacks_stage2();
976	}
977
978	// compaction
979	if (last_logged_malloc_address && (type_flags & stack_logging_type_dealloc) && STACK_LOGGING_DISGUISE(ptr_arg) == last_logged_malloc_address) {
980		// *waves hand* the last allocation never occurred
981		pre_write_buffers->next_free_index_buffer_offset -= (uint32_t)sizeof(stack_logging_index_event);
982		last_logged_malloc_address = 0ul;
983
984		thread_doing_logging = 0;
985		_malloc_lock_unlock(&stack_logging_lock);
986		return;
987	}
988
989	// gather stack
990	uint32_t count;
991	thread_stack_pcs(stack_buffer, STACK_LOGGING_MAX_STACK_SIZE-1, &count); // only gather up to STACK_LOGGING_MAX_STACK_SIZE-1 since we append thread id
992	stack_buffer[count++] = self_thread + 1;		// stuffing thread # in the coldest slot.  Add 1 to match what the old stack logging did.
993	num_hot_to_skip += 2;
994	if (count <= num_hot_to_skip) {
995		// Oops!  Didn't get a valid backtrace from thread_stack_pcs().
996		thread_doing_logging = 0;
997		_malloc_lock_unlock(&stack_logging_lock);
998		return;
999	}
1000
1001	// unique stack in memory
1002	count -= num_hot_to_skip;
1003#if __LP64__
1004	mach_vm_address_t *frames = (mach_vm_address_t*)stack_buffer + num_hot_to_skip;
1005#else
1006	mach_vm_address_t frames[STACK_LOGGING_MAX_STACK_SIZE];
1007	uint32_t i;
1008	for (i = 0; i < count; i++) {
1009		frames[i] = stack_buffer[i+num_hot_to_skip];
1010	}
1011#endif
1012
1013	uint64_t uniqueStackIdentifier = (uint64_t)(-1ll);
1014	while (!__enter_frames_in_table(pre_write_buffers->uniquing_table, &uniqueStackIdentifier, frames, (int32_t)count)) {
1015		__expand_uniquing_table(pre_write_buffers->uniquing_table);
1016	}
1017
1018	stack_logging_index_event current_index;
1019	if (type_flags & stack_logging_type_alloc || type_flags & stack_logging_type_vm_allocate) {
1020		current_index.address = STACK_LOGGING_DISGUISE(return_val);
1021		current_index.argument = size;
1022		if (logging_use_compaction) {
1023			last_logged_malloc_address = current_index.address; // disguised
1024		}
1025	} else {
1026		current_index.address = STACK_LOGGING_DISGUISE(ptr_arg);
1027		current_index.argument = size;
1028		last_logged_malloc_address = 0ul;
1029	}
1030	current_index.offset_and_flags = STACK_LOGGING_OFFSET_AND_FLAGS(uniqueStackIdentifier, type_flags);
1031
1032	//	the following line is a good debugging tool for logging each allocation event as it happens.
1033	//	malloc_printf("{0x%lx, %lld}\n", STACK_LOGGING_DISGUISE(current_index.address), uniqueStackIdentifier);
1034
1035	// flush the data buffer to disk if necessary
1036	if (pre_write_buffers->next_free_index_buffer_offset + sizeof(stack_logging_index_event) >= STACK_LOGGING_BLOCK_WRITING_SIZE) {
1037		flush_data();
1038	}
1039
1040	// store bytes in buffers
1041	memcpy(pre_write_buffers->index_buffer+pre_write_buffers->next_free_index_buffer_offset, &current_index, sizeof(stack_logging_index_event));
1042	pre_write_buffers->next_free_index_buffer_offset += (uint32_t)sizeof(stack_logging_index_event);
1043
1044	thread_doing_logging = 0;
1045	_malloc_lock_unlock(&stack_logging_lock);
1046}
1047
1048void
1049__stack_logging_fork_prepare(void) {
1050	_malloc_lock_lock(&stack_logging_lock);
1051}
1052
1053void
1054__stack_logging_fork_parent(void) {
1055	_malloc_lock_unlock(&stack_logging_lock);
1056}
1057
1058void
1059__stack_logging_fork_child(void) {
1060	malloc_logger = NULL;
1061	stack_logging_enable_logging = 0;
1062	_malloc_lock_unlock(&stack_logging_lock);
1063}
1064
1065void
1066__stack_logging_early_finished(void) {
1067	stack_logging_finished_init = 1;
1068	stack_logging_postponed = 0;
1069}
1070
1071__attribute__((visibility("hidden")))
1072boolean_t
1073__stack_logging_locked(void)
1074{
1075	bool acquired_lock = _malloc_lock_trylock(&stack_logging_lock);
1076	if (acquired_lock) _malloc_lock_unlock(&stack_logging_lock);
1077	return (acquired_lock ? false : true);
1078}
1079
1080#pragma mark -
1081#pragma mark Remote Stack Log Access
1082
1083#pragma mark - Design notes:
1084
1085/*
1086
1087 this first one will look through the index, find the "stack_identifier" (i.e. the offset in the log file), and call the third function listed here.
1088 extern kern_return_t __mach_stack_logging_get_frames(task_t task, mach_vm_address_t address, mach_vm_address_t *stack_frames_buffer, uint32_t max_stack_frames, uint32_t *num_frames);
1089 //  Gets the last allocation record about address
1090
1091 if !address, will load index and iterate through (expensive)
1092 else will load just index, search for stack, and then use third function here to retrieve. (also expensive)
1093 extern kern_return_t __mach_stack_logging_enumerate_records(task_t task, mach_vm_address_t address, void enumerator(mach_stack_logging_record_t, void *), void *context);
1094 // Applies enumerator to all records involving address sending context as enumerator's second parameter; if !address, applies enumerator to all records
1095
1096 this function will load the stack file, look for the stack, and follow up to STACK_LOGGING_FORCE_FULL_BACKTRACE_EVERY references to reconstruct.
1097 extern kern_return_t __mach_stack_logging_frames_for_uniqued_stack(task_t task, uint64_t stack_identifier, mach_vm_address_t *stack_frames_buffer, uint32_t max_stack_frames, uint32_t *count);
1098 // Given a uniqued_stack fills stack_frames_buffer
1099
1100 */
1101
1102#pragma mark -
1103#pragma mark Backtrace Uniquing Table Reading and Lookup
1104
1105// This is client-side code to get a stack log from a uniquing_table.
1106static void
1107free_uniquing_table_chunks(backtrace_uniquing_table *uniquing_table) {
1108	table_chunk_header_t *table_chunk_header = uniquing_table->u.first_table_chunk_hdr;
1109	while (table_chunk_header) {
1110		mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)(uintptr_t)(table_chunk_header->table_chunk), table_chunk_header->table_chunk_size);
1111		table_chunk_header_t *next = table_chunk_header->next_table_chunk_header;
1112		free(table_chunk_header);
1113		table_chunk_header = next;
1114	}
1115}
1116
1117static kern_return_t
1118read_uniquing_table_from_task(task_t remote_task, backtrace_uniquing_table *uniquing_table) {
1119	mach_vm_address_t next_address_to_read = uniquing_table->table_address;
1120	uint64_t remaining_size_to_read = uniquing_table->tableSize;
1121	const mach_vm_size_t two_gigabytes = 2ull * 1024 * 1024 * 1024;		// attempting to read 4 GB in one call fails, so try a max of 2 GB
1122	table_chunk_header_t **table_chunk_hdr_ptr = &(uniquing_table->u.first_table_chunk_hdr);
1123	*table_chunk_hdr_ptr = NULL;
1124
1125	while (remaining_size_to_read > 0ull) {
1126		vm_address_t local_table_chunk_address = 0ul;
1127		mach_msg_type_number_t local_table_chunk_size = 0;
1128
1129		mach_vm_size_t next_size_to_read = (remaining_size_to_read > two_gigabytes) ? two_gigabytes : remaining_size_to_read;
1130		while (1) {
1131			kern_return_t err = mach_vm_read(remote_task, next_address_to_read, next_size_to_read, &local_table_chunk_address, &local_table_chunk_size);
1132			if (err == KERN_SUCCESS) {
1133				*table_chunk_hdr_ptr = malloc(sizeof(table_chunk_header_t));
1134				table_chunk_header_t *table_chunk_hdr = *table_chunk_hdr_ptr;
1135				table_chunk_hdr->num_nodes_in_chunk = local_table_chunk_size / (sizeof(mach_vm_address_t) * 2);;
1136				table_chunk_hdr->table_chunk = local_table_chunk_address;
1137				table_chunk_hdr->table_chunk_size = local_table_chunk_size;
1138				table_chunk_hdr->next_table_chunk_header = NULL;	// initialize it, in case it is the last chunk
1139				table_chunk_hdr_ptr = &(table_chunk_hdr->next_table_chunk_header);		// set up to assign next chunk to this
1140
1141				next_address_to_read += local_table_chunk_size;
1142				remaining_size_to_read -= local_table_chunk_size;
1143				//fprintf(stderr, "requested %#qx, got %#x of %#qx at %p from backtrace uniquing table of target process\n", next_size_to_read, local_table_chunk_size, uniquing_table->tableSize, table_chunk_hdr);
1144				break;
1145			} else {
1146				//fprintf(stderr, "requested %#qx, failed\n", next_size_to_read);
1147				next_size_to_read /= 2;
1148				if (next_size_to_read <= 1024 * 1024) {
1149					// We couldn't even map one megabyte?  Let's call that an error...
1150					free_uniquing_table_chunks(uniquing_table);
1151					return err;
1152				}
1153			}
1154		}
1155	}
1156	return KERN_SUCCESS;
1157}
1158
1159static mach_vm_address_t *
1160get_node_from_uniquing_table(backtrace_uniquing_table *uniquing_table, uint64_t index_pos)
1161{
1162	table_chunk_header_t *table_chunk_hdr = uniquing_table->u.first_table_chunk_hdr;
1163	uint64_t start_node_of_chunk = 0;
1164	while (table_chunk_hdr && index_pos > start_node_of_chunk + table_chunk_hdr->num_nodes_in_chunk) {
1165		table_chunk_hdr = table_chunk_hdr->next_table_chunk_header;
1166		if (table_chunk_hdr) {
1167			start_node_of_chunk += table_chunk_hdr->num_nodes_in_chunk;
1168		}
1169	}
1170	assert(table_chunk_hdr);
1171	uint64_t index_in_chunk = index_pos - start_node_of_chunk;
1172	mach_vm_address_t *node = table_chunk_hdr->table_chunk + (index_in_chunk * 2);
1173	return node;
1174}
1175
1176static void
1177unwind_stack_from_table_index(backtrace_uniquing_table *uniquing_table, uint64_t index_pos, mach_vm_address_t *out_frames_buffer, uint32_t *out_frames_count, uint32_t max_frames)
1178{
1179	mach_vm_address_t *node = get_node_from_uniquing_table(uniquing_table, index_pos);
1180	uint32_t foundFrames = 0;
1181	if (index_pos < uniquing_table->numNodes) {
1182		while (foundFrames < max_frames) {
1183			out_frames_buffer[foundFrames++] = node[0];
1184			if (node[1] == (mach_vm_address_t)(-1ll)) break;
1185			node = get_node_from_uniquing_table(uniquing_table, node[1]);
1186		}
1187	}
1188
1189	*out_frames_count = foundFrames;
1190}
1191
1192#pragma mark - caching
1193
1194__attribute__((always_inline)) static inline size_t
1195hash_index(uint64_t address, size_t max_pos) {
1196	return (size_t)((address >> 2) % (max_pos-1)); // simplicity rules.
1197}
1198
1199__attribute__((always_inline)) static inline size_t
1200hash_multiplier(size_t capacity, uint32_t allowed_collisions) {
1201	return (capacity/(allowed_collisions*2+1));
1202}
1203
1204__attribute__((always_inline)) static inline size_t
1205next_hash(size_t hash, size_t multiplier, size_t capacity, uint32_t collisions) {
1206	hash += multiplier * collisions;
1207	if (hash >= capacity) hash -= capacity;
1208	return hash;
1209}
1210
1211static void
1212transfer_node(remote_index_cache *cache, remote_index_node *old_node)
1213{
1214	uint32_t collisions = 0;
1215	size_t pos = hash_index(old_node->address, cache->cache_node_capacity);
1216	size_t multiplier = hash_multiplier(cache->cache_node_capacity, cache->collision_allowance);
1217	do {
1218		if (cache->table_memory[pos].address == old_node->address) { // hit like this shouldn't happen.
1219			fprintf(stderr, "impossible collision! two address==address lists! (transfer_node)\n");
1220			break;
1221		} else if (cache->table_memory[pos].address == 0) { // empty
1222			cache->table_memory[pos] = *old_node;
1223			break;
1224		} else {
1225			collisions++;
1226			pos = next_hash(pos, multiplier, cache->cache_node_capacity, collisions);
1227		}
1228	} while (collisions <= cache->collision_allowance);
1229
1230	if (collisions > cache->collision_allowance) {
1231		fprintf(stderr, "reporting bad hash function! disk stack logging reader %lu bit. (transfer_node)\n", sizeof(void*)*8);
1232	}
1233}
1234
1235static void
1236expand_cache(remote_index_cache *cache)
1237{
1238	// keep old stats
1239	size_t old_node_capacity = cache->cache_node_capacity;
1240	remote_index_node *old_table = cache->table_memory;
1241
1242	// double size
1243	cache->cache_size <<= 2;
1244	cache->cache_node_capacity <<= 2;
1245	cache->collision_allowance += 3;
1246	cache->table_memory = (void*)calloc(cache->cache_node_capacity, sizeof(remote_index_node));
1247
1248	// repopulate (expensive!)
1249	size_t i;
1250	for (i = 0; i < old_node_capacity; i++) {
1251		if (old_table[i].address) {
1252			transfer_node(cache, &old_table[i]);
1253		}
1254	}
1255	free(old_table);
1256	//	printf("cache expanded to %0.2f mb (eff: %3.0f%%, capacity: %lu, nodes: %llu, llnodes: %llu)\n", ((float)(cache->cache_size))/(1 << 20), ((float)(cache->cache_node_count)*100.0)/((float)(cache->cache_node_capacity)), cache->cache_node_capacity, cache->cache_node_count, cache->cache_llnode_count);
1257}
1258
1259static void
1260insert_node(remote_index_cache *cache, uint64_t address, uint64_t index_file_offset)
1261{
1262	uint32_t collisions = 0;
1263	size_t pos = hash_index(address, cache->cache_node_capacity);
1264	size_t multiplier = hash_multiplier(cache->cache_node_capacity, cache->collision_allowance);
1265
1266	while (1) {
1267		if (cache->table_memory[pos].address == 0ull || cache->table_memory[pos].address == address) { // hit or empty
1268			cache->table_memory[pos].address = address;
1269			cache->table_memory[pos].index_file_offset = index_file_offset;
1270			// Inserted it!  Break out of the loop.
1271			break;
1272		}
1273
1274		collisions++;
1275		pos = next_hash(pos, multiplier, cache->cache_node_capacity, collisions);
1276
1277		if (collisions > cache->collision_allowance) {
1278			expand_cache(cache);
1279			pos = hash_index(address, cache->cache_node_capacity);
1280			multiplier = hash_multiplier(cache->cache_node_capacity, cache->collision_allowance);
1281			collisions = 0;
1282		}
1283	}
1284}
1285
1286// Kudos to Daniel Delwood for this function.  This is called in an analysis tool process
1287// to share a VM region from a target process, without the target process needing to explicitly
1288// share the region itself via shm_open().  The VM_FLAGS_RETURN_DATA_ADDR flag is necessary
1289// for iOS in case the target process uses a different VM page size than the analysis tool process.
1290static mach_vm_address_t
1291map_shared_memory_from_task(task_t sourceTask, mach_vm_address_t sourceAddress, mach_vm_size_t sourceSize) {
1292#if TARGET_OS_EMBEDDED
1293	int mapRequestFlags = VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR;
1294	mach_vm_address_t mapRequestAddress = sourceAddress;
1295	mach_vm_size_t mapRequestSize = sourceSize;
1296#else
1297	// Sadly, VM_FLAGS_RETURN_DATA_ADDR isn't available to us; align everything manually.
1298	int mapRequestFlags = VM_FLAGS_ANYWHERE;
1299	mach_vm_address_t mapRequestAddress = trunc_page(sourceAddress);
1300	mach_vm_size_t mapRequestSize = round_page(sourceAddress + sourceSize) - mapRequestAddress;
1301#endif
1302	mach_vm_address_t mappedAddress = 0;
1303	vm_prot_t outCurrentProt = VM_PROT_NONE;
1304	vm_prot_t outMaxProt = VM_PROT_NONE;
1305	kern_return_t err = mach_vm_remap(mach_task_self(), &mappedAddress, mapRequestSize, 0, mapRequestFlags, sourceTask, mapRequestAddress, false, &outCurrentProt, &outMaxProt, VM_INHERIT_NONE);
1306	if (err != KERN_SUCCESS) {
1307		return 0;
1308	}
1309	return mappedAddress + (sourceAddress - mapRequestAddress);
1310}
1311
1312static kern_return_t
1313update_cache_for_file_streams(remote_task_file_streams *descriptors)
1314{
1315	remote_index_cache *cache = descriptors->cache;
1316
1317	// create from scratch if necessary.
1318	if (!cache) {
1319		descriptors->cache = cache = (remote_index_cache*)calloc((size_t)1, sizeof(remote_index_cache));
1320		cache->cache_node_capacity = 1 << 14;
1321		cache->collision_allowance = 17;
1322		cache->last_index_file_offset = 0;
1323		cache->cache_size = cache->cache_node_capacity*sizeof(remote_index_node);
1324		cache->table_memory = (void*)calloc(cache->cache_node_capacity, sizeof(remote_index_node));
1325
1326		cache->shmem = map_shared_memory_from_task(descriptors->remote_task, descriptors->remote_stack_buffer_shared_memory_address, sizeof(stack_buffer_shared_memory));
1327		if (! cache->shmem) {
1328			// failed to connect to the shared memory region; warn and continue.
1329			_malloc_printf(ASL_LEVEL_INFO, "warning: unable to map shared memory from %llx in target process %d; no stack backtraces will be available.\n", descriptors->remote_stack_buffer_shared_memory_address, descriptors->remote_pid);
1330		}
1331	}
1332
1333	// suspend and see how much updating there is to do. there are three scenarios, listed below
1334	bool update_snapshot = false;
1335	if (descriptors->remote_task != mach_task_self()) {
1336		task_suspend(descriptors->remote_task);
1337	}
1338
1339	struct stat file_statistics;
1340	fstat(fileno(descriptors->index_file_stream), &file_statistics);
1341	size_t read_size = (descriptors->task_is_64_bit ? sizeof(stack_logging_index_event64) : sizeof(stack_logging_index_event32));
1342	uint64_t read_this_update = 0;
1343
1344	// the delta indecies is a complex number; there are three cases:
1345	// 1. there is no shared memory (or we can't connect); diff the last_index_file_offset from the filesize.
1346	// 2. the only updates have been in shared memory; disk file didn't change at all. delta_indecies should be zero, scan snapshot only.
1347	// 3. the updates have flushed to disk, meaning that most likely there is new data on disk that wasn't read from shared memory.
1348	//    correct delta_indecies for the pre-scanned amount and read the new data from disk and shmem.
1349	uint64_t delta_indecies = (file_statistics.st_size - cache->last_index_file_offset) / read_size;
1350	uint32_t last_snapshot_scan_index = 0;
1351	if (delta_indecies && cache->shmem) {
1352		// case 3: add cache scanned to known from disk and recalc
1353		cache->last_index_file_offset += cache->snapshot.next_free_index_buffer_offset;
1354		delta_indecies = (file_statistics.st_size - cache->last_index_file_offset) / read_size;
1355		update_snapshot = true;
1356	} else if (cache->shmem) {
1357		// case 2: set the last snapshot scan count so we don't rescan something we've seen.
1358		last_snapshot_scan_index = cache->snapshot.next_free_index_buffer_offset / (uint32_t)read_size;
1359	}
1360
1361	// no update necessary for the file; check if need a snapshot.
1362	if (delta_indecies == 0) {
1363		if (cache->shmem && !update_snapshot) {
1364			update_snapshot = (cache->shmem->next_free_index_buffer_offset != cache->snapshot.next_free_index_buffer_offset);
1365		}
1366	}
1367
1368	// if a snapshot is necessary, memcpy from remote frozen process' memory
1369	// note: there were two ways to do this - spin lock or suspend. suspend allows us to
1370	// analyze processes even if they were artificially suspended. with a lock, there'd be
1371	// worry that the target was suspended with the lock taken.
1372	kern_return_t err = KERN_SUCCESS;
1373	if (update_snapshot) {
1374		memcpy(&cache->snapshot, cache->shmem, sizeof(stack_buffer_shared_memory));
1375		// also need to update our version of the remote uniquing table
1376		vm_address_t local_uniquing_address = 0ul;
1377		mach_msg_type_number_t local_uniquing_size = 0;
1378		mach_vm_size_t desired_size = round_page(sizeof(backtrace_uniquing_table));
1379		if ((err = mach_vm_read(descriptors->remote_task, cache->shmem->uniquing_table, desired_size, &local_uniquing_address, &local_uniquing_size)) != KERN_SUCCESS
1380			|| local_uniquing_size != desired_size) {
1381			fprintf(stderr, "error while attempting to mach_vm_read remote stack uniquing table (%d): %s\n", err, mach_error_string(err));
1382		} else {
1383			// the mach_vm_read was successful, so acquire the uniquing table
1384
1385			// need to re-read the table, so deallocate the current memory
1386			free_uniquing_table_chunks(&cache->uniquing_table_snapshot);
1387
1388			// The following line copies the uniquing table structure data, but the actual uniquing table memory is invalid
1389			// since it's a pointer from the remote process.
1390			cache->uniquing_table_snapshot = *((backtrace_uniquing_table*)local_uniquing_address);
1391
1392			// Read the uniquing table memory from the target process.
1393			err = read_uniquing_table_from_task(descriptors->remote_task, &(cache->uniquing_table_snapshot));
1394			if (err) {
1395				fprintf(stderr, "error while attempting to mach_vm_read remote stack uniquing table contents (%d): %s\n", err, mach_error_string(err));
1396			}
1397			// Check the error status below, after further deallocating and resuming the target task.
1398
1399			mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)local_uniquing_address, (mach_vm_size_t)local_uniquing_size);
1400		}
1401	}
1402
1403	// resume
1404	if (descriptors->remote_task != mach_task_self()) {
1405		task_resume(descriptors->remote_task);
1406	}
1407
1408	if (err != KERN_SUCCESS) {
1409		// To Do:  further clean up allocated resources, and also try to prevent printing numerous identical "out of memory" errors (maybe we should abort?).
1410		return err;
1411	}
1412
1413	if (!update_snapshot && delta_indecies == 0) return KERN_SUCCESS; // absolutely no updating needed.
1414
1415	FILE *the_index = (descriptors->index_file_stream);
1416
1417	// prepare for the read; target process could be 32 or 64 bit.
1418
1419	stack_logging_index_event32 *target_32_index = NULL;
1420	stack_logging_index_event64 *target_64_index = NULL;
1421
1422	// perform the update from the file
1423	uint32_t i;
1424	if (delta_indecies) {
1425		char bufferSpace[4096]; // 4 kb
1426		target_32_index = (stack_logging_index_event32*)bufferSpace;
1427		target_64_index = (stack_logging_index_event64*)bufferSpace;
1428		size_t number_slots = (size_t)(4096/read_size);
1429
1430		size_t read_count = 0;
1431		if (fseeko(the_index, (off_t)(cache->last_index_file_offset), SEEK_SET)) {
1432			fprintf(stderr, "error while attempting to cache information from remote stack index file. (update_cache_for_file_streams)\n");
1433		}
1434		off_t current_index_position = cache->last_index_file_offset;
1435		do {
1436			number_slots = (size_t)MIN(delta_indecies - read_this_update, number_slots);
1437			read_count = fread(bufferSpace, read_size, number_slots, the_index);
1438			if (descriptors->task_is_64_bit) {
1439				for (i = 0; i < read_count; i++) {
1440					insert_node(cache, STACK_LOGGING_DISGUISE(target_64_index[i].address), (uint64_t)current_index_position);
1441					read_this_update++;
1442					current_index_position += read_size;
1443				}
1444			} else {
1445				for (i = 0; i < read_count; i++) {
1446					insert_node(cache, (mach_vm_address_t)STACK_LOGGING_DISGUISE(target_32_index[i].address), (uint64_t)current_index_position);
1447					read_this_update++;
1448					current_index_position += read_size;
1449				}
1450			}
1451		} while (read_count);
1452
1453		if (read_this_update < delta_indecies) {
1454			fprintf(stderr, "insufficient data in remote stack index file; expected more records.\n");
1455		}
1456		cache->last_index_file_offset += read_this_update * read_size;
1457	}
1458
1459	if (update_snapshot) {
1460		target_32_index = (stack_logging_index_event32*)(cache->snapshot.index_buffer);
1461		target_64_index = (stack_logging_index_event64*)(cache->snapshot.index_buffer);
1462
1463		uint32_t free_snapshot_scan_index = cache->snapshot.next_free_index_buffer_offset / (uint32_t)read_size;
1464		off_t current_index_position = cache->snapshot.start_index_offset;
1465		if (descriptors->task_is_64_bit) {
1466			for (i = last_snapshot_scan_index; i < free_snapshot_scan_index; i++) {
1467				insert_node(cache, STACK_LOGGING_DISGUISE(target_64_index[i].address), (uint64_t)(current_index_position + (i * read_size)));
1468			}
1469		} else {
1470			for (i = last_snapshot_scan_index; i < free_snapshot_scan_index; i++) {
1471				insert_node(cache, (mach_vm_address_t)STACK_LOGGING_DISGUISE(target_32_index[i].address), (uint64_t)(current_index_position + (i * read_size)));
1472			}
1473		}
1474	}
1475
1476	return KERN_SUCCESS;
1477}
1478
1479static void
1480destroy_cache_for_file_streams(remote_task_file_streams *descriptors)
1481{
1482	if (descriptors->cache->shmem) {
1483		munmap(descriptors->cache->shmem, sizeof(stack_buffer_shared_memory));
1484	}
1485	free(descriptors->cache->table_memory);
1486	free(descriptors->cache);
1487	descriptors->cache = NULL;
1488}
1489
1490#pragma mark - internal
1491
1492// In the stack log analysis process, find the stack logging file for target process <pid>
1493// by scanning the given directory for entries with names of the form "stack-logs.<pid>.*.index"
1494// If we find such an entry then open that stack logging file.
1495static FILE *
1496open_log_file_from_directory(pid_t pid, char* directory, remote_task_file_streams *streams)
1497{
1498	DIR *dp;
1499	struct dirent *entry;
1500	char prefix_and_pid[PATH_MAX];
1501	char pathname[PATH_MAX];
1502	FILE *file = NULL;
1503
1504	// Check for access permissions in case we're sandbox'ed.
1505	if (access(directory, R_OK | X_OK) == 0 && (dp = opendir(directory)) != NULL) {
1506		// It's OK to use snprintf in this routine since it should only be called by the clients
1507		// of stack logging, and thus calls to malloc are OK.
1508		snprintf(prefix_and_pid	, (size_t)PATH_MAX, "%s%d.", stack_log_file_base_name, pid);	// make sure to use "%s%d." rather than just "%s%d" to match the whole pid
1509		size_t prefix_and_pid_length = strlen(prefix_and_pid);
1510
1511		while ( (entry = readdir(dp)) != NULL ) {
1512			if ( strncmp( entry->d_name, prefix_and_pid, prefix_and_pid_length) == 0 ) {
1513				snprintf(pathname, (size_t)PATH_MAX, "%s/%s", directory, entry->d_name);
1514				file = fopen(pathname, "r");
1515
1516				// The hex address of the remote_index_cache in the target process
1517				// is given in the stack log file name, following the pid and a period.
1518				streams->remote_stack_buffer_shared_memory_address = strtoll(entry->d_name + prefix_and_pid_length, NULL, 16);
1519				break;
1520			}
1521		}
1522		closedir(dp);
1523	}
1524	streams->index_file_stream = file;
1525	return file;
1526}
1527
1528// Read the launch data of the target process from the kernel to find the
1529// value of the environment variable named env_var_name.  Since this function
1530// uses alloca() to temporarily allocate space for data copied from the kernel,
1531// and we don't want to malloc space so that this can be called from malloc stack
1532// logging code in the target process as well, we copy the result into the
1533// env_var_value_buf of length max_path_len supplied by the caller.
1534static bool
1535getenv_from_process(pid_t pid, char *env_var_name, char *env_var_value_buf, size_t buf_length	)
1536{
1537	env_var_value_buf[0] = '\0';
1538
1539	// Just call getenv() if pid is the current process, partly to avoid the sysctl()
1540	// call which can cause system deadlock (<rdar://problem/14409213> "processes hang
1541	// if sandboxd is running with MallocStackLogging enabled").  But it probably
1542	// doesn't completely fix that since there is another sysctl() call in is_process_running()
1543	// when checking to see if the process corresponding to an existing stack log file
1544	// is still running.
1545	if (pid == getpid()) {
1546		char *env_var_value = getenv(env_var_name);
1547		if (! env_var_value) {
1548			return false;
1549		} else {
1550			strlcpy(env_var_value_buf, env_var_value, buf_length);
1551			return true;
1552		}
1553	}
1554
1555	int mib[3];
1556	size_t argbufSize = 0;	// Must initialize this to 0 so this works when compiled for x86_64.
1557
1558	// First get the maximum arguments size, to determine the necessary buffer size.
1559	mib[0] = CTL_KERN;
1560	mib[1] = KERN_ARGMAX;
1561
1562	size_t size = sizeof(argbufSize);
1563	int ret = sysctl(mib, 2, &argbufSize, &size, NULL, 0);
1564	if (ret != 0) return false;
1565
1566	mib[0] = CTL_KERN;
1567	mib[1] = KERN_PROCARGS2;	// The older KERN_PROCARGS is deprecated.
1568	mib[2] = pid;
1569
1570	char *argbuf = (char *) alloca(argbufSize);
1571	ret = sysctl(mib, 3, argbuf, &argbufSize, (void*)NULL, 0);
1572	if (ret != 0) return false;
1573	argbuf[argbufSize - 1] = '\0';	// make sure the buffer is null-terminated
1574	char *p = argbuf;
1575	char *endp = &argbuf[argbufSize];
1576
1577	// Skip over argc, which is always 4 bytes long (int-sized), even in 64-bit architectures.
1578	int argumentCount = *((int*)argbuf);
1579	p += sizeof(argumentCount);
1580
1581	// Skip over arguments, using the argumentCount read from the start of argbuf.
1582	argumentCount++;	// increment argumentCount to also skip saved exec path, which comes first
1583	for (int argumentNum = 0; argumentNum < argumentCount && p < endp; argumentNum++) {
1584		while (p < endp && *p != '\0') p++;
1585		while (p < endp && *p == '\0') p++;		// saved exec path sometimes has multiple nul's
1586	}
1587
1588	size_t env_var_name_length = strlen(env_var_name);
1589
1590	// Examine environment variables.
1591	while ((p + env_var_name_length + 1) < endp && *p != '\0') {
1592		if (strncmp(p, env_var_name, env_var_name_length) == 0 && p[env_var_name_length] == '=') {
1593			p += env_var_name_length + 1;
1594			strlcpy(env_var_value_buf, p, buf_length);
1595			//_malloc_printf(ASL_LEVEL_INFO, "found env var %s='%s'\n", env_var_name, env_var_value_buf);
1596			return true;
1597		}
1598		while (p < endp && *p != '\0') p++;
1599		p++;
1600	}
1601	return false;
1602}
1603
1604static FILE *
1605open_log_file(pid_t pid, remote_task_file_streams *streams)
1606{
1607	static bool already_reaped = false;
1608	if (! already_reaped) {
1609		reap_orphaned_log_files(pid);	// reap any left-over log files (for non-existant processes, but not for this analysis process)
1610		already_reaped = true;
1611	}
1612
1613	// Since we're searching for the log file here, not creating it, we can search in any order we want.
1614	// So look at MallocStackLoggingDirectory last since that is almost never set.
1615	FILE *file = open_log_file_from_directory(pid, _PATH_TMP, streams);
1616	if (! file) {
1617		char *env_var_names[] = { "TMPDIR", "MallocStackLoggingDirectory" };
1618		for (unsigned i = 0; i < sizeof(env_var_names) / sizeof(char *); i++) {
1619			char directory[PATH_MAX];
1620			bool success = getenv_from_process(pid, env_var_names[i], directory, sizeof(directory));
1621			if (success) {
1622				file = open_log_file_from_directory(pid, directory, streams);
1623				if (file) break;
1624			}
1625		}
1626	}
1627	return file;
1628}
1629
1630static remote_task_file_streams*
1631retain_file_streams_for_task(task_t task)
1632{
1633	if (task == MACH_PORT_NULL) return NULL;
1634
1635	_malloc_lock_lock(&remote_fd_list_lock);
1636
1637	// see if they're already in use
1638	uint32_t i = 0;
1639	for (i = 0; i < remote_task_fd_count; i++) {
1640		if (remote_fds[i].remote_task == task) {
1641			remote_fds[i].in_use_count++;
1642			_malloc_lock_unlock(&remote_fd_list_lock);
1643			return &remote_fds[i];
1644		}
1645	}
1646
1647	// open them
1648	uint32_t failures = 0;
1649	if (remote_task_fd_count == STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED) {
1650		while (remote_fds[next_remote_task_fd].in_use_count > 0) {
1651			next_remote_task_fd++;
1652			if (next_remote_task_fd == STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED) next_remote_task_fd = 0;
1653			failures++;
1654			if (failures >= STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED) {
1655				_malloc_lock_unlock(&remote_fd_list_lock);
1656				return NULL;
1657			}
1658		}
1659		fclose(remote_fds[next_remote_task_fd].index_file_stream);
1660		destroy_cache_for_file_streams(&remote_fds[next_remote_task_fd]);
1661	}
1662
1663	pid_t pid;
1664	kern_return_t err = pid_for_task(task, &pid);
1665	if (err != KERN_SUCCESS) {
1666		_malloc_lock_unlock(&remote_fd_list_lock);
1667		return NULL;
1668	}
1669
1670	remote_task_file_streams *this_task_streams = &remote_fds[next_remote_task_fd];
1671
1672	open_log_file(pid, this_task_streams);
1673
1674	// check if opens failed
1675	if (this_task_streams->index_file_stream == NULL) {
1676		_malloc_lock_unlock(&remote_fd_list_lock);
1677		return NULL;
1678	}
1679
1680	// check if target pid is running 64-bit
1681	int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, pid };
1682	struct kinfo_proc processInfo;
1683	size_t bufsize = sizeof(processInfo);
1684	if (sysctl(mib, (unsigned)(sizeof(mib)/sizeof(int)), &processInfo, &bufsize, NULL, (size_t)0) == 0 && bufsize > 0) {
1685		this_task_streams->task_is_64_bit = processInfo.kp_proc.p_flag & P_LP64;
1686	} else {
1687		this_task_streams->task_is_64_bit = 0;
1688	}
1689
1690	// otherwise set vars and go
1691	this_task_streams->in_use_count = 1;
1692	this_task_streams->remote_task = task;
1693	this_task_streams->remote_pid = pid;
1694	next_remote_task_fd++;
1695	if (next_remote_task_fd == STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED) next_remote_task_fd = 0;
1696	remote_task_fd_count = MIN(remote_task_fd_count + 1, STACK_LOGGING_MAX_SIMUL_REMOTE_TASKS_INSPECTED);
1697
1698	_malloc_lock_unlock(&remote_fd_list_lock);
1699	return this_task_streams;
1700}
1701
1702static void
1703release_file_streams_for_task(task_t task)
1704{
1705	_malloc_lock_lock(&remote_fd_list_lock);
1706
1707	// decrement in-use count
1708	uint32_t i = 0;
1709	for (i = 0; i < remote_task_fd_count; i++) {
1710		if (remote_fds[i].remote_task == task) {
1711			remote_fds[i].in_use_count--;
1712			break;
1713		}
1714	}
1715
1716	_malloc_lock_unlock(&remote_fd_list_lock);
1717}
1718
1719#pragma mark - extern
1720
1721// This function is no longer used.  It was a hack that required an analysis tool process
1722// to read the target tasks's __stack_log_file_path__ variable then pass the value of
1723// that to this function.  This is now handled automatically all within this file, by
1724// having the stack log reading code read the environment variables of the target process.
1725// This function should be removed once no clients are calling it.
1726kern_return_t
1727__mach_stack_logging_set_file_path(task_t task, char* file_path)
1728{
1729	return KERN_SUCCESS;
1730}
1731
1732kern_return_t
1733__mach_stack_logging_get_frames(task_t task, mach_vm_address_t address, mach_vm_address_t *stack_frames_buffer, uint32_t max_stack_frames, uint32_t *count)
1734{
1735	remote_task_file_streams *remote_fd = retain_file_streams_for_task(task);
1736	if (remote_fd == NULL) {
1737		return KERN_FAILURE;
1738	}
1739
1740	kern_return_t err = update_cache_for_file_streams(remote_fd);
1741	if (err != KERN_SUCCESS) {
1742		release_file_streams_for_task(task);
1743		return err;
1744	}
1745
1746	uint32_t collisions = 0;
1747	size_t hash = hash_index(address, remote_fd->cache->cache_node_capacity);
1748	size_t multiplier = hash_multiplier(remote_fd->cache->cache_node_capacity, remote_fd->cache->collision_allowance);
1749	uint64_t located_file_position = 0;
1750
1751	bool found = false;
1752	do {
1753		if (remote_fd->cache->table_memory[hash].address == address) { // hit!
1754			located_file_position = remote_fd->cache->table_memory[hash].index_file_offset;
1755			found = true;
1756			break;
1757		} else if (remote_fd->cache->table_memory[hash].address == 0ull) { // failure!
1758			break;
1759		}
1760
1761		collisions++;
1762		hash = next_hash(hash, multiplier, remote_fd->cache->cache_node_capacity, collisions);
1763
1764	} while (collisions <= remote_fd->cache->collision_allowance);
1765
1766	if (found) {
1767		// prepare for the read; target process could be 32 or 64 bit.
1768		stack_logging_index_event32 *target_32_index = NULL;
1769		stack_logging_index_event64 *target_64_index = NULL;
1770
1771		if (located_file_position >= remote_fd->cache->last_index_file_offset) {
1772			// must be in shared memory
1773			if (remote_fd->cache->shmem) {
1774				if (remote_fd->task_is_64_bit) {
1775					target_64_index = (stack_logging_index_event64*)(remote_fd->cache->snapshot.index_buffer + (located_file_position - remote_fd->cache->snapshot.start_index_offset));
1776					located_file_position = STACK_LOGGING_OFFSET(target_64_index->offset_and_flags);
1777				} else {
1778					target_32_index = (stack_logging_index_event32*)(remote_fd->cache->snapshot.index_buffer + (located_file_position - remote_fd->cache->snapshot.start_index_offset));
1779					located_file_position = STACK_LOGGING_OFFSET(target_32_index->offset_and_flags);
1780				}
1781			} else {
1782				found = false;
1783			}
1784
1785		} else {
1786			// it's written to disk
1787			char bufferSpace[128];
1788
1789			size_t read_size = (remote_fd->task_is_64_bit ? sizeof(stack_logging_index_event64) : sizeof(stack_logging_index_event32));
1790			fseeko(remote_fd->index_file_stream, (off_t)located_file_position, SEEK_SET);
1791			size_t read_count = fread(bufferSpace, read_size, (size_t)1, remote_fd->index_file_stream);
1792			if (read_count) {
1793				if (remote_fd->task_is_64_bit) {
1794					target_64_index = (stack_logging_index_event64*)bufferSpace;
1795					located_file_position = STACK_LOGGING_OFFSET(target_64_index->offset_and_flags);
1796				} else {
1797					target_32_index = (stack_logging_index_event32*)bufferSpace;
1798					located_file_position = STACK_LOGGING_OFFSET(target_32_index->offset_and_flags);
1799				}
1800			} else {
1801				found = false;
1802			}
1803		}
1804	}
1805
1806	release_file_streams_for_task(task);
1807
1808	if (!found) {
1809		return KERN_FAILURE;
1810	}
1811
1812	return __mach_stack_logging_frames_for_uniqued_stack(task, located_file_position, stack_frames_buffer, max_stack_frames, count);
1813}
1814
1815
1816kern_return_t
1817__mach_stack_logging_enumerate_records(task_t task, mach_vm_address_t address, void enumerator(mach_stack_logging_record_t, void *), void *context)
1818{
1819	remote_task_file_streams *remote_fd = retain_file_streams_for_task(task);
1820	if (remote_fd == NULL) {
1821		return KERN_FAILURE;
1822	}
1823
1824	bool reading_all_addresses = (address == 0 ? true : false);
1825	mach_stack_logging_record_t pass_record;
1826	kern_return_t err = KERN_SUCCESS;
1827
1828	// update (read index file once and only once)
1829	err = update_cache_for_file_streams(remote_fd);
1830	if (err != KERN_SUCCESS) {
1831		release_file_streams_for_task(task);
1832		return err;
1833	}
1834
1835	FILE *the_index = (remote_fd->index_file_stream);
1836
1837	// prepare for the read; target process could be 32 or 64 bit.
1838	char bufferSpace[2048]; // 2 kb
1839	stack_logging_index_event32 *target_32_index = (stack_logging_index_event32*)bufferSpace;
1840	stack_logging_index_event64 *target_64_index = (stack_logging_index_event64*)bufferSpace;
1841	uint32_t target_addr_32 = (uint32_t)STACK_LOGGING_DISGUISE((uint32_t)address);
1842	uint64_t target_addr_64 = STACK_LOGGING_DISGUISE((uint64_t)address);
1843	size_t read_size = (remote_fd->task_is_64_bit ? sizeof(stack_logging_index_event64) : sizeof(stack_logging_index_event32));
1844	size_t number_slots = (size_t)(2048/read_size);
1845	uint64_t total_slots = remote_fd->cache->last_index_file_offset / read_size;
1846
1847	// perform the search
1848	size_t read_count = 0;
1849	int64_t current_file_offset = 0;
1850	uint32_t i;
1851	do {
1852		// at this point, we need to read index events; read them from the file until it's necessary to grab them from the shared memory snapshot
1853		// and crop file reading to the point where we last scanned
1854		number_slots = (size_t)MIN(number_slots, total_slots);
1855
1856		// if out of file to read (as of the time we entered this function), try to use shared memory snapshot
1857		if (number_slots == 0) {
1858			if (remote_fd->cache->shmem && remote_fd->cache->snapshot.start_index_offset + remote_fd->cache->snapshot.next_free_index_buffer_offset > (uint64_t)current_file_offset) {
1859				// use shared memory
1860				target_32_index = (stack_logging_index_event32*)remote_fd->cache->snapshot.index_buffer;
1861				target_64_index = (stack_logging_index_event64*)remote_fd->cache->snapshot.index_buffer;
1862				read_count = (uint32_t)(remote_fd->cache->snapshot.start_index_offset + remote_fd->cache->snapshot.next_free_index_buffer_offset - current_file_offset) / read_size;
1863				current_file_offset += read_count * read_size;
1864			} else {
1865				break;
1866			}
1867		} else {
1868			// get and save index (enumerator could modify)
1869			fseeko(the_index, current_file_offset, SEEK_SET);
1870			read_count = fread(bufferSpace, read_size, number_slots, the_index);
1871			current_file_offset = ftello(the_index);
1872			total_slots -= read_count;
1873		}
1874
1875		if (remote_fd->task_is_64_bit) {
1876			for (i = 0; i < read_count; i++) {
1877				if (reading_all_addresses || target_64_index[i].address == target_addr_64) {
1878					pass_record.address = STACK_LOGGING_DISGUISE(target_64_index[i].address);
1879					pass_record.argument = target_64_index[i].argument;
1880					pass_record.stack_identifier = STACK_LOGGING_OFFSET(target_64_index[i].offset_and_flags);
1881					pass_record.type_flags = STACK_LOGGING_FLAGS_AND_USER_TAG(target_64_index[i].offset_and_flags);
1882					enumerator(pass_record, context);
1883				}
1884			}
1885		} else {
1886			for (i = 0; i < read_count; i++) {
1887				if (reading_all_addresses || target_32_index[i].address == target_addr_32) {
1888					pass_record.address = STACK_LOGGING_DISGUISE(target_32_index[i].address);
1889					pass_record.argument = target_32_index[i].argument;
1890					pass_record.stack_identifier = STACK_LOGGING_OFFSET(target_32_index[i].offset_and_flags);
1891					pass_record.type_flags = STACK_LOGGING_FLAGS_AND_USER_TAG(target_32_index[i].offset_and_flags);
1892					enumerator(pass_record, context);
1893				}
1894			}
1895		}
1896	} while (read_count);
1897
1898	release_file_streams_for_task(task);
1899	return err;
1900}
1901
1902
1903kern_return_t
1904__mach_stack_logging_frames_for_uniqued_stack(task_t task, uint64_t stack_identifier, mach_vm_address_t *stack_frames_buffer, uint32_t max_stack_frames, uint32_t *count)
1905{
1906	remote_task_file_streams *remote_fd = retain_file_streams_for_task(task);
1907	if (remote_fd == NULL) return KERN_FAILURE;
1908
1909	unwind_stack_from_table_index(&remote_fd->cache->uniquing_table_snapshot, stack_identifier, stack_frames_buffer, count, max_stack_frames);
1910
1911	release_file_streams_for_task(task);
1912
1913	if (*count) return KERN_SUCCESS;
1914	else return KERN_FAILURE;
1915}
1916
1917
1918#ifdef TEST_DISK_STACK_LOGGING
1919
1920// cc -o stack_logging_disk stack_logging_disk.c -DTEST_DISK_STACK_LOGGING
1921
1922#include <sys/wait.h>
1923
1924int
1925main()
1926{
1927	int status;
1928	int i;
1929	size_t total_globals = 0ul;
1930
1931	fprintf(stderr, "master test process is %d\n", getpid());
1932	fprintf(stderr, "sizeof pre_write_buffers: %lu\n", sizeof(pre_write_buffers)); total_globals += sizeof(pre_write_buffers);
1933	fprintf(stderr, "sizeof stack_buffer: %lu\n", sizeof(stack_buffer)); total_globals += sizeof(stack_buffer);
1934	fprintf(stderr, "sizeof last_logged_malloc_address: %lu\n", sizeof(last_logged_malloc_address)); total_globals += sizeof(last_logged_malloc_address);
1935	fprintf(stderr, "sizeof stack_log_file_base_name: %lu\n", sizeof(stack_log_file_base_name)); total_globals += sizeof(stack_log_file_base_name);
1936	fprintf(stderr, "sizeof stack_log_file_suffix: %lu\n", sizeof(stack_log_file_suffix)); total_globals += sizeof(stack_log_file_suffix);
1937	fprintf(stderr, "sizeof __stack_log_file_path__ (index_file_path): %lu\n", (size_t)PATH_MAX); total_globals += (size_t)PATH_MAX;
1938	fprintf(stderr, "sizeof index_file_descriptor: %lu\n", sizeof(index_file_descriptor)); total_globals += sizeof(index_file_descriptor);
1939	fprintf(stderr, "sizeof remote_fds: %lu\n", sizeof(remote_fds)); total_globals += sizeof(remote_fds);
1940	fprintf(stderr, "sizeof next_remote_task_fd: %lu\n", sizeof(next_remote_task_fd)); total_globals += sizeof(next_remote_task_fd);
1941	fprintf(stderr, "sizeof remote_task_fd_count: %lu\n", sizeof(remote_task_fd_count)); total_globals += sizeof(remote_task_fd_count);
1942	fprintf(stderr, "sizeof remote_fd_list_lock: %lu\n", sizeof(remote_fd_list_lock)); total_globals += sizeof(remote_fd_list_lock);
1943	fprintf(stderr, "sizeof logging_use_compaction: %lu\n", sizeof(logging_use_compaction)); total_globals += sizeof(logging_use_compaction);
1944
1945	fprintf(stderr, "size of all global data: %lu\n", total_globals);
1946
1947	create_log_file();
1948
1949	// create a few child processes and exit them cleanly so their logs should get cleaned up
1950	fprintf(stderr, "\ncreating child processes and exiting cleanly\n");
1951	for (i = 0; i < 3; i++) {
1952		if (fork() == 0) {
1953			fprintf(stderr, "\nin child processes %d\n", getpid());
1954			create_log_file();
1955			fprintf(stderr, "exiting child processes %d\n", getpid());
1956			exit(1);
1957		}
1958		wait(&status);
1959	}
1960
1961	// create a few child processes and abruptly _exit them, leaving their logs around
1962	fprintf(stderr, "\ncreating child processes and exiting abruptly, leaving logs around\n");
1963	for (i = 0; i < 3; i++) {
1964		if (fork() == 0) {
1965			fprintf(stderr, "\nin child processes %d\n", getpid());
1966			create_log_file();
1967			fprintf(stderr, "exiting child processes %d\n", getpid());
1968			_exit(1);
1969		}
1970		wait(&status);
1971	}
1972
1973	// this should reap any remaining logs
1974	fprintf(stderr, "\nexiting master test process %d\n", getpid());
1975	delete_log_files();
1976	return 0;
1977}
1978
1979#endif
1980
1981/* vim: set noet:ts=4:sw=4:cindent: */
1982