1/*-
2 * Copyright (c) 2005 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <sys/types.h>
30#include <sys/socket.h>
31#include <sys/stdint.h>
32#include <sys/time.h>
33#include <sys/utsname.h>
34#include <sys/wait.h>
35
36#include <netinet/in.h>
37
38#include <err.h>
39#include <errno.h>
40#include <pthread.h>
41#include <signal.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46
47/*
48 * juggle is a simple IPC/context switch performance test, which works on
49 * pairs of file descriptors of various types.  In various runs, it considers
50 * the cost of bouncing a message synchronously across the descriptor pair,
51 * either in the same thread, two different threads, or two different
52 * processes.  Timing measurements for each series of I/O's are reported, but
53 * the first measurement in each series discarded as "warmup" on the IPC
54 * primitive.  Variations on the test permit for pipelining, or the insertion
55 * of more than one packet into the stream at a time, intended to permit
56 * greater parallelism, hopefully allowing performance numbers to reflect
57 * use of available parallelism, and/or intelligence in context switching to
58 * avoid premature switching when multiple messages are queued.
59 */
60
61/*
62 * The UDP test uses UDP over the loopback interface.  Two arbitrary but
63 * fixed port numbers.
64 */
65#define	UDP_PORT1	2020
66#define	UDP_PORT2	2021
67
68/*
69 * Size of each message.  Must be smaller than the socket buffer or pipe
70 * buffer maximum size, as we want to send it atomically without blocking.
71 * If pipelining is in use, must be able to fit PIPELINE_MAX of these
72 * messages into the send queue.
73 */
74#define	MESSAGELEN	128
75
76/*
77 * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
78 * fd1.  By counting in cycles, we allow the master thread or process to
79 * perform timing without explicitly synchronizing with the secondary thread
80 * or process.
81 */
82#define	NUMCYCLES	1024
83
84/*
85 * Number of times to run each test.
86 */
87#define	LOOPS		10
88
89/*
90 * Number of in-flight messages per cycle.  I adjusting this value, be
91 * careful not to exceed the socket/etc buffer depth, or messages may be lost
92 * or result in blocking.
93 */
94#define	PIPELINE_MAX	4
95
96/*
97 * As in all programs, steal timespecsub() from time.h.
98 */
99#define timespecsub(vvp, uvp)                                           \
100        do {                                                            \
101                (vvp)->tv_sec -= (uvp)->tv_sec;                         \
102                (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
103                if ((vvp)->tv_nsec < 0) {                               \
104                        (vvp)->tv_sec--;                                \
105                        (vvp)->tv_nsec += 1000000000;                   \
106                }                                                       \
107        } while (0)
108
109static int
110udp_create(int *fd1p, int *fd2p)
111{
112	struct sockaddr_in sin1, sin2;
113	int sock1, sock2;
114
115	sock1 = socket(PF_INET, SOCK_DGRAM, 0);
116	if (sock1 == -1)
117		return (-1);
118
119	sock2 = socket(PF_INET, SOCK_DGRAM, 0);
120	if (sock2 == -1) {
121		close(sock1);
122		return (-1);
123	}
124
125	bzero(&sin1, sizeof(sin1));
126	sin1.sin_len = sizeof(sin1);
127	sin1.sin_family = AF_INET;
128	sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
129	sin1.sin_port = htons(UDP_PORT1);
130
131	bzero(&sin2, sizeof(sin2));
132	sin2.sin_len = sizeof(sin2);
133	sin2.sin_family = AF_INET;
134	sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
135	sin2.sin_port = htons(UDP_PORT2);
136
137	if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
138		close(sock1);
139		close(sock2);
140		return (-1);
141	}
142
143	if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
144		close(sock1);
145		close(sock2);
146		return (-1);
147	}
148
149	if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
150		close(sock1);
151		close(sock2);
152		return (-1);
153	}
154
155	if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
156		close(sock1);
157		close(sock2);
158		return (-1);
159	}
160
161	*fd1p = sock1;
162	*fd2p = sock2;
163
164	return (0);
165}
166
167static int
168pipe_create(int *fd1p, int *fd2p)
169{
170	int fds[2];
171
172	if (pipe(fds) < 0)
173		return (-1);
174
175	*fd1p = fds[0];
176	*fd2p = fds[1];
177
178	return (0);
179}
180
181static int
182socketpairdgram_create(int *fd1p, int *fd2p)
183{
184	int fds[2];
185
186	if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
187		return (-1);
188
189	*fd1p = fds[0];
190	*fd2p = fds[1];
191
192	return (0);
193}
194
195static int
196socketpairstream_create(int *fd1p, int *fd2p)
197{
198	int fds[2];
199
200	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
201		return (-1);
202
203	*fd1p = fds[0];
204	*fd2p = fds[1];
205
206	return (0);
207}
208
209static int
210message_send(int s)
211{
212	u_char buffer[MESSAGELEN];
213	ssize_t len;
214
215	bzero(buffer, sizeof(buffer));
216
217	len = write(s, buffer, sizeof(buffer));
218	if (len == -1)
219		return (-1);
220	if (len != sizeof(buffer)) {
221		errno = EMSGSIZE;
222		return (-1);
223	}
224	return (0);
225}
226
227static int
228message_recv(int s)
229{
230	u_char buffer[MESSAGELEN];
231	ssize_t len;
232
233	len = read(s, buffer, sizeof(buffer));
234	if (len == -1)
235		return (-1);
236	if (len != sizeof(buffer)) {
237		errno = EMSGSIZE;
238		return (-1);
239	}
240	return (0);
241}
242
243/*
244 * Juggle messages between two file descriptors in a single thread/process,
245 * so simply a measure of IPC performance.
246 */
247static struct timespec
248juggle(int fd1, int fd2, int pipeline)
249{
250	struct timespec tstart, tfinish;
251	int i, j;
252
253	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
254		err(-1, "juggle: clock_gettime");
255
256	for (i = 0; i < NUMCYCLES; i++) {
257
258		for (j = 0; j < pipeline; j++) {
259			if (message_send(fd1) < 0)
260				err(-1, "message_send fd1");
261		}
262
263		for (j = 0; j < pipeline; j++) {
264			if (message_recv(fd2) < 0)
265				err(-1, "message_recv fd2");
266
267			if (message_send(fd2) < 0)
268				err(-1, "message_send fd2");
269		}
270
271		for (j = 0; j < pipeline; j++) {
272			if (message_recv(fd1) < 0)
273				err(-1, "message_recv fd1");
274		}
275	}
276
277	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
278		err(-1, "juggle: clock_gettime");
279
280	timespecsub(&tfinish, &tstart);
281
282	return (tfinish);
283}
284
285/*
286 * Juggle messages between two file descriptors in two threads, so measure
287 * the cost of IPC and the cost of a thread context switch.
288 *
289 * In order to avoid measuring thread creation time, we make use of a
290 * condition variable to decide when both threads are ready to begin
291 * juggling.
292 */
293static int threaded_child_ready;
294static pthread_mutex_t threaded_mtx;
295static pthread_cond_t threaded_cond;
296static int threaded_pipeline;
297
298static void *
299juggling_thread(void *arg)
300{
301	int fd2, i, j;
302
303	fd2 = *(int *)arg;
304
305	if (pthread_mutex_lock(&threaded_mtx) != 0)
306		err(-1, "juggling_thread: pthread_mutex_lock");
307
308	threaded_child_ready = 1;
309
310	if (pthread_cond_signal(&threaded_cond) != 0)
311		err(-1, "juggling_thread: pthread_cond_signal");
312
313	if (pthread_mutex_unlock(&threaded_mtx) != 0)
314		err(-1, "juggling_thread: pthread_mutex_unlock");
315
316	for (i = 0; i < NUMCYCLES; i++) {
317		for (j = 0; j < threaded_pipeline; j++) {
318			if (message_recv(fd2) < 0)
319				err(-1, "message_recv fd2");
320
321			if (message_send(fd2) < 0)
322				err(-1, "message_send fd2");
323		}
324	}
325
326	return (NULL);
327}
328
329static struct timespec
330thread_juggle(int fd1, int fd2, int pipeline)
331{
332	struct timespec tstart, tfinish;
333	pthread_t thread;
334	int i, j;
335
336	threaded_pipeline = pipeline;
337
338	if (pthread_mutex_init(&threaded_mtx, NULL) != 0)
339		err(-1, "thread_juggle: pthread_mutex_init");
340
341	if (pthread_create(&thread, NULL, juggling_thread, &fd2) != 0)
342		err(-1, "thread_juggle: pthread_create");
343
344	if (pthread_mutex_lock(&threaded_mtx) != 0)
345		err(-1, "thread_juggle: pthread_mutex_lock");
346
347	while (!threaded_child_ready) {
348		if (pthread_cond_wait(&threaded_cond, &threaded_mtx) != 0)
349			err(-1, "thread_juggle: pthread_cond_wait");
350	}
351
352	if (pthread_mutex_unlock(&threaded_mtx) != 0)
353		err(-1, "thread_juggle: pthread_mutex_unlock");
354
355	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
356		err(-1, "thread_juggle: clock_gettime");
357
358	for (i = 0; i < NUMCYCLES; i++) {
359		for (j = 0; j < pipeline; j++) {
360			if (message_send(fd1) < 0)
361				err(-1, "message_send fd1");
362		}
363
364		for (j = 0; j < pipeline; j++) {
365			if (message_recv(fd1) < 0)
366				err(-1, "message_recv fd1");
367		}
368	}
369
370	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
371		err(-1, "thread_juggle: clock_gettime");
372
373	if (pthread_join(thread, NULL) != 0)
374		err(-1, "thread_juggle: pthread_join");
375
376	timespecsub(&tfinish, &tstart);
377
378	return (tfinish);
379}
380
381/*
382 * Juggle messages between two file descriptors in two processes, so measure
383 * the cost of IPC and the cost of a process context switch.
384 *
385 * Since we can't use a mutex between the processes, we simply do an extra
386 * write on the child to let the parent know that it's ready to start.
387 */
388static struct timespec
389process_juggle(int fd1, int fd2, int pipeline)
390{
391	struct timespec tstart, tfinish;
392	pid_t pid, ppid, wpid;
393	int error, i, j;
394
395	ppid = getpid();
396
397	pid = fork();
398	if (pid < 0)
399		err(-1, "process_juggle: fork");
400
401	if (pid == 0) {
402		if (message_send(fd2) < 0) {
403			error = errno;
404			kill(ppid, SIGTERM);
405			errno = error;
406			err(-1, "process_juggle: child: message_send");
407		}
408
409		for (i = 0; i < NUMCYCLES; i++) {
410			for (j = 0; j < pipeline; j++) {
411				if (message_send(fd2) < 0)
412					err(-1, "message_send fd2");
413
414				if (message_recv(fd2) < 0)
415					err(-1, "message_recv fd2");
416			}
417		}
418
419		exit(0);
420	} else {
421		if (message_recv(fd1) < 0) {
422			error = errno;
423			kill(pid, SIGTERM);
424			errno = error;
425			err(-1, "process_juggle: parent: message_recv");
426		}
427
428		if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
429			err(-1, "process_juggle: clock_gettime");
430
431		for (i = 0; i < NUMCYCLES; i++) {
432			for (j = 0; j < pipeline; j++) {
433				if (message_send(fd1) < 0) {
434					error = errno;
435					kill(pid, SIGTERM);
436					errno = error;
437					err(-1, "message_send fd1");
438				}
439			}
440
441			for (j = 0; j < pipeline; j++) {
442				if (message_recv(fd1) < 0) {
443					error = errno;
444					kill(pid, SIGTERM);
445					errno = error;
446					err(-1, "message_recv fd1");
447				}
448			}
449		}
450
451		if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
452			err(-1, "process_juggle: clock_gettime");
453	}
454
455	wpid = waitpid(pid, NULL, 0);
456	if (wpid < 0)
457		err(-1, "process_juggle: waitpid");
458	if (wpid != pid)
459		errx(-1, "process_juggle: waitpid: pid != wpid");
460
461	timespecsub(&tfinish, &tstart);
462
463	return (tfinish);
464}
465
466/*
467 * When we print out results for larger pipeline sizes, we scale back by the
468 * depth of the pipeline.  This generally means dividing by the pipeline
469 * depth.  Except when it means dividing by zero.
470 */
471static void
472scale_timespec(struct timespec *ts, int p)
473{
474
475	if (p == 0)
476		return;
477
478	ts->tv_sec /= p;
479	ts->tv_nsec /= p;
480}
481
482static const struct ipctype {
483	int		(*it_create)(int *fd1p, int *fd2p);
484	const char	*it_name;
485} ipctypes[] = {
486	{ pipe_create, "pipe" },
487	{ udp_create, "udp" },
488	{ socketpairdgram_create, "socketpairdgram" },
489	{ socketpairstream_create, "socketpairstream" },
490};
491static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
492
493int
494main(int argc, char *argv[])
495{
496	struct timespec juggle_results[LOOPS], process_results[LOOPS];
497	struct timespec thread_results[LOOPS];
498	int fd1, fd2, i, j, p;
499	struct utsname uts;
500
501	printf("version, juggle.c %s\n", "$FreeBSD$");
502
503	if (uname(&uts) < 0)
504		err(-1, "utsname");
505	printf("sysname, %s\n", uts.sysname);
506	printf("nodename, %s\n", uts.nodename);
507	printf("release, %s\n", uts.release);
508	printf("version, %s\n", uts.version);
509	printf("machine, %s\n", uts.machine);
510	printf("\n");
511
512	printf("MESSAGELEN, %d\n", MESSAGELEN);
513	printf("NUMCYCLES, %d\n", NUMCYCLES);
514	printf("LOOPS, %d\n", LOOPS);
515	printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
516	printf("\n\n");
517
518	printf("ipctype, test, pipeline_depth");
519	for (j = 0; j < LOOPS; j++)
520		printf(", data%d", j);
521	printf("\n");
522	fflush(stdout);
523	for (p = 0; p < PIPELINE_MAX + 1; p++) {
524		for (i = 0; i < ipctypes_len; i++) {
525			if (ipctypes[i].it_create(&fd1, &fd2) < 0)
526				err(-1, "main: %s", ipctypes[i].it_name);
527
528			/*
529			 * For each test, do one uncounted warmup, then LOOPS
530			 * runs of the actual test.
531			 */
532			juggle(fd1, fd2, p);
533			for (j = 0; j < LOOPS; j++)
534				juggle_results[j] = juggle(fd1, fd2, p);
535			process_juggle(fd1, fd2, p);
536			for (j = 0; j < LOOPS; j++)
537				process_results[j] = process_juggle(fd1, fd2,
538				    p);
539			thread_juggle(fd1, fd2, p);
540			for (j = 0; j < LOOPS; j++)
541				thread_results[j] = thread_juggle(fd1, fd2,
542				    p);
543			for (j = 0; j < LOOPS; j++) {
544				thread_results[j].tv_sec = 0;
545				thread_results[j].tv_nsec = 0;
546			}
547			close(fd1);
548			close(fd2);
549		}
550		/*
551		 * When printing results for the round, normalize the results
552		 * with respect to the pipeline depth.  We're doing p times
553		 * as much work, and are we taking p times as long?
554		 */
555		for (i = 0; i < ipctypes_len; i++) {
556			printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
557			for (j = 0; j < LOOPS; j++) {
558				if (j != 0)
559					printf(", ");
560				scale_timespec(&juggle_results[j], p);
561				printf("%jd.%09lu",
562				    (intmax_t)juggle_results[j].tv_sec,
563				    juggle_results[j].tv_nsec);
564			}
565			printf("\n");
566			printf("%s, process_juggle, %d, ",
567			    ipctypes[i].it_name, p);
568			for (j = 0; j < LOOPS; j++) {
569				if (j != 0)
570					printf(", ");
571				scale_timespec(&process_results[j], p);
572				printf("%jd.%09lu",
573                                    (intmax_t)process_results[j].tv_sec,
574				    process_results[j].tv_nsec);
575			}
576			printf("\n");
577			printf("%s, thread_juggle, %d, ",
578			    ipctypes[i].it_name, p);
579			for (j = 0; j < LOOPS; j++) {
580				if (j != 0)
581					printf(", ");
582				scale_timespec(&thread_results[j], p);
583				printf("%jd.%09lu",
584				    (intmax_t)thread_results[j].tv_sec,
585				    thread_results[j].tv_nsec);
586			}
587			printf("\n");
588		}
589		fflush(stdout);
590	}
591	return (0);
592}
593