juggle.c revision 153790
1/*-
2 * Copyright (c) 2005 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/tools/tools/netrate/juggle/juggle.c 153790 2005-12-28 12:42:12Z rwatson $
27 */
28
29#include <sys/types.h>
30#include <sys/socket.h>
31#include <sys/time.h>
32#include <sys/utsname.h>
33#include <sys/wait.h>
34
35#include <netinet/in.h>
36
37#include <err.h>
38#include <errno.h>
39#include <pthread.h>
40#include <signal.h>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include <unistd.h>
45
46/*
47 * juggle is a simple IPC/context switch performance test, which works on
48 * pairs of file descriptors of various types.  In various runs, it considers
49 * the cost of bouncing a message synchronously across the descriptor pair,
50 * either in the same thread, two different threads, or two different
51 * processes.  Timing measurements for each series of I/O's are reported, but
52 * the first measurement in each series discarded as "warmup" on the IPC
53 * primitive.  Variations on the test permit for pipelining, or the insertion
54 * of more than one packet into the stream at a time, intended to permit
55 * greater parallelism, hopefully allowing performance numbers to reflect
56 * use of available parallelism, and/or intelligence in context switching to
57 * avoid premature switching when multiple messages are queued.
58 */
59
60/*
61 * The UDP test uses UDP over the loopback interface.  Two arbitrary but
62 * fixed port numbers.
63 */
64#define	UDP_PORT1	2020
65#define	UDP_PORT2	2021
66
67/*
68 * Size of each message.  Must be smaller than the socket buffer or pipe
69 * buffer maximum size, as we want to send it atomically without blocking.
70 * If pipelining is in use, must be able to fit PIPELINE_MAX of these
71 * messages into the send queue.
72 */
73#define	MESSAGELEN	128
74
75/*
76 * Number of message cycles -- into fd1, out of fd2, into fd2, and out of
77 * fd1.  By counting in cycles, we allow the master thread or process to
78 * perform timing without explicitly synchronizing with the secondary thread
79 * or process.
80 */
81#define	NUMCYCLES	1024
82
83/*
84 * Number of times to run each test.
85 */
86#define	LOOPS		10
87
88/*
89 * Number of in-flight messages per cycle.  I adjusting this value, be
90 * careful not to exceed the socket/etc buffer depth, or messages may be lost
91 * or result in blocking.
92 */
93#define	PIPELINE_MAX	4
94
95/*
96 * As in all programs, steal timespecsub() from time.h.
97 */
98#define timespecsub(vvp, uvp)                                           \
99        do {                                                            \
100                (vvp)->tv_sec -= (uvp)->tv_sec;                         \
101                (vvp)->tv_nsec -= (uvp)->tv_nsec;                       \
102                if ((vvp)->tv_nsec < 0) {                               \
103                        (vvp)->tv_sec--;                                \
104                        (vvp)->tv_nsec += 1000000000;                   \
105                }                                                       \
106        } while (0)
107
108static int
109udp_create(int *fd1p, int *fd2p)
110{
111	struct sockaddr_in sin1, sin2;
112	int sock1, sock2;
113
114	sock1 = socket(PF_INET, SOCK_DGRAM, 0);
115	if (sock1 == -1)
116		return (-1);
117
118	sock2 = socket(PF_INET, SOCK_DGRAM, 0);
119	if (sock2 == -1) {
120		close(sock1);
121		return (-1);
122	}
123
124	bzero(&sin1, sizeof(sin1));
125	sin1.sin_len = sizeof(sin1);
126	sin1.sin_family = AF_INET;
127	sin1.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
128	sin1.sin_port = htons(UDP_PORT1);
129
130	bzero(&sin2, sizeof(sin2));
131	sin2.sin_len = sizeof(sin2);
132	sin2.sin_family = AF_INET;
133	sin2.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
134	sin2.sin_port = htons(UDP_PORT2);
135
136	if (bind(sock1, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
137		close(sock1);
138		close(sock2);
139		return (-1);
140	}
141
142	if (bind(sock2, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
143		close(sock1);
144		close(sock2);
145		return (-1);
146	}
147
148	if (connect(sock1, (struct sockaddr *) &sin2, sizeof(sin2)) < 0) {
149		close(sock1);
150		close(sock2);
151		return (-1);
152	}
153
154	if (connect(sock2, (struct sockaddr *) &sin1, sizeof(sin1)) < 0) {
155		close(sock1);
156		close(sock2);
157		return (-1);
158	}
159
160	*fd1p = sock1;
161	*fd2p = sock2;
162
163	return (0);
164}
165
166static int
167pipe_create(int *fd1p, int *fd2p)
168{
169	int fds[2];
170
171	if (pipe(fds) < 0)
172		return (-1);
173
174	*fd1p = fds[0];
175	*fd2p = fds[1];
176
177	return (0);
178}
179
180static int
181socketpairdgram_create(int *fd1p, int *fd2p)
182{
183	int fds[2];
184
185	if (socketpair(PF_LOCAL, SOCK_DGRAM, 0, fds) < 0)
186		return (-1);
187
188	*fd1p = fds[0];
189	*fd2p = fds[1];
190
191	return (0);
192}
193
194static int
195socketpairstream_create(int *fd1p, int *fd2p)
196{
197	int fds[2];
198
199	if (socketpair(PF_LOCAL, SOCK_STREAM, 0, fds) < 0)
200		return (-1);
201
202	*fd1p = fds[0];
203	*fd2p = fds[1];
204
205	return (0);
206}
207
208static int
209message_send(int s)
210{
211	u_char buffer[MESSAGELEN];
212	ssize_t len;
213
214	bzero(buffer, sizeof(buffer));
215
216	len = write(s, buffer, sizeof(buffer));
217	if (len == -1)
218		return (-1);
219	if (len != sizeof(buffer)) {
220		errno = EMSGSIZE;
221		return (-1);
222	}
223	return (0);
224}
225
226static int
227message_recv(int s)
228{
229	u_char buffer[MESSAGELEN];
230	ssize_t len;
231
232	len = read(s, buffer, sizeof(buffer));
233	if (len == -1)
234		return (-1);
235	if (len != sizeof(buffer)) {
236		errno = EMSGSIZE;
237		return (-1);
238	}
239	return (0);
240}
241
242/*
243 * Juggle messages between two file descriptors in a single thread/process,
244 * so simply a measure of IPC performance.
245 */
246static struct timespec
247juggle(int fd1, int fd2, int pipeline)
248{
249	struct timespec tstart, tfinish;
250	int i, j;
251
252	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
253		err(-1, "juggle: clock_gettime");
254
255	for (i = 0; i < NUMCYCLES; i++) {
256
257		for (j = 0; j < pipeline; j++) {
258			if (message_send(fd1) < 0)
259				err(-1, "message_send fd1");
260		}
261
262		for (j = 0; j < pipeline; j++) {
263			if (message_recv(fd2) < 0)
264				err(-1, "message_recv fd2");
265
266			if (message_send(fd2) < 0)
267				err(-1, "message_send fd2");
268		}
269
270		for (j = 0; j < pipeline; j++) {
271			if (message_recv(fd1) < 0)
272				err(-1, "message_recv fd1");
273		}
274	}
275
276	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
277		err(-1, "juggle: clock_gettime");
278
279	timespecsub(&tfinish, &tstart);
280
281	return (tfinish);
282}
283
284/*
285 * Juggle messages between two file descriptors in two threads, so measure
286 * the cost of IPC and the cost of a thread context switch.
287 *
288 * In order to avoid measuring thread creation time, we make use of a
289 * condition variable to decide when both threads are ready to begin
290 * juggling.
291 */
292static int threaded_child_ready;
293static pthread_mutex_t threaded_mtx;
294static pthread_cond_t threaded_cond;
295static int threaded_pipeline;
296
297static void *
298juggling_thread(void *arg)
299{
300	int fd2, i, j;
301
302	fd2 = *(int *)arg;
303
304	if (pthread_mutex_lock(&threaded_mtx) < 0)
305		err(-1, "juggling_thread: pthread_mutex_lock");
306
307	threaded_child_ready = 1;
308
309	if (pthread_cond_signal(&threaded_cond) < 0)
310		err(-1, "juggling_thread: pthread_cond_signal");
311
312	if (pthread_mutex_unlock(&threaded_mtx) < 0)
313		err(-1, "juggling_thread: pthread_mutex_unlock");
314
315	for (i = 0; i < NUMCYCLES; i++) {
316		for (j = 0; j < threaded_pipeline; j++) {
317			if (message_recv(fd2) < 0)
318				err(-1, "message_recv fd2");
319
320			if (message_send(fd2) < 0)
321				err(-1, "message_send fd2");
322		}
323	}
324
325	return (NULL);
326}
327
328static struct timespec
329thread_juggle(int fd1, int fd2, int pipeline)
330{
331	struct timespec tstart, tfinish;
332	pthread_t thread;
333	int i, j;
334
335	threaded_pipeline = pipeline;
336
337	if (pthread_mutex_init(&threaded_mtx, NULL) < 0)
338		err(-1, "thread_juggle: pthread_mutex_init");
339
340	if (pthread_create(&thread, NULL, juggling_thread, &fd2) < 0)
341		err(-1, "thread_juggle: pthread_create");
342
343	if (pthread_mutex_lock(&threaded_mtx) < 0)
344		err(-1, "thread_juggle: pthread_mutex_lock");
345
346	while (!threaded_child_ready) {
347		if (pthread_cond_wait(&threaded_cond, &threaded_mtx) < 0)
348			err(-1, "thread_juggle: pthread_cond_wait");
349	}
350
351	if (pthread_mutex_unlock(&threaded_mtx) < 0)
352		err(-1, "thread_juggle: pthread_mutex_unlock");
353
354	if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
355		err(-1, "thread_juggle: clock_gettime");
356
357	for (i = 0; i < NUMCYCLES; i++) {
358		for (j = 0; j < pipeline; j++) {
359			if (message_send(fd1) < 0)
360				err(-1, "message_send fd1");
361		}
362
363		for (j = 0; j < pipeline; j++) {
364			if (message_recv(fd1) < 0)
365				err(-1, "message_recv fd1");
366		}
367	}
368
369	if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
370		err(-1, "thread_juggle: clock_gettime");
371
372	if (pthread_join(thread, NULL) < 0)
373		err(-1, "thread_juggle: pthread_join");
374
375	timespecsub(&tfinish, &tstart);
376
377	return (tfinish);
378}
379
380/*
381 * Juggle messages between two file descriptors in two processes, so measure
382 * the cost of IPC and the cost of a process context switch.
383 *
384 * Since we can't use a mutex between the processes, we simply do an extra
385 * write on the child to let the parent know that it's ready to start.
386 */
387static struct timespec
388process_juggle(int fd1, int fd2, int pipeline)
389{
390	struct timespec tstart, tfinish;
391	pid_t pid, ppid, wpid;
392	int error, i, j;
393
394	ppid = getpid();
395
396	pid = fork();
397	if (pid < 0)
398		err(-1, "process_juggle: fork");
399
400	if (pid == 0) {
401		if (message_send(fd2) < 0) {
402			error = errno;
403			kill(ppid, SIGTERM);
404			errno = error;
405			err(-1, "process_juggle: child: message_send");
406		}
407
408		for (i = 0; i < NUMCYCLES; i++) {
409			for (j = 0; j < pipeline; j++) {
410				if (message_send(fd2) < 0)
411					err(-1, "message_send fd2");
412
413				if (message_recv(fd2) < 0)
414					err(-1, "message_recv fd2");
415			}
416		}
417
418		exit(0);
419	} else {
420		if (message_recv(fd1) < 0) {
421			error = errno;
422			kill(pid, SIGTERM);
423			errno = error;
424			err(-1, "process_juggle: parent: message_recv");
425		}
426
427		if (clock_gettime(CLOCK_REALTIME, &tstart) < 0)
428			err(-1, "process_juggle: clock_gettime");
429
430		for (i = 0; i < NUMCYCLES; i++) {
431			for (j = 0; j < pipeline; j++) {
432				if (message_send(fd1) < 0) {
433					error = errno;
434					kill(pid, SIGTERM);
435					errno = error;
436					err(-1, "message_send fd1");
437				}
438			}
439
440			for (j = 0; j < pipeline; j++) {
441				if (message_recv(fd1) < 0) {
442					error = errno;
443					kill(pid, SIGTERM);
444					errno = error;
445					err(-1, "message_recv fd1");
446				}
447			}
448		}
449
450		if (clock_gettime(CLOCK_REALTIME, &tfinish) < 0)
451			err(-1, "process_juggle: clock_gettime");
452	}
453
454	wpid = waitpid(pid, NULL, 0);
455	if (wpid < 0)
456		err(-1, "process_juggle: waitpid");
457	if (wpid != pid)
458		errx(-1, "process_juggle: waitpid: pid != wpid");
459
460	timespecsub(&tfinish, &tstart);
461
462	return (tfinish);
463}
464
465/*
466 * When we print out results for larger pipeline sizes, we scale back by the
467 * depth of the pipeline.  This generally means dividing by the pipeline
468 * depth.  Except when it means dividing by zero.
469 */
470static void
471scale_timespec(struct timespec *ts, int p)
472{
473
474	if (p == 0)
475		return;
476
477	ts->tv_sec /= p;
478	ts->tv_nsec /= p;
479}
480
481static const struct ipctype {
482	int		(*it_create)(int *fd1p, int *fd2p);
483	const char	*it_name;
484} ipctypes[] = {
485	{ pipe_create, "pipe" },
486	{ udp_create, "udp" },
487	{ socketpairdgram_create, "socketpairdgram" },
488	{ socketpairstream_create, "socketpairstream" },
489};
490static const int ipctypes_len = (sizeof(ipctypes) / sizeof(struct ipctype));
491
492int
493main(int argc, char *argv[])
494{
495	struct timespec juggle_results[LOOPS], process_results[LOOPS];
496	struct timespec thread_results[LOOPS];
497	int fd1, fd2, i, j, p;
498	struct utsname uts;
499
500	printf("version, juggle.c %s\n", "$P4: //depot/projects/netsmp/src/tools/netperf/juggle/juggle.c#3 $");
501
502	if (uname(&uts) < 0)
503		err(-1, "utsname");
504	printf("sysname, %s\n", uts.sysname);
505	printf("nodename, %s\n", uts.nodename);
506	printf("release, %s\n", uts.release);
507	printf("version, %s\n", uts.version);
508	printf("machine, %s\n", uts.machine);
509	printf("\n");
510
511	printf("MESSAGELEN, %d\n", MESSAGELEN);
512	printf("NUMCYCLES, %d\n", NUMCYCLES);
513	printf("LOOPS, %d\n", LOOPS);
514	printf("PIPELINE_MAX, %d\n", PIPELINE_MAX);
515	printf("\n\n");
516
517	printf("ipctype, test, pipeline_depth");
518	for (j = 0; j < LOOPS; j++)
519		printf(", data%d", j);
520	printf("\n");
521	fflush(stdout);
522	for (p = 0; p < PIPELINE_MAX + 1; p++) {
523		for (i = 0; i < ipctypes_len; i++) {
524			if (ipctypes[i].it_create(&fd1, &fd2) < 0)
525				err(-1, "main: %s", ipctypes[i].it_name);
526
527			/*
528			 * For each test, do one uncounted warmup, then LOOPS
529			 * runs of the actual test.
530			 */
531			juggle(fd1, fd2, p);
532			for (j = 0; j < LOOPS; j++)
533				juggle_results[j] = juggle(fd1, fd2, p);
534			process_juggle(fd1, fd2, p);
535			for (j = 0; j < LOOPS; j++)
536				process_results[j] = process_juggle(fd1, fd2,
537				    p);
538			thread_juggle(fd1, fd2, p);
539			for (j = 0; j < LOOPS; j++)
540				thread_results[j] = thread_juggle(fd1, fd2,
541				    p);
542			for (j = 0; j < LOOPS; j++) {
543				thread_results[j].tv_sec = 0;
544				thread_results[j].tv_nsec = 0;
545			}
546			close(fd1);
547			close(fd2);
548		}
549		/*
550		 * When printing results for the round, normalize the results
551		 * with respect to the pipeline depth.  We're doing p times
552		 * as much work, and are we taking p times as long?
553		 */
554		for (i = 0; i < ipctypes_len; i++) {
555			printf("%s, juggle, %d, ", ipctypes[i].it_name, p);
556			for (j = 0; j < LOOPS; j++) {
557				if (j != 0)
558					printf(", ");
559				scale_timespec(&juggle_results[j], p);
560				printf("%u.%09lu", juggle_results[j].tv_sec,
561				    juggle_results[j].tv_nsec);
562			}
563			printf("\n");
564			printf("%s, process_juggle, %d, ",
565			    ipctypes[i].it_name, p);
566			for (j = 0; j < LOOPS; j++) {
567				if (j != 0)
568					printf(", ");
569				scale_timespec(&process_results[j], p);
570				printf("%u.%09lu", process_results[j].tv_sec,
571				    process_results[j].tv_nsec);
572			}
573			printf("\n");
574			printf("%s, thread_juggle, %d, ",
575			    ipctypes[i].it_name, p);
576			for (j = 0; j < LOOPS; j++) {
577				if (j != 0)
578					printf(", ");
579				scale_timespec(&thread_results[j], p);
580				printf("%u.%09lu", thread_results[j].tv_sec,
581				    thread_results[j].tv_nsec);
582			}
583			printf("\n");
584		}
585		fflush(stdout);
586	}
587	return (0);
588}
589