1/*
2 * $Id: b_workload.c,v 1.16 2008/04/14 02:21:47 david Exp $
3 */
4
5#include "bench.h"
6#include "b_workload.h"
7
8static int   dump_verbose_stats __P((DB *, CONFIG *));
9static int   is_del_workload __P((int));
10static int   is_get_workload __P((int));
11static int   is_put_workload __P((int));
12static int   run_mixed_workload __P((DB *, CONFIG *));
13static int   run_std_workload __P((DB *, CONFIG *));
14static int   usage __P((void));
15static char *workload_str __P((int));
16
17/*
18 * General TODO list:
19 * * The workload type. Might work better as a bitmask than the current enum.
20 * * Improve the verbose stats, so they can be easily parsed.
21 * * Think about doing automatic btree/hash comparison in here.
22 */
23int
24b_workload(argc, argv)
25	int argc;
26	char *argv[];
27{
28	extern char *optarg;
29	extern int optind;
30	CONFIG conf;
31	DB *dbp;
32	DB_ENV *dbenv;
33	int ch, ffactor, ksz;
34
35	dbenv = NULL;
36	memset(&conf, 0, sizeof(conf));
37	conf.seed = 124087;
38	srand(conf.seed);
39
40	conf.pcount = 100000;
41	conf.ts = "Btree";
42	conf.type = DB_BTREE;
43	conf.dsize = 20;
44	conf.presize = 0;
45	conf.workload = T_PUT_GET_DELETE;
46
47	while ((ch = getopt(argc, argv, "b:c:d:e:g:ik:m:op:r:t:vw:")) != EOF)
48		switch (ch) {
49		case 'b':
50			conf.cachesz = atoi(optarg);
51			break;
52		case 'c':
53			conf.pcount = atoi(optarg);
54			break;
55		case 'd':
56			conf.dsize = atoi(optarg);
57			break;
58		case 'e':
59			conf.cursor_del = atoi(optarg);
60			break;
61		case 'g':
62			conf.gcount = atoi(optarg);
63			break;
64		case 'i':
65			conf.presize = 1;
66			break;
67		case 'k':
68			conf.ksize = atoi(optarg);
69			break;
70		case 'm':
71			conf.message = optarg;
72			break;
73		case 'o':
74			conf.orderedkeys = 1;
75			break;
76		case 'p':
77			conf.pagesz = atoi(optarg);
78			break;
79		case 'r':
80			conf.num_dups = atoi(optarg);
81			break;
82		case 't':
83			switch (optarg[0]) {
84			case 'B': case 'b':
85				conf.ts = "Btree";
86				conf.type = DB_BTREE;
87				break;
88			case 'H': case 'h':
89				if (b_util_have_hash())
90					return (0);
91				conf.ts = "Hash";
92				conf.type = DB_HASH;
93				break;
94			default:
95				return (usage());
96			}
97			break;
98		case 'v':
99			conf.verbose = 1;
100			break;
101		case 'w':
102			switch (optarg[0]) {
103			case 'A':
104				conf.workload = T_PUT_GET_DELETE;
105				break;
106			case 'B':
107				conf.workload = T_GET;
108				break;
109			case 'C':
110				conf.workload = T_PUT;
111				break;
112			case 'D':
113				conf.workload = T_DELETE;
114				break;
115			case 'E':
116				conf.workload = T_PUT_GET;
117				break;
118			case 'F':
119				conf.workload = T_PUT_DELETE;
120				break;
121			case 'G':
122				conf.workload = T_GET_DELETE;
123				break;
124			case 'H':
125				conf.workload = T_MIXED;
126				break;
127			default:
128				return (usage());
129			}
130			break;
131		case '?':
132		default:
133			fprintf(stderr, "Invalid option: %c\n", ch);
134			return (usage());
135		}
136	argc -= optind;
137	argv += optind;
138	if (argc != 0)
139		return (usage());
140
141	/*
142	 * Validate the input parameters if specified.
143	 */
144	if (conf.pagesz != 0)
145		DB_BENCH_ASSERT(conf.pagesz >= 512 && conf.pagesz <= 65536 &&
146		   ((conf.pagesz & (conf.pagesz - 1)) == 0));
147
148	if (conf.cachesz != 0)
149		DB_BENCH_ASSERT(conf.cachesz > 20480);
150	DB_BENCH_ASSERT(conf.ksize == 0 || conf.orderedkeys == 0);
151
152	/* Create the environment. */
153	DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0);
154	dbenv->set_errfile(dbenv, stderr);
155	if (conf.cachesz != 0)
156		DB_BENCH_ASSERT(
157		    dbenv->set_cachesize(dbenv, 0, conf.cachesz, 0) == 0);
158
159#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1
160	DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR",
161	    NULL, DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0);
162#else
163	DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR",
164	    DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0);
165#endif
166
167	DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0);
168	if (conf.pagesz != 0)
169		DB_BENCH_ASSERT(
170		    dbp->set_pagesize(dbp, conf.pagesz) == 0);
171	if (conf.presize != 0 && conf.type == DB_HASH) {
172		ksz = (conf.orderedkeys != 0) ? sizeof(u_int32_t) : conf.ksize;
173		if (ksz == 0)
174			ksz = 10;
175		ffactor = (conf.pagesz - 32)/(ksz + conf.dsize + 8);
176		fprintf(stderr, "ffactor: %d\n", ffactor);
177		DB_BENCH_ASSERT(
178		    dbp->set_h_ffactor(dbp, ffactor) == 0);
179		DB_BENCH_ASSERT(
180		    dbp->set_h_nelem(dbp, conf.pcount*10) == 0);
181	}
182#if DB_VERSION_MAJOR >= 4 && DB_VERSION_MINOR >= 1
183	DB_BENCH_ASSERT(dbp->open(
184	    dbp, NULL, TESTFILE, NULL, conf.type, DB_CREATE, 0666) == 0);
185#else
186	DB_BENCH_ASSERT(dbp->open(
187	    dbp, TESTFILE, NULL, conf.type, DB_CREATE, 0666) == 0);
188#endif
189
190	if (conf.workload == T_MIXED)
191		 run_mixed_workload(dbp, &conf);
192	else
193		run_std_workload(dbp, &conf);
194
195	if (is_put_workload(conf.workload) == 0)
196		timespecadd(&conf.tot_time, &conf.put_time);
197	if (is_get_workload(conf.workload) == 0)
198		timespecadd(&conf.tot_time, &conf.get_time);
199	if (is_del_workload(conf.workload) == 0)
200		timespecadd(&conf.tot_time, &conf.del_time);
201
202	/* Ensure data is flushed for following measurements. */
203	DB_BENCH_ASSERT(dbp->sync(dbp, 0) == 0);
204
205	if (conf.verbose != 0)
206		dump_verbose_stats(dbp, &conf);
207
208	DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0);
209	DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0);
210
211	/*
212	 * Construct a string for benchmark output.
213	 *
214	 * Insert HTML in-line to make the output prettier -- ugly, but easy.
215	 */
216	printf("# workload test: %s: %s<br>%lu ops",
217	    conf.ts, workload_str(conf.workload), (u_long)conf.pcount);
218	if (conf.ksize != 0)
219		printf(", key size: %lu", (u_long)conf.ksize);
220	if (conf.dsize != 0)
221		printf(", data size: %lu", (u_long)conf.dsize);
222	if (conf.pagesz != 0)
223		printf(", page size: %lu", (u_long)conf.pagesz);
224	else
225		printf(", page size: default");
226	if (conf.cachesz != 0)
227		printf(", cache size: %lu", (u_long)conf.cachesz);
228	else
229		printf(", cache size: default");
230	printf(", %s keys", conf.orderedkeys == 1 ? "ordered" : "unordered");
231	printf(", num dups: %lu", (u_long)conf.num_dups);
232	printf("\n");
233
234	if (conf.workload != T_MIXED) {
235		if (conf.message != NULL)
236			printf("%s %s ", conf.message, conf.ts);
237		TIME_DISPLAY(conf.pcount, conf.tot_time);
238	} else
239		TIMER_DISPLAY(conf.pcount);
240
241	return (0);
242}
243
244/*
245 * The mixed workload is designed to simulate a somewhat real
246 * usage scenario.
247 * NOTES: * rand is used to decide on the current operation. This will
248 *        be repeatable, since the same seed is always used.
249 *        * All added keys are stored in a FIFO queue, this is not very
250 *        space efficient, but is the best way I could come up with to
251 *        insert random key values, and be able to retrieve/delete them.
252 *        * TODO: the workload will currently only work with unordered
253 *        fixed length keys.
254 */
255#define	GET_PROPORTION 90
256#define	PUT_PROPORTION 7
257#define	DEL_PROPORTION 3
258
259static int
260run_mixed_workload(dbp, config)
261	DB *dbp;
262	CONFIG *config;
263{
264	DBT key, data;
265	size_t next_op, i, ioff;
266	char kbuf[KBUF_LEN];
267	struct bench_q operation_queue;
268
269	/* Having ordered insertion does not make sense here */
270	DB_BENCH_ASSERT(config->orderedkeys == 0);
271
272	srand(config->seed);
273	memset(&operation_queue, 0, sizeof(struct bench_q));
274
275	ioff = 0;
276	INIT_KEY(key, config);
277	memset(&data, 0, sizeof(data));
278	DB_BENCH_ASSERT(
279	    (data.data = malloc(data.size = config->dsize)) != NULL);
280
281	/*
282	 * Add an initial sample set of data to the DB.
283	 * This should add some stability, and reduce the likelihood
284	 * of deleting all of the entries in the DB.
285	 */
286	for (i = 0; i < 2 * config->pcount; ++i) {
287		GET_KEY_NEXT(key, config, kbuf, i);
288		BENCH_Q_TAIL_INSERT(operation_queue, kbuf);
289		DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0);
290	}
291
292	TIMER_START;
293	for (i = 0; i < config->pcount; ++i) {
294		next_op = rand()%100;
295
296		if (next_op < GET_PROPORTION ) {
297			BENCH_Q_POP_PUSH(operation_queue, kbuf);
298			key.data = kbuf;
299			key.size = sizeof(kbuf);
300			dbp->get(dbp, NULL, &key, &data, 0);
301		} else if (next_op < GET_PROPORTION+PUT_PROPORTION) {
302			GET_KEY_NEXT(key, config, kbuf, i);
303			BENCH_Q_TAIL_INSERT(operation_queue, kbuf);
304			dbp->put(dbp, NULL, &key, &data, 0);
305		} else {
306			BENCH_Q_POP(operation_queue, kbuf);
307			key.data = kbuf;
308			key.size = sizeof(kbuf);
309			dbp->del(dbp, NULL, &key, 0);
310		}
311	}
312	TIMER_STOP;
313	TIMER_GET(config->tot_time);
314
315	return (0);
316}
317
318static int
319run_std_workload(dbp, config)
320	DB *dbp;
321	CONFIG *config;
322{
323	DBT key, data;
324	DBC *dbc;
325	u_int32_t i;
326	int ret;
327	char kbuf[KBUF_LEN];
328
329	/* Setup a key/data pair. */
330	INIT_KEY(key, config);
331	memset(&data, 0, sizeof(data));
332	DB_BENCH_ASSERT(
333	    (data.data = malloc(data.size = config->dsize)) != NULL);
334
335	/* Store the key/data pair count times. */
336	TIMER_START;
337	for (i = 0; i < config->pcount; ++i) {
338		GET_KEY_NEXT(key, config, kbuf, i);
339		DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0);
340	}
341	TIMER_STOP;
342	TIMER_GET(config->put_time);
343
344	if (is_get_workload(config->workload) == 0) {
345		TIMER_START;
346		for (i = 0; i <= config->gcount; ++i) {
347			DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0);
348			while ((dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0);
349			DB_BENCH_ASSERT(dbc->c_close(dbc) == 0);
350		}
351		TIMER_STOP;
352		TIMER_GET(config->get_time);
353	}
354
355	if (is_del_workload(config->workload) == 0) {
356		/* reset rand to reproduce key sequence. */
357		srand(config->seed);
358
359		TIMER_START;
360		if (config->cursor_del != 0) {
361			DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0);
362			while (
363			    (ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0)
364				DB_BENCH_ASSERT(dbc->c_del(dbc, 0) == 0);
365			DB_BENCH_ASSERT (ret == DB_NOTFOUND);
366		} else {
367			INIT_KEY(key, config);
368			for (i = 0; i < config->pcount; ++i) {
369				GET_KEY_NEXT(key, config, kbuf, i);
370
371				ret = dbp->del(dbp, NULL, &key, 0);
372				/*
373				 * Random key generation can cause dups,
374				 * so NOTFOUND result is OK.
375				 */
376				if (config->ksize == 0)
377					DB_BENCH_ASSERT
378					    (ret == 0 || ret == DB_NOTFOUND);
379				else
380					DB_BENCH_ASSERT(ret == 0);
381			}
382		}
383		TIMER_STOP;
384		TIMER_GET(config->del_time);
385	}
386	return (0);
387}
388
389static int
390dump_verbose_stats(dbp, config)
391	DB *dbp;
392	CONFIG *config;
393{
394/*
395 * It would be nice to be able to define stat as _stat on
396 * Windows, but that substitutes _stat for the db call as well.
397 */
398#ifdef DB_WIN32
399	struct _stat fstat;
400#else
401	struct stat fstat;
402#endif
403	DB_HASH_STAT *hstat;
404	DB_BTREE_STAT *bstat;
405	double free_prop;
406	char path[1024];
407
408#ifdef DB_BENCH_INCLUDE_CONFIG_SUMMARY
409	printf("Completed workload benchmark.\n");
410	printf("Configuration summary:\n");
411	printf("\tworkload type: %d\n", (int)config->workload);
412	printf("\tdatabase type: %s\n", config->ts);
413	if (config->cachesz != 0)
414		printf("\tcache size: %lu\n", (u_long)config->cachesz);
415	if (config->pagesz != 0)
416		printf("\tdatabase page size: %lu\n", (u_long)config->pagesz);
417	printf("\tput element count: %lu\n", (u_long)config->pcount);
418	if ( is_get_workload(config->workload) == 0)
419		printf("\tget element count: %lu\n", (u_long)config->gcount);
420	if (config->orderedkeys)
421		printf("\tInserting items in order\n");
422	else if (config->ksize == 0)
423		printf("\tInserting keys with size 10\n");
424	else
425		printf(
426		    "\tInserting keys with size: %lu\n", (u_long)config->ksize);
427
428	printf("\tInserting data elements size: %lu\n", (u_long)config->dsize);
429
430	if (is_del_workload(config->workload) == 0) {
431		if (config->cursor_del)
432			printf("\tDeleting items using a cursor\n");
433		else
434			printf("\tDeleting items without a cursor\n");
435	}
436#endif /* DB_BENCH_INCLUDE_CONFIG_SUMMARY */
437
438	if (is_put_workload(config->workload) == 0)
439		printf("%s Time spent inserting (%lu) (%s) items: %lu/%lu\n",
440		    config->message[0] == '\0' ? "" : config->message,
441		    (u_long)config->pcount, config->ts,
442		    (u_long)config->put_time.tv_sec, config->put_time.tv_nsec);
443
444	if (is_get_workload(config->workload) == 0)
445		printf("%s Time spent getting (%lu) (%s) items: %lu/%lu\n",
446		    config->message[0] == '\0' ? "" : config->message,
447		    (u_long)config->pcount * ((config->gcount == 0) ?
448		    1 : config->gcount), config->ts,
449		    (u_long)config->get_time.tv_sec, config->get_time.tv_nsec);
450
451	if (is_del_workload(config->workload) == 0)
452		printf("%s Time spent deleting (%lu) (%s) items: %lu/%lu\n",
453		    config->message[0] == '\0' ? "" : config->message,
454		    (u_long)config->pcount, config->ts,
455		    (u_long)config->del_time.tv_sec, config->del_time.tv_nsec);
456
457	(void)snprintf(path, sizeof(path),
458	    "%s%c%s", TESTDIR, PATH_SEPARATOR[0], TESTFILE);
459#ifdef DB_WIN32
460	if (_stat(path, &fstat) == 0) {
461#else
462	if (stat(path, &fstat) == 0) {
463#endif
464		printf("%s Size of db file (%s): %lu K\n",
465		    config->message[0] == '\0' ? "" : config->message,
466		    config->ts, (u_long)fstat.st_size/1024);
467	}
468
469	if (config->type == DB_HASH) {
470#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR <= 2
471		DB_BENCH_ASSERT(dbp->stat(dbp, &hstat, NULL, 0) == 0);
472#elif DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 2
473		DB_BENCH_ASSERT(dbp->stat(dbp, &hstat, 0) == 0);
474#else
475		DB_BENCH_ASSERT(dbp->stat(dbp, NULL, &hstat, 0) == 0);
476#endif
477		/*
478		 * Hash fill factor is a bit tricky. Want to include
479		 * both bucket and overflow buckets (not offpage).
480		 */
481		free_prop = hstat->hash_pagesize*hstat->hash_buckets;
482		free_prop += hstat->hash_pagesize*hstat->hash_overflows;
483		free_prop =
484		    (free_prop - hstat->hash_bfree - hstat->hash_ovfl_free)/
485		    free_prop;
486		printf("%s db fill factor (%s): %.2f%%\n",
487		    config->message[0] == '\0' ? "" : config->message,
488		    config->ts, free_prop*100);
489		free(hstat);
490	} else { /* Btree */
491#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR <= 2
492		DB_BENCH_ASSERT(dbp->stat(dbp, &bstat, NULL, 0) == 0);
493#elif DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 2
494		DB_BENCH_ASSERT(dbp->stat(dbp, &bstat, 0) == 0);
495#else
496		DB_BENCH_ASSERT(dbp->stat(dbp, NULL, &bstat, 0) == 0);
497#endif
498		free_prop = bstat->bt_pagesize*bstat->bt_leaf_pg;
499		free_prop = (free_prop-bstat->bt_leaf_pgfree)/free_prop;
500		printf("%s db fill factor (%s): %.2f%%\n",
501		    config->message[0] == '\0' ? "" : config->message,
502		    config->ts, free_prop*100);
503		free(bstat);
504	}
505	return (0);
506}
507
508static char *
509workload_str(workload)
510	int workload;
511{
512	static char buf[128];
513
514	switch (workload) {
515	case T_PUT_GET_DELETE:
516		return ("PUT/GET/DELETE");
517		/* NOTREACHED */
518	case T_GET:
519		return ("GET");
520		/* NOTREACHED */
521	case T_PUT:
522		return ("PUT");
523		/* NOTREACHED */
524	case T_DELETE:
525		return ("DELETE");
526		/* NOTREACHED */
527	case T_PUT_GET:
528		return ("PUT/GET");
529		/* NOTREACHED */
530	case T_PUT_DELETE:
531		return ("PUT/DELETE");
532		/* NOTREACHED */
533	case T_GET_DELETE:
534		return ("GET/DELETE");
535		/* NOTREACHED */
536	case T_MIXED:
537		snprintf(buf, sizeof(buf), "MIXED (get: %d, put: %d, del: %d)",
538		    (int)GET_PROPORTION,
539		    (int)PUT_PROPORTION, (int)DEL_PROPORTION);
540		return (buf);
541	default:
542		break;
543	}
544
545	exit(usage());
546	/* NOTREACHED */
547}
548
549static int
550is_get_workload(workload)
551	int workload;
552{
553	switch (workload) {
554	case T_GET:
555	case T_PUT_GET:
556	case T_PUT_GET_DELETE:
557	case T_GET_DELETE:
558		return 0;
559	}
560	return 1;
561}
562
563static int
564is_put_workload(workload)
565	int workload;
566{
567	switch (workload) {
568	case T_PUT:
569	case T_PUT_GET:
570	case T_PUT_GET_DELETE:
571	case T_PUT_DELETE:
572		return 0;
573	}
574	return 1;
575}
576
577static int
578is_del_workload(workload)
579	int workload;
580{
581	switch (workload) {
582	case T_DELETE:
583	case T_PUT_DELETE:
584	case T_PUT_GET_DELETE:
585	case T_GET_DELETE:
586		return 0;
587	}
588	return 1;
589}
590
591static int
592usage()
593{
594	(void)fprintf(stderr,
595	    "usage: b_workload [-b cachesz] [-c count] [-d bytes] [-e]\n");
596	(void)fprintf(stderr,
597	    "\t[-g getitrs] [-i] [-k keysize] [-m message] [-o] [-p pagesz]\n");
598	(void)fprintf(stderr, "\t[-r dup_count] [-t type] [-w type]\n");
599
600	(void)fprintf(stderr, "Where:\n");
601	(void)fprintf(stderr, "\t-b the size of the DB cache.\n");
602	(void)fprintf(stderr, "\t-c the number of elements to be measured.\n");
603	(void)fprintf(stderr, "\t-d the size of each data element.\n");
604	(void)fprintf(stderr, "\t-e delete entries using a cursor.\n");
605	(void)fprintf(stderr, "\t-g number of get cursor traverses.\n");
606	(void)fprintf(stderr, "\t-i Pre-init hash DB bucket count.\n");
607	(void)fprintf(stderr, "\t-k the size of each key inserted.\n");
608	(void)fprintf(stderr, "\t-m message pre-pended to log output.\n");
609	(void)fprintf(stderr, "\t-o keys should be ordered for insert.\n");
610	(void)fprintf(stderr, "\t-p the page size for the database.\n");
611	(void)fprintf(stderr, "\t-r the number of duplicates to insert\n");
612	(void)fprintf(stderr, "\t-t type of the underlying database.\n");
613	(void)fprintf(stderr, "\t-w the workload to measure, available:\n");
614	(void)fprintf(stderr, "\t\tA - PUT_GET_DELETE\n");
615	(void)fprintf(stderr, "\t\tB - GET\n");
616	(void)fprintf(stderr, "\t\tC - PUT\n");
617	(void)fprintf(stderr, "\t\tD - DELETE\n");
618	(void)fprintf(stderr, "\t\tE - PUT_GET\n");
619	(void)fprintf(stderr, "\t\tF - PUT_DELETE\n");
620	(void)fprintf(stderr, "\t\tG - GET_DELETE\n");
621	(void)fprintf(stderr, "\t\tH - MIXED\n");
622	return (EXIT_FAILURE);
623}
624