1/*
2 * $Id: b_workload.c,v 1.16 2008/04/14 02:21:47 david Exp $
3 */
4
5#include "bench.h"
6#include "b_workload.h"
7
8static int   b_workload_dump_verbose_stats __P((DB *, CONFIG *));
9static int   b_workload_is_del_workload __P((int));
10static int   b_workload_is_get_workload __P((int));
11static int   b_workload_is_put_workload __P((int));
12static int   b_workload_run_mixed_workload __P((DB *, CONFIG *));
13static int   b_workload_run_std_workload __P((DB *, CONFIG *));
14static int   b_workload_usage __P((void));
15static char *b_workload_workload_str __P((int));
16
17/*
18 * General TODO list:
19 * * The workload type. Might work better as a bitmask than the current enum.
20 * * Improve the verbose stats, so they can be easily parsed.
21 * * Think about doing automatic btree/hash comparison in here.
22 */
23int
24b_workload(argc, argv)
25	int argc;
26	char *argv[];
27{
28	extern char *optarg;
29	extern int optind, __db_getopt_reset;
30	CONFIG conf;
31	DB *dbp;
32	DB_ENV *dbenv;
33	int ch, ffactor, ksz;
34
35	dbenv = NULL;
36	memset(&conf, 0, sizeof(conf));
37	conf.seed = 124087;
38	srand(conf.seed);
39
40	conf.pcount = 100000;
41	conf.ts = "Btree";
42	conf.type = DB_BTREE;
43	conf.dsize = 20;
44	conf.presize = 0;
45	conf.workload = T_PUT_GET_DELETE;
46
47	__db_getopt_reset = 1;
48	while ((ch = getopt(argc, argv, "b:c:d:e:g:ik:m:op:r:t:vw:")) != EOF)
49		switch (ch) {
50		case 'b':
51			conf.cachesz = atoi(optarg);
52			break;
53		case 'c':
54			conf.pcount = atoi(optarg);
55			break;
56		case 'd':
57			conf.dsize = atoi(optarg);
58			break;
59		case 'e':
60			conf.cursor_del = atoi(optarg);
61			break;
62		case 'g':
63			conf.gcount = atoi(optarg);
64			break;
65		case 'i':
66			conf.presize = 1;
67			break;
68		case 'k':
69			conf.ksize = atoi(optarg);
70			break;
71		case 'm':
72			conf.message = optarg;
73			break;
74		case 'o':
75			conf.orderedkeys = 1;
76			break;
77		case 'p':
78			conf.pagesz = atoi(optarg);
79			break;
80		case 'r':
81			conf.num_dups = atoi(optarg);
82			break;
83		case 't':
84			switch (optarg[0]) {
85			case 'B': case 'b':
86				conf.ts = "Btree";
87				conf.type = DB_BTREE;
88				break;
89			case 'H': case 'h':
90				if (b_util_have_hash())
91					return (0);
92				conf.ts = "Hash";
93				conf.type = DB_HASH;
94				break;
95			default:
96				return (b_workload_usage());
97			}
98			break;
99		case 'v':
100			conf.verbose = 1;
101			break;
102		case 'w':
103			switch (optarg[0]) {
104			case 'A':
105				conf.workload = T_PUT_GET_DELETE;
106				break;
107			case 'B':
108				conf.workload = T_GET;
109				break;
110			case 'C':
111				conf.workload = T_PUT;
112				break;
113			case 'D':
114				conf.workload = T_DELETE;
115				break;
116			case 'E':
117				conf.workload = T_PUT_GET;
118				break;
119			case 'F':
120				conf.workload = T_PUT_DELETE;
121				break;
122			case 'G':
123				conf.workload = T_GET_DELETE;
124				break;
125			case 'H':
126				conf.workload = T_MIXED;
127				break;
128			default:
129				return (b_workload_usage());
130			}
131			break;
132		case '?':
133		default:
134			fprintf(stderr, "Invalid option: %c\n", ch);
135			return (b_workload_usage());
136		}
137	argc -= optind;
138	argv += optind;
139	if (argc != 0)
140		return (b_workload_usage());
141
142	/*
143	 * Validate the input parameters if specified.
144	 */
145	if (conf.pagesz != 0)
146		DB_BENCH_ASSERT(conf.pagesz >= 512 && conf.pagesz <= 65536 &&
147		   ((conf.pagesz & (conf.pagesz - 1)) == 0));
148
149	if (conf.cachesz != 0)
150		DB_BENCH_ASSERT(conf.cachesz > 20480);
151	DB_BENCH_ASSERT(conf.ksize == 0 || conf.orderedkeys == 0);
152
153	/* Create the environment. */
154	DB_BENCH_ASSERT(db_env_create(&dbenv, 0) == 0);
155	dbenv->set_errfile(dbenv, stderr);
156	if (conf.cachesz != 0)
157		DB_BENCH_ASSERT(
158		    dbenv->set_cachesize(dbenv, 0, conf.cachesz, 0) == 0);
159
160#if DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR < 1
161	DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR",
162	    NULL, DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0);
163#else
164	DB_BENCH_ASSERT(dbenv->open(dbenv, "TESTDIR",
165	    DB_CREATE | DB_INIT_MPOOL | DB_PRIVATE, 0666) == 0);
166#endif
167
168	DB_BENCH_ASSERT(db_create(&dbp, dbenv, 0) == 0);
169	if (conf.pagesz != 0)
170		DB_BENCH_ASSERT(
171		    dbp->set_pagesize(dbp, conf.pagesz) == 0);
172	if (conf.presize != 0 && conf.type == DB_HASH) {
173		ksz = (conf.orderedkeys != 0) ? sizeof(u_int32_t) : conf.ksize;
174		if (ksz == 0)
175			ksz = 10;
176		ffactor = (conf.pagesz - 32)/(ksz + conf.dsize + 8);
177		fprintf(stderr, "ffactor: %d\n", ffactor);
178		DB_BENCH_ASSERT(
179		    dbp->set_h_ffactor(dbp, ffactor) == 0);
180		DB_BENCH_ASSERT(
181		    dbp->set_h_nelem(dbp, conf.pcount*10) == 0);
182	}
183#if DB_VERSION_MAJOR >= 4 && DB_VERSION_MINOR >= 1
184	DB_BENCH_ASSERT(dbp->open(
185	    dbp, NULL, TESTFILE, NULL, conf.type, DB_CREATE, 0666) == 0);
186#else
187	DB_BENCH_ASSERT(dbp->open(
188	    dbp, TESTFILE, NULL, conf.type, DB_CREATE, 0666) == 0);
189#endif
190
191	if (conf.workload == T_MIXED)
192		 b_workload_run_mixed_workload(dbp, &conf);
193	else
194		b_workload_run_std_workload(dbp, &conf);
195
196	if (b_workload_is_put_workload(conf.workload) == 0)
197		timespecadd(&conf.tot_time, &conf.put_time);
198	if (b_workload_is_get_workload(conf.workload) == 0)
199		timespecadd(&conf.tot_time, &conf.get_time);
200	if (b_workload_is_del_workload(conf.workload) == 0)
201		timespecadd(&conf.tot_time, &conf.del_time);
202
203	/* Ensure data is flushed for following measurements. */
204	DB_BENCH_ASSERT(dbp->sync(dbp, 0) == 0);
205
206	if (conf.verbose != 0)
207		b_workload_dump_verbose_stats(dbp, &conf);
208
209	DB_BENCH_ASSERT(dbp->close(dbp, 0) == 0);
210	DB_BENCH_ASSERT(dbenv->close(dbenv, 0) == 0);
211
212	/*
213	 * Construct a string for benchmark output.
214	 *
215	 * Insert HTML in-line to make the output prettier -- ugly, but easy.
216	 */
217	printf("# workload test: %s: %s<br>%lu ops",
218	    conf.ts, b_workload_workload_str(conf.workload), (u_long)conf.pcount);
219	if (conf.ksize != 0)
220		printf(", key size: %lu", (u_long)conf.ksize);
221	if (conf.dsize != 0)
222		printf(", data size: %lu", (u_long)conf.dsize);
223	if (conf.pagesz != 0)
224		printf(", page size: %lu", (u_long)conf.pagesz);
225	else
226		printf(", page size: default");
227	if (conf.cachesz != 0)
228		printf(", cache size: %lu", (u_long)conf.cachesz);
229	else
230		printf(", cache size: default");
231	printf(", %s keys", conf.orderedkeys == 1 ? "ordered" : "unordered");
232	printf(", num dups: %lu", (u_long)conf.num_dups);
233	printf("\n");
234
235	if (conf.workload != T_MIXED) {
236		if (conf.message != NULL)
237			printf("%s %s ", conf.message, conf.ts);
238		TIME_DISPLAY(conf.pcount, conf.tot_time);
239	} else
240		TIMER_DISPLAY(conf.pcount);
241
242	return (0);
243}
244
245/*
246 * The mixed workload is designed to simulate a somewhat real
247 * usage scenario.
248 * NOTES: * rand is used to decide on the current operation. This will
249 *        be repeatable, since the same seed is always used.
250 *        * All added keys are stored in a FIFO queue, this is not very
251 *        space efficient, but is the best way I could come up with to
252 *        insert random key values, and be able to retrieve/delete them.
253 *        * TODO: the workload will currently only work with unordered
254 *        fixed length keys.
255 */
256#define	GET_PROPORTION 90
257#define	PUT_PROPORTION 7
258#define	DEL_PROPORTION 3
259
260static int
261b_workload_run_mixed_workload(dbp, config)
262	DB *dbp;
263	CONFIG *config;
264{
265	DBT key, data;
266	size_t next_op, i, ioff;
267	char kbuf[KBUF_LEN];
268	struct bench_q operation_queue;
269
270	/* Having ordered insertion does not make sense here */
271	DB_BENCH_ASSERT(config->orderedkeys == 0);
272
273	srand(config->seed);
274	memset(&operation_queue, 0, sizeof(struct bench_q));
275
276	ioff = 0;
277	INIT_KEY(key, config);
278	memset(&data, 0, sizeof(data));
279	DB_BENCH_ASSERT(
280	    (data.data = malloc(data.size = config->dsize)) != NULL);
281
282	/*
283	 * Add an initial sample set of data to the DB.
284	 * This should add some stability, and reduce the likelihood
285	 * of deleting all of the entries in the DB.
286	 */
287	for (i = 0; i < 2 * config->pcount; ++i) {
288		GET_KEY_NEXT(key, config, kbuf, i);
289		BENCH_Q_TAIL_INSERT(operation_queue, kbuf);
290		DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0);
291	}
292
293	TIMER_START;
294	for (i = 0; i < config->pcount; ++i) {
295		next_op = rand()%100;
296
297		if (next_op < GET_PROPORTION ) {
298			BENCH_Q_POP_PUSH(operation_queue, kbuf);
299			key.data = kbuf;
300			key.size = sizeof(kbuf);
301			dbp->get(dbp, NULL, &key, &data, 0);
302		} else if (next_op < GET_PROPORTION+PUT_PROPORTION) {
303			GET_KEY_NEXT(key, config, kbuf, i);
304			BENCH_Q_TAIL_INSERT(operation_queue, kbuf);
305			dbp->put(dbp, NULL, &key, &data, 0);
306		} else {
307			BENCH_Q_POP(operation_queue, kbuf);
308			key.data = kbuf;
309			key.size = sizeof(kbuf);
310			dbp->del(dbp, NULL, &key, 0);
311		}
312	}
313	TIMER_STOP;
314	TIMER_GET(config->tot_time);
315
316	return (0);
317}
318
319static int
320b_workload_run_std_workload(dbp, config)
321	DB *dbp;
322	CONFIG *config;
323{
324	DBT key, data;
325	DBC *dbc;
326	u_int32_t i;
327	int ret;
328	char kbuf[KBUF_LEN];
329
330	/* Setup a key/data pair. */
331	INIT_KEY(key, config);
332	memset(&data, 0, sizeof(data));
333	DB_BENCH_ASSERT(
334	    (data.data = malloc(data.size = config->dsize)) != NULL);
335
336	/* Store the key/data pair count times. */
337	TIMER_START;
338	for (i = 0; i < config->pcount; ++i) {
339		GET_KEY_NEXT(key, config, kbuf, i);
340		DB_BENCH_ASSERT(dbp->put(dbp, NULL, &key, &data, 0) == 0);
341	}
342	TIMER_STOP;
343	TIMER_GET(config->put_time);
344
345	if (b_workload_is_get_workload(config->workload) == 0) {
346		TIMER_START;
347		for (i = 0; i <= config->gcount; ++i) {
348			DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0);
349			while ((dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0);
350			DB_BENCH_ASSERT(dbc->c_close(dbc) == 0);
351		}
352		TIMER_STOP;
353		TIMER_GET(config->get_time);
354	}
355
356	if (b_workload_is_del_workload(config->workload) == 0) {
357		/* reset rand to reproduce key sequence. */
358		srand(config->seed);
359
360		TIMER_START;
361		if (config->cursor_del != 0) {
362			DB_BENCH_ASSERT(dbp->cursor(dbp, NULL, &dbc, 0) == 0);
363			while (
364			    (ret = dbc->c_get(dbc, &key, &data, DB_NEXT)) == 0)
365				DB_BENCH_ASSERT(dbc->c_del(dbc, 0) == 0);
366			DB_BENCH_ASSERT (ret == DB_NOTFOUND);
367		} else {
368			INIT_KEY(key, config);
369			for (i = 0; i < config->pcount; ++i) {
370				GET_KEY_NEXT(key, config, kbuf, i);
371
372				ret = dbp->del(dbp, NULL, &key, 0);
373				/*
374				 * Random key generation can cause dups,
375				 * so NOTFOUND result is OK.
376				 */
377				if (config->ksize == 0)
378					DB_BENCH_ASSERT
379					    (ret == 0 || ret == DB_NOTFOUND);
380				else
381					DB_BENCH_ASSERT(ret == 0);
382			}
383		}
384		TIMER_STOP;
385		TIMER_GET(config->del_time);
386	}
387	return (0);
388}
389
390static int
391b_workload_dump_verbose_stats(dbp, config)
392	DB *dbp;
393	CONFIG *config;
394{
395/*
396 * It would be nice to be able to define stat as _stat on
397 * Windows, but that substitutes _stat for the db call as well.
398 */
399#ifdef DB_WIN32
400	struct _stat fstat;
401#else
402	struct stat fstat;
403#endif
404	DB_HASH_STAT *hstat;
405	DB_BTREE_STAT *bstat;
406	double free_prop;
407	char path[1024];
408
409#ifdef DB_BENCH_INCLUDE_CONFIG_SUMMARY
410	printf("Completed workload benchmark.\n");
411	printf("Configuration summary:\n");
412	printf("\tworkload type: %d\n", (int)config->workload);
413	printf("\tdatabase type: %s\n", config->ts);
414	if (config->cachesz != 0)
415		printf("\tcache size: %lu\n", (u_long)config->cachesz);
416	if (config->pagesz != 0)
417		printf("\tdatabase page size: %lu\n", (u_long)config->pagesz);
418	printf("\tput element count: %lu\n", (u_long)config->pcount);
419	if ( b_workload_is_get_workload(config->workload) == 0)
420		printf("\tget element count: %lu\n", (u_long)config->gcount);
421	if (config->orderedkeys)
422		printf("\tInserting items in order\n");
423	else if (config->ksize == 0)
424		printf("\tInserting keys with size 10\n");
425	else
426		printf(
427		    "\tInserting keys with size: %lu\n", (u_long)config->ksize);
428
429	printf("\tInserting data elements size: %lu\n", (u_long)config->dsize);
430
431	if (b_workload_is_del_workload(config->workload) == 0) {
432		if (config->cursor_del)
433			printf("\tDeleting items using a cursor\n");
434		else
435			printf("\tDeleting items without a cursor\n");
436	}
437#endif /* DB_BENCH_INCLUDE_CONFIG_SUMMARY */
438
439	if (b_workload_is_put_workload(config->workload) == 0)
440		printf("%s Time spent inserting (%lu) (%s) items: %lu/%lu\n",
441		    config->message[0] == '\0' ? "" : config->message,
442		    (u_long)config->pcount, config->ts,
443		    (u_long)config->put_time.tv_sec, config->put_time.tv_nsec);
444
445	if (b_workload_is_get_workload(config->workload) == 0)
446		printf("%s Time spent getting (%lu) (%s) items: %lu/%lu\n",
447		    config->message[0] == '\0' ? "" : config->message,
448		    (u_long)config->pcount * ((config->gcount == 0) ?
449		    1 : config->gcount), config->ts,
450		    (u_long)config->get_time.tv_sec, config->get_time.tv_nsec);
451
452	if (b_workload_is_del_workload(config->workload) == 0)
453		printf("%s Time spent deleting (%lu) (%s) items: %lu/%lu\n",
454		    config->message[0] == '\0' ? "" : config->message,
455		    (u_long)config->pcount, config->ts,
456		    (u_long)config->del_time.tv_sec, config->del_time.tv_nsec);
457
458	(void)snprintf(path, sizeof(path),
459	    "%s%c%s", TESTDIR, PATH_SEPARATOR[0], TESTFILE);
460#ifdef DB_WIN32
461	if (_stat(path, &fstat) == 0) {
462#else
463	if (stat(path, &fstat) == 0) {
464#endif
465		printf("%s Size of db file (%s): %lu K\n",
466		    config->message[0] == '\0' ? "" : config->message,
467		    config->ts, (u_long)fstat.st_size/1024);
468	}
469
470	if (config->type == DB_HASH) {
471#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR <= 2
472		DB_BENCH_ASSERT(dbp->stat(dbp, &hstat, NULL, 0) == 0);
473#elif DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 2
474		DB_BENCH_ASSERT(dbp->stat(dbp, &hstat, 0) == 0);
475#else
476		DB_BENCH_ASSERT(dbp->stat(dbp, NULL, &hstat, 0) == 0);
477#endif
478		/*
479		 * Hash fill factor is a bit tricky. Want to include
480		 * both bucket and overflow buckets (not offpage).
481		 */
482		free_prop = hstat->hash_pagesize*hstat->hash_buckets;
483		free_prop += hstat->hash_pagesize*hstat->hash_overflows;
484		free_prop =
485		    (free_prop - hstat->hash_bfree - hstat->hash_ovfl_free)/
486		    free_prop;
487		printf("%s db fill factor (%s): %.2f%%\n",
488		    config->message[0] == '\0' ? "" : config->message,
489		    config->ts, free_prop*100);
490		free(hstat);
491	} else { /* Btree */
492#if DB_VERSION_MAJOR < 3 || DB_VERSION_MAJOR == 3 && DB_VERSION_MINOR <= 2
493		DB_BENCH_ASSERT(dbp->stat(dbp, &bstat, NULL, 0) == 0);
494#elif DB_VERSION_MAJOR < 4 || DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR <= 2
495		DB_BENCH_ASSERT(dbp->stat(dbp, &bstat, 0) == 0);
496#else
497		DB_BENCH_ASSERT(dbp->stat(dbp, NULL, &bstat, 0) == 0);
498#endif
499		free_prop = bstat->bt_pagesize*bstat->bt_leaf_pg;
500		free_prop = (free_prop-bstat->bt_leaf_pgfree)/free_prop;
501		printf("%s db fill factor (%s): %.2f%%\n",
502		    config->message[0] == '\0' ? "" : config->message,
503		    config->ts, free_prop*100);
504		free(bstat);
505	}
506	return (0);
507}
508
509static char *
510b_workload_workload_str(workload)
511	int workload;
512{
513	static char buf[128];
514
515	switch (workload) {
516	case T_PUT_GET_DELETE:
517		return ("PUT/GET/DELETE");
518		/* NOTREACHED */
519	case T_GET:
520		return ("GET");
521		/* NOTREACHED */
522	case T_PUT:
523		return ("PUT");
524		/* NOTREACHED */
525	case T_DELETE:
526		return ("DELETE");
527		/* NOTREACHED */
528	case T_PUT_GET:
529		return ("PUT/GET");
530		/* NOTREACHED */
531	case T_PUT_DELETE:
532		return ("PUT/DELETE");
533		/* NOTREACHED */
534	case T_GET_DELETE:
535		return ("GET/DELETE");
536		/* NOTREACHED */
537	case T_MIXED:
538		snprintf(buf, sizeof(buf), "MIXED (get: %d, put: %d, del: %d)",
539		    (int)GET_PROPORTION,
540		    (int)PUT_PROPORTION, (int)DEL_PROPORTION);
541		return (buf);
542	default:
543		break;
544	}
545
546	exit(b_workload_usage());
547	/* NOTREACHED */
548}
549
550static int
551b_workload_is_get_workload(workload)
552	int workload;
553{
554	switch (workload) {
555	case T_GET:
556	case T_PUT_GET:
557	case T_PUT_GET_DELETE:
558	case T_GET_DELETE:
559		return 0;
560	}
561	return 1;
562}
563
564static int
565b_workload_is_put_workload(workload)
566	int workload;
567{
568	switch (workload) {
569	case T_PUT:
570	case T_PUT_GET:
571	case T_PUT_GET_DELETE:
572	case T_PUT_DELETE:
573		return 0;
574	}
575	return 1;
576}
577
578static int
579b_workload_is_del_workload(workload)
580	int workload;
581{
582	switch (workload) {
583	case T_DELETE:
584	case T_PUT_DELETE:
585	case T_PUT_GET_DELETE:
586	case T_GET_DELETE:
587		return 0;
588	}
589	return 1;
590}
591
592static int
593b_workload_usage()
594{
595	(void)fprintf(stderr,
596	    "usage: b_workload [-b cachesz] [-c count] [-d bytes] [-e]\n");
597	(void)fprintf(stderr,
598	    "\t[-g getitrs] [-i] [-k keysize] [-m message] [-o] [-p pagesz]\n");
599	(void)fprintf(stderr, "\t[-r dup_count] [-t type] [-w type]\n");
600
601	(void)fprintf(stderr, "Where:\n");
602	(void)fprintf(stderr, "\t-b the size of the DB cache.\n");
603	(void)fprintf(stderr, "\t-c the number of elements to be measured.\n");
604	(void)fprintf(stderr, "\t-d the size of each data element.\n");
605	(void)fprintf(stderr, "\t-e delete entries using a cursor.\n");
606	(void)fprintf(stderr, "\t-g number of get cursor traverses.\n");
607	(void)fprintf(stderr, "\t-i Pre-init hash DB bucket count.\n");
608	(void)fprintf(stderr, "\t-k the size of each key inserted.\n");
609	(void)fprintf(stderr, "\t-m message pre-pended to log output.\n");
610	(void)fprintf(stderr, "\t-o keys should be ordered for insert.\n");
611	(void)fprintf(stderr, "\t-p the page size for the database.\n");
612	(void)fprintf(stderr, "\t-r the number of duplicates to insert\n");
613	(void)fprintf(stderr, "\t-t type of the underlying database.\n");
614	(void)fprintf(stderr, "\t-w the workload to measure, available:\n");
615	(void)fprintf(stderr, "\t\tA - PUT_GET_DELETE\n");
616	(void)fprintf(stderr, "\t\tB - GET\n");
617	(void)fprintf(stderr, "\t\tC - PUT\n");
618	(void)fprintf(stderr, "\t\tD - DELETE\n");
619	(void)fprintf(stderr, "\t\tE - PUT_GET\n");
620	(void)fprintf(stderr, "\t\tF - PUT_DELETE\n");
621	(void)fprintf(stderr, "\t\tG - GET_DELETE\n");
622	(void)fprintf(stderr, "\t\tH - MIXED\n");
623	return (EXIT_FAILURE);
624}
625