1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <stdio.h>
28#include <stdlib.h>
29#include <sys/types.h>
30#include <unistd.h>
31#include <errno.h>
32#include <libintl.h>
33#include <string.h>
34#include <rcm_module.h>
35#include <sys/pset.h>
36
37#include <pool.h>
38
39/*
40 * RCM module ops.
41 */
42static int pool_register(rcm_handle_t *);
43static int pool_unregister(rcm_handle_t *);
44static int pool_get_info(rcm_handle_t *, char *, id_t, uint_t, char **,
45    char **, nvlist_t *, rcm_info_t **);
46static int pool_request_suspend(rcm_handle_t *, char *, id_t,
47    timespec_t *, uint_t, char **, rcm_info_t **);
48static int pool_notify_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
49    rcm_info_t **);
50static int pool_notify_remove(rcm_handle_t *, char *, id_t, uint_t,
51    char **, rcm_info_t **);
52static int pool_request_offline(rcm_handle_t *, char *, id_t, uint_t,
53    char **, rcm_info_t **);
54static int pool_notify_online(rcm_handle_t *, char *, id_t, uint_t, char **,
55    rcm_info_t **);
56static int pool_request_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
57    nvlist_t *, char **, rcm_info_t **);
58static int pool_notify_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
59    nvlist_t *, char **, rcm_info_t **);
60
61/*
62 * Pool-specific callback functions.
63 */
64static int pset_validate_remove(nvlist_t *, char **);
65
66static struct {
67	const char *rsrc;
68	int (*capacity_change_cb)(nvlist_t *, char **);
69} registrations[] = {
70	{ "SUNW_cpu", pset_validate_remove },
71	{ NULL, NULL }
72};
73
74static int registered = 0;
75
76static struct rcm_mod_ops pool_ops = {
77	RCM_MOD_OPS_VERSION,
78	pool_register,
79	pool_unregister,
80	pool_get_info,
81	pool_request_suspend,
82	pool_notify_resume,
83	pool_request_offline,
84	pool_notify_online,
85	pool_notify_remove,
86	pool_request_capacity_change,
87	pool_notify_capacity_change,
88	NULL
89};
90
91struct rcm_mod_ops *
92rcm_mod_init(void)
93{
94	rcm_log_message(RCM_TRACE1, "Pools RCM module created\n");
95	return (&pool_ops);
96}
97
98
99int
100rcm_mod_fini(void)
101{
102	rcm_log_message(RCM_TRACE1, "Pools RCM module unloaded\n");
103	return (RCM_SUCCESS);
104}
105
106const char *
107rcm_mod_info(void)
108{
109	return ("Pools RCM module 1.4");
110}
111
112static int
113pool_check_pset(pool_conf_t *conf, pool_resource_t *res,
114    processorid_t *del_cpus, char **errorp)
115{
116	int64_t tmp;
117	int i, j;
118	uint_t num_cpus;
119	uint64_t min_cpus;
120	uint_t num_found = 0;
121	processorid_t *cpulist;
122	psetid_t psetid;
123	pool_value_t *pval;
124	pool_elem_t *elem = pool_resource_to_elem(conf, res);
125
126	if ((pval = pool_value_alloc()) == NULL)
127		return (-1);
128	if (pool_get_property(conf, elem, "pset.min", pval) != POC_UINT) {
129		rcm_log_message(RCM_ERROR,
130		    gettext("POOL: cannot find property 'pset.min' in pset\n"));
131		pool_value_free(pval);
132		return (-1);
133	}
134	(void) pool_value_get_uint64(pval, &min_cpus);
135	if (pool_get_property(conf, elem, "pset.sys_id", pval) != POC_INT) {
136		rcm_log_message(RCM_ERROR,
137		    gettext("POOL: cannot get pset.sys_id\n"));
138		pool_value_free(pval);
139		return (-1);
140	}
141	(void) pool_value_get_int64(pval, &tmp);
142	pool_value_free(pval);
143	psetid = (psetid_t)tmp;
144	rcm_log_message(RCM_TRACE1, "POOL: checking pset: %d\n", psetid);
145
146	rcm_log_message(RCM_TRACE1, "POOL: min_cpus is %llu\n", min_cpus);
147	if (pset_info(psetid, NULL, &num_cpus, NULL) != 0) {
148		rcm_log_message(RCM_ERROR,
149		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
150		    strerror(errno));
151		return (-1);
152	}
153	if ((cpulist = malloc(num_cpus * sizeof (processorid_t))) == NULL) {
154		rcm_log_message(RCM_ERROR,
155		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
156		return (-1);
157	}
158	if (pset_info(psetid, NULL, &num_cpus, cpulist) != 0) {
159		free(cpulist);
160		rcm_log_message(RCM_ERROR,
161		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
162		    strerror(errno));
163		return (-1);
164	}
165	for (i = 0; del_cpus[i] != -1; i++)
166		for (j = 0; j < num_cpus; j++)
167			if (cpulist[j] == del_cpus[i])
168				num_found++;
169	free(cpulist);
170	if (num_found > 0 && (num_cpus - num_found) < (uint_t)min_cpus) {
171		int len;
172		char *errval;
173		const char *errfmt =
174		    gettext("POOL: processor set (%1$d) would go "
175		    "below its minimum value of %2$u\n");
176
177		/*
178		 * We would go below the min value. Fail this request.
179		 */
180		len = strlen(errfmt) + 4 * 2; /* 4 digits for psetid and min */
181		if ((errval = malloc((len + 1) * sizeof (char))) != NULL) {
182			(void) snprintf(errval, len + 1, errfmt, psetid,
183			    (uint_t)min_cpus);
184			*errorp = errval;
185		}
186
187		rcm_log_message(RCM_ERROR, (char *)errfmt, psetid,
188		    (uint_t)min_cpus);
189
190		return (-1);
191	}
192	rcm_log_message(RCM_TRACE1, "POOL: pset %d is fine\n", psetid);
193	return (0);
194}
195
196/*
197 * pset_validate_remove()
198 * 	Check to see if the requested cpu removal would be acceptable.
199 * 	Returns RCM_FAILURE if not.
200 */
201static int
202pset_validate_remove(nvlist_t *nvl, char **errorp)
203{
204	int error = RCM_SUCCESS;
205	int32_t old_total, new_total, removed_total;
206	processorid_t *removed_list = NULL; /* list terminated by (-1). */
207	processorid_t *old_cpu_list = NULL, *new_cpu_list = NULL;
208	int i, j;
209	pool_conf_t *conf;
210	pool_value_t *pvals[] = { NULL, NULL };
211	pool_resource_t **res = NULL;
212	uint_t nelem;
213	const char *generic_error = gettext("POOL: Error processing request\n");
214
215	if ((conf = pool_conf_alloc()) == NULL)
216		return (RCM_FAILURE);
217	if (pool_conf_open(conf, pool_dynamic_location(), PO_RDONLY) < 0) {
218		rcm_log_message(RCM_TRACE1,
219		    "POOL: failed to parse config file: '%s'\n",
220		    pool_dynamic_location());
221		pool_conf_free(conf);
222		return (RCM_SUCCESS);
223	}
224
225	if ((error = nvlist_lookup_int32(nvl, "old_total", &old_total)) != 0) {
226		(void) pool_conf_close(conf);
227		pool_conf_free(conf);
228		rcm_log_message(RCM_ERROR,
229		    gettext("POOL: unable to find 'old_total' in nvlist: %s\n"),
230		    strerror(error));
231		*errorp = strdup(generic_error);
232		return (RCM_FAILURE);
233	}
234	if ((error = nvlist_lookup_int32(nvl, "new_total", &new_total)) != 0) {
235		(void) pool_conf_close(conf);
236		pool_conf_free(conf);
237		rcm_log_message(RCM_ERROR,
238		    gettext("POOL: unable to find 'new_total' in nvlist: %s\n"),
239		    strerror(error));
240		*errorp = strdup(generic_error);
241		return (RCM_FAILURE);
242	}
243	if (new_total >= old_total) {
244		(void) pool_conf_close(conf);
245		pool_conf_free(conf);
246		/*
247		 * This doesn't look like a cpu removal.
248		 */
249		rcm_log_message(RCM_TRACE1,
250		    gettext("POOL: 'old_total' (%d) is less than 'new_total' "
251		    "(%d)\n"), old_total, new_total);
252		return (RCM_SUCCESS);
253	}
254	if ((removed_list = malloc((old_total - new_total + 1) * sizeof (int)))
255	    == NULL) {
256		rcm_log_message(RCM_ERROR,
257		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
258
259		error = RCM_FAILURE;
260		goto out;
261	}
262	if ((error = nvlist_lookup_int32_array(nvl, "old_cpu_list",
263	    &old_cpu_list, &nelem)) != 0) {
264		rcm_log_message(RCM_ERROR,
265		    gettext("POOL: 'old_cpu_list' not found in nvlist: %s\n"),
266		    strerror(error));
267		error = RCM_FAILURE;
268		goto out;
269	}
270	if ((int32_t)nelem != old_total) {
271		rcm_log_message(RCM_ERROR,
272		    gettext("POOL: 'old_cpu_list' size mismatch: %1$d vs "
273		    "%2$d\n"), nelem, old_total);
274		error = RCM_FAILURE;
275		goto out;
276	}
277	if ((error = nvlist_lookup_int32_array(nvl, "new_cpu_list",
278	    &new_cpu_list, &nelem)) != 0) {
279		rcm_log_message(RCM_ERROR,
280		    gettext("POOL: 'new_cpu_list' not found in nvlist: %s\n"),
281		    strerror(error));
282		error = RCM_FAILURE;
283		goto out;
284	}
285	if (nelem != new_total) {
286		rcm_log_message(RCM_ERROR,
287		    gettext("POOL: 'new_cpu_list' size mismatch: %1$d vs "
288		    "%2$d\n"), nelem, new_total);
289		error = RCM_FAILURE;
290		goto out;
291	}
292
293	for (i = 0, removed_total = 0; i < old_total; i++) {
294		for (j = 0; j < new_total; j++)
295			if (old_cpu_list[i] == new_cpu_list[j])
296				break;
297		if (j == new_total) /* not found in new_cpu_list */
298			removed_list[removed_total++] = old_cpu_list[i];
299	}
300	removed_list[removed_total] = -1;
301
302	if (removed_total != (old_total - new_total)) {
303		rcm_log_message(RCM_ERROR,
304		    gettext("POOL: error finding removed cpu list\n"));
305		error = RCM_FAILURE;
306		goto out;
307	}
308	if ((pvals[0] = pool_value_alloc()) == NULL) {
309		rcm_log_message(RCM_ERROR, gettext("POOL: pool_value_alloc"
310		    " failed: %s\n"), strerror(errno));
311		error = RCM_FAILURE;
312		goto out;
313	}
314	/*
315	 * Look for resources with "'type' = 'pset'"
316	 */
317	(void) pool_value_set_name(pvals[0], "type");
318	(void) pool_value_set_string(pvals[0], "pset");
319	if ((res = pool_query_resources(conf, &nelem, pvals)) == NULL) {
320		rcm_log_message(RCM_ERROR,
321		    gettext("POOL: No psets found in configuration\n"));
322		pool_value_free(pvals[0]);
323		error =	 RCM_FAILURE;
324		goto out;
325	}
326	pool_value_free(pvals[0]);
327	for (i = 0; res[i] != NULL; i++)
328		/*
329		 * Ask each pset if removing these cpus would cause it to go
330		 * below it's minimum value.
331		 */
332		if (pool_check_pset(conf, res[i], removed_list, errorp) < 0) {
333			error = RCM_FAILURE;
334			break;
335		}
336	free(res);
337out:
338	if (removed_list)
339		free(removed_list);
340	if (conf) {
341		(void) pool_conf_close(conf);
342		pool_conf_free(conf);
343	}
344
345	/*
346	 * Set the error string if not already set.
347	 */
348	if (error != RCM_SUCCESS && *errorp == NULL)
349		*errorp = strdup(generic_error);
350	return (error);
351}
352
353/*
354 * Returns RCM_SUCCESS in a number of error cases, since RCM_FAILURE would
355 * mean that the capacity change would be disallowed by this module,
356 * which is not what we mean.
357 */
358static int
359pool_request_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
360    uint_t flags, nvlist_t *nvlist, char **errorp, rcm_info_t **dependent_info)
361{
362	int i;
363
364	*errorp = NULL;
365	rcm_log_message(RCM_TRACE1,
366	    "POOL: requesting capacity change for: %s (flag: %d)\n",
367	    rsrcname, flags);
368	if (flags & RCM_FORCE) {
369		rcm_log_message(RCM_TRACE1,
370		    "POOL: Allowing forced operation to pass through...\n");
371		return (RCM_SUCCESS);
372	}
373	for (i = 0; registrations[i].rsrc != NULL; i++) {
374		if (strcmp(rsrcname, registrations[i].rsrc) == 0) {
375			return ((*registrations[i].capacity_change_cb)(nvlist,
376			    errorp));
377		}
378	}
379
380	return (RCM_SUCCESS);
381}
382
383static int
384pool_notify_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
385    uint_t flags, nvlist_t *nvlist, char **info, rcm_info_t **dependent_info)
386{
387	rcm_log_message(RCM_TRACE1,
388	    "POOL: notifying capacity change for: %s (flags: %d)\n",
389	    rsrcname, flags);
390	return (RCM_SUCCESS);
391}
392
393static int
394pool_register(rcm_handle_t *hdl)
395{
396	int i;
397
398	rcm_log_message(RCM_TRACE1, "Registering Pools RCM module\n");
399	if (registered)
400		return (RCM_SUCCESS);
401	registered++;
402	for (i = 0; registrations[i].rsrc != NULL; i++) {
403		if (rcm_register_capacity(hdl, (char *)registrations[i].rsrc,
404		    0, NULL) != RCM_SUCCESS) {
405			rcm_log_message(RCM_ERROR,
406			    gettext("POOL: failed to register capacity "
407			    "change for '%s'\n"),
408			    registrations[i].rsrc);
409		}
410	}
411	return (RCM_SUCCESS);
412}
413
414static int
415pool_unregister(rcm_handle_t *hdl)
416{
417	int i;
418
419	rcm_log_message(RCM_TRACE1, "Pools RCM un-registered\n");
420	if (registered) {
421		registered--;
422		for (i = 0; registrations[i].rsrc != NULL; i++)
423			if (rcm_unregister_capacity(hdl,
424			    (char *)registrations[i].rsrc, 0) != RCM_SUCCESS) {
425				rcm_log_message(RCM_ERROR,
426				    gettext("POOL: unregister capacity failed "
427				    "for '%s'\n"), registrations[i].rsrc);
428			}
429	}
430	return (RCM_SUCCESS);
431}
432
433static int
434pool_get_info(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
435    char **infop, char **errorp, nvlist_t *props, rcm_info_t **dependent_info)
436{
437	rcm_log_message(RCM_TRACE1, "POOL: RCM get info: '%s'\n", rsrcname);
438	if ((*infop = strdup(gettext("POOL: In use by pool(4) subsystem")))
439	    == NULL) {
440		rcm_log_message(RCM_ERROR, gettext("POOL: get info(%s) malloc "
441		    "failure\n"), rsrcname);
442		*infop = NULL;
443		*errorp = NULL;
444		return (RCM_FAILURE);
445	}
446	return (RCM_SUCCESS);
447}
448
449
450static int
451pool_request_suspend(rcm_handle_t *hdl, char *rsrcname,
452    id_t id, timespec_t *time, uint_t flags, char **reason,
453    rcm_info_t **dependent_info)
454{
455	rcm_log_message(RCM_TRACE1,
456	    "POOL: requesting suspend for: %s\n", rsrcname);
457	return (RCM_SUCCESS);
458}
459
460static int
461pool_notify_resume(rcm_handle_t *hdl, char *rsrcname,
462    id_t pid, uint_t flags, char **reason, rcm_info_t **dependent_info)
463{
464	rcm_log_message(RCM_TRACE1,
465	    "POOL: notifying resume of: %s\n", rsrcname);
466	return (RCM_SUCCESS);
467}
468
469static int
470pool_request_offline(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
471    char **reason, rcm_info_t **dependent_info)
472{
473	rcm_log_message(RCM_TRACE1,
474	    "POOL: requesting offline for: %s\n", rsrcname);
475	return (RCM_SUCCESS);
476}
477
478static int
479pool_notify_online(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flags,
480    char **reason, rcm_info_t **dependent_info)
481{
482	rcm_log_message(RCM_TRACE1,
483	    "POOL: notifying online for: %s\n", rsrcname);
484	return (RCM_SUCCESS);
485}
486static int
487pool_notify_remove(rcm_handle_t *hdl, char *rsrcname, id_t pid,
488    uint_t flag, char **reason, rcm_info_t **dependent_info)
489{
490	rcm_log_message(RCM_TRACE1,
491	    "POOL: notifying removal of: %s\n", rsrcname);
492	return (RCM_SUCCESS);
493}
494