1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26/*
27 * Copyright (c) 2010, Intel Corporation.
28 * All rights reserved.
29 */
30
31/*
32 * DR memory support routines.
33 */
34
35#include <sys/note.h>
36#include <sys/debug.h>
37#include <sys/types.h>
38#include <sys/errno.h>
39#include <sys/param.h>
40#include <sys/kmem.h>
41#include <sys/kobj.h>
42#include <sys/conf.h>
43#include <sys/dditypes.h>
44#include <sys/ddi.h>
45#include <sys/sunddi.h>
46#include <sys/sunndi.h>
47#include <sys/ddi_impldefs.h>
48#include <sys/ndi_impldefs.h>
49#include <sys/sysmacros.h>
50#include <sys/machsystm.h>
51#include <sys/promif.h>
52#include <sys/lgrp.h>
53#include <sys/mem_config.h>
54#include <vm/seg_kmem.h>
55#include <vm/page.h>
56
57#include <sys/dr.h>
58#include <sys/dr_util.h>
59#include <sys/drmach.h>
60
61extern struct memlist	*phys_install;
62
63/* TODO: push this reference below drmach line */
64extern int		kcage_on;
65
66/* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
67static char *dr_ie_fmt = "dr_mem_acpi.c %d";
68
69static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
70
71/*
72 * dr_mem_unit_t.sbm_flags
73 */
74#define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
75#define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
76#define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
77#define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
78#define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
79
80/* helper macros */
81#define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
82#define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
83
84static struct memlist *
85dr_get_memlist(dr_mem_unit_t *mp)
86{
87	struct memlist	*mlist = NULL;
88	sbd_error_t	*err;
89	static fn_t	f = "dr_get_memlist";
90
91	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
92
93	/*
94	 * Return cached memlist, if present.
95	 * This memlist will be present following an
96	 * unconfigure (a.k.a: detach) of this memunit.
97	 * It should only be used in the case were a configure
98	 * is bringing this memunit back in without going
99	 * through the disconnect and connect states.
100	 */
101	if (mp->sbm_mlist) {
102		PR_MEM("%s: found cached memlist\n", f);
103
104		mlist = memlist_dup(mp->sbm_mlist);
105	} else {
106		uint64_t basepa = _ptob64(mp->sbm_basepfn);
107
108		/* attempt to construct a memlist using phys_install */
109
110		/* round down to slice base address */
111		basepa &= ~mp->sbm_alignment_mask;
112
113		/* get a copy of phys_install to edit */
114		memlist_read_lock();
115		mlist = memlist_dup(phys_install);
116		memlist_read_unlock();
117
118		/* trim lower irrelevant span */
119		if (mlist)
120			mlist = memlist_del_span(mlist, 0ull, basepa);
121
122		/* trim upper irrelevant span */
123		if (mlist) {
124			uint64_t endpa, toppa;
125
126			toppa = mp->sbm_slice_top;
127			endpa = _ptob64(physmax + 1);
128			if (endpa > toppa)
129				mlist = memlist_del_span(
130				    mlist, toppa,
131				    endpa - toppa);
132		}
133
134		if (mlist) {
135			/* successfully built a memlist */
136			PR_MEM("%s: derived memlist from phys_install\n", f);
137		}
138
139		/* if no mlist yet, try platform layer */
140		if (!mlist) {
141			err = drmach_mem_get_memlist(
142			    mp->sbm_cm.sbdev_id, &mlist);
143			if (err) {
144				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
145				mlist = NULL; /* paranoia */
146			}
147		}
148	}
149
150	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
151	PR_MEMLIST_DUMP(mlist);
152
153	return (mlist);
154}
155
156/*ARGSUSED*/
157void
158dr_release_mem(dr_common_unit_t *cp)
159{
160}
161
162void
163dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
164{
165	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
166	struct memlist	*ml, *mc;
167	sbd_error_t	*err;
168	static fn_t	f = "dr_attach_mem";
169	uint64_t	dr_physmax;
170
171	PR_MEM("%s...\n", f);
172
173	dr_lock_status(hp->h_bd);
174	err = drmach_configure(cp->sbdev_id, 0);
175	dr_unlock_status(hp->h_bd);
176	if (err) {
177		DRERR_SET_C(&cp->sbdev_error, &err);
178		return;
179	}
180
181	ml = dr_get_memlist(mp);
182
183	/* Skip memory with address above plat_dr_physmax or kpm_size */
184	dr_physmax = plat_dr_physmax ? ptob(plat_dr_physmax) : UINT64_MAX;
185	if (kpm_size < dr_physmax)
186		dr_physmax = kpm_size;
187	ml = memlist_del_span(ml, dr_physmax, UINT64_MAX - dr_physmax);
188
189	for (mc = ml; mc; mc = mc->ml_next) {
190		int		 rv;
191		sbd_error_t	*err;
192
193		rv = kphysm_add_memory_dynamic(
194		    (pfn_t)btop(mc->ml_address),
195		    (pgcnt_t)btop(mc->ml_size));
196		if (rv != KPHYSM_OK) {
197			/*
198			 * translate kphysm error and
199			 * store in devlist error
200			 */
201			switch (rv) {
202			case KPHYSM_ERESOURCE:
203				rv = ESBD_NOMEM;
204				break;
205
206			case KPHYSM_EFAULT:
207				rv = ESBD_FAULT;
208				break;
209
210			default:
211				rv = ESBD_INTERNAL;
212				break;
213			}
214
215			if (rv == ESBD_INTERNAL) {
216				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
217			} else
218				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
219			break;
220		}
221
222		err = drmach_mem_add_span(
223		    mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
224		if (err) {
225			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
226			break;
227		}
228	}
229
230	memlist_delete(ml);
231	dr_init_mem_unit_data(mp);
232
233	/* back out if configure failed */
234	if (mp->sbm_cm.sbdev_error != NULL) {
235		dr_lock_status(hp->h_bd);
236		err = drmach_unconfigure(cp->sbdev_id, 0);
237		if (err)
238			sbd_err_clear(&err);
239		dr_unlock_status(hp->h_bd);
240	}
241}
242
243/*ARGSUSED*/
244void
245dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
246{
247}
248
249/*
250 * This routine acts as a wrapper for kphysm_del_span_query in order to
251 * support potential memory holes in a board's physical address space.
252 * It calls kphysm_del_span_query for each node in a memlist and accumulates
253 * the results in *mp.
254 */
255static int
256dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
257{
258	int		 rv = 0;
259
260	if (mlist == NULL)
261		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
262
263	mp->phys_pages = 0;
264	mp->managed = 0;
265	mp->nonrelocatable = 0;
266	mp->first_nonrelocatable = 0;
267	mp->last_nonrelocatable = 0;
268
269	return (rv);
270}
271
272/*
273 * NOTE: This routine is only partially smart about multiple
274 *	 mem-units.  Need to make mem-status structure smart
275 *	 about them also.
276 */
277int
278dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
279{
280	int		m, mix;
281	memquery_t	mq;
282	dr_board_t	*bp;
283	dr_mem_unit_t	*mp;
284	sbd_mem_stat_t	*msp;
285	static fn_t	f = "dr_mem_status";
286
287	bp = hp->h_bd;
288	devset &= DR_DEVS_PRESENT(bp);
289
290	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
291		int		rv;
292		sbd_error_t	*err;
293		drmach_status_t	 pstat;
294		dr_mem_unit_t	*p_mp;
295
296		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
297			continue;
298
299		mp = dr_get_mem_unit(bp, m);
300
301		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
302			/* present, but not fully initialized */
303			continue;
304		}
305
306		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
307			continue;
308
309		/* fetch platform status */
310		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
311		if (err) {
312			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
313			continue;
314		}
315
316		msp = &dsp->d_mem;
317		bzero((caddr_t)msp, sizeof (*msp));
318
319		(void) strlcpy(msp->ms_cm.c_id.c_name, pstat.type,
320		    sizeof (msp->ms_cm.c_id.c_name));
321		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
322		msp->ms_cm.c_id.c_unit = mp->sbm_cm.sbdev_unum;
323		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
324		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
325		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
326		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
327
328		msp->ms_totpages = mp->sbm_npages;
329		msp->ms_basepfn = mp->sbm_basepfn;
330		msp->ms_pageslost = mp->sbm_pageslost;
331		msp->ms_cage_enabled = kcage_on;
332
333		if (mp->sbm_flags & DR_MFLAG_RESERVED)
334			p_mp = mp->sbm_peer;
335		else
336			p_mp = NULL;
337
338		if (p_mp == NULL) {
339			msp->ms_peer_is_target = 0;
340			msp->ms_peer_ap_id[0] = '\0';
341		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
342			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
343			char *minor;
344
345			/*
346			 * b_dip doesn't have to be held for ddi_pathname()
347			 * because the board struct (dr_board_t) will be
348			 * destroyed before b_dip detaches.
349			 */
350			(void) ddi_pathname(bp->b_dip, path);
351			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
352
353			(void) snprintf(msp->ms_peer_ap_id,
354			    sizeof (msp->ms_peer_ap_id), "%s%s",
355			    path, (minor == NULL) ? "" : minor);
356
357			kmem_free(path, MAXPATHLEN);
358
359			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
360				msp->ms_peer_is_target = 1;
361		}
362
363		/*
364		 * kphysm_del_span_query can report non-reloc pages = total
365		 * pages for memory that is not yet configured
366		 */
367		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
368			struct memlist *ml;
369
370			ml = dr_get_memlist(mp);
371			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
372			memlist_delete(ml);
373
374			if (rv == KPHYSM_OK) {
375				msp->ms_managed_pages = mq.managed;
376				msp->ms_noreloc_pages = mq.nonrelocatable;
377				msp->ms_noreloc_first =
378				    mq.first_nonrelocatable;
379				msp->ms_noreloc_last =
380				    mq.last_nonrelocatable;
381				msp->ms_cm.c_sflags = 0;
382				if (mq.nonrelocatable &&
383				    drmach_copy_rename_need_suspend(
384				    mp->sbm_cm.sbdev_id)) {
385					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
386					    msp->ms_cm.c_sflags);
387				}
388			} else {
389				PR_MEM("%s: kphysm_del_span_query() = %d\n",
390				    f, rv);
391			}
392		}
393
394		/*
395		 * Check source unit state during copy-rename
396		 */
397		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
398		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
399		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
400			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
401
402		mix++;
403		dsp++;
404	}
405
406	return (mix);
407}
408
409/*ARGSUSED*/
410int
411dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
412{
413	int		err_flag = 0;
414	int		d;
415	sbd_error_t	*err;
416	static fn_t	f = "dr_pre_attach_mem";
417
418	PR_MEM("%s...\n", f);
419
420	for (d = 0; d < devnum; d++) {
421		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
422		dr_state_t	state;
423
424		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
425
426		state = mp->sbm_cm.sbdev_state;
427		switch (state) {
428		case DR_STATE_UNCONFIGURED:
429			PR_MEM("%s: recovering from UNCONFIG for %s\n",
430			    f, mp->sbm_cm.sbdev_path);
431
432			/* use memlist cached by dr_post_detach_mem_unit */
433			ASSERT(mp->sbm_mlist != NULL);
434			PR_MEM("%s: re-configuring cached memlist for %s:\n",
435			    f, mp->sbm_cm.sbdev_path);
436			PR_MEMLIST_DUMP(mp->sbm_mlist);
437
438			/* kphysm del handle should be have been freed */
439			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
440
441			/*FALLTHROUGH*/
442
443		case DR_STATE_CONNECTED:
444			PR_MEM("%s: reprogramming mem hardware on %s\n",
445			    f, mp->sbm_cm.sbdev_bp->b_path);
446
447			PR_MEM("%s: enabling %s\n",
448			    f, mp->sbm_cm.sbdev_path);
449
450			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
451			if (err) {
452				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
453				err_flag = 1;
454			}
455			break;
456
457		default:
458			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
459			err_flag = 1;
460			break;
461		}
462
463		/* exit for loop if error encountered */
464		if (err_flag)
465			break;
466	}
467
468	return (err_flag ? -1 : 0);
469}
470
471/*ARGSUSED*/
472int
473dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
474{
475	int		d;
476	static fn_t	f = "dr_post_attach_mem";
477
478	PR_MEM("%s...\n", f);
479
480	for (d = 0; d < devnum; d++) {
481		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
482		struct memlist	*mlist, *ml;
483
484		mlist = dr_get_memlist(mp);
485
486		/*
487		 * Verify the memory really did successfully attach
488		 * by checking for its existence in phys_install.
489		 */
490		memlist_read_lock();
491		if (memlist_intersect(phys_install, mlist) == 0) {
492			memlist_read_unlock();
493
494			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
495
496			PR_MEM("%s: %s memlist not in phys_install",
497			    f, mp->sbm_cm.sbdev_path);
498
499			memlist_delete(mlist);
500			continue;
501		}
502		memlist_read_unlock();
503
504		for (ml = mlist; ml != NULL; ml = ml->ml_next) {
505			sbd_error_t *err;
506
507			err = drmach_mem_add_span(
508			    mp->sbm_cm.sbdev_id,
509			    ml->ml_address,
510			    ml->ml_size);
511			if (err)
512				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
513		}
514
515		memlist_delete(mlist);
516
517		/*
518		 * Destroy cached memlist, if any.
519		 * There will be a cached memlist in sbm_mlist if
520		 * this board is being configured directly after
521		 * an unconfigure.
522		 * To support this transition, dr_post_detach_mem
523		 * left a copy of the last known memlist in sbm_mlist.
524		 * This memlist could differ from any derived from
525		 * hardware if while this memunit was last configured
526		 * the system detected and deleted bad pages from
527		 * phys_install.  The location of those bad pages
528		 * will be reflected in the cached memlist.
529		 */
530		if (mp->sbm_mlist) {
531			memlist_delete(mp->sbm_mlist);
532			mp->sbm_mlist = NULL;
533		}
534	}
535
536	return (0);
537}
538
539/*ARGSUSED*/
540int
541dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
542{
543	return (-1);
544}
545
546/*ARGSUSED*/
547int
548dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
549{
550	return (-1);
551}
552
553/*
554 * Successful return from this function will have the memory
555 * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
556 * and waiting.  This routine's job is to select the memory that
557 * actually has to be released (detached) which may not necessarily
558 * be the same memory node that came in in devlist[],
559 * i.e. a copy-rename is needed.
560 */
561/*ARGSUSED*/
562int
563dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
564{
565	return (-1);
566}
567
568/*ARGSUSED*/
569void
570dr_release_mem_done(dr_common_unit_t *cp)
571{
572}
573
574/*ARGSUSED*/
575int
576dr_disconnect_mem(dr_mem_unit_t *mp)
577{
578	return (-1);
579}
580
581/*ARGSUSED*/
582int
583dr_cancel_mem(dr_mem_unit_t *s_mp)
584{
585	return (-1);
586}
587
588void
589dr_init_mem_unit(dr_mem_unit_t *mp)
590{
591	dr_state_t	new_state;
592
593	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
594		new_state = DR_STATE_CONFIGURED;
595		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
596	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
597		new_state = DR_STATE_CONNECTED;
598		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
599	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
600		new_state = DR_STATE_OCCUPIED;
601	} else {
602		new_state = DR_STATE_EMPTY;
603	}
604
605	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
606		dr_init_mem_unit_data(mp);
607
608	/* delay transition until fully initialized */
609	dr_device_transition(&mp->sbm_cm, new_state);
610}
611
612static void
613dr_init_mem_unit_data(dr_mem_unit_t *mp)
614{
615	drmachid_t	id = mp->sbm_cm.sbdev_id;
616	drmach_mem_info_t	minfo;
617	sbd_error_t	*err;
618	static fn_t	f = "dr_init_mem_unit_data";
619
620	PR_MEM("%s...\n", f);
621
622	/* a little sanity checking */
623	ASSERT(mp->sbm_peer == NULL);
624	ASSERT(mp->sbm_flags == 0);
625
626	if (err = drmach_mem_get_info(id, &minfo)) {
627		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
628		return;
629	}
630	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
631	mp->sbm_npages = _b64top(minfo.mi_size);
632	mp->sbm_alignment_mask = minfo.mi_alignment_mask;
633	mp->sbm_slice_base = minfo.mi_slice_base;
634	mp->sbm_slice_top = minfo.mi_slice_top;
635	mp->sbm_slice_size = minfo.mi_slice_size;
636
637	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
638	    f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
639}
640