mcamd_drv.c revision 8391:e7d7cef504a4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/conf.h>
28#include <sys/ddi.h>
29#include <sys/ddifm.h>
30#include <sys/sunddi.h>
31#include <sys/sunndi.h>
32#include <sys/stat.h>
33#include <sys/modctl.h>
34#include <sys/types.h>
35#include <sys/cpuvar.h>
36#include <sys/cmn_err.h>
37#include <sys/kmem.h>
38#include <sys/cred.h>
39#include <sys/ksynch.h>
40#include <sys/rwlock.h>
41#include <sys/pghw.h>
42#include <sys/open.h>
43#include <sys/policy.h>
44#include <sys/x86_archext.h>
45#include <sys/cpu_module.h>
46#include <qsort.h>
47#include <sys/pci_cfgspace.h>
48#include <sys/mc.h>
49#include <sys/mc_amd.h>
50#include <mcamd.h>
51#include <mcamd_dimmcfg.h>
52#include <mcamd_pcicfg.h>
53#include <mcamd_api.h>
54#include <sys/fm/cpu/AMD.h>
55
56/*
57 * Set to prevent mc-amd from attaching.
58 */
59int mc_no_attach = 0;
60
61/*
62 * Of the 754/939/940 packages, only socket 940 supports quadrank registered
63 * dimms.  Unfortunately, no memory-controller register indicates the
64 * presence of quadrank dimm support or presence (i.e., in terms of number
65 * of slots per cpu, and chip-select lines per slot,  The following may be set
66 * in /etc/system to indicate the presence of quadrank support on a motherboard.
67 *
68 * There is no need to set this for F(1207) and S1g1.
69 */
70int mc_quadranksupport = 0;
71
72mc_t *mc_list, *mc_last;
73krwlock_t mc_lock;
74int mc_hold_attached = 1;
75
76#define	MAX(m, n) ((m) >= (n) ? (m) : (n))
77#define	MIN(m, n) ((m) <= (n) ? (m) : (n))
78
79/*
80 * The following tuneable is used to determine the DRAM scrubbing rate.
81 * The values range from 0x00-0x16 as described in the BKDG.  Zero
82 * disables DRAM scrubbing.  Values above zero indicate rates in descending
83 * order.
84 *
85 * The default value below is used on several Sun systems.  In the future
86 * this code should assign values dynamically based on memory sizing.
87 */
88uint32_t mc_scrub_rate_dram = 0xd;	/* 64B every 163.8 us; 1GB per 45 min */
89
90enum {
91	MC_SCRUB_BIOSDEFAULT,	/* retain system default value */
92	MC_SCRUB_FIXED,		/* assign mc_scrub_rate_* values */
93	MC_SCRUB_MAX		/* assign max of system and tunables */
94} mc_scrub_policy = MC_SCRUB_MAX;
95
96static void
97mc_snapshot_destroy(mc_t *mc)
98{
99	ASSERT(RW_LOCK_HELD(&mc_lock));
100
101	if (mc->mc_snapshot == NULL)
102		return;
103
104	kmem_free(mc->mc_snapshot, mc->mc_snapshotsz);
105	mc->mc_snapshot = NULL;
106	mc->mc_snapshotsz = 0;
107	mc->mc_snapshotgen++;
108}
109
110static int
111mc_snapshot_update(mc_t *mc)
112{
113	ASSERT(RW_LOCK_HELD(&mc_lock));
114
115	if (mc->mc_snapshot != NULL)
116		return (0);
117
118	if (nvlist_pack(mc->mc_nvl, &mc->mc_snapshot, &mc->mc_snapshotsz,
119	    NV_ENCODE_XDR, KM_SLEEP) != 0)
120		return (-1);
121
122	return (0);
123}
124
125static mc_t *
126mc_lookup_by_chipid(int chipid)
127{
128	mc_t *mc;
129
130	ASSERT(RW_LOCK_HELD(&mc_lock));
131
132	for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
133		if (mc->mc_props.mcp_num  == chipid)
134			return (mc);
135	}
136
137	return (NULL);
138}
139
140/*
141 * Read config register pairs into the two arrays provided on the given
142 * handle and at offsets as follows:
143 *
144 *	Index	Array r1 offset			Array r2 offset
145 *	0	r1addr				r2addr
146 *	1	r1addr + incr			r2addr + incr
147 *	2	r1addr + 2 * incr		r2addr + 2 * incr
148 *	...
149 *	n - 1	r1addr + (n - 1) * incr		r2addr + (n - 1) * incr
150 *
151 * The number of registers to read into the r1 array is r1n; the number
152 * for the r2 array is r2n.
153 */
154static void
155mc_prop_read_pair(mc_pcicfg_hdl_t cfghdl, uint32_t *r1, off_t r1addr,
156    int r1n, uint32_t *r2, off_t r2addr, int r2n, off_t incr)
157{
158	int i;
159
160	for (i = 0; i < MAX(r1n, r2n); i++, r1addr += incr, r2addr += incr) {
161		if (i < r1n)
162			r1[i] = mc_pcicfg_get32(cfghdl, r1addr);
163		if (i < r2n)
164			r2[i] = mc_pcicfg_get32(cfghdl, r2addr);
165	}
166}
167
168#define	NSKT	6
169
170static void
171mc_nvl_add_socket(nvlist_t *nvl, mc_t *mc)
172{
173	const char *s = "Unknown";
174	int i;
175
176	static const struct {
177		uint32_t type;
178		const char *name;
179	} sktnames[NSKT] = {
180		{ X86_SOCKET_754, "Socket 754" },
181		{ X86_SOCKET_939, "Socket 939" },
182		{ X86_SOCKET_940, "Socket 940" },
183		{ X86_SOCKET_AM2, "Socket AM2" },
184		{ X86_SOCKET_F1207, "Socket F(1207)" },
185		{ X86_SOCKET_S1g1, "Socket S1g1" },
186	};
187
188	for (i = 0; i < NSKT; i++) {
189		if (mc->mc_socket == sktnames[i].type) {
190			s = sktnames[i].name;
191			break;
192		}
193	}
194
195	(void) nvlist_add_string(nvl, "socket", s);
196}
197
198static uint32_t
199mc_ecc_enabled(mc_t *mc)
200{
201	uint32_t rev = mc->mc_props.mcp_rev;
202	union mcreg_nbcfg nbcfg;
203
204	MCREG_VAL32(&nbcfg) = mc->mc_cfgregs.mcr_nbcfg;
205
206	return (MC_REV_MATCH(rev, MC_F_REVS_BCDE) ?
207	    MCREG_FIELD_F_preF(&nbcfg, EccEn) :
208	    MCREG_FIELD_F_revFG(&nbcfg, EccEn));
209}
210
211static uint32_t
212mc_ck_enabled(mc_t *mc)
213{
214	uint32_t rev = mc->mc_props.mcp_rev;
215	union mcreg_nbcfg nbcfg;
216
217	MCREG_VAL32(&nbcfg) = mc->mc_cfgregs.mcr_nbcfg;
218
219	return (MC_REV_MATCH(rev, MC_F_REVS_BCDE) ?
220	    MCREG_FIELD_F_preF(&nbcfg, ChipKillEccEn) :
221	    MCREG_FIELD_F_revFG(&nbcfg, ChipKillEccEn));
222}
223
224static void
225mc_nvl_add_ecctype(nvlist_t *nvl, mc_t *mc)
226{
227	(void) nvlist_add_string(nvl, "ecc-type", mc_ecc_enabled(mc) ?
228	    (mc_ck_enabled(mc) ? "ChipKill 128/16" : "Normal 64/8") : "None");
229}
230
231static void
232mc_nvl_add_prop(nvlist_t *nvl, void *node, mcamd_propcode_t code, int reqval)
233{
234	int valfound;
235	uint64_t value;
236	const char *name = mcamd_get_propname(code);
237
238	valfound = mcamd_get_numprop(NULL, (mcamd_node_t *)node, code, &value);
239
240	ASSERT(name != NULL && valfound);
241	if (name != NULL && valfound && (!reqval || value != MC_INVALNUM))
242		(void) nvlist_add_uint64(nvl, name, value);
243}
244
245static void
246mc_nvl_add_cslist(nvlist_t *mcnvl, mc_t *mc)
247{
248	mc_cs_t *mccs = mc->mc_cslist;
249	nvlist_t *cslist[MC_CHIP_NCS];
250	int nelem, i;
251
252	for (nelem = 0; mccs != NULL; mccs = mccs->mccs_next, nelem++) {
253		nvlist_t **csp = &cslist[nelem];
254		char csname[MCDCFG_CSNAMELEN];
255
256		(void) nvlist_alloc(csp, NV_UNIQUE_NAME, KM_SLEEP);
257		mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_NUM, 0);
258		mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_BASE_ADDR, 0);
259		mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_MASK, 0);
260		mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_SIZE, 0);
261
262		/*
263		 * It is possible for an mc_cs_t not to have associated
264		 * DIMM info if mcdcfg_lookup failed.
265		 */
266		if (mccs->mccs_csl[0] != NULL) {
267			mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_CSDIMM1, 1);
268			mcdcfg_csname(mc->mc_socket, mccs->mccs_csl[0], csname,
269			    sizeof (csname));
270			(void) nvlist_add_string(*csp, "dimm1-csname", csname);
271		}
272
273		if (mccs->mccs_csl[1] != NULL) {
274			mc_nvl_add_prop(*csp, mccs, MCAMD_PROP_CSDIMM2, 1);
275			mcdcfg_csname(mc->mc_socket, mccs->mccs_csl[1], csname,
276			    sizeof (csname));
277			(void) nvlist_add_string(*csp, "dimm2-csname", csname);
278		}
279	}
280
281	/* Add cslist nvlist array even if zero members */
282	(void) nvlist_add_nvlist_array(mcnvl, "cslist", cslist, nelem);
283	for (i = 0; i < nelem; i++)
284		nvlist_free(cslist[i]);
285}
286
287static void
288mc_nvl_add_dimmlist(nvlist_t *mcnvl, mc_t *mc)
289{
290	nvlist_t *dimmlist[MC_CHIP_NDIMM];
291	mc_dimm_t *mcd;
292	int nelem, i;
293
294	for (nelem = 0, mcd = mc->mc_dimmlist; mcd != NULL;
295	    mcd = mcd->mcd_next, nelem++) {
296		nvlist_t **dimmp = &dimmlist[nelem];
297		uint64_t csnums[MC_CHIP_DIMMRANKMAX];
298		char csname[4][MCDCFG_CSNAMELEN];
299		char *csnamep[4];
300		int ncs = 0;
301
302		(void) nvlist_alloc(dimmp, NV_UNIQUE_NAME, KM_SLEEP);
303
304		mc_nvl_add_prop(*dimmp, mcd, MCAMD_PROP_NUM, 1);
305		mc_nvl_add_prop(*dimmp, mcd, MCAMD_PROP_SIZE, 1);
306
307		for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
308			if (mcd->mcd_cs[i] != NULL) {
309				csnums[ncs] =
310				    mcd->mcd_cs[i]->mccs_props.csp_num;
311				mcdcfg_csname(mc->mc_socket, mcd->mcd_csl[i],
312				    csname[ncs], MCDCFG_CSNAMELEN);
313				csnamep[ncs] = csname[ncs];
314				ncs++;
315			}
316		}
317
318		(void) nvlist_add_uint64_array(*dimmp, "csnums", csnums, ncs);
319		(void) nvlist_add_string_array(*dimmp, "csnames", csnamep, ncs);
320	}
321
322	/* Add dimmlist nvlist array even if zero members */
323	(void) nvlist_add_nvlist_array(mcnvl, "dimmlist", dimmlist, nelem);
324	for (i = 0; i < nelem; i++)
325		nvlist_free(dimmlist[i]);
326}
327
328static void
329mc_nvl_add_htconfig(nvlist_t *mcnvl, mc_t *mc)
330{
331	mc_cfgregs_t *mcr = &mc->mc_cfgregs;
332	union mcreg_htroute *htrp = (union mcreg_htroute *)&mcr->mcr_htroute[0];
333	union mcreg_nodeid *nip = (union mcreg_nodeid *)&mcr->mcr_htnodeid;
334	union mcreg_unitid *uip = (union mcreg_unitid *)&mcr->mcr_htunitid;
335	int ndcnt = HT_COHERENTNODES(nip);
336	uint32_t BCRte[MC_CHIP_MAXNODES];
337	uint32_t RPRte[MC_CHIP_MAXNODES];
338	uint32_t RQRte[MC_CHIP_MAXNODES];
339	nvlist_t *nvl;
340	int i;
341
342	(void) nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
343
344	(void) nvlist_add_uint32(nvl, "NodeId", MCREG_FIELD_CMN(nip, NodeId));
345	(void) nvlist_add_uint32(nvl, "CoherentNodes", HT_COHERENTNODES(nip));
346	(void) nvlist_add_uint32(nvl, "SbNode", MCREG_FIELD_CMN(nip, SbNode));
347	(void) nvlist_add_uint32(nvl, "LkNode", MCREG_FIELD_CMN(nip, LkNode));
348	(void) nvlist_add_uint32(nvl, "SystemCoreCount",
349	    HT_SYSTEMCORECOUNT(nip));
350
351	(void) nvlist_add_uint32(nvl, "C0Unit", MCREG_FIELD_CMN(uip, C0Unit));
352	(void) nvlist_add_uint32(nvl, "C1Unit", MCREG_FIELD_CMN(uip, C1Unit));
353	(void) nvlist_add_uint32(nvl, "McUnit", MCREG_FIELD_CMN(uip, McUnit));
354	(void) nvlist_add_uint32(nvl, "HbUnit", MCREG_FIELD_CMN(uip, HbUnit));
355	(void) nvlist_add_uint32(nvl, "SbLink", MCREG_FIELD_CMN(uip, SbLink));
356
357	if (ndcnt <= MC_CHIP_MAXNODES) {
358		for (i = 0; i < ndcnt; i++, htrp++) {
359			BCRte[i] = MCREG_FIELD_CMN(htrp, BCRte);
360			RPRte[i] = MCREG_FIELD_CMN(htrp, RPRte);
361			RQRte[i] = MCREG_FIELD_CMN(htrp, RQRte);
362		}
363
364		(void) nvlist_add_uint32_array(nvl, "BroadcastRoutes",
365		    &BCRte[0], ndcnt);
366		(void) nvlist_add_uint32_array(nvl, "ResponseRoutes",
367		    &RPRte[0], ndcnt);
368		(void) nvlist_add_uint32_array(nvl, "RequestRoutes",
369		    &RQRte[0], ndcnt);
370	}
371
372	(void) nvlist_add_nvlist(mcnvl, "htconfig", nvl);
373	nvlist_free(nvl);
374}
375
376static nvlist_t *
377mc_nvl_create(mc_t *mc)
378{
379	nvlist_t *mcnvl;
380
381	(void) nvlist_alloc(&mcnvl, NV_UNIQUE_NAME, KM_SLEEP);
382
383	/*
384	 * Since this nvlist is used in populating the topo tree changes
385	 * made here may propogate through to changed property names etc
386	 * in the topo tree.  Some properties in the topo tree will be
387	 * contracted via ARC, so be careful what you change here.
388	 */
389	(void) nvlist_add_uint8(mcnvl, MC_NVLIST_VERSTR, MC_NVLIST_VERS1);
390
391	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_NUM, 0);
392	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_REV, 0);
393	(void) nvlist_add_string(mcnvl, "revname", mc->mc_revname);
394	mc_nvl_add_socket(mcnvl, mc);
395	mc_nvl_add_ecctype(mcnvl, mc);
396
397	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BASE_ADDR, 0);
398	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_LIM_ADDR, 0);
399	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ILEN, 0);
400	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ILSEL, 0);
401	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_CSINTLVFCTR, 0);
402	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_DRAMHOLE_SIZE, 0);
403	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_ACCESS_WIDTH, 0);
404	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_CSBANKMAPREG, 0);
405	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BANKSWZL, 0);
406	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_MOD64MUX, 0);
407	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_SPARECS, 1);
408	mc_nvl_add_prop(mcnvl, mc, MCAMD_PROP_BADCS, 1);
409
410	mc_nvl_add_cslist(mcnvl, mc);
411	mc_nvl_add_dimmlist(mcnvl, mc);
412	mc_nvl_add_htconfig(mcnvl, mc);
413
414	return (mcnvl);
415}
416
417/*
418 * Link a dimm to its associated chip-selects and chip-select lines.
419 * Total the size of all ranks of this dimm.
420 */
421static void
422mc_dimm_csadd(mc_t *mc, mc_dimm_t *mcd, mc_cs_t *mccs, const mcdcfg_csl_t *csl)
423{
424	int factor = (mc->mc_props.mcp_accwidth == 128) ? 2 : 1;
425	uint64_t sz = 0;
426	int i;
427
428	/* Skip to first unused rank slot */
429	for (i = 0; i < MC_CHIP_DIMMRANKMAX; i++) {
430		if (mcd->mcd_cs[i] == NULL) {
431			mcd->mcd_cs[i] = mccs;
432			mcd->mcd_csl[i] = csl;
433			sz += mccs->mccs_props.csp_size / factor;
434			break;
435		} else {
436			sz += mcd->mcd_cs[i]->mccs_props.csp_size / factor;
437		}
438	}
439
440	ASSERT(i != MC_CHIP_DIMMRANKMAX);
441
442	mcd->mcd_size = sz;
443}
444
445/*
446 * Create a dimm structure and call to link it to its associated chip-selects.
447 */
448static mc_dimm_t *
449mc_dimm_create(mc_t *mc, uint_t num)
450{
451	mc_dimm_t *mcd = kmem_zalloc(sizeof (mc_dimm_t), KM_SLEEP);
452
453	mcd->mcd_hdr.mch_type = MC_NT_DIMM;
454	mcd->mcd_mc = mc;
455	mcd->mcd_num = num;
456
457	return (mcd);
458}
459
460/*
461 * The chip-select structure includes an array of dimms associated with
462 * that chip-select.  This function fills that array, and also builds
463 * the list of all dimms on this memory controller mc_dimmlist.  The
464 * caller has filled a structure with all there is to know about the
465 * associated dimm(s).
466 */
467static void
468mc_csdimms_create(mc_t *mc, mc_cs_t *mccs, mcdcfg_rslt_t *rsltp)
469{
470	mc_dimm_t *found[MC_CHIP_DIMMPERCS];
471	mc_dimm_t *mcd;
472	int nfound = 0;
473	int i;
474
475	/*
476	 * Has some other chip-select already created this dimm or dimms?
477	 * If so then link to the dimm(s) from the mccs_dimm array,
478	 * record their topo numbers in the csp_dimmnums array, and link
479	 * the dimm(s) to the additional chip-select.
480	 */
481	for (mcd = mc->mc_dimmlist; mcd != NULL; mcd = mcd->mcd_next) {
482		for (i = 0; i < rsltp->ndimm; i++) {
483			if (mcd->mcd_num == rsltp->dimm[i].toponum)
484				found[nfound++] = mcd;
485		}
486	}
487	ASSERT(nfound == 0 || nfound == rsltp->ndimm);
488
489	for (i = 0; i < rsltp->ndimm; i++) {
490		if (nfound == 0) {
491			mcd = mc_dimm_create(mc, rsltp->dimm[i].toponum);
492			if (mc->mc_dimmlist == NULL)
493				mc->mc_dimmlist = mcd;
494			else
495				mc->mc_dimmlast->mcd_next = mcd;
496			mc->mc_dimmlast = mcd;
497		} else {
498			mcd = found[i];
499		}
500
501		mccs->mccs_dimm[i] = mcd;
502		mccs->mccs_csl[i] = rsltp->dimm[i].cslp;
503		mccs->mccs_props.csp_dimmnums[i] = mcd->mcd_num;
504		mc_dimm_csadd(mc, mcd, mccs, rsltp->dimm[i].cslp);
505
506	}
507
508	/* The rank number is constant across all constituent dimm(s) */
509	mccs->mccs_props.csp_dimmrank = rsltp->dimm[0].cslp->csl_rank;
510}
511
512/*
513 * mc_dimmlist_create is called after we have discovered all enabled
514 * (and spare or testfailed on revs F and G) chip-selects on the
515 * given memory controller.  For each chip-select we must derive
516 * the associated dimms, remembering that a chip-select csbase/csmask
517 * pair may be associated with up to 2 chip-select lines (in 128 bit mode)
518 * and that any one dimm may be associated with 1, 2, or 4 chip-selects
519 * depending on whether it is single, dual or quadrank.
520 */
521static void
522mc_dimmlist_create(mc_t *mc)
523{
524	union mcreg_dramcfg_hi *drcfghip =
525	    (union mcreg_dramcfg_hi *)(&mc->mc_cfgregs.mcr_dramcfghi);
526	mc_props_t *mcp = &mc->mc_props;
527	uint32_t rev = mcp->mcp_rev;
528	mc_cs_t *mccs;
529	int r4 = 0, s4 = 0;
530
531	/*
532	 * Are we dealing with quadrank registered dimms?
533	 *
534	 * For socket 940 we can't tell and we'll assume we're not.
535	 * This can be over-ridden by the admin in /etc/system by setting
536	 * mc_quadranksupport nonzero.  A possible optimisation in systems
537	 * that export an SMBIOS table would be to count the number of
538	 * dimm slots per cpu - more than 4 would indicate no quadrank support
539	 * and 4 or fewer would indicate that if we see any of the upper
540	 * chip-selects enabled then a quadrank dimm is present.
541	 *
542	 * For socket F(1207) we can check a bit in the dram config high reg.
543	 *
544	 * Other socket types do not support registered dimms.
545	 */
546	if (mc->mc_socket == X86_SOCKET_940)
547		r4 = mc_quadranksupport != 0;
548	else if (mc->mc_socket == X86_SOCKET_F1207)
549		r4 = MCREG_FIELD_F_revFG(drcfghip, FourRankRDimm);
550
551	/*
552	 * Are we dealing with quadrank SO-DIMMs?  These are supported
553	 * in AM2 and S1g1 packages only, but in all rev F/G cases we
554	 * can detect their presence via a bit in the dram config high reg.
555	 */
556	if (MC_REV_MATCH(rev, MC_F_REVS_FG))
557		s4 = MCREG_FIELD_F_revFG(drcfghip, FourRankSODimm);
558
559	for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
560		mcdcfg_rslt_t rslt;
561
562		/*
563		 * If lookup fails we will not create dimm structures for
564		 * this chip-select.  In the mc_cs_t we will have both
565		 * csp_dimmnum members set to MC_INVALNUM and patounum
566		 * code will see from those that we do not have dimm info
567		 * for this chip-select.
568		 */
569		if (mcdcfg_lookup(rev, mcp->mcp_mod64mux, mcp->mcp_accwidth,
570		    mccs->mccs_props.csp_num, mc->mc_socket,
571		    r4, s4, &rslt) < 0)
572			continue;
573
574		mc_csdimms_create(mc, mccs, &rslt);
575	}
576}
577
578static mc_cs_t *
579mc_cs_create(mc_t *mc, uint_t num, uint64_t base, uint64_t mask, size_t sz,
580    int csbe, int spare, int testfail)
581{
582	mc_cs_t *mccs = kmem_zalloc(sizeof (mc_cs_t), KM_SLEEP);
583	mccs_props_t *csp = &mccs->mccs_props;
584	int i;
585
586	mccs->mccs_hdr.mch_type = MC_NT_CS;
587	mccs->mccs_mc = mc;
588	csp->csp_num = num;
589	csp->csp_base = base;
590	csp->csp_mask = mask;
591	csp->csp_size = sz;
592	csp->csp_csbe = csbe;
593	csp->csp_spare = spare;
594	csp->csp_testfail = testfail;
595
596	for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
597		csp->csp_dimmnums[i] = MC_INVALNUM;
598
599	if (spare)
600		mc->mc_props.mcp_sparecs = num;
601
602	return (mccs);
603}
604
605/*
606 * For any cs# of this mc marked TestFail generate an ereport with
607 * resource identifying the associated dimm(s).
608 */
609static void
610mc_report_testfails(mc_t *mc)
611{
612	mc_unum_t unum;
613	mc_cs_t *mccs;
614	int i;
615
616	for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
617		if (mccs->mccs_props.csp_testfail) {
618			unum.unum_board = 0;
619			unum.unum_chip = mc->mc_props.mcp_num;
620			unum.unum_mc = 0;
621			unum.unum_chan = MC_INVALNUM;
622			unum.unum_cs = mccs->mccs_props.csp_num;
623			unum.unum_rank = mccs->mccs_props.csp_dimmrank;
624			unum.unum_offset = MCAMD_RC_INVALID_OFFSET;
625			for (i = 0; i < MC_CHIP_DIMMPERCS; i++)
626				unum.unum_dimms[i] = MC_INVALNUM;
627
628			mcamd_ereport_post(mc, FM_EREPORT_CPU_AMD_MC_TESTFAIL,
629			    &unum,
630			    FM_EREPORT_PAYLOAD_FLAGS_CPU_AMD_MC_TESTFAIL);
631		}
632	}
633}
634
635/*
636 * Function 0 - HyperTransport Technology Configuration
637 */
638static void
639mc_mkprops_htcfg(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
640{
641	union mcreg_nodeid nodeid;
642	off_t offset;
643	int i;
644
645	mc->mc_cfgregs.mcr_htnodeid = MCREG_VAL32(&nodeid) =
646	    mc_pcicfg_get32(cfghdl, MC_HT_REG_NODEID);
647
648	mc->mc_cfgregs.mcr_htunitid = mc_pcicfg_get32(cfghdl, MC_HT_REG_UNITID);
649
650	for (i = 0, offset = MC_HT_REG_RTBL_NODE_0;
651	    i < HT_COHERENTNODES(&nodeid);
652	    i++, offset += MC_HT_REG_RTBL_INCR)
653		mc->mc_cfgregs.mcr_htroute[i] = mc_pcicfg_get32(cfghdl, offset);
654}
655
656/*
657 * Function 1 Configuration - Address Map (see BKDG 3.4.4 DRAM Address Map)
658 *
659 * Read the Function 1 Address Map for each potential DRAM node.  The Base
660 * Address for a node gives the starting system address mapped at that node,
661 * and the limit gives the last valid address mapped at that node.  Regions for
662 * different nodes should not overlap, unless node-interleaving is enabled.
663 * The base register also indicates the node-interleaving settings (IntlvEn).
664 * The limit register includes IntlvSel which determines which 4K blocks will
665 * be routed to this node and the destination node ID for addresses that fall
666 * within the [base, limit] range - this must match the pair number.
667 */
668static void
669mc_mkprops_addrmap(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
670{
671	union mcreg_drambase basereg;
672	union mcreg_dramlimit limreg;
673	mc_props_t *mcp = &mc->mc_props;
674	mc_cfgregs_t *mcr = &mc->mc_cfgregs;
675	union mcreg_dramhole hole;
676	int nodeid = mc->mc_props.mcp_num;
677
678	mcr->mcr_drambase = MCREG_VAL32(&basereg) = mc_pcicfg_get32(cfghdl,
679	    MC_AM_REG_DRAMBASE_0 + nodeid * MC_AM_REG_DRAM_INCR);
680
681	mcr->mcr_dramlimit = MCREG_VAL32(&limreg) = mc_pcicfg_get32(cfghdl,
682	    MC_AM_REG_DRAMLIM_0 + nodeid * MC_AM_REG_DRAM_INCR);
683
684	/*
685	 * Derive some "cooked" properties for nodes that have a range of
686	 * physical addresses that are read or write enabled and for which
687	 * the DstNode matches the node we are attaching.
688	 */
689	if (MCREG_FIELD_CMN(&limreg, DRAMLimiti) != 0 &&
690	    MCREG_FIELD_CMN(&limreg, DstNode) == nodeid &&
691	    (MCREG_FIELD_CMN(&basereg, WE) || MCREG_FIELD_CMN(&basereg, RE))) {
692		mcp->mcp_base = MC_DRAMBASE(&basereg);
693		mcp->mcp_lim = MC_DRAMLIM(&limreg);
694		mcp->mcp_ilen = MCREG_FIELD_CMN(&basereg, IntlvEn);
695		mcp->mcp_ilsel = MCREG_FIELD_CMN(&limreg, IntlvSel);
696	}
697
698	/*
699	 * The Function 1 DRAM Hole Address Register tells us which node(s)
700	 * own the DRAM space that is hoisted above 4GB, together with the
701	 * hole base and offset for this node.  This was introduced in
702	 * revision E.
703	 */
704	if (MC_REV_ATLEAST(mc->mc_props.mcp_rev, MC_F_REV_E)) {
705		mcr->mcr_dramhole = MCREG_VAL32(&hole) =
706		    mc_pcicfg_get32(cfghdl, MC_AM_REG_HOLEADDR);
707
708		if (MCREG_FIELD_CMN(&hole, DramHoleValid))
709			mcp->mcp_dramhole_size = MC_DRAMHOLE_SIZE(&hole);
710	}
711}
712
713/*
714 * Read some function 3 parameters via PCI Mechanism 1 accesses (which
715 * will serialize any NB accesses).
716 */
717static void
718mc_getmiscctl(mc_t *mc)
719{
720	uint32_t rev = mc->mc_props.mcp_rev;
721	union mcreg_nbcfg nbcfg;
722	union mcreg_sparectl sparectl;
723
724	mc->mc_cfgregs.mcr_nbcfg = MCREG_VAL32(&nbcfg) =
725	    mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_NBCFG);
726
727	if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
728		mc->mc_cfgregs.mcr_sparectl = MCREG_VAL32(&sparectl) =
729		    mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
730		    MC_CTL_REG_SPARECTL);
731
732		if (MCREG_FIELD_F_revFG(&sparectl, SwapDone)) {
733			mc->mc_props.mcp_badcs =
734			    MCREG_FIELD_F_revFG(&sparectl, BadDramCs);
735		}
736	}
737}
738
739static int
740csbasecmp(mc_cs_t **csapp, mc_cs_t **csbpp)
741{
742	uint64_t basea = (*csapp)->mccs_props.csp_base;
743	uint64_t baseb = (*csbpp)->mccs_props.csp_base;
744
745	if (basea == baseb)
746		return (0);
747	else if (basea < baseb)
748		return (-1);
749	else
750		return (1);
751}
752
753/*
754 * The following are for use in simulating TestFail for a chip-select
755 * without poking at the hardware (which tends to get upset if you do
756 * since the BIOS needs to restart to map a failed cs out).  For internal
757 * testing only!  Note that setting these does not give the full experience -
758 * the select chip-select *is* enabled and can give errors etc and the
759 * patounum logic will get confused.
760 */
761int testfail_mcnum = -1;
762int testfail_csnum = -1;
763
764/*
765 * Function 2 configuration - DRAM Controller
766 */
767static void
768mc_mkprops_dramctl(mc_pcicfg_hdl_t cfghdl, mc_t *mc)
769{
770	union mcreg_csbase base[MC_CHIP_NCS];
771	union mcreg_csmask mask[MC_CHIP_NCS];
772	union mcreg_dramcfg_lo drcfg_lo;
773	union mcreg_dramcfg_hi drcfg_hi;
774	union mcreg_drammisc drmisc;
775	union mcreg_bankaddrmap baddrmap;
776	mc_props_t *mcp = &mc->mc_props;
777	mc_cfgregs_t *mcr = &mc->mc_cfgregs;
778	int maskdivisor;
779	int wide = 0;
780	uint32_t rev = mc->mc_props.mcp_rev;
781	int i;
782	mcamd_hdl_t hdl;
783
784	mcamd_mkhdl(&hdl);	/* to call into common code */
785
786	/*
787	 * Read Function 2 DRAM Configuration High and Low registers.  The High
788	 * part is mostly concerned with memory clocks etc and we'll not have
789	 * any use for that.  The Low component tells us if ECC is enabled,
790	 * if we're in 64- or 128-bit MC mode, how the upper chip-selects
791	 * are mapped, which chip-select pairs are using x4 parts, etc.
792	 */
793	MCREG_VAL32(&drcfg_lo) = mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMCFGLO);
794	MCREG_VAL32(&drcfg_hi) = mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMCFGHI);
795	mcr->mcr_dramcfglo = MCREG_VAL32(&drcfg_lo);
796	mcr->mcr_dramcfghi = MCREG_VAL32(&drcfg_hi);
797
798	/*
799	 * Note the DRAM controller width.  The 64/128 bit is in a different
800	 * bit position for revision F and G.
801	 */
802	if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
803		wide = MCREG_FIELD_F_revFG(&drcfg_lo, Width128);
804	} else {
805		wide = MCREG_FIELD_F_preF(&drcfg_lo, Width128);
806	}
807	mcp->mcp_accwidth = wide ? 128 : 64;
808
809	/*
810	 * Read Function 2 DRAM Controller Miscellaenous Regsiter for those
811	 * revs that support it.  This include the Mod64Mux indication on
812	 * these revs - for rev E it is in DRAM config low.
813	 */
814	if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
815		mcr->mcr_drammisc = MCREG_VAL32(&drmisc) =
816		    mc_pcicfg_get32(cfghdl, MC_DC_REG_DRAMMISC);
817		mcp->mcp_mod64mux = MCREG_FIELD_F_revFG(&drmisc, Mod64Mux);
818	} else if (MC_REV_MATCH(rev, MC_F_REV_E)) {
819		mcp->mcp_mod64mux = MCREG_FIELD_F_preF(&drcfg_lo, Mod64BitMux);
820	}
821
822	/*
823	 * Read Function 2 DRAM Bank Address Mapping.  This encodes the
824	 * type of DIMM module in use for each chip-select pair.
825	 * Prior ro revision F it also tells us whether BankSwizzle mode
826	 * is enabled - in rev F that has moved to dram config hi register.
827	 */
828	mcp->mcp_csbankmapreg = MCREG_VAL32(&baddrmap) =
829	    mc_pcicfg_get32(cfghdl, MC_DC_REG_BANKADDRMAP);
830
831	/*
832	 * Determine whether bank swizzle mode is active.  Bank swizzling was
833	 * introduced as an option in rev E,  but the bit that indicates it
834	 * is enabled has moved in revs F/G.
835	 */
836	if (MC_REV_MATCH(rev, MC_F_REV_E)) {
837		mcp->mcp_bnkswzl =
838		    MCREG_FIELD_F_preF(&baddrmap, BankSwizzleMode);
839	} else if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
840		mcp->mcp_bnkswzl = MCREG_FIELD_F_revFG(&drcfg_hi,
841		    BankSwizzleMode);
842	}
843
844	/*
845	 * Read the DRAM CS Base and DRAM CS Mask registers.  Revisions prior
846	 * to F have an equal number of base and mask registers; revision F
847	 * has twice as many base registers as masks.
848	 */
849	maskdivisor = MC_REV_MATCH(rev, MC_F_REVS_FG) ? 2 : 1;
850
851	mc_prop_read_pair(cfghdl,
852	    (uint32_t *)base, MC_DC_REG_CSBASE_0, MC_CHIP_NCS,
853	    (uint32_t *)mask, MC_DC_REG_CSMASK_0, MC_CHIP_NCS / maskdivisor,
854	    MC_DC_REG_CS_INCR);
855
856	/*
857	 * Create a cs node for each enabled chip-select as well as
858	 * any appointed online spare chip-selects and for any that have
859	 * failed test.
860	 */
861	for (i = 0; i < MC_CHIP_NCS; i++) {
862		mc_cs_t *mccs;
863		uint64_t csbase, csmask;
864		size_t sz;
865		int csbe, spare, testfail;
866
867		if (MC_REV_MATCH(rev, MC_F_REVS_FG)) {
868			csbe = MCREG_FIELD_F_revFG(&base[i], CSEnable);
869			spare = MCREG_FIELD_F_revFG(&base[i], Spare);
870			testfail = MCREG_FIELD_F_revFG(&base[i], TestFail);
871		} else {
872			csbe = MCREG_FIELD_F_preF(&base[i], CSEnable);
873			spare = 0;
874			testfail = 0;
875		}
876
877		/* Testing hook */
878		if (testfail_mcnum != -1 && testfail_csnum != -1 &&
879		    mcp->mcp_num == testfail_mcnum && i == testfail_csnum) {
880			csbe = spare = 0;
881			testfail = 1;
882			cmn_err(CE_NOTE, "Pretending MC %d CS %d failed test",
883			    testfail_mcnum, testfail_csnum);
884		}
885
886		/*
887		 * If the chip-select is not enabled then skip it unless
888		 * it is a designated online spare or is marked with TestFail.
889		 */
890		if (!csbe && !(spare || testfail))
891			continue;
892
893		/*
894		 * For an enabled or spare chip-select the Bank Address Mapping
895		 * register will be valid as will the chip-select mask.  The
896		 * base will not be valid but we'll read and store it anyway.
897		 * We will not know whether the spare is already swapped in
898		 * until MC function 3 attaches.
899		 */
900		if (csbe || spare) {
901			if (mcamd_cs_size(&hdl, (mcamd_node_t *)mc, i, &sz) < 0)
902				continue;
903			csbase = MC_CSBASE(&base[i], rev);
904			csmask = MC_CSMASK(&mask[i / maskdivisor], rev);
905		} else {
906			sz = 0;
907			csbase = csmask = 0;
908		}
909
910		mccs = mc_cs_create(mc, i, csbase, csmask, sz,
911		    csbe, spare, testfail);
912
913		if (mc->mc_cslist == NULL)
914			mc->mc_cslist = mccs;
915		else
916			mc->mc_cslast->mccs_next = mccs;
917		mc->mc_cslast = mccs;
918
919		mccs->mccs_cfgregs.csr_csbase = MCREG_VAL32(&base[i]);
920		mccs->mccs_cfgregs.csr_csmask =
921		    MCREG_VAL32(&mask[i / maskdivisor]);
922
923		/*
924		 * Check for cs bank interleaving - some bits clear in the
925		 * lower mask.  All banks must/will have the same lomask bits
926		 * if cs interleaving is active.
927		 */
928		if (csbe && !mcp->mcp_csintlvfctr) {
929			int bitno, ibits = 0;
930			for (bitno = MC_CSMASKLO_LOBIT(rev);
931			    bitno <= MC_CSMASKLO_HIBIT(rev); bitno++) {
932				if (!(csmask & (1 << bitno)))
933					ibits++;
934			}
935			mcp->mcp_csintlvfctr = 1 << ibits;
936		}
937	}
938
939	/*
940	 * If there is no chip-select interleave on this node determine
941	 * whether the chip-select ranks are contiguous or if there
942	 * is a hole.
943	 */
944	if (mcp->mcp_csintlvfctr == 1) {
945		mc_cs_t *csp[MC_CHIP_NCS];
946		mc_cs_t *mccs;
947		int ncsbe = 0;
948
949		for (mccs = mc->mc_cslist; mccs != NULL;
950		    mccs = mccs->mccs_next) {
951			if (mccs->mccs_props.csp_csbe)
952				csp[ncsbe++] = mccs;
953		}
954
955		if (ncsbe != 0) {
956			qsort((void *)csp, ncsbe, sizeof (mc_cs_t *),
957			    (int (*)(const void *, const void *))csbasecmp);
958
959			for (i = 1; i < ncsbe; i++) {
960				if (csp[i]->mccs_props.csp_base !=
961				    csp[i - 1]->mccs_props.csp_base +
962				    csp[i - 1]->mccs_props.csp_size)
963					mc->mc_csdiscontig = 1;
964			}
965		}
966	}
967
968
969	/*
970	 * Since we do not attach to MC function 3 go ahead and read some
971	 * config parameters from it now.
972	 */
973	mc_getmiscctl(mc);
974
975	/*
976	 * Now that we have discovered all enabled/spare/testfail chip-selects
977	 * we divine the associated DIMM configuration.
978	 */
979	mc_dimmlist_create(mc);
980}
981
982typedef struct mc_bind_map {
983	const char *bm_bindnm;	 /* attachment binding name */
984	enum mc_funcnum bm_func; /* PCI config space function number for bind */
985	const char *bm_model;	 /* value for device node model property */
986	void (*bm_mkprops)(mc_pcicfg_hdl_t, mc_t *);
987} mc_bind_map_t;
988
989/*
990 * Do not attach to MC function 3 - agpgart already attaches to that.
991 * Function 3 may be a good candidate for a nexus driver to fan it out
992 * into virtual devices by functionality.  We will use pci_mech1_getl
993 * to retrieve the function 3 parameters we require.
994 */
995
996static const mc_bind_map_t mc_bind_map[] = {
997	{ MC_FUNC_HTCONFIG_BINDNM, MC_FUNC_HTCONFIG,
998	    "AMD Memory Controller (HT Configuration)", mc_mkprops_htcfg },
999	{ MC_FUNC_ADDRMAP_BINDNM, MC_FUNC_ADDRMAP,
1000	    "AMD Memory Controller (Address Map)", mc_mkprops_addrmap },
1001	{ MC_FUNC_DRAMCTL_BINDNM, MC_FUNC_DRAMCTL,
1002	    "AMD Memory Controller (DRAM Controller & HT Trace)",
1003	    mc_mkprops_dramctl },
1004	NULL
1005};
1006
1007/*ARGSUSED*/
1008static int
1009mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1010{
1011	if (otyp != OTYP_CHR)
1012		return (EINVAL);
1013
1014	rw_enter(&mc_lock, RW_READER);
1015	if (mc_lookup_by_chipid(getminor(*devp)) == NULL) {
1016		rw_exit(&mc_lock);
1017		return (EINVAL);
1018	}
1019	rw_exit(&mc_lock);
1020
1021	return (0);
1022}
1023
1024/*ARGSUSED*/
1025static int
1026mc_close(dev_t dev, int flag, int otyp, cred_t *credp)
1027{
1028	return (0);
1029}
1030
1031/*
1032 * Enable swap from chip-select csnum to the spare chip-select on this
1033 * memory controller (if any).
1034 */
1035
1036int mc_swapdonetime = 30;	/* max number of seconds to wait for SwapDone */
1037
1038static int
1039mc_onlinespare(mc_t *mc, int csnum)
1040{
1041	mc_props_t *mcp = &mc->mc_props;
1042	union mcreg_sparectl sparectl;
1043	union mcreg_scrubctl scrubctl;
1044	mc_cs_t *mccs;
1045	hrtime_t tmax;
1046	int i = 0;
1047
1048	ASSERT(RW_WRITE_HELD(&mc_lock));
1049
1050	if (!MC_REV_MATCH(mcp->mcp_rev, MC_F_REVS_FG))
1051		return (ENOTSUP);	/* MC rev does not offer online spare */
1052	else if (mcp->mcp_sparecs == MC_INVALNUM)
1053		return (ENODEV);	/* Supported, but no spare configured */
1054	else if (mcp->mcp_badcs != MC_INVALNUM)
1055		return (EBUSY);		/* Spare already swapped in */
1056	else if (csnum == mcp->mcp_sparecs)
1057		return (EINVAL);	/* Can't spare the spare! */
1058
1059	for (mccs = mc->mc_cslist; mccs != NULL; mccs = mccs->mccs_next) {
1060		if (mccs->mccs_props.csp_num == csnum)
1061			break;
1062	}
1063	if (mccs == NULL)
1064		return (EINVAL);	/* nominated bad CS does not exist */
1065
1066	/*
1067	 * If the DRAM Scrubber is not enabled then the swap cannot succeed.
1068	 */
1069	MCREG_VAL32(&scrubctl) = mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
1070	    MC_CTL_REG_SCRUBCTL);
1071	if (MCREG_FIELD_CMN(&scrubctl, DramScrub) == 0)
1072		return (ENODEV);	/* DRAM scrubber not enabled */
1073
1074	/*
1075	 * Read Online Spare Comtrol Register again, just in case our
1076	 * state does not reflect reality.
1077	 */
1078	MCREG_VAL32(&sparectl) = mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL,
1079	    MC_CTL_REG_SPARECTL);
1080
1081	if (MCREG_FIELD_F_revFG(&sparectl, SwapDone))
1082		return (EBUSY);
1083
1084	/* Write to the BadDramCs field */
1085	MCREG_FIELD_F_revFG(&sparectl, BadDramCs) = csnum;
1086	mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL,
1087	    MCREG_VAL32(&sparectl));
1088
1089	/* And request that the swap to the spare start */
1090	MCREG_FIELD_F_revFG(&sparectl, SwapEn) = 1;
1091	mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL,
1092	    MCREG_VAL32(&sparectl));
1093
1094	/*
1095	 * Poll for SwapDone - we have disabled notification by interrupt.
1096	 * Swap takes "several CPU cycles, depending on the DRAM speed, but
1097	 * is performed in the background" (Family 0Fh Bios Porting Guide).
1098	 * We're in a slow ioctl path so there is no harm in waiting around
1099	 * a bit - consumers of the ioctl must be aware that it may take
1100	 * a moment.  We will poll for up to mc_swapdonetime seconds,
1101	 * limiting that to 120s.
1102	 *
1103	 * The swap is performed by the DRAM scrubber (which must be enabled)
1104	 * whose scrub rate is accelerated for the duration of the swap.
1105	 * The maximum swap rate is 40.0ns per 64 bytes, so the maximum
1106	 * supported cs size of 16GB would take 10.7s at that max rate
1107	 * of 25000000 scrubs/second.
1108	 */
1109	tmax = gethrtime() + MIN(mc_swapdonetime, 120) * 1000000000ULL;
1110	do {
1111		if (i++ < 20)
1112			delay(drv_usectohz(100000));	/* 0.1s for up to 2s */
1113		else
1114			delay(drv_usectohz(500000));	/* 0.5s */
1115
1116		MCREG_VAL32(&sparectl) = mc_pcicfg_get32_nohdl(mc,
1117		    MC_FUNC_MISCCTL, MC_CTL_REG_SPARECTL);
1118	} while (!MCREG_FIELD_F_revFG(&sparectl, SwapDone) &&
1119	    gethrtime() < tmax);
1120
1121	if (!MCREG_FIELD_F_revFG(&sparectl, SwapDone))
1122		return (ETIME);		/* Operation timed out */
1123
1124	mcp->mcp_badcs = csnum;
1125	mc->mc_cfgregs.mcr_sparectl = MCREG_VAL32(&sparectl);
1126	mc->mc_spareswaptime = gethrtime();
1127
1128	return (0);
1129}
1130
1131/*ARGSUSED*/
1132static int
1133mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1134{
1135	int rc = 0;
1136	mc_t *mc;
1137
1138	if (cmd != MC_IOC_SNAPSHOT_INFO && cmd != MC_IOC_SNAPSHOT &&
1139	    cmd != MC_IOC_ONLINESPARE_EN)
1140		return (EINVAL);
1141
1142	rw_enter(&mc_lock, RW_READER);
1143
1144	if ((mc = mc_lookup_by_chipid(getminor(dev))) == NULL) {
1145		rw_exit(&mc_lock);
1146		return (EINVAL);
1147	}
1148
1149	switch (cmd) {
1150	case MC_IOC_SNAPSHOT_INFO: {
1151		mc_snapshot_info_t mcs;
1152
1153		if (mc_snapshot_update(mc) < 0) {
1154			rw_exit(&mc_lock);
1155			return (EIO);
1156		}
1157
1158		mcs.mcs_size = mc->mc_snapshotsz;
1159		mcs.mcs_gen = mc->mc_snapshotgen;
1160
1161		if (ddi_copyout(&mcs, (void *)arg, sizeof (mc_snapshot_info_t),
1162		    mode) < 0)
1163			rc = EFAULT;
1164		break;
1165	}
1166
1167	case MC_IOC_SNAPSHOT:
1168		if (mc_snapshot_update(mc) < 0) {
1169			rw_exit(&mc_lock);
1170			return (EIO);
1171		}
1172
1173		if (ddi_copyout(mc->mc_snapshot, (void *)arg, mc->mc_snapshotsz,
1174		    mode) < 0)
1175			rc = EFAULT;
1176		break;
1177
1178	case MC_IOC_ONLINESPARE_EN:
1179		if (drv_priv(credp) != 0) {
1180			rw_exit(&mc_lock);
1181			return (EPERM);
1182		}
1183
1184		if (!rw_tryupgrade(&mc_lock)) {
1185			rw_exit(&mc_lock);
1186			return (EAGAIN);
1187		}
1188
1189		if ((rc = mc_onlinespare(mc, (int)arg)) == 0) {
1190			mc_snapshot_destroy(mc);
1191			nvlist_free(mc->mc_nvl);
1192			mc->mc_nvl = mc_nvl_create(mc);
1193		}
1194
1195		break;
1196	}
1197
1198	rw_exit(&mc_lock);
1199
1200	return (rc);
1201}
1202
1203static struct cb_ops mc_cb_ops = {
1204	mc_open,
1205	mc_close,
1206	nodev,		/* not a block driver */
1207	nodev,		/* no print routine */
1208	nodev,		/* no dump routine */
1209	nodev,		/* no read routine */
1210	nodev,		/* no write routine */
1211	mc_ioctl,
1212	nodev,		/* no devmap routine */
1213	nodev,		/* no mmap routine */
1214	nodev,		/* no segmap routine */
1215	nochpoll,	/* no chpoll routine */
1216	ddi_prop_op,
1217	0,		/* not a STREAMS driver */
1218	D_NEW | D_MP,	/* safe for multi-thread/multi-processor */
1219};
1220
1221/*ARGSUSED*/
1222static int
1223mc_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1224{
1225	int rc = DDI_SUCCESS;
1226	mc_t *mc;
1227
1228	if (infocmd != DDI_INFO_DEVT2DEVINFO &&
1229	    infocmd != DDI_INFO_DEVT2INSTANCE) {
1230		*result = NULL;
1231		return (DDI_FAILURE);
1232	}
1233
1234	rw_enter(&mc_lock, RW_READER);
1235
1236	if ((mc = mc_lookup_by_chipid(getminor((dev_t)arg))) == NULL ||
1237	    mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_devi == NULL) {
1238		rc = DDI_FAILURE;
1239	} else if (infocmd == DDI_INFO_DEVT2DEVINFO) {
1240		*result = mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_devi;
1241	} else {
1242		*result = (void *)(uintptr_t)
1243		    mc->mc_funcs[MC_FUNC_DEVIMAP].mcf_instance;
1244	}
1245
1246	rw_exit(&mc_lock);
1247
1248	return (rc);
1249}
1250
1251/*ARGSUSED2*/
1252static int
1253mc_fm_handle(dev_info_t *dip, ddi_fm_error_t *fmerr, const void *arg)
1254{
1255	pci_ereport_post(dip, fmerr, NULL);
1256	return (fmerr->fme_status);
1257}
1258
1259static void
1260mc_fm_init(dev_info_t *dip)
1261{
1262	int fmcap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE;
1263	ddi_fm_init(dip, &fmcap, NULL);
1264	pci_ereport_setup(dip);
1265	ddi_fm_handler_register(dip, mc_fm_handle, NULL);
1266}
1267
1268/*ARGSUSED*/
1269static int
1270mc_create_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
1271{
1272	chipid_t chipid = *((chipid_t *)arg1);
1273	cmi_hdl_t *hdlp = (cmi_hdl_t *)arg2;
1274
1275	if (cmi_hdl_chipid(whdl) == chipid) {
1276		cmi_hdl_hold(whdl);	/* short-term hold */
1277		*hdlp = whdl;
1278		return (CMI_HDL_WALK_DONE);
1279	} else {
1280		return (CMI_HDL_WALK_NEXT);
1281	}
1282}
1283
1284static mc_t *
1285mc_create(chipid_t chipid)
1286{
1287	mc_t *mc;
1288	cmi_hdl_t hdl = NULL;
1289
1290	ASSERT(RW_WRITE_HELD(&mc_lock));
1291
1292	/*
1293	 * Find a handle for one of a chip's CPU.
1294	 *
1295	 * We can use one of the chip's CPUs since all cores
1296	 * of a chip share the same revision and socket type.
1297	 */
1298	cmi_hdl_walk(mc_create_cb, (void *)&chipid, (void *)&hdl, NULL);
1299	if (hdl == NULL)
1300		return (NULL);	/* no cpu for this chipid found! */
1301
1302	mc = kmem_zalloc(sizeof (mc_t), KM_SLEEP);
1303
1304	mc->mc_hdr.mch_type = MC_NT_MC;
1305	mc->mc_props.mcp_num = chipid;
1306	mc->mc_props.mcp_sparecs = MC_INVALNUM;
1307	mc->mc_props.mcp_badcs = MC_INVALNUM;
1308
1309	mc->mc_props.mcp_rev = cmi_hdl_chiprev(hdl);
1310	mc->mc_revname = cmi_hdl_chiprevstr(hdl);
1311	mc->mc_socket = cmi_hdl_getsockettype(hdl);
1312
1313	if (mc_list == NULL)
1314		mc_list = mc;
1315	if (mc_last != NULL)
1316		mc_last->mc_next = mc;
1317
1318	mc->mc_next = NULL;
1319	mc_last = mc;
1320
1321	cmi_hdl_rele(hdl);
1322
1323	return (mc);
1324}
1325
1326/*
1327 * Return the maximum scrubbing rate between r1 and r2, where r2 is extracted
1328 * from the specified 'cfg' register value using 'mask' and 'shift'.  If a
1329 * value is zero, scrubbing is off so return the opposite value.  Otherwise
1330 * the maximum rate is the smallest non-zero value of the two values.
1331 */
1332static uint32_t
1333mc_scrubber_max(uint32_t r1, uint32_t cfg, uint32_t mask, uint32_t shift)
1334{
1335	uint32_t r2 = (cfg & mask) >> shift;
1336
1337	if (r1 != 0 && r2 != 0)
1338		return (MIN(r1, r2));
1339
1340	return (r1 ? r1 : r2);
1341}
1342
1343
1344/*
1345 * Enable the memory scrubber.  We must use the mc_pcicfg_{get32,put32}_nohdl
1346 * interfaces since we do not bind to function 3.
1347 */
1348cmi_errno_t
1349mc_scrubber_enable(mc_t *mc)
1350{
1351	mc_props_t *mcp = &mc->mc_props;
1352	mc_cfgregs_t *mcr = &mc->mc_cfgregs;
1353	union mcreg_scrubctl scrubctl;
1354	union mcreg_dramscrublo dalo;
1355	union mcreg_dramscrubhi dahi;
1356
1357	mcr->mcr_scrubctl = MCREG_VAL32(&scrubctl) =
1358	    mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL);
1359
1360	mcr->mcr_scrubaddrlo = MCREG_VAL32(&dalo) =
1361	    mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_LO);
1362
1363	mcr->mcr_scrubaddrhi = MCREG_VAL32(&dahi) =
1364	    mc_pcicfg_get32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_HI);
1365
1366	if (mc_scrub_policy == MC_SCRUB_BIOSDEFAULT)
1367		return (MCREG_FIELD_CMN(&scrubctl, DramScrub) !=
1368		    AMD_NB_SCRUBCTL_RATE_NONE ?
1369		    CMI_SUCCESS : CMIERR_MC_NOMEMSCRUB);
1370
1371	/*
1372	 * Disable DRAM scrubbing while we fiddle.
1373	 */
1374	MCREG_FIELD_CMN(&scrubctl, DramScrub) = AMD_NB_SCRUBCTL_RATE_NONE;
1375	mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL,
1376	    MCREG_VAL32(&scrubctl));
1377
1378	/*
1379	 * Setup DRAM Scrub Address Low and High registers for the
1380	 * base address of this node, and to select srubber redirect.
1381	 */
1382	MCREG_FIELD_CMN(&dalo, ScrubReDirEn) = 1;
1383	MCREG_FIELD_CMN(&dalo, ScrubAddrLo) =
1384	    AMD_NB_SCRUBADDR_MKLO(mcp->mcp_base);
1385
1386	MCREG_FIELD_CMN(&dahi, ScrubAddrHi) =
1387	    AMD_NB_SCRUBADDR_MKHI(mcp->mcp_base);
1388
1389	mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_LO,
1390	    MCREG_VAL32(&dalo));
1391	mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBADDR_HI,
1392	    MCREG_VAL32(&dahi));
1393
1394	if (mc_scrub_rate_dram > AMD_NB_SCRUBCTL_RATE_MAX) {
1395		cmn_err(CE_WARN, "mc_scrub_rate_dram is too large; "
1396		    "resetting to 0x%x\n", AMD_NB_SCRUBCTL_RATE_MAX);
1397		mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_MAX;
1398	}
1399
1400	switch (mc_scrub_policy) {
1401	case MC_SCRUB_FIXED:
1402		/* Use the system value checked above */
1403		break;
1404
1405	default:
1406		cmn_err(CE_WARN, "Unknown mc_scrub_policy value %d - "
1407		    "using default policy of MC_SCRUB_MAX", mc_scrub_policy);
1408		/*FALLTHRU*/
1409
1410	case MC_SCRUB_MAX:
1411		mc_scrub_rate_dram = mc_scrubber_max(mc_scrub_rate_dram,
1412		    mcr->mcr_scrubctl, AMD_NB_SCRUBCTL_DRAM_MASK,
1413		    AMD_NB_SCRUBCTL_DRAM_SHIFT);
1414		break;
1415	}
1416
1417#ifdef	OPTERON_ERRATUM_99
1418	/*
1419	 * This erratum applies on revisions D and earlier.
1420	 * This erratum also applies on revisions E and later,
1421	 * if BIOS uses chip-select hoisting instead of DRAM hole
1422	 * mapping.
1423	 *
1424	 * Do not enable the dram scrubber if the chip-select ranges
1425	 * for the node are not contiguous.
1426	 */
1427	if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
1428	    mc->mc_csdiscontig)
1429		cmn_err(CE_CONT, "?Opteron DRAM scrubber disabled on revision "
1430		    "%s chip %d because DRAM hole is present on this node",
1431		    mc->mc_revname, chipid);
1432		mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_NONE;
1433	}
1434#endif
1435
1436#ifdef OPTERON_ERRATUM_101
1437	/*
1438	 * This erratum applies on revisions D and earlier.
1439	 *
1440	 * If the DRAM Base Address register's IntlvEn field indicates that
1441	 * node interleaving is enabled, we must disable the DRAM scrubber
1442	 * and return zero to indicate that Solaris should use s/w instead.
1443	 */
1444	if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE &&
1445	    mcp->mcp_ilen != 0 &&
1446	    !X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_E)) {
1447		cmn_err(CE_CONT, "?Opteron DRAM scrubber disabled on revision "
1448		    "%s chip %d because DRAM memory is node-interleaved",
1449		    mc->mc_revname, chipid);
1450		mc_scrub_rate_dram = AMD_NB_SCRUBCTL_RATE_NONE;
1451	}
1452#endif
1453
1454	if (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE) {
1455		MCREG_FIELD_CMN(&scrubctl, DramScrub) = mc_scrub_rate_dram;
1456		mc_pcicfg_put32_nohdl(mc, MC_FUNC_MISCCTL, MC_CTL_REG_SCRUBCTL,
1457		    MCREG_VAL32(&scrubctl));
1458	}
1459
1460	return (mc_scrub_rate_dram != AMD_NB_SCRUBCTL_RATE_NONE ?
1461	    CMI_SUCCESS : CMIERR_MC_NOMEMSCRUB);
1462}
1463
1464/*ARGSUSED*/
1465static int
1466mc_attach_cb(cmi_hdl_t whdl, void *arg1, void *arg2, void *arg3)
1467{
1468	mc_t *mc = (mc_t *)arg1;
1469	mcamd_prop_t chipid = *((mcamd_prop_t *)arg2);
1470
1471	if (cmi_hdl_chipid(whdl) == chipid) {
1472		mcamd_mc_register(whdl, mc);
1473	}
1474
1475	return (CMI_HDL_WALK_NEXT);
1476}
1477
1478static int mc_sw_scrub_disabled = 0;
1479
1480static int
1481mc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1482{
1483	mc_pcicfg_hdl_t cfghdl;
1484	const mc_bind_map_t *bm;
1485	const char *bindnm;
1486	char *unitstr = NULL;
1487	enum mc_funcnum func;
1488	long unitaddr;
1489	int chipid, rc;
1490	mc_t *mc;
1491
1492	/*
1493	 * This driver has no hardware state, but does
1494	 * claim to have a reg property, so it will be
1495	 * called on suspend.  It is probably better to
1496	 * make sure it doesn't get called on suspend,
1497	 * but it is just as easy to make sure we just
1498	 * return DDI_SUCCESS if called.
1499	 */
1500	if (cmd == DDI_RESUME)
1501		return (DDI_SUCCESS);
1502
1503	if (cmd != DDI_ATTACH || mc_no_attach != 0)
1504		return (DDI_FAILURE);
1505
1506	bindnm = ddi_binding_name(dip);
1507	for (bm = mc_bind_map; bm->bm_bindnm != NULL; bm++) {
1508		if (strcmp(bindnm, bm->bm_bindnm) == 0) {
1509			func = bm->bm_func;
1510			break;
1511		}
1512	}
1513
1514	if (bm->bm_bindnm == NULL)
1515		return (DDI_FAILURE);
1516
1517	/*
1518	 * We need the device number, which corresponds to the processor node
1519	 * number plus 24.  The node number can then be used to associate this
1520	 * memory controller device with a given processor chip.
1521	 */
1522	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip,
1523	    DDI_PROP_DONTPASS, "unit-address", &unitstr) != DDI_PROP_SUCCESS) {
1524		cmn_err(CE_WARN, "failed to find unit-address for %s", bindnm);
1525		return (DDI_FAILURE);
1526	}
1527
1528	rc = ddi_strtol(unitstr, NULL, 16, &unitaddr);
1529	ASSERT(rc == 0 && unitaddr >= MC_AMD_DEV_OFFSET);
1530
1531	if (rc != 0 || unitaddr < MC_AMD_DEV_OFFSET) {
1532		cmn_err(CE_WARN, "failed to parse unit address %s for %s\n",
1533		    unitstr, bindnm);
1534		ddi_prop_free(unitstr);
1535		return (DDI_FAILURE);
1536	}
1537	ddi_prop_free(unitstr);
1538
1539	chipid = unitaddr - MC_AMD_DEV_OFFSET;
1540
1541	rw_enter(&mc_lock, RW_WRITER);
1542
1543	for (mc = mc_list; mc != NULL; mc = mc->mc_next) {
1544		if (mc->mc_props.mcp_num == chipid)
1545			break;
1546	}
1547
1548	/* Integrate this memory controller device into existing set */
1549	if (mc == NULL) {
1550		mc = mc_create(chipid);
1551
1552		if (mc == NULL) {
1553			/*
1554			 * We don't complain here because this is a legitimate
1555			 * path for MP systems.  On those machines, we'll attach
1556			 * before all CPUs have been initialized, and thus the
1557			 * chip verification in mc_create will fail.  We'll be
1558			 * reattached later for those CPUs.
1559			 */
1560			rw_exit(&mc_lock);
1561			return (DDI_FAILURE);
1562		}
1563	} else {
1564		mc_snapshot_destroy(mc);
1565	}
1566
1567	/* Beyond this point, we're committed to creating this node */
1568
1569	mc_fm_init(dip);
1570
1571	ASSERT(mc->mc_funcs[func].mcf_devi == NULL);
1572	mc->mc_funcs[func].mcf_devi = dip;
1573	mc->mc_funcs[func].mcf_instance = ddi_get_instance(dip);
1574
1575	mc->mc_ref++;
1576
1577	/*
1578	 * Add the common properties to this node, and then add any properties
1579	 * that are specific to this node based upon its configuration space.
1580	 */
1581	(void) ddi_prop_update_string(DDI_DEV_T_NONE,
1582	    dip, "model", (char *)bm->bm_model);
1583
1584	(void) ddi_prop_update_int(DDI_DEV_T_NONE,
1585	    dip, "chip-id", mc->mc_props.mcp_num);
1586
1587	if (bm->bm_mkprops != NULL &&
1588	    mc_pcicfg_setup(mc, bm->bm_func, &cfghdl) == DDI_SUCCESS) {
1589		bm->bm_mkprops(cfghdl, mc);
1590		mc_pcicfg_teardown(cfghdl);
1591	}
1592
1593	/*
1594	 * If this is the last node to be attached for this memory controller,
1595	 * then create the minor node, enable scrubbers, and register with
1596	 * cpu module(s) for this chip.
1597	 */
1598	if (func == MC_FUNC_DEVIMAP) {
1599		mc_props_t *mcp = &mc->mc_props;
1600		int dram_present = 0;
1601
1602		if (ddi_create_minor_node(dip, "mc-amd", S_IFCHR,
1603		    mcp->mcp_num, "ddi_mem_ctrl",
1604		    0) != DDI_SUCCESS) {
1605			cmn_err(CE_WARN, "failed to create minor node for chip "
1606			    "%d memory controller\n",
1607			    (chipid_t)mcp->mcp_num);
1608		}
1609
1610		/*
1611		 * Register the memory controller for every CPU of this chip.
1612		 *
1613		 * If there is memory present on this node and ECC is enabled
1614		 * attempt to enable h/w memory scrubbers for this node.
1615		 * If we are successful in enabling *any* hardware scrubbers,
1616		 * disable the software memory scrubber.
1617		 */
1618		cmi_hdl_walk(mc_attach_cb, (void *)mc, (void *)&mcp->mcp_num,
1619		    NULL);
1620
1621		if (mcp->mcp_lim != mcp->mcp_base) {
1622			/*
1623			 * This node may map non-dram memory alone, so we
1624			 * must check for an enabled chip-select to be
1625			 * sure there is dram present.
1626			 */
1627			mc_cs_t *mccs;
1628
1629			for (mccs = mc->mc_cslist; mccs != NULL;
1630			    mccs = mccs->mccs_next) {
1631				if (mccs->mccs_props.csp_csbe) {
1632					dram_present = 1;
1633					break;
1634				}
1635			}
1636		}
1637
1638		if (dram_present && !mc_ecc_enabled(mc)) {
1639			/*
1640			 * On a single chip system there is no point in
1641			 * scrubbing if there is no ECC on the single node.
1642			 * On a multichip system, necessarily Opteron using
1643			 * registered ECC-capable DIMMs, if there is memory
1644			 * present on a node but no ECC there then we'll assume
1645			 * ECC is disabled for all nodes and we will not enable
1646			 * the scrubber and wll also disable the software
1647			 * memscrub thread.
1648			 */
1649			rc = 1;
1650		} else if (!dram_present) {
1651			/* No memory on this node - others decide memscrub */
1652			rc = 0;
1653		} else {
1654			/*
1655			 * There is memory on this node and ECC is enabled.
1656			 * Call via the cpu module to enable memory scrubbing
1657			 * on this node - we could call directly but then
1658			 * we may overlap with a request to enable chip-cache
1659			 * scrubbing.
1660			 */
1661			rc = mc_scrubber_enable(mc);
1662		}
1663
1664		if (rc == CMI_SUCCESS && !mc_sw_scrub_disabled++)
1665			cmi_mc_sw_memscrub_disable();
1666
1667		mc_report_testfails(mc);
1668	}
1669
1670	/*
1671	 * Update nvlist for as far as we have gotten in attach/init.
1672	 */
1673	nvlist_free(mc->mc_nvl);
1674	mc->mc_nvl = mc_nvl_create(mc);
1675
1676	rw_exit(&mc_lock);
1677	return (DDI_SUCCESS);
1678}
1679
1680/*ARGSUSED*/
1681static int
1682mc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1683{
1684	/*
1685	 * See the comment about suspend in
1686	 * mc_attach().
1687	 */
1688	if (cmd == DDI_SUSPEND)
1689		return (DDI_SUCCESS);
1690	else
1691		return (DDI_FAILURE);
1692}
1693
1694
1695static struct dev_ops mc_ops = {
1696	DEVO_REV,		/* devo_rev */
1697	0,			/* devo_refcnt */
1698	mc_getinfo,		/* devo_getinfo */
1699	nulldev,		/* devo_identify */
1700	nulldev,		/* devo_probe */
1701	mc_attach,		/* devo_attach */
1702	mc_detach,		/* devo_detach */
1703	nodev,			/* devo_reset */
1704	&mc_cb_ops,		/* devo_cb_ops */
1705	NULL,			/* devo_bus_ops */
1706	NULL,			/* devo_power */
1707	ddi_quiesce_not_needed,		/* devo_quiesce */
1708};
1709
1710static struct modldrv modldrv = {
1711	&mod_driverops,
1712	"Memory Controller for AMD processors",
1713	&mc_ops
1714};
1715
1716static struct modlinkage modlinkage = {
1717	MODREV_1,
1718	(void *)&modldrv,
1719	NULL
1720};
1721
1722int
1723_init(void)
1724{
1725	/*
1726	 * Refuse to load if there is no PCI config space support.
1727	 */
1728	if (pci_getl_func == NULL)
1729		return (ENOTSUP);
1730
1731	rw_init(&mc_lock, NULL, RW_DRIVER, NULL);
1732	return (mod_install(&modlinkage));
1733}
1734
1735int
1736_info(struct modinfo *modinfop)
1737{
1738	return (mod_info(&modlinkage, modinfop));
1739}
1740
1741int
1742_fini(void)
1743{
1744	int rc;
1745
1746	if ((rc = mod_remove(&modlinkage)) != 0)
1747		return (rc);
1748
1749	rw_destroy(&mc_lock);
1750	return (0);
1751}
1752