1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27/*
28 * Support routines for managing per-Lxcache state.
29 */
30
31#include <cmd_Lxcache.h>
32#include <cmd_mem.h>
33#include <cmd_cpu.h>
34#include <cmd.h>
35#include <errno.h>
36#include <fcntl.h>
37#include <unistd.h>
38#include <stdio.h>
39#include <strings.h>
40#include <fm/fmd_api.h>
41#include <sys/fm/protocol.h>
42#include <sys/cheetahregs.h>
43#include <sys/mem_cache.h>
44
45#define	PN_ECSTATE_NA	5
46/*
47 * These values are our threshold values for SERDing CPU's based on the
48 * the # of times we have retired a cache line for each category.
49 */
50
51#define	CMD_CPU_SERD_AGG_1  	64
52#define	CMD_CPU_SERD_AGG_2	64
53
54static int8_t cmd_lowest_way[16] = {
55/*	0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
56	-1,  0,  1,  0,  2,  0,  1,  0,  3,  0,  1,  0,  2,  0,  1,  0};
57static int cmd_num_of_bits[16] = {
58/*	0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xa 0xb 0xc 0xd 0xe 0xf */
59	0,  1,  1,  2,  1,  2,  2,  3,  1,  2,  2,  3,  2,  3,  3,  4};
60
61
62void
63cmd_Lxcache_write(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
64{
65	fmd_buf_write(hdl, NULL, Lxcache->Lxcache_bufname, Lxcache,
66	    sizeof (cmd_Lxcache_pers_t));
67}
68
69const char *
70cmd_type_to_str(cmd_ptrsubtype_t pstype)
71{
72	switch (pstype) {
73		case CMD_PTR_CPU_L2DATA:
74			return ("l2data");
75			break;
76		case CMD_PTR_CPU_L3DATA:
77			return ("l3data");
78			break;
79		case CMD_PTR_CPU_L2TAG:
80			return ("l2tag");
81			break;
82		case CMD_PTR_CPU_L3TAG:
83			return ("l3tag");
84			break;
85		default:
86			return ("unknown");
87			break;
88	}
89}
90
91const char *
92cmd_flags_to_str(int flags)
93{
94	switch (flags) {
95		case CMD_LxCACHE_F_ACTIVE:
96			return ("ACTIVE");
97		case CMD_LxCACHE_F_FAULTING:
98			return ("FAULTING");
99		case CMD_LxCACHE_F_RETIRED:
100			return ("RETIRED");
101		case CMD_LxCACHE_F_UNRETIRED:
102			return ("UNRETIRED");
103		case CMD_LxCACHE_F_RERETIRED:
104			return ("RERETIRED");
105		default:
106			return ("Unknown_flags");
107	}
108}
109
110const char *
111cmd_reason_to_str(int reason)
112{
113	switch (reason) {
114		case CMD_LXSUSPECT_DATA:
115			return ("SUSPECT_DATA");
116		case CMD_LXSUSPECT_0_TAG:
117			return ("SUSPECT_0_TAG");
118		case CMD_LXSUSPECT_1_TAG:
119			return ("SUSPECT_1_TAG");
120		case CMD_LXCONVICTED:
121			return ("CONVICTED");
122		case CMD_LXFUNCTIONING:
123			return ("FUNCTIONING");
124		default:
125			return ("Unknown_reason");
126	}
127}
128
129static void
130cmd_pretty_print_Lxcache(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
131{
132	fmd_hdl_debug(hdl,
133	    "\n"
134	    "	cpu	= %s\n"
135	    "	type	= %s\n"
136	    "	index	= %d\n"
137	    "	way	= %d\n"
138	    "	bit	= %d\n"
139	    "	reason	= %s\n"
140	    "	flags	= %s\n",
141	    Lxcache->Lxcache_cpu_bufname,
142	    cmd_type_to_str(Lxcache->Lxcache_type),
143	    Lxcache->Lxcache_index,
144	    Lxcache->Lxcache_way,
145	    Lxcache->Lxcache_bit,
146	    cmd_reason_to_str(Lxcache->Lxcache_reason),
147	    cmd_flags_to_str(Lxcache->Lxcache_flags));
148}
149
150void
151cmd_Lxcache_free(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
152    int destroy)
153{
154	cmd_case_t *cc = &Lxcache->Lxcache_case;
155
156	fmd_hdl_debug(hdl, "Entering cmd_Lxcache_free for %s destroy = %d\n",
157	    Lxcache->Lxcache_bufname, destroy);
158
159	if (cc->cc_cp != NULL)
160		cmd_case_fini(hdl, cc->cc_cp, destroy);
161	if (cc->cc_serdnm != NULL) {
162		if (fmd_serd_exists(hdl, cc->cc_serdnm) && destroy) {
163			fmd_serd_destroy(hdl, cc->cc_serdnm);
164			fmd_hdl_strfree(hdl, cc->cc_serdnm);
165			cc->cc_serdnm = NULL;
166		}
167	}
168	if (Lxcache->Lxcache_nvl) {
169		nvlist_free(Lxcache->Lxcache_nvl);
170		Lxcache->Lxcache_nvl = NULL;
171	}
172	/*
173	 * Clean up the SERD engine created to handle recheck of TAGS.
174	 * This SERD engine was created to save the event pointer.
175	 */
176	if (Lxcache->Lxcache_serdnm != NULL) {
177		if (fmd_serd_exists(hdl, Lxcache->Lxcache_serdnm) && destroy) {
178			fmd_serd_destroy(hdl, Lxcache->Lxcache_serdnm);
179			fmd_hdl_strfree(hdl, Lxcache->Lxcache_serdnm);
180			Lxcache->Lxcache_serdnm = NULL;
181		}
182	}
183	Lxcache->Lxcache_timeout_id = -1;
184	Lxcache->Lxcache_ep = NULL;
185	Lxcache->Lxcache_retry_count = 0;
186	if (destroy)
187		fmd_buf_destroy(hdl, NULL, Lxcache->Lxcache_bufname);
188	cmd_fmri_fini(hdl, &Lxcache->Lxcache_asru, destroy);
189	cmd_list_delete(&cpu->cpu_Lxcaches, Lxcache);
190	fmd_hdl_free(hdl, Lxcache, sizeof (cmd_Lxcache_t));
191}
192
193void
194cmd_Lxcache_destroy(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
195{
196	cmd_Lxcache_free(hdl, cpu, Lxcache, FMD_B_TRUE);
197}
198
199cmd_Lxcache_t *
200cmd_Lxcache_lookup_by_type_index_way_bit(cmd_cpu_t *cpu,
201    cmd_ptrsubtype_t pstype, int32_t index, int8_t way, int16_t bit)
202{
203	cmd_Lxcache_t *Lxcache;
204
205	for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches); Lxcache != NULL;
206	    Lxcache = cmd_list_next(Lxcache)) {
207		if ((Lxcache->Lxcache_type == pstype) &&
208		    (Lxcache->Lxcache_index == (uint32_t)index) &&
209		    (Lxcache->Lxcache_way == (uint32_t)way) &&
210		    (Lxcache->Lxcache_bit == (uint16_t)bit))
211			return (Lxcache);
212	}
213
214	return (NULL);
215}
216
217cmd_Lxcache_t *
218cmd_Lxcache_create(fmd_hdl_t *hdl, cmd_xr_t *xr, cmd_cpu_t *cpu,
219    nvlist_t *modasru, cmd_ptrsubtype_t pstype, int32_t index,
220    int8_t way, int16_t bit)
221{
222	cmd_Lxcache_t *Lxcache;
223	nvlist_t *asru;
224	const char	*pstype_name;
225	uint8_t	fmri_Lxcache_type;
226
227	pstype_name = cmd_type_to_str(pstype);
228	fmd_hdl_debug(hdl,
229	    "\n%s:cpu_id %d:Creating new Lxcache for index=%d way=%d bit=%d\n",
230	    pstype_name, cpu->cpu_cpuid, index, way, bit);
231
232	CMD_CPU_STAT_BUMP(cpu, Lxcache_creat);
233
234	Lxcache = fmd_hdl_zalloc(hdl, sizeof (cmd_Lxcache_t), FMD_SLEEP);
235	(void) strncpy(Lxcache->Lxcache_cpu_bufname,
236	    cpu->cpu_bufname, CMD_BUFNMLEN);
237	Lxcache->Lxcache_nodetype = CMD_NT_LxCACHE;
238	Lxcache->Lxcache_version = CMD_LxCACHE_VERSION;
239	Lxcache->Lxcache_type = pstype;
240	Lxcache->Lxcache_index = (uint32_t)index;
241	Lxcache->Lxcache_way = (uint32_t)way;
242	Lxcache->Lxcache_bit = (uint16_t)bit;
243	Lxcache->Lxcache_reason = CMD_LXFUNCTIONING;
244	Lxcache->Lxcache_flags = CMD_LxCACHE_F_ACTIVE;
245	Lxcache->Lxcache_timeout_id = -1;
246	Lxcache->Lxcache_retry_count = 0;
247	Lxcache->Lxcache_nvl = NULL;
248	Lxcache->Lxcache_ep = NULL;
249	Lxcache->Lxcache_serdnm = NULL;
250	Lxcache->Lxcache_clcode = 0;
251	Lxcache->xr = xr;
252	Lxcache->Lxcache_retired_fmri[0] = '\0';
253	switch (pstype) {
254		case CMD_PTR_CPU_L2DATA:
255			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L2;
256			break;
257		case CMD_PTR_CPU_L3DATA:
258			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L3;
259			break;
260		case CMD_PTR_CPU_L2TAG:
261			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L2;
262			break;
263		case CMD_PTR_CPU_L3TAG:
264			fmri_Lxcache_type = FM_FMRI_CPU_CACHE_TYPE_L3;
265			break;
266		default:
267			break;
268	}
269
270	cmd_bufname(Lxcache->Lxcache_bufname, sizeof (Lxcache->Lxcache_bufname),
271	    "Lxcache_%s_%d_%d_%d_%d", pstype_name, cpu->cpu_cpuid,
272	    index, way, bit);
273	fmd_hdl_debug(hdl,
274	    "\n%s:cpu_id %d: new Lxcache name is %s\n",
275	    pstype_name, cpu->cpu_cpuid, Lxcache->Lxcache_bufname);
276	if ((errno = nvlist_dup(modasru, &asru, 0)) != 0 ||
277	    (errno = nvlist_add_uint32(asru, FM_FMRI_CPU_CACHE_INDEX,
278	    index)) != 0 ||
279	    (errno = nvlist_add_uint32(asru, FM_FMRI_CPU_CACHE_WAY,
280	    (uint32_t)way)) != 0 ||
281	    (errno = nvlist_add_uint16(asru, FM_FMRI_CPU_CACHE_BIT,
282	    bit)) != 0 ||
283	    (errno = nvlist_add_uint8(asru, FM_FMRI_CPU_CACHE_TYPE,
284	    fmri_Lxcache_type)) != 0 ||
285	    (errno = fmd_nvl_fmri_expand(hdl, asru)) != 0)
286		fmd_hdl_abort(hdl, "failed to build Lxcache fmri");
287	asru->nvl_nvflag |= NV_UNIQUE_NAME_TYPE;
288
289	cmd_fmri_init(hdl, &Lxcache->Lxcache_asru, asru,
290	    "%s_asru_%d_%d_%d", pstype_name, index, way, bit);
291
292	nvlist_free(asru);
293
294	cmd_list_append(&cpu->cpu_Lxcaches, Lxcache);
295	cmd_Lxcache_write(hdl, Lxcache);
296
297	return (Lxcache);
298}
299
300cmd_Lxcache_t *
301cmd_Lxcache_lookup_by_index_way(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
302    int32_t index, int8_t way)
303{
304	cmd_Lxcache_t *cache;
305
306	for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
307	    cache = cmd_list_next(cache)) {
308	if ((cache->Lxcache_index == (uint32_t)index) &&
309	    (cache->Lxcache_way == (uint32_t)way) &&
310	    (cache->Lxcache_type == pstype)) {
311		return (cache);
312		}
313	}
314
315	return (NULL);
316}
317
318static cmd_Lxcache_t *
319Lxcache_wrapv1(fmd_hdl_t *hdl, cmd_Lxcache_pers_t *pers, size_t psz)
320{
321	cmd_Lxcache_t *Lxcache;
322
323	if (psz != sizeof (cmd_Lxcache_pers_t)) {
324		fmd_hdl_abort(hdl, "size of state doesn't match size of "
325		    "version 1 state (%u bytes).\n",
326		    sizeof (cmd_Lxcache_pers_t));
327	}
328
329	Lxcache = fmd_hdl_zalloc(hdl, sizeof (cmd_Lxcache_t), FMD_SLEEP);
330	bcopy(pers, Lxcache, sizeof (cmd_Lxcache_pers_t));
331	fmd_hdl_free(hdl, pers, psz);
332	return (Lxcache);
333}
334
335void *
336cmd_Lxcache_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
337{
338	cmd_Lxcache_t *Lxcache;
339	cmd_Lxcache_t *recovered_Lxcache;
340	cmd_cpu_t	*cpu;
341	size_t		Lxcachesz;
342	char		*serdnm;
343
344	/*
345	 * We need to first extract the cpu name by reading directly
346	 * from fmd buffers in order to begin our search for Lxcache in
347	 * the appropriate cpu list.
348	 * After we identify the cpu list using buf name we look
349	 * in cpu list for our Lxcache states.
350	 */
351	fmd_hdl_debug(hdl, "restoring Lxcache from %s\n", ptr->ptr_name);
352
353	if ((Lxcachesz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
354		fmd_hdl_abort(hdl, "Lxcache referenced by case %s does "
355		    "not exist in saved state\n",
356		    fmd_case_uuid(hdl, cp));
357	} else if (Lxcachesz != sizeof (cmd_Lxcache_pers_t)) {
358		fmd_hdl_abort(hdl, "Lxcache buffer referenced by case %s "
359		    "is %d bytes. Expected size is %d bytes\n",
360		    fmd_case_uuid(hdl, cp), Lxcachesz,
361		    sizeof (cmd_Lxcache_pers_t));
362	}
363
364	if ((Lxcache = cmd_buf_read(hdl, NULL, ptr->ptr_name,
365	    Lxcachesz)) == NULL) {
366		fmd_hdl_abort(hdl, "failed to read Lxcache buf %s",
367		    ptr->ptr_name);
368	}
369	cmd_pretty_print_Lxcache(hdl, Lxcache);
370
371	fmd_hdl_debug(hdl, "found %d in version field\n",
372	    Lxcache->Lxcache_version);
373	cpu = cmd_restore_cpu_only(hdl, cp, Lxcache->Lxcache_cpu_bufname);
374	if (cpu == NULL) {
375		fmd_hdl_debug(hdl,
376		    "\nCould not restore cpu %s\n",
377		    Lxcache->Lxcache_cpu_bufname);
378		return (NULL);
379	}
380	recovered_Lxcache = Lxcache;	/* save the recovered Lxcache */
381
382	for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches); Lxcache != NULL;
383	    Lxcache = cmd_list_next(Lxcache)) {
384		if (strcmp(Lxcache->Lxcache_bufname, ptr->ptr_name) == 0)
385			break;
386	}
387
388	if (Lxcache == NULL) {
389
390		switch (recovered_Lxcache->Lxcache_version) {
391			case CMD_LxCACHE_VERSION_1:
392				Lxcache = Lxcache_wrapv1(hdl,
393				    (cmd_Lxcache_pers_t *)recovered_Lxcache,
394				    Lxcachesz);
395				break;
396			default:
397				fmd_hdl_abort(hdl, "unknown version (found %d) "
398				"for Lxcache state referenced by case %s.\n",
399				    recovered_Lxcache->Lxcache_version,
400				    fmd_case_uuid(hdl, cp));
401			break;
402		}
403
404		cmd_fmri_restore(hdl, &Lxcache->Lxcache_asru);
405		/*
406		 * We need to cleanup the information associated with
407		 * the timeout routine because these are not checkpointed
408		 * and cannot be retored.
409		 */
410		Lxcache->Lxcache_timeout_id = -1;
411		Lxcache->Lxcache_retry_count = 0;
412		Lxcache->Lxcache_nvl = NULL;
413		Lxcache->Lxcache_ep = NULL;
414		Lxcache->Lxcache_serdnm = NULL;
415
416		cmd_list_append(&cpu->cpu_Lxcaches, Lxcache);
417	}
418	serdnm = cmd_Lxcache_serdnm_create(hdl, cpu->cpu_cpuid,
419	    Lxcache->Lxcache_type, Lxcache->Lxcache_index,
420	    Lxcache->Lxcache_way, Lxcache->Lxcache_bit);
421	fmd_hdl_debug(hdl,
422	    "cpu_id %d: serdname for the case is %s\n",
423	    cpu->cpu_cpuid, serdnm);
424	fmd_hdl_debug(hdl,
425	    "cpu_id %d: restoring the case for index %d way %d bit %d\n",
426	    cpu->cpu_cpuid, Lxcache->Lxcache_index,
427	    Lxcache->Lxcache_way, Lxcache->Lxcache_bit);
428	cmd_case_restore(hdl, &Lxcache->Lxcache_case, cp, serdnm);
429
430	return (Lxcache);
431}
432
433/*ARGSUSED*/
434void
435cmd_Lxcache_validate(fmd_hdl_t *hdl, cmd_cpu_t *cpu)
436{
437	cmd_Lxcache_t *Lxcache, *next;
438
439	for (Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
440	    Lxcache != NULL; Lxcache = next) {
441		next = cmd_list_next(Lxcache);
442
443		if (fmd_nvl_fmri_unusable(hdl, Lxcache->Lxcache_asru_nvl)) {
444			cmd_Lxcache_destroy(hdl, cpu, Lxcache);
445		}
446	}
447}
448
449void
450cmd_Lxcache_dirty(fmd_hdl_t *hdl, cmd_Lxcache_t *Lxcache)
451{
452	if (fmd_buf_size(hdl, NULL, Lxcache->Lxcache_bufname) !=
453	    sizeof (cmd_Lxcache_pers_t))
454		fmd_buf_destroy(hdl, NULL, Lxcache->Lxcache_bufname);
455
456	/* No need to rewrite the FMRIs in the Lxcache - they don't change */
457	fmd_buf_write(hdl, NULL,
458	    Lxcache->Lxcache_bufname, &Lxcache->Lxcache_pers,
459	    sizeof (cmd_Lxcache_pers_t));
460}
461
462void
463cmd_Lxcache_fini(fmd_hdl_t *hdl, cmd_cpu_t *cpu)
464{
465	cmd_Lxcache_t *Lxcache;
466
467	while ((Lxcache = cmd_list_next(&cpu->cpu_Lxcaches)) != NULL)
468		cmd_Lxcache_free(hdl, cpu, Lxcache, FMD_B_FALSE);
469}
470
471char *
472cmd_Lxcache_serdnm_create(fmd_hdl_t *hdl, uint32_t cpu_id,
473			    cmd_ptrsubtype_t pstype,
474			    int32_t index, int8_t way, int16_t bit)
475{
476	const char *fmt = "cpu_%d:%s_%d_%d_%d_serd";
477	const char *serdbase;
478	size_t sz;
479	char	*nm;
480
481	serdbase = cmd_type_to_str(pstype);
482	sz = (snprintf(NULL, 0, fmt, cpu_id, serdbase, index, way, bit) + 1);
483	nm = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
484	(void) snprintf(nm, sz, fmt, cpu_id, serdbase, index, way, bit);
485	return (nm);
486}
487
488char *
489cmd_Lxcache_anonymous_serdnm_create(fmd_hdl_t *hdl, uint32_t cpu_id,
490			    cmd_ptrsubtype_t pstype,
491			    int32_t index, int8_t way, int16_t bit)
492{
493	const char *fmt = "cpu_%d:%s_%d_%d_%d_anonymous_serd";
494	const char *serdbase;
495	size_t sz;
496	char	*nm;
497
498	serdbase = cmd_type_to_str(pstype);
499	sz = (snprintf(NULL, 0, fmt, cpu_id, serdbase, index, way, bit) + 1);
500	nm = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
501	(void) snprintf(nm, sz, fmt, cpu_id, serdbase, index, way, bit);
502	return (nm);
503}
504
505/*
506 * Count the number of SERD type 2 ways retired for a given cpu
507 * These are defined to be L3 Cache data retirements
508 */
509
510uint32_t
511cmd_Lx_index_count_type2_ways(cmd_cpu_t *cpu)
512{
513	cmd_Lxcache_t *cache = NULL;
514	uint32_t ret_count = 0;
515
516	for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
517	    cache = cmd_list_next(cache)) {
518		if ((cache->Lxcache_flags & CMD_LxCACHE_F_RETIRED) &&
519		    (cache->Lxcache_type == CMD_PTR_CPU_L3DATA)) {
520			ret_count++;
521		}
522	}
523	return (ret_count);
524}
525/*
526 * Count the number of SERD type 1 ways retired for a given cpu
527 * These are defined to be L2 Data, tag and L3 Tag retirements
528 */
529
530uint32_t
531cmd_Lx_index_count_type1_ways(cmd_cpu_t *cpu)
532{
533	cmd_Lxcache_t *cache = NULL;
534	uint32_t ret_count = 0;
535
536	for (cache = cmd_list_next(&cpu->cpu_Lxcaches); cache != NULL;
537	    cache = cmd_list_next(cache)) {
538		if ((cache->Lxcache_flags & CMD_LxCACHE_F_RETIRED) &&
539		    ((cache->Lxcache_type == CMD_PTR_CPU_L2DATA) ||
540		    IS_TAG(cache->Lxcache_type))) {
541			ret_count++;
542		}
543	}
544	return (ret_count);
545}
546
547void
548cmd_fault_the_cpu(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
549    const char *fltnm)
550{
551	fmd_case_t	*cp;
552	const char 	*uuid;
553
554	cp = cmd_case_create(hdl, &cpu->cpu_header, pstype,
555	    &uuid);
556	fmd_hdl_debug(hdl,
557	    "\n%s:cpu_id %d Created case %s to retire CPU\n",
558	    fltnm, cpu->cpu_cpuid);
559
560	if ((errno = fmd_nvl_fmri_expand(hdl, cpu->cpu_asru_nvl)) != 0)
561		fmd_hdl_abort(hdl, "failed to build CPU fmri");
562
563	cmd_cpu_create_faultlist(hdl, cp, cpu, fltnm, NULL, HUNDRED_PERCENT);
564	fmd_case_solve(hdl, cp);
565}
566
567void
568cmd_retire_cpu_if_limits_exceeded(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
569    cmd_ptrsubtype_t pstype, const char *fltnm)
570{
571	int cpu_retired_1, cpu_retired_2;
572
573	/* Retrieve the number of retired ways for each category */
574
575	cpu_retired_1 = cmd_Lx_index_count_type1_ways(cpu);
576	cpu_retired_2 = cmd_Lx_index_count_type2_ways(cpu);
577	fmd_hdl_debug(hdl,
578	    "\n%s:CPU %d retired Type 1 way count is: %d\n",
579	    fltnm, cpu->cpu_cpuid, cpu_retired_1);
580	fmd_hdl_debug(hdl, "\n%s:CPU %d retired Type 2 way count is: %d\n",
581	    fltnm, cpu->cpu_cpuid, cpu_retired_2);
582
583	if (((cpu_retired_1 > CMD_CPU_SERD_AGG_1) ||
584	    (cpu_retired_2 > CMD_CPU_SERD_AGG_2)) &&
585	    (cpu->cpu_faulting != FMD_B_TRUE)) {
586		cmd_fault_the_cpu(hdl, cpu, pstype, fltnm);
587	}
588}
589
590void
591cmd_Lxcache_fault(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache,
592	const char *fltnm, nvlist_t *rsrc, uint_t cert)
593{
594	char fltmsg[64];
595	nvlist_t *flt;
596
597	(void) snprintf(fltmsg, sizeof (fltmsg), "fault.cpu.%s.%s-line",
598	    cmd_cpu_type2name(hdl, cpu->cpu_type), fltnm);
599	fmd_hdl_debug(hdl,
600	    "\n%s:cpu_id %d: fltmsg = %s\n",
601	    fltnm, cpu->cpu_cpuid, fltmsg);
602	if (Lxcache->Lxcache_flags & CMD_LxCACHE_F_FAULTING) {
603		return;
604	}
605	Lxcache->Lxcache_flags |= CMD_LxCACHE_F_FAULTING;
606	flt = fmd_nvl_create_fault(hdl, fltmsg, cert,
607	    Lxcache->Lxcache_asru.fmri_nvl, cpu->cpu_fru_nvl, rsrc);
608	if (nvlist_add_boolean_value(flt, FM_SUSPECT_MESSAGE, B_FALSE) != 0)
609		fmd_hdl_abort(hdl, "failed to add no-message member to fault");
610
611	fmd_hdl_debug(hdl,
612	    "\n%s:cpu_id %d: adding suspect list to case %s\n",
613	    fltnm, cpu->cpu_cpuid,
614	    fmd_case_uuid(hdl, Lxcache->Lxcache_case.cc_cp));
615	fmd_case_add_suspect(hdl, Lxcache->Lxcache_case.cc_cp, flt);
616	fmd_case_solve(hdl, Lxcache->Lxcache_case.cc_cp);
617	if (Lxcache->Lxcache_retired_fmri[0] == 0) {
618		if (cmd_fmri_nvl2str(hdl, Lxcache->Lxcache_asru.fmri_nvl,
619		    Lxcache->Lxcache_retired_fmri,
620		    sizeof (Lxcache->Lxcache_retired_fmri)) == -1)
621			fmd_hdl_debug(hdl,
622			    "\n%s:cpu_id %d: Failed to save the"
623			    " retired fmri string\n",
624			    fltnm, cpu->cpu_cpuid);
625		else
626			fmd_hdl_debug(hdl,
627			    "\n%s:cpu_id %d:Saved the retired fmri string %s\n",
628			    fltnm, cpu->cpu_cpuid,
629			    Lxcache->Lxcache_retired_fmri);
630	}
631	Lxcache->Lxcache_flags &= ~(CMD_LxCACHE_F_FAULTING);
632
633}
634
635void
636cmd_Lxcache_close(fmd_hdl_t *hdl, void *arg)
637{
638	cmd_cpu_t *cpu;
639	cmd_Lxcache_t *Lxcache;
640	cmd_case_t *cc;
641
642	Lxcache = (cmd_Lxcache_t *)arg;
643	fmd_hdl_debug(hdl, "cmd_Lxcache_close called  for %s\n",
644	    Lxcache->Lxcache_bufname);
645	cc = &Lxcache->Lxcache_case;
646
647	for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
648	    cpu = cmd_list_next(cpu)) {
649		if (strcmp(cpu->cpu_bufname,
650		    Lxcache->Lxcache_cpu_bufname) == 0)
651			break;
652	}
653	if (cpu == NULL)
654		fmd_hdl_abort(hdl, "failed to find the cpu %s for %s\n",
655		    Lxcache->Lxcache_cpu_bufname,
656		    Lxcache->Lxcache_bufname);
657	/*
658	 * We will destroy the case and serd engine.
659	 * The rest will be destroyed when we retire the CPU
660	 * until then we keep the Lxcache strutures alive.
661	 */
662	if (cc->cc_cp != NULL) {
663		cmd_case_fini(hdl, cc->cc_cp, FMD_B_TRUE);
664		cc->cc_cp = NULL;
665	}
666	if (cc->cc_serdnm != NULL) {
667		if (fmd_serd_exists(hdl, cc->cc_serdnm))
668			fmd_serd_destroy(hdl, cc->cc_serdnm);
669		fmd_hdl_strfree(hdl, cc->cc_serdnm);
670		cc->cc_serdnm = NULL;
671	}
672
673}
674
675cmd_Lxcache_t *
676cmd_Lxcache_lookup_by_timeout_id(id_t id)
677{
678	cmd_cpu_t *cpu;
679	cmd_Lxcache_t *cmd_Lxcache;
680
681	for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
682	    cpu = cmd_list_next(cpu)) {
683		for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
684		    cmd_Lxcache != NULL;
685		    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
686			if (cmd_Lxcache->Lxcache_timeout_id == id)
687				return (cmd_Lxcache);
688		}
689	}
690	return (NULL);
691}
692
693void
694cmd_Lxcache_gc(fmd_hdl_t *hdl)
695{
696	cmd_cpu_t *cpu;
697
698	for (cpu = cmd_list_next(&cmd.cmd_cpus); cpu != NULL;
699	    cpu = cmd_list_next(cpu))
700		cmd_Lxcache_validate(hdl, cpu);
701}
702
703cmd_evdisp_t
704get_tagdata(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype,
705	    int32_t index, uint64_t	*tag_data)
706{
707	int		fd;
708	cache_info_t	cache_info;
709
710	fd = open(mem_cache_device, O_RDONLY);
711	if (fd == -1) {
712		(void) printf(
713		    "cpu_id = %d could not open %s to read tag info.\n",
714		    cpu->cpu_cpuid, mem_cache_device);
715		return (CMD_EVD_BAD);
716	}
717	switch (pstype) {
718		case CMD_PTR_CPU_L2TAG:
719		case CMD_PTR_CPU_L2DATA:
720			cache_info.cache = L2_CACHE_TAG;
721			break;
722		case CMD_PTR_CPU_L3TAG:
723		case CMD_PTR_CPU_L3DATA:
724			cache_info.cache = L3_CACHE_TAG;
725			break;
726	}
727	cache_info.cpu_id = cpu->cpu_cpuid;
728	cache_info.index = index;
729	cache_info.datap = tag_data;
730	cache_info.way = 0;
731
732	if (test_mode) {
733
734		if (ioctl(fd, MEM_CACHE_READ_ERROR_INJECTED_TAGS, &cache_info)
735		    == -1) {
736			(void) printf("cpu_id = %d ioctl"
737			    " MEM_CACHE_READ_ERROR_INJECTED_TAGS failed"
738			    " errno = %d\n",
739			    cpu->cpu_cpuid, errno);
740			(void) close(fd);
741			return (CMD_EVD_BAD);
742		}
743	} else {
744		if (ioctl(fd, MEM_CACHE_READ_TAGS, &cache_info)
745		    == -1) {
746			(void) printf("cpu_id = %d ioctl"
747			    " MEM_CACHE_READ_TAGS failed"
748			    " errno = %d\n",
749			    cpu->cpu_cpuid, errno);
750			(void) close(fd);
751			return (CMD_EVD_BAD);
752		}
753	}
754	(void) close(fd);
755	return (CMD_EVD_OK);
756}
757
758int
759get_index_retired_ways(cmd_cpu_t *cpu, cmd_ptrsubtype_t pstype, int32_t index)
760{
761	int		i, retired_ways;
762	uint64_t	tag_data[PN_CACHE_NWAYS];
763
764	if (get_tagdata(cpu, pstype, index, tag_data) != 0) {
765		return (-1);
766	}
767	retired_ways = 0;
768	for (i = 0; i < PN_CACHE_NWAYS; i++) {
769		if ((tag_data[i] & CH_ECSTATE_MASK) ==
770		    PN_ECSTATE_NA)
771			retired_ways++;
772	}
773	return (retired_ways);
774}
775
776boolean_t
777cmd_cache_way_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
778{
779	const char		*fltnm;
780	cache_info_t    cache_info;
781	int ret, fd;
782
783	fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
784	fd = open(mem_cache_device, O_RDWR);
785	if (fd == -1) {
786		fmd_hdl_debug(hdl,
787		    "fltnm:cpu_id %d open of %s failed\n",
788		    fltnm, cpu->cpu_cpuid, mem_cache_device);
789		return (B_FALSE);
790	}
791	cache_info.cpu_id = cpu->cpu_cpuid;
792	cache_info.way = Lxcache->Lxcache_way;
793	cache_info.bit = Lxcache->Lxcache_bit;
794	cache_info.index = Lxcache->Lxcache_index;
795
796	switch (Lxcache->Lxcache_type) {
797		case CMD_PTR_CPU_L2TAG:
798			cache_info.cache = L2_CACHE_TAG;
799			break;
800		case CMD_PTR_CPU_L2DATA:
801			cache_info.cache = L2_CACHE_DATA;
802			break;
803		case CMD_PTR_CPU_L3TAG:
804			cache_info.cache = L3_CACHE_TAG;
805			break;
806		case CMD_PTR_CPU_L3DATA:
807			cache_info.cache = L3_CACHE_DATA;
808			break;
809	}
810
811	fmd_hdl_debug(hdl,
812	    "\n%s:cpu %d: Retiring index %d, way %d bit %d\n",
813	    fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
814	    (int16_t)cache_info.bit);
815	ret = ioctl(fd, MEM_CACHE_RETIRE, &cache_info);
816	(void) close(fd);
817	if (ret == -1) {
818		fmd_hdl_debug(hdl,
819		    "fltnm:cpu_id %d MEM_CACHE_RETIRE ioctl failed\n",
820		    fltnm, cpu->cpu_cpuid);
821		return (B_FALSE);
822	}
823
824	return (B_TRUE);
825}
826
827boolean_t
828cmd_cache_way_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu, cmd_Lxcache_t *Lxcache)
829{
830	const char		*fltnm;
831	cache_info_t    cache_info;
832	int ret, fd;
833
834	fltnm = cmd_type_to_str(Lxcache->Lxcache_type);
835	fd = open(mem_cache_device, O_RDWR);
836	if (fd == -1) {
837		fmd_hdl_debug(hdl,
838		    "fltnm:cpu_id %d open of %s failed\n",
839		    fltnm, cpu->cpu_cpuid, mem_cache_device);
840		return (B_FALSE);
841	}
842	cache_info.cpu_id = cpu->cpu_cpuid;
843	cache_info.way = Lxcache->Lxcache_way;
844	cache_info.bit = Lxcache->Lxcache_bit;
845	cache_info.index = Lxcache->Lxcache_index;
846
847	switch (Lxcache->Lxcache_type) {
848		case CMD_PTR_CPU_L2TAG:
849			cache_info.cache = L2_CACHE_TAG;
850			break;
851		case CMD_PTR_CPU_L2DATA:
852			cache_info.cache = L2_CACHE_DATA;
853			break;
854		case CMD_PTR_CPU_L3TAG:
855			cache_info.cache = L3_CACHE_TAG;
856			break;
857		case CMD_PTR_CPU_L3DATA:
858			cache_info.cache = L3_CACHE_DATA;
859			break;
860	}
861
862	fmd_hdl_debug(hdl,
863	    "\n%s:cpu %d: Unretiring index %d, way %d bit %d\n",
864	    fltnm, cpu->cpu_cpuid, cache_info.index, cache_info.way,
865	    (int16_t)cache_info.bit);
866	ret = ioctl(fd, MEM_CACHE_UNRETIRE, &cache_info);
867	(void) close(fd);
868	if (ret == -1) {
869		fmd_hdl_debug(hdl,
870		    "fltnm:cpu_id %d MEM_CACHE_UNRETIRE ioctl failed\n",
871		    fltnm, cpu->cpu_cpuid);
872		return (B_FALSE);
873	}
874
875	return (B_TRUE);
876}
877
878static cmd_Lxcache_t *
879cmd_Lxcache_lookup_by_type_index_way_flags(cmd_cpu_t *cpu,
880    cmd_ptrsubtype_t type, int32_t index, int8_t way, int32_t flags)
881{
882	cmd_Lxcache_t *cmd_Lxcache;
883
884	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
885	    cmd_Lxcache != NULL;
886	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
887		if ((cmd_Lxcache->Lxcache_index == index) &&
888		    (cmd_Lxcache->Lxcache_way == way) &&
889		    (cmd_Lxcache->Lxcache_type == type) &&
890		    (cmd_Lxcache->Lxcache_flags & flags))
891			return (cmd_Lxcache);
892	}
893	return (NULL);
894}
895
896static int8_t
897cmd_Lxcache_get_bit_array_of_available_ways(cmd_cpu_t *cpu,
898    cmd_ptrsubtype_t type, int32_t index)
899{
900	uint8_t bit_array_of_unavailable_ways;
901	uint8_t bit_array_of_available_ways;
902	cmd_ptrsubtype_t match_type;
903	cmd_Lxcache_t *cmd_Lxcache;
904	uint8_t bit_array_of_retired_ways;
905
906
907	/*
908	 * We scan the Lxcache structures for this CPU and collect
909	 * the following 2 information.
910	 * - bit_array_of_retired_ways
911	 * - bit_array_of_unavailable_ways
912	 * If type is Lx_TAG then unavailable_ways will not include ways that
913	 * were retired due to DATA faults, because these ways can still be
914	 * re-retired for TAG faults.
915	 * If 3 ways have been retired then we protect the only remaining
916	 * unretired way by marking it as unavailable.
917	 */
918	bit_array_of_unavailable_ways = 0;
919	bit_array_of_retired_ways = 0;
920	switch (type) {
921		case CMD_PTR_CPU_L2TAG:
922			match_type = CMD_PTR_CPU_L2DATA;
923			break;
924		case CMD_PTR_CPU_L2DATA:
925			match_type = CMD_PTR_CPU_L2TAG;
926			break;
927		case CMD_PTR_CPU_L3TAG:
928			match_type = CMD_PTR_CPU_L3DATA;
929			break;
930		case CMD_PTR_CPU_L3DATA:
931			match_type = CMD_PTR_CPU_L3TAG;
932			break;
933	}
934
935	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
936	    cmd_Lxcache != NULL;
937	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
938		if ((cmd_Lxcache->Lxcache_index == index) &&
939		    ((cmd_Lxcache->Lxcache_type == type) ||
940		    (cmd_Lxcache->Lxcache_type == match_type)) &&
941		    (cmd_Lxcache->Lxcache_flags &
942		    (CMD_LxCACHE_F_RETIRED | CMD_LxCACHE_F_RERETIRED))) {
943			bit_array_of_retired_ways |=
944			    (1 << cmd_Lxcache->Lxcache_way);
945			/*
946			 * If we are calling this while handling TAG errors
947			 * we can reretire the cachelines retired due to DATA
948			 * errors. We will ignore the cachelnes that are
949			 * retired due to DATA faults.
950			 */
951			if ((type == CMD_PTR_CPU_L2TAG) &&
952			    (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L2DATA))
953				continue;
954			if ((type == CMD_PTR_CPU_L3TAG) &&
955			    (cmd_Lxcache->Lxcache_type == CMD_PTR_CPU_L3DATA))
956				continue;
957			bit_array_of_unavailable_ways |=
958			    (1 << cmd_Lxcache->Lxcache_way);
959		}
960	}
961	if (cmd_num_of_bits[bit_array_of_retired_ways & 0xf] == 3) {
962		/*
963		 * special case: 3 ways are already retired.
964		 * The Lone unretired way is set as 1, rest are set as 0.
965		 * We now OR this with bit_array_of_unavailable_ways
966		 * so that this unretired way will not be allocated.
967		 */
968		bit_array_of_retired_ways ^= 0xf;
969		bit_array_of_retired_ways &= 0xf;
970		bit_array_of_unavailable_ways |= bit_array_of_retired_ways;
971	}
972	bit_array_of_available_ways =
973	    ((bit_array_of_unavailable_ways ^ 0xf) & 0xf);
974	return (bit_array_of_available_ways);
975}
976
977
978/*
979 * Look for a way next to the specified way that is
980 * not in a retired state.
981 * We stop when way 3 is reached.
982 */
983int8_t
984cmd_Lxcache_get_next_retirable_way(cmd_cpu_t *cpu,
985    int32_t index, cmd_ptrsubtype_t pstype, int8_t specified_way)
986{
987	uint8_t bit_array_of_ways;
988	int8_t mask;
989
990	if (specified_way == 3)
991		return (-1);
992	bit_array_of_ways = cmd_Lxcache_get_bit_array_of_available_ways(
993	    cpu,
994	    pstype, index);
995	if (specified_way == 2)
996		mask = 0x8;
997	else if (specified_way == 1)
998		mask = 0xc;
999	else
1000		mask = 0xe;
1001	return (cmd_lowest_way[bit_array_of_ways & mask]);
1002}
1003
1004int8_t
1005cmd_Lxcache_get_lowest_retirable_way(cmd_cpu_t *cpu,
1006    int32_t index, cmd_ptrsubtype_t pstype)
1007{
1008	uint8_t bit_array_of_ways;
1009
1010	bit_array_of_ways = cmd_Lxcache_get_bit_array_of_available_ways(
1011	    cpu,
1012	    pstype, index);
1013	return (cmd_lowest_way[bit_array_of_ways]);
1014}
1015
1016cmd_Lxcache_t *
1017cmd_Lxcache_lookup_by_type_index_way_reason(cmd_cpu_t *cpu,
1018    cmd_ptrsubtype_t pstype, int32_t index, int8_t way, int32_t reason)
1019{
1020	cmd_Lxcache_t *cmd_Lxcache;
1021
1022	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
1023	    cmd_Lxcache != NULL;
1024	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
1025		if ((cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
1026		    (cmd_Lxcache->Lxcache_way == (uint32_t)way) &&
1027		    (cmd_Lxcache->Lxcache_reason & reason) &&
1028		    (cmd_Lxcache->Lxcache_type == pstype)) {
1029			return (cmd_Lxcache);
1030		}
1031	}
1032	return (NULL);
1033}
1034
1035cmd_Lxcache_t *
1036cmd_Lxcache_lookup_by_type_index_bit_reason(cmd_cpu_t *cpu,
1037    cmd_ptrsubtype_t pstype, int32_t index, int16_t bit, int32_t reason)
1038{
1039	cmd_Lxcache_t *cmd_Lxcache;
1040
1041	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
1042	    cmd_Lxcache != NULL;
1043	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
1044		if ((cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
1045		    (cmd_Lxcache->Lxcache_bit == (uint16_t)bit) &&
1046		    (cmd_Lxcache->Lxcache_reason & reason) &&
1047		    (cmd_Lxcache->Lxcache_type == pstype)) {
1048			return (cmd_Lxcache);
1049		}
1050	}
1051	return (NULL);
1052}
1053
1054void
1055cmd_Lxcache_destroy_anonymous_serd_engines(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1056    cmd_ptrsubtype_t type, int32_t index, int16_t bit)
1057{
1058	cmd_Lxcache_t *cmd_Lxcache;
1059	cmd_case_t *cc;
1060
1061	for (cmd_Lxcache = cmd_list_next(&cpu->cpu_Lxcaches);
1062	    cmd_Lxcache != NULL;
1063	    cmd_Lxcache = cmd_list_next(cmd_Lxcache)) {
1064		if ((cmd_Lxcache->Lxcache_type == type) &&
1065		    (cmd_Lxcache->Lxcache_index == (uint32_t)index) &&
1066		    (cmd_Lxcache->Lxcache_bit == (uint16_t)bit) &&
1067		    (cmd_Lxcache->Lxcache_way == (uint32_t)CMD_ANON_WAY)) {
1068			cc = &cmd_Lxcache->Lxcache_case;
1069			if (cc == NULL)
1070				continue;
1071			if (cc->cc_serdnm != NULL) {
1072				if (fmd_serd_exists(hdl, cc->cc_serdnm)) {
1073					fmd_hdl_debug(hdl,
1074					    "\n%s:cpu_id %d destroying SERD"
1075					    " engine %s\n",
1076					    cmd_type_to_str(type),
1077					    cpu->cpu_cpuid, cc->cc_serdnm);
1078					fmd_serd_destroy(hdl, cc->cc_serdnm);
1079				}
1080				fmd_hdl_strfree(hdl, cc->cc_serdnm);
1081				cc->cc_serdnm = NULL;
1082			}
1083		}
1084	}
1085}
1086
1087ssize_t
1088cmd_fmri_nvl2str(fmd_hdl_t *hdl, nvlist_t *nvl, char *buf, size_t buflen)
1089{
1090	uint8_t type;
1091	uint32_t cpuid, way;
1092	uint32_t	index;
1093	uint16_t	bit;
1094	char *serstr = NULL;
1095	char	missing_list[128];
1096
1097	missing_list[0] = 0;
1098	if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID, &cpuid) != 0)
1099		(void) strcat(missing_list, FM_FMRI_CPU_ID);
1100	if (nvlist_lookup_string(nvl, FM_FMRI_CPU_SERIAL_ID, &serstr) != 0)
1101		(void) strcat(missing_list, FM_FMRI_CPU_SERIAL_ID);
1102	if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_INDEX, &index) != 0)
1103		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_INDEX);
1104	if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_CACHE_WAY, &way) != 0)
1105		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_WAY);
1106	if (nvlist_lookup_uint16(nvl, FM_FMRI_CPU_CACHE_BIT, &bit) != 0)
1107		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_BIT);
1108	if (nvlist_lookup_uint8(nvl, FM_FMRI_CPU_CACHE_TYPE, &type) != 0)
1109		(void) strcat(missing_list, FM_FMRI_CPU_CACHE_TYPE);
1110
1111	if (strlen(missing_list) != 0) {
1112		fmd_hdl_debug(hdl,
1113		    "\ncmd_fmri_nvl2str: missing %s in fmri\n",
1114		    missing_list);
1115		return (-1);
1116	}
1117
1118	return (snprintf(buf, buflen,
1119	    "cpu:///%s=%u/%s=%s/%s=%u/%s=%u/%s=%d/%s=%d",
1120	    FM_FMRI_CPU_ID, cpuid,
1121	    FM_FMRI_CPU_SERIAL_ID, serstr,
1122	    FM_FMRI_CPU_CACHE_INDEX, index,
1123	    FM_FMRI_CPU_CACHE_WAY, way,
1124	    FM_FMRI_CPU_CACHE_BIT, bit,
1125	    FM_FMRI_CPU_CACHE_TYPE, type));
1126}
1127
1128boolean_t
1129cmd_create_case_for_Lxcache(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1130    cmd_Lxcache_t *cmd_Lxcache)
1131{
1132	const char *fltnm;
1133	const char *uuid;
1134
1135	if (cmd_Lxcache->Lxcache_case.cc_cp != NULL)
1136		return (B_TRUE);
1137	cmd_Lxcache->Lxcache_case.cc_cp = cmd_case_create(hdl,
1138	    &cmd_Lxcache->Lxcache_header, CMD_PTR_LxCACHE_CASE,
1139	    &uuid);
1140	fltnm = cmd_type_to_str(cmd_Lxcache->Lxcache_type);
1141	if (cmd_Lxcache->Lxcache_case.cc_cp == NULL) {
1142		fmd_hdl_debug(hdl,
1143		    "\n%s:cpu_id %d:Failed to create a case for"
1144		    " index %d way %d bit %d\n",
1145		    fltnm, cpu->cpu_cpuid,
1146		    cmd_Lxcache->Lxcache_index,
1147		    cmd_Lxcache->Lxcache_way, cmd_Lxcache->Lxcache_bit);
1148		return (B_FALSE);
1149	}
1150	fmd_hdl_debug(hdl,
1151	    "\n%s:cpu_id %d: New case %s created.\n",
1152	    fltnm, cpu->cpu_cpuid, uuid);
1153	if (cmd_Lxcache->Lxcache_ep)
1154		fmd_case_add_ereport(hdl, cmd_Lxcache->Lxcache_case.cc_cp,
1155		    cmd_Lxcache->Lxcache_ep);
1156	return (B_TRUE);
1157}
1158
1159static int
1160cmd_repair_fmri(fmd_hdl_t *hdl, char *buf)
1161{
1162	int err;
1163
1164	err = fmd_repair_asru(hdl, buf);
1165	if (err) {
1166		fmd_hdl_debug(hdl,
1167		    "Failed to repair %s err = %d\n", buf, err);
1168	}
1169	return (err);
1170}
1171
1172boolean_t
1173cmd_Lxcache_unretire(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1174    cmd_Lxcache_t *unretire_this_Lxcache, const char *fltnm)
1175{
1176	cmd_ptrsubtype_t data_type;
1177	cmd_Lxcache_t *previously_retired_Lxcache;
1178	int	found_reretired_cacheline = 0;
1179	int	certainty;
1180
1181	/*
1182	 * If we are unretiring a cacheline retired due to suspected TAG
1183	 * fault, then we must first check if we are using a cacheline
1184	 * that was retired earlier for DATA fault.
1185	 * If so we will not unretire the cacheline.
1186	 * We will change the flags to reflect the current condition.
1187	 * We will return success, though.
1188	 */
1189	if (IS_TAG(unretire_this_Lxcache->Lxcache_type)) {
1190		if (unretire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
1191			data_type = CMD_PTR_CPU_L2DATA;
1192		if (unretire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
1193			data_type = CMD_PTR_CPU_L3DATA;
1194		fmd_hdl_debug(hdl,
1195		    "\n%s:cpuid %d checking if there is a %s"
1196		    " cacheline re-retired at this index %d and way %d\n",
1197		    fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
1198		    unretire_this_Lxcache->Lxcache_index,
1199		    unretire_this_Lxcache->Lxcache_way);
1200		previously_retired_Lxcache =
1201		    cmd_Lxcache_lookup_by_type_index_way_flags(
1202		    cpu, data_type, unretire_this_Lxcache->Lxcache_index,
1203		    unretire_this_Lxcache->Lxcache_way,
1204		    CMD_LxCACHE_F_RERETIRED);
1205		if (previously_retired_Lxcache) {
1206			fmd_hdl_debug(hdl,
1207			    "\n%s:cpuid %d Found a %s cacheline re-retired at"
1208			    " this index %d and way %d. Will mark this"
1209			    " RETIRED\n",
1210			    fltnm, cpu->cpu_cpuid, cmd_type_to_str(data_type),
1211			    unretire_this_Lxcache->Lxcache_index,
1212			    unretire_this_Lxcache->Lxcache_way);
1213			/*
1214			 * We call the cmd_Lxcache_fault to inform fmd
1215			 * about the suspect fmri. The cacheline is already
1216			 * retired but the existing suspect fmri is for TAG
1217			 * fault which will be removed in this routine.
1218			 */
1219			if (previously_retired_Lxcache->Lxcache_reason
1220			    == CMD_LXCONVICTED)
1221				certainty = HUNDRED_PERCENT;
1222			else
1223				certainty = SUSPECT_PERCENT;
1224			cmd_Lxcache_fault(hdl, cpu, previously_retired_Lxcache,
1225			    fltnm, cpu->cpu_fru_nvl, certainty);
1226			previously_retired_Lxcache->Lxcache_flags =
1227			    CMD_LxCACHE_F_RETIRED;
1228			/*
1229			 * Update persistent storage
1230			 */
1231			cmd_Lxcache_write(hdl, previously_retired_Lxcache);
1232			found_reretired_cacheline = 1;
1233		}
1234	} else {
1235		/*
1236		 * We have been called to unretire a cacheline retired
1237		 * earlier due to DATA errors.
1238		 * If this cacheline is marked RERETIRED then it means that
1239		 * the cacheline has been retired due to TAG errors and
1240		 * we should not be unretiring the cacheline.
1241		 */
1242		if (unretire_this_Lxcache->Lxcache_flags &
1243		    CMD_LxCACHE_F_RERETIRED) {
1244			fmd_hdl_debug(hdl,
1245			    "\n%s:cpuid %d The cacheline at index %d and"
1246			    " way %d  which we are attempting to unretire"
1247			    " is in RERETIRED state. Therefore we will not"
1248			    " unretire it but will mark it as RETIRED.\n",
1249			    fltnm, cpu->cpu_cpuid,
1250			    unretire_this_Lxcache->Lxcache_index,
1251			    unretire_this_Lxcache->Lxcache_way);
1252			found_reretired_cacheline = 1;
1253		}
1254	}
1255	/*
1256	 * if we did not find a RERETIRED cacheline above
1257	 * unretire the cacheline.
1258	 */
1259	if (!found_reretired_cacheline) {
1260		if (cmd_cache_way_unretire(hdl, cpu, unretire_this_Lxcache)
1261		    == B_FALSE)
1262			return (B_FALSE);
1263	}
1264	unretire_this_Lxcache->Lxcache_flags = CMD_LxCACHE_F_UNRETIRED;
1265	/*
1266	 * We have exonerated the cacheline. We need to inform the fmd
1267	 * that we have repaired the suspect fmri that we retired earlier.
1268	 * The cpumem agent will not unretire cacheline in response to
1269	 * the list.repair events it receives.
1270	 */
1271	if (unretire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
1272		fmd_hdl_debug(hdl,
1273		    "\n%s:cpuid %d Repairing the retired fmri %s",
1274		    fltnm, cpu->cpu_cpuid,
1275		    unretire_this_Lxcache->Lxcache_retired_fmri);
1276		if (cmd_repair_fmri(hdl,
1277		    unretire_this_Lxcache->Lxcache_retired_fmri) != 0) {
1278			fmd_hdl_debug(hdl,
1279			    "\n%s:cpuid %d Failed to repair retired fmri.",
1280			    fltnm, cpu->cpu_cpuid);
1281			/*
1282			 * We need to retire the cacheline that we just
1283			 * unretired.
1284			 */
1285			if (cmd_cache_way_retire(hdl, cpu,
1286			    unretire_this_Lxcache) == B_FALSE) {
1287				/*
1288				 * A hopeless situation.
1289				 * cannot maintain consistency of cacheline
1290				 * sate between fmd and DE.
1291				 * Aborting the DE.
1292				 */
1293				fmd_hdl_abort(hdl,
1294				    "\n%s:cpuid %d We are unable to repair"
1295				    " the fmri we just unretired and are"
1296				    " unable to restore the DE and fmd to"
1297				    " a sane state.\n",
1298				    fltnm, cpu->cpu_cpuid);
1299			}
1300			return (B_FALSE);
1301		} else {
1302			unretire_this_Lxcache->Lxcache_retired_fmri[0] = 0;
1303		}
1304	}
1305	return (B_TRUE);
1306}
1307
1308boolean_t
1309cmd_Lxcache_retire(fmd_hdl_t *hdl, cmd_cpu_t *cpu,
1310    cmd_Lxcache_t *retire_this_Lxcache, const char *fltnm, uint_t cert)
1311{
1312	cmd_Lxcache_t *previously_retired_Lxcache;
1313	cmd_ptrsubtype_t data_type;
1314	const char	*uuid;
1315	char	suspect_list[128];
1316
1317	fmd_hdl_debug(hdl,
1318	    "\n%s:cpu_id %d: cmd_Lxcache_retire called for index %d"
1319	    " way %d bit %d\n",
1320	    fltnm, cpu->cpu_cpuid, retire_this_Lxcache->Lxcache_index,
1321	    retire_this_Lxcache->Lxcache_way, retire_this_Lxcache->Lxcache_bit);
1322	if (fmd_case_solved(hdl, retire_this_Lxcache->Lxcache_case.cc_cp)) {
1323		/*
1324		 * Case solved implies that the cache line is already
1325		 * retired as SUSPECT_0_TAG and we are here to retire this
1326		 * as SUSPECT_1_TAG.
1327		 * We will first repair the retired cacheline
1328		 * so that it does not get retired during replay for
1329		 *  wrong reason.
1330		 * If we are able to repair the retired cacheline we close the
1331		 * case and open a new case for it.
1332		 */
1333		if (retire_this_Lxcache->Lxcache_reason !=
1334		    CMD_LXSUSPECT_0_TAG) {
1335			fmd_hdl_debug(hdl,
1336			    "\n%s:cpu_id %d: Unexpected condition encountered."
1337			    " Expected the reason for retirement as"
1338			    " SUSPECT_0_TAG however found the reason"
1339			    " to be %s\n",
1340			    fltnm, cpu->cpu_cpuid,
1341			    cmd_reason_to_str(
1342			    retire_this_Lxcache->Lxcache_reason));
1343			return (B_FALSE);
1344		}
1345		fmd_hdl_debug(hdl,
1346		    "\n%s:cpu_id %d: We are re-retiring SUSPECT_0_TAG as"
1347		    " SUSPECT_1_TAG index %d way %d bit %d\n",
1348		    fltnm, cpu->cpu_cpuid,
1349		    retire_this_Lxcache->Lxcache_index,
1350		    retire_this_Lxcache->Lxcache_way,
1351		    retire_this_Lxcache->Lxcache_bit);
1352		fmd_hdl_debug(hdl,
1353		    "\n%s:cpu_id %d: The existing case for this Lxcache has"
1354		    " has been already solved. We will first repair the suspect"
1355		    " cacheline and if we are successful then close this case,"
1356		    " and open a new case.\n",
1357		    fltnm, cpu->cpu_cpuid);
1358		/*
1359		 * repair the retired cacheline.
1360		 */
1361		if (retire_this_Lxcache->Lxcache_retired_fmri[0] != 0) {
1362			fmd_hdl_debug(hdl,
1363			    "\n%s:cpuid %d Repairing the retired suspect"
1364			    " cacheline %s\n",
1365			    fltnm, cpu->cpu_cpuid,
1366			    retire_this_Lxcache->Lxcache_retired_fmri);
1367			if (cmd_repair_fmri(hdl,
1368			    retire_this_Lxcache->Lxcache_retired_fmri) != 0) {
1369				fmd_hdl_debug(hdl,
1370				    "\n%s:cpuid %d Failed to repair the"
1371				    " retired fmri.",
1372				    fltnm, cpu->cpu_cpuid);
1373				return (B_FALSE);
1374			} else {
1375				retire_this_Lxcache->Lxcache_retired_fmri[0] =
1376				    0;
1377			}
1378		}
1379		uuid = fmd_case_uuid(hdl,
1380		    retire_this_Lxcache->Lxcache_case.cc_cp);
1381		fmd_hdl_debug(hdl,
1382		    "\n%s:cpuid %d: Closing the case %s\n",
1383		    fltnm, cpu->cpu_cpuid, uuid);
1384		cmd_case_fini(hdl, retire_this_Lxcache->Lxcache_case.cc_cp,
1385		    FMD_B_TRUE);
1386		retire_this_Lxcache->Lxcache_case.cc_cp = NULL;
1387		if (cmd_create_case_for_Lxcache(hdl, cpu, retire_this_Lxcache)
1388		    == B_FALSE)
1389			return (B_FALSE);
1390	} else {
1391		/*
1392		 * Not a SUSPECT_0_TAG.
1393		 * We should be entering this path if the cacheline is
1394		 * transitioning  from ACTIVE/UNRETIRED to RETIRED state.
1395		 * If the cacheline state is not as expected we print debug
1396		 * message and return failure.
1397		 */
1398		if ((retire_this_Lxcache->Lxcache_flags !=
1399		    CMD_LxCACHE_F_ACTIVE) &&
1400		    (retire_this_Lxcache->Lxcache_flags
1401		    != CMD_LxCACHE_F_UNRETIRED)) {
1402			/*
1403			 * Unexpected condition.
1404			 */
1405			fmd_hdl_debug(hdl,
1406			    "\n%s:cpu_id %d:Unexpected state %s for the"
1407			    " cacheline at index %d way %d encountered.\n",
1408			    fltnm, cpu->cpu_cpuid,
1409			    cmd_flags_to_str(
1410			    retire_this_Lxcache->Lxcache_flags),
1411			    retire_this_Lxcache->Lxcache_index,
1412			    retire_this_Lxcache->Lxcache_way);
1413			return (B_FALSE);
1414		}
1415	}
1416	suspect_list[0] = 0;
1417	(void) cmd_fmri_nvl2str(hdl, retire_this_Lxcache->Lxcache_asru.fmri_nvl,
1418	    suspect_list, sizeof (suspect_list));
1419	fmd_hdl_debug(hdl,
1420	    "\n%s:cpu_id %d:current suspect list is %s\n",
1421	    fltnm, cpu->cpu_cpuid, suspect_list);
1422	cmd_Lxcache_fault(hdl, cpu, retire_this_Lxcache, fltnm,
1423	    cpu->cpu_fru_nvl,
1424	    cert);
1425	retire_this_Lxcache->Lxcache_flags = CMD_LxCACHE_F_RETIRED;
1426	if (IS_TAG(retire_this_Lxcache->Lxcache_type)) {
1427		/*
1428		 * If the cacheline we just retired was retired earlier
1429		 * due to DATA faults we mark the Lxcache
1430		 * corresponding to DATA as RERETIRED.
1431		 */
1432		if (retire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L2TAG)
1433			data_type = CMD_PTR_CPU_L2DATA;
1434		if (retire_this_Lxcache->Lxcache_type == CMD_PTR_CPU_L3TAG)
1435			data_type = CMD_PTR_CPU_L3DATA;
1436		fmd_hdl_debug(hdl,
1437		    "\n%s:cpuid %d checking if there is a %s"
1438		    " cacheline retired at this index %d way %d\n",
1439		    fltnm, cpu->cpu_cpuid,
1440		    cmd_type_to_str(data_type),
1441		    retire_this_Lxcache->Lxcache_index,
1442		    retire_this_Lxcache->Lxcache_way);
1443		previously_retired_Lxcache =
1444		    cmd_Lxcache_lookup_by_type_index_way_flags(cpu,
1445		    data_type, retire_this_Lxcache->Lxcache_index,
1446		    retire_this_Lxcache->Lxcache_way, CMD_LxCACHE_F_RETIRED);
1447		if (previously_retired_Lxcache) {
1448			fmd_hdl_debug(hdl,
1449			    "\n%s:cpu_id %d: Found  index %d way %d"
1450			    " retired earlier. Will mark this Lxcache"
1451			    " as RERETIRED.\n",
1452			    fltnm, cpu->cpu_cpuid,
1453			    retire_this_Lxcache->Lxcache_index,
1454			    retire_this_Lxcache->Lxcache_way);
1455			/*
1456			 * First repair the retired cacheline and if successful
1457			 * close the existing case and create a new case.
1458			 */
1459
1460			/*
1461			 * This cacheline has already been retired for
1462			 * TAG fault.
1463			 * Repair the previously retired DATA fault cacheline so
1464			 * that it does not get retired by fmd during replay.
1465			 */
1466			if (previously_retired_Lxcache->Lxcache_retired_fmri[0]
1467			    != 0) {
1468				fmd_hdl_debug(hdl,
1469				    "\n%s:cpuid %d Repairing the cacheline"
1470				    " retired due to data errors. %s\n",
1471				    fltnm, cpu->cpu_cpuid,
1472				    previously_retired_Lxcache->
1473				    Lxcache_retired_fmri);
1474				if (cmd_repair_fmri(hdl,
1475				    previously_retired_Lxcache->
1476				    Lxcache_retired_fmri)
1477				    != 0) {
1478					fmd_hdl_debug(hdl,
1479					    "\n%s:cpuid %d Failed to repair the"
1480					    " retired fmri.",
1481					    fltnm, cpu->cpu_cpuid);
1482					return (B_FALSE);
1483				} else {
1484					previously_retired_Lxcache->
1485					    Lxcache_retired_fmri[0] = 0;
1486				}
1487			}
1488			cmd_case_fini(hdl,
1489			    previously_retired_Lxcache->Lxcache_case.cc_cp,
1490			    FMD_B_TRUE);
1491			previously_retired_Lxcache->Lxcache_case.cc_cp = NULL;
1492			previously_retired_Lxcache->Lxcache_flags =
1493			    CMD_LxCACHE_F_RERETIRED;
1494			/*
1495			 * Update persistent storage
1496			 */
1497			cmd_Lxcache_write(hdl, previously_retired_Lxcache);
1498			/*
1499			 * Create a new case so that this Lxcache structure
1500			 * gets restored on replay.
1501			 */
1502			if (cmd_create_case_for_Lxcache(hdl, cpu,
1503			    previously_retired_Lxcache) == B_FALSE)
1504				return (B_FALSE);
1505		}
1506	}
1507	cmd_retire_cpu_if_limits_exceeded(hdl, cpu,
1508	    retire_this_Lxcache->Lxcache_type,
1509	    fltnm);
1510	return (B_TRUE);
1511}
1512