1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * PowerNV cpuidle code
4 *
5 * Copyright 2015 IBM Corp.
6 */
7
8#include <linux/types.h>
9#include <linux/mm.h>
10#include <linux/slab.h>
11#include <linux/of.h>
12#include <linux/device.h>
13#include <linux/cpu.h>
14
15#include <asm/firmware.h>
16#include <asm/interrupt.h>
17#include <asm/machdep.h>
18#include <asm/opal.h>
19#include <asm/cputhreads.h>
20#include <asm/cpuidle.h>
21#include <asm/code-patching.h>
22#include <asm/smp.h>
23#include <asm/runlatch.h>
24#include <asm/dbell.h>
25
26#include "powernv.h"
27#include "subcore.h"
28
29/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
30#define MAX_STOP_STATE	0xF
31
32#define P9_STOP_SPR_MSR 2000
33#define P9_STOP_SPR_PSSCR      855
34
35static u32 supported_cpuidle_states;
36struct pnv_idle_states_t *pnv_idle_states;
37int nr_pnv_idle_states;
38
39/*
40 * The default stop state that will be used by ppc_md.power_save
41 * function on platforms that support stop instruction.
42 */
43static u64 pnv_default_stop_val;
44static u64 pnv_default_stop_mask;
45static bool default_stop_found;
46
47/*
48 * First stop state levels when SPR and TB loss can occur.
49 */
50static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
51static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
52
53/*
54 * psscr value and mask of the deepest stop idle state.
55 * Used when a cpu is offlined.
56 */
57static u64 pnv_deepest_stop_psscr_val;
58static u64 pnv_deepest_stop_psscr_mask;
59static u64 pnv_deepest_stop_flag;
60static bool deepest_stop_found;
61
62static unsigned long power7_offline_type;
63
64static int __init pnv_save_sprs_for_deep_states(void)
65{
66	int cpu;
67	int rc;
68
69	/*
70	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
71	 * all cpus at boot. Get these reg values of current cpu and use the
72	 * same across all cpus.
73	 */
74	uint64_t lpcr_val	= mfspr(SPRN_LPCR);
75	uint64_t hid0_val	= mfspr(SPRN_HID0);
76	uint64_t hmeer_val	= mfspr(SPRN_HMEER);
77	uint64_t msr_val = MSR_IDLE;
78	uint64_t psscr_val = pnv_deepest_stop_psscr_val;
79
80	for_each_present_cpu(cpu) {
81		uint64_t pir = get_hard_smp_processor_id(cpu);
82		uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
83
84		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
85		if (rc != 0)
86			return rc;
87
88		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
89		if (rc != 0)
90			return rc;
91
92		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
93			rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
94			if (rc)
95				return rc;
96
97			rc = opal_slw_set_reg(pir,
98					      P9_STOP_SPR_PSSCR, psscr_val);
99
100			if (rc)
101				return rc;
102		}
103
104		/* HIDs are per core registers */
105		if (cpu_thread_in_core(cpu) == 0) {
106
107			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
108			if (rc != 0)
109				return rc;
110
111			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
112			if (rc != 0)
113				return rc;
114
115			/* Only p8 needs to set extra HID registers */
116			if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
117				uint64_t hid1_val = mfspr(SPRN_HID1);
118				uint64_t hid4_val = mfspr(SPRN_HID4);
119				uint64_t hid5_val = mfspr(SPRN_HID5);
120
121				rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
122				if (rc != 0)
123					return rc;
124
125				rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
126				if (rc != 0)
127					return rc;
128
129				rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
130				if (rc != 0)
131					return rc;
132			}
133		}
134	}
135
136	return 0;
137}
138
139u32 pnv_get_supported_cpuidle_states(void)
140{
141	return supported_cpuidle_states;
142}
143EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
144
145static void pnv_fastsleep_workaround_apply(void *info)
146
147{
148	int cpu = smp_processor_id();
149	int rc;
150	int *err = info;
151
152	if (cpu_first_thread_sibling(cpu) != cpu)
153		return;
154
155	rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
156					OPAL_CONFIG_IDLE_APPLY);
157	if (rc)
158		*err = 1;
159}
160
161static bool power7_fastsleep_workaround_entry = true;
162static bool power7_fastsleep_workaround_exit = true;
163
164/*
165 * Used to store fastsleep workaround state
166 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
167 * 1 - Workaround applied once, never undone.
168 */
169static u8 fastsleep_workaround_applyonce;
170
171static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
172		struct device_attribute *attr, char *buf)
173{
174	return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
175}
176
177static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
178		struct device_attribute *attr, const char *buf,
179		size_t count)
180{
181	int err;
182	u8 val;
183
184	if (kstrtou8(buf, 0, &val) || val != 1)
185		return -EINVAL;
186
187	if (fastsleep_workaround_applyonce == 1)
188		return count;
189
190	/*
191	 * fastsleep_workaround_applyonce = 1 implies
192	 * fastsleep workaround needs to be left in 'applied' state on all
193	 * the cores. Do this by-
194	 * 1. Disable the 'undo' workaround in fastsleep exit path
195	 * 2. Sendi IPIs to all the cores which have at least one online thread
196	 * 3. Disable the 'apply' workaround in fastsleep entry path
197	 *
198	 * There is no need to send ipi to cores which have all threads
199	 * offlined, as last thread of the core entering fastsleep or deeper
200	 * state would have applied workaround.
201	 */
202	power7_fastsleep_workaround_exit = false;
203
204	cpus_read_lock();
205	on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
206	cpus_read_unlock();
207	if (err) {
208		pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
209		goto fail;
210	}
211
212	power7_fastsleep_workaround_entry = false;
213
214	fastsleep_workaround_applyonce = 1;
215
216	return count;
217fail:
218	return -EIO;
219}
220
221static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
222			show_fastsleep_workaround_applyonce,
223			store_fastsleep_workaround_applyonce);
224
225static inline void atomic_start_thread_idle(void)
226{
227	int cpu = raw_smp_processor_id();
228	int first = cpu_first_thread_sibling(cpu);
229	int thread_nr = cpu_thread_in_core(cpu);
230	unsigned long *state = &paca_ptrs[first]->idle_state;
231
232	clear_bit(thread_nr, state);
233}
234
235static inline void atomic_stop_thread_idle(void)
236{
237	int cpu = raw_smp_processor_id();
238	int first = cpu_first_thread_sibling(cpu);
239	int thread_nr = cpu_thread_in_core(cpu);
240	unsigned long *state = &paca_ptrs[first]->idle_state;
241
242	set_bit(thread_nr, state);
243}
244
245static inline void atomic_lock_thread_idle(void)
246{
247	int cpu = raw_smp_processor_id();
248	int first = cpu_first_thread_sibling(cpu);
249	unsigned long *lock = &paca_ptrs[first]->idle_lock;
250
251	while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock)))
252		barrier();
253}
254
255static inline void atomic_unlock_and_stop_thread_idle(void)
256{
257	int cpu = raw_smp_processor_id();
258	int first = cpu_first_thread_sibling(cpu);
259	unsigned long thread = 1UL << cpu_thread_in_core(cpu);
260	unsigned long *state = &paca_ptrs[first]->idle_state;
261	unsigned long *lock = &paca_ptrs[first]->idle_lock;
262	u64 s = READ_ONCE(*state);
263	u64 new, tmp;
264
265	BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT));
266	BUG_ON(s & thread);
267
268again:
269	new = s | thread;
270	tmp = cmpxchg(state, s, new);
271	if (unlikely(tmp != s)) {
272		s = tmp;
273		goto again;
274	}
275	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
276}
277
278static inline void atomic_unlock_thread_idle(void)
279{
280	int cpu = raw_smp_processor_id();
281	int first = cpu_first_thread_sibling(cpu);
282	unsigned long *lock = &paca_ptrs[first]->idle_lock;
283
284	BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock));
285	clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
286}
287
288/* P7 and P8 */
289struct p7_sprs {
290	/* per core */
291	u64 tscr;
292	u64 worc;
293
294	/* per subcore */
295	u64 sdr1;
296	u64 rpr;
297
298	/* per thread */
299	u64 lpcr;
300	u64 hfscr;
301	u64 fscr;
302	u64 purr;
303	u64 spurr;
304	u64 dscr;
305	u64 wort;
306
307	/* per thread SPRs that get lost in shallow states */
308	u64 amr;
309	u64 iamr;
310	u64 uamor;
311	/* amor is restored to constant ~0 */
312};
313
314static unsigned long power7_idle_insn(unsigned long type)
315{
316	int cpu = raw_smp_processor_id();
317	int first = cpu_first_thread_sibling(cpu);
318	unsigned long *state = &paca_ptrs[first]->idle_state;
319	unsigned long thread = 1UL << cpu_thread_in_core(cpu);
320	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
321	unsigned long srr1;
322	bool full_winkle;
323	struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
324	bool sprs_saved = false;
325	int rc;
326
327	if (unlikely(type != PNV_THREAD_NAP)) {
328		atomic_lock_thread_idle();
329
330		BUG_ON(!(*state & thread));
331		*state &= ~thread;
332
333		if (power7_fastsleep_workaround_entry) {
334			if ((*state & core_thread_mask) == 0) {
335				rc = opal_config_cpu_idle_state(
336						OPAL_CONFIG_IDLE_FASTSLEEP,
337						OPAL_CONFIG_IDLE_APPLY);
338				BUG_ON(rc);
339			}
340		}
341
342		if (type == PNV_THREAD_WINKLE) {
343			sprs.tscr	= mfspr(SPRN_TSCR);
344			sprs.worc	= mfspr(SPRN_WORC);
345
346			sprs.sdr1	= mfspr(SPRN_SDR1);
347			sprs.rpr	= mfspr(SPRN_RPR);
348
349			sprs.lpcr	= mfspr(SPRN_LPCR);
350			if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
351				sprs.hfscr	= mfspr(SPRN_HFSCR);
352				sprs.fscr	= mfspr(SPRN_FSCR);
353			}
354			sprs.purr	= mfspr(SPRN_PURR);
355			sprs.spurr	= mfspr(SPRN_SPURR);
356			sprs.dscr	= mfspr(SPRN_DSCR);
357			sprs.wort	= mfspr(SPRN_WORT);
358
359			sprs_saved = true;
360
361			/*
362			 * Increment winkle counter and set all winkle bits if
363			 * all threads are winkling. This allows wakeup side to
364			 * distinguish between fast sleep and winkle state
365			 * loss. Fast sleep still has to resync the timebase so
366			 * this may not be a really big win.
367			 */
368			*state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
369			if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
370					>> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
371					== threads_per_core)
372				*state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
373			WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
374		}
375
376		atomic_unlock_thread_idle();
377	}
378
379	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
380		sprs.amr	= mfspr(SPRN_AMR);
381		sprs.iamr	= mfspr(SPRN_IAMR);
382		sprs.uamor	= mfspr(SPRN_UAMOR);
383	}
384
385	local_paca->thread_idle_state = type;
386	srr1 = isa206_idle_insn_mayloss(type);		/* go idle */
387	local_paca->thread_idle_state = PNV_THREAD_RUNNING;
388
389	WARN_ON_ONCE(!srr1);
390	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
391
392	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
393		if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
394			/*
395			 * We don't need an isync after the mtsprs here because
396			 * the upcoming mtmsrd is execution synchronizing.
397			 */
398			mtspr(SPRN_AMR,		sprs.amr);
399			mtspr(SPRN_IAMR,	sprs.iamr);
400			mtspr(SPRN_AMOR,	~0);
401			mtspr(SPRN_UAMOR,	sprs.uamor);
402		}
403	}
404
405	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
406		hmi_exception_realmode(NULL);
407
408	if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
409		if (unlikely(type != PNV_THREAD_NAP)) {
410			atomic_lock_thread_idle();
411			if (type == PNV_THREAD_WINKLE) {
412				WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
413				*state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
414				*state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
415			}
416			atomic_unlock_and_stop_thread_idle();
417		}
418		return srr1;
419	}
420
421	/* HV state loss */
422	BUG_ON(type == PNV_THREAD_NAP);
423
424	atomic_lock_thread_idle();
425
426	full_winkle = false;
427	if (type == PNV_THREAD_WINKLE) {
428		WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
429		*state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
430		if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
431			*state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
432			full_winkle = true;
433			BUG_ON(!sprs_saved);
434		}
435	}
436
437	WARN_ON(*state & thread);
438
439	if ((*state & core_thread_mask) != 0)
440		goto core_woken;
441
442	/* Per-core SPRs */
443	if (full_winkle) {
444		mtspr(SPRN_TSCR,	sprs.tscr);
445		mtspr(SPRN_WORC,	sprs.worc);
446	}
447
448	if (power7_fastsleep_workaround_exit) {
449		rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
450						OPAL_CONFIG_IDLE_UNDO);
451		BUG_ON(rc);
452	}
453
454	/* TB */
455	if (opal_resync_timebase() != OPAL_SUCCESS)
456		BUG();
457
458core_woken:
459	if (!full_winkle)
460		goto subcore_woken;
461
462	if ((*state & local_paca->subcore_sibling_mask) != 0)
463		goto subcore_woken;
464
465	/* Per-subcore SPRs */
466	mtspr(SPRN_SDR1,	sprs.sdr1);
467	mtspr(SPRN_RPR,		sprs.rpr);
468
469subcore_woken:
470	/*
471	 * isync after restoring shared SPRs and before unlocking. Unlock
472	 * only contains hwsync which does not necessarily do the right
473	 * thing for SPRs.
474	 */
475	isync();
476	atomic_unlock_and_stop_thread_idle();
477
478	/* Fast sleep does not lose SPRs */
479	if (!full_winkle)
480		return srr1;
481
482	/* Per-thread SPRs */
483	mtspr(SPRN_LPCR,	sprs.lpcr);
484	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
485		mtspr(SPRN_HFSCR,	sprs.hfscr);
486		mtspr(SPRN_FSCR,	sprs.fscr);
487	}
488	mtspr(SPRN_PURR,	sprs.purr);
489	mtspr(SPRN_SPURR,	sprs.spurr);
490	mtspr(SPRN_DSCR,	sprs.dscr);
491	mtspr(SPRN_WORT,	sprs.wort);
492
493	mtspr(SPRN_SPRG3,	local_paca->sprg_vdso);
494
495#ifdef CONFIG_PPC_64S_HASH_MMU
496	/*
497	 * The SLB has to be restored here, but it sometimes still
498	 * contains entries, so the __ variant must be used to prevent
499	 * multi hits.
500	 */
501	__slb_restore_bolted_realmode();
502#endif
503
504	return srr1;
505}
506
507extern unsigned long idle_kvm_start_guest(unsigned long srr1);
508
509#ifdef CONFIG_HOTPLUG_CPU
510static unsigned long power7_offline(void)
511{
512	unsigned long srr1;
513
514	mtmsr(MSR_IDLE);
515
516#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
517	/* Tell KVM we're entering idle. */
518	/******************************************************/
519	/*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
520	/* The following store to HSTATE_HWTHREAD_STATE(r13)  */
521	/* MUST occur in real mode, i.e. with the MMU off,    */
522	/* and the MMU must stay off until we clear this flag */
523	/* and test HSTATE_HWTHREAD_REQ(r13) in               */
524	/* pnv_powersave_wakeup in this file.                 */
525	/* The reason is that another thread can switch the   */
526	/* MMU to a guest context whenever this flag is set   */
527	/* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
528	/* that would potentially cause this thread to start  */
529	/* executing instructions from guest memory in        */
530	/* hypervisor mode, leading to a host crash or data   */
531	/* corruption, or worse.                              */
532	/******************************************************/
533	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
534#endif
535
536	__ppc64_runlatch_off();
537	srr1 = power7_idle_insn(power7_offline_type);
538	__ppc64_runlatch_on();
539
540#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
541	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
542	/* Order setting hwthread_state vs. testing hwthread_req */
543	smp_mb();
544	if (local_paca->kvm_hstate.hwthread_req)
545		srr1 = idle_kvm_start_guest(srr1);
546#endif
547
548	mtmsr(MSR_KERNEL);
549
550	return srr1;
551}
552#endif
553
554void power7_idle_type(unsigned long type)
555{
556	unsigned long srr1;
557
558	if (!prep_irq_for_idle_irqsoff())
559		return;
560
561	mtmsr(MSR_IDLE);
562	__ppc64_runlatch_off();
563	srr1 = power7_idle_insn(type);
564	__ppc64_runlatch_on();
565	mtmsr(MSR_KERNEL);
566
567	fini_irq_for_idle_irqsoff();
568	irq_set_pending_from_srr1(srr1);
569}
570
571static void power7_idle(void)
572{
573	if (!powersave_nap)
574		return;
575
576	power7_idle_type(PNV_THREAD_NAP);
577}
578
579struct p9_sprs {
580	/* per core */
581	u64 ptcr;
582	u64 rpr;
583	u64 tscr;
584	u64 ldbar;
585
586	/* per thread */
587	u64 lpcr;
588	u64 hfscr;
589	u64 fscr;
590	u64 pid;
591	u64 purr;
592	u64 spurr;
593	u64 dscr;
594	u64 ciabr;
595
596	u64 mmcra;
597	u32 mmcr0;
598	u32 mmcr1;
599	u64 mmcr2;
600
601	/* per thread SPRs that get lost in shallow states */
602	u64 amr;
603	u64 iamr;
604	u64 amor;
605	u64 uamor;
606};
607
608static unsigned long power9_idle_stop(unsigned long psscr)
609{
610	int cpu = raw_smp_processor_id();
611	int first = cpu_first_thread_sibling(cpu);
612	unsigned long *state = &paca_ptrs[first]->idle_state;
613	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
614	unsigned long srr1;
615	unsigned long pls;
616	unsigned long mmcr0 = 0;
617	unsigned long mmcra = 0;
618	struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
619	bool sprs_saved = false;
620
621	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
622		/* EC=ESL=0 case */
623
624		/*
625		 * Wake synchronously. SRESET via xscom may still cause
626		 * a 0x100 powersave wakeup with SRR1 reason!
627		 */
628		srr1 = isa300_idle_stop_noloss(psscr);		/* go idle */
629		if (likely(!srr1))
630			return 0;
631
632		/*
633		 * Registers not saved, can't recover!
634		 * This would be a hardware bug
635		 */
636		BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
637
638		goto out;
639	}
640
641	/* EC=ESL=1 case */
642#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
643	if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
644		local_paca->requested_psscr = psscr;
645		/* order setting requested_psscr vs testing dont_stop */
646		smp_mb();
647		if (atomic_read(&local_paca->dont_stop)) {
648			local_paca->requested_psscr = 0;
649			return 0;
650		}
651	}
652#endif
653
654	if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
655		 /*
656		  * POWER9 DD2 can incorrectly set PMAO when waking up
657		  * after a state-loss idle. Saving and restoring MMCR0
658		  * over idle is a workaround.
659		  */
660		mmcr0		= mfspr(SPRN_MMCR0);
661	}
662
663	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
664		sprs.lpcr	= mfspr(SPRN_LPCR);
665		sprs.hfscr	= mfspr(SPRN_HFSCR);
666		sprs.fscr	= mfspr(SPRN_FSCR);
667		sprs.pid	= mfspr(SPRN_PID);
668		sprs.purr	= mfspr(SPRN_PURR);
669		sprs.spurr	= mfspr(SPRN_SPURR);
670		sprs.dscr	= mfspr(SPRN_DSCR);
671		sprs.ciabr	= mfspr(SPRN_CIABR);
672
673		sprs.mmcra	= mfspr(SPRN_MMCRA);
674		sprs.mmcr0	= mfspr(SPRN_MMCR0);
675		sprs.mmcr1	= mfspr(SPRN_MMCR1);
676		sprs.mmcr2	= mfspr(SPRN_MMCR2);
677
678		sprs.ptcr	= mfspr(SPRN_PTCR);
679		sprs.rpr	= mfspr(SPRN_RPR);
680		sprs.tscr	= mfspr(SPRN_TSCR);
681		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
682			sprs.ldbar = mfspr(SPRN_LDBAR);
683
684		sprs_saved = true;
685
686		atomic_start_thread_idle();
687	}
688
689	sprs.amr	= mfspr(SPRN_AMR);
690	sprs.iamr	= mfspr(SPRN_IAMR);
691	sprs.uamor	= mfspr(SPRN_UAMOR);
692
693	srr1 = isa300_idle_stop_mayloss(psscr);		/* go idle */
694
695#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
696	local_paca->requested_psscr = 0;
697#endif
698
699	psscr = mfspr(SPRN_PSSCR);
700
701	WARN_ON_ONCE(!srr1);
702	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
703
704	if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
705		/*
706		 * We don't need an isync after the mtsprs here because the
707		 * upcoming mtmsrd is execution synchronizing.
708		 */
709		mtspr(SPRN_AMR,		sprs.amr);
710		mtspr(SPRN_IAMR,	sprs.iamr);
711		mtspr(SPRN_AMOR,	~0);
712		mtspr(SPRN_UAMOR,	sprs.uamor);
713
714		/*
715		 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
716		 * might have been corrupted and needs flushing. We also need
717		 * to reload MMCR0 (see mmcr0 comment above).
718		 */
719		if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
720			asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
721			mtspr(SPRN_MMCR0, mmcr0);
722		}
723
724		/*
725		 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
726		 * to ensure the PMU starts running.
727		 */
728		mmcra = mfspr(SPRN_MMCRA);
729		mmcra |= PPC_BIT(60);
730		mtspr(SPRN_MMCRA, mmcra);
731		mmcra &= ~PPC_BIT(60);
732		mtspr(SPRN_MMCRA, mmcra);
733	}
734
735	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
736		hmi_exception_realmode(NULL);
737
738	/*
739	 * On POWER9, SRR1 bits do not match exactly as expected.
740	 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
741	 * just always test PSSCR for SPR/TB state loss.
742	 */
743	pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
744	if (likely(pls < deep_spr_loss_state)) {
745		if (sprs_saved)
746			atomic_stop_thread_idle();
747		goto out;
748	}
749
750	/* HV state loss */
751	BUG_ON(!sprs_saved);
752
753	atomic_lock_thread_idle();
754
755	if ((*state & core_thread_mask) != 0)
756		goto core_woken;
757
758	/* Per-core SPRs */
759	mtspr(SPRN_PTCR,	sprs.ptcr);
760	mtspr(SPRN_RPR,		sprs.rpr);
761	mtspr(SPRN_TSCR,	sprs.tscr);
762
763	if (pls >= pnv_first_tb_loss_level) {
764		/* TB loss */
765		if (opal_resync_timebase() != OPAL_SUCCESS)
766			BUG();
767	}
768
769	/*
770	 * isync after restoring shared SPRs and before unlocking. Unlock
771	 * only contains hwsync which does not necessarily do the right
772	 * thing for SPRs.
773	 */
774	isync();
775
776core_woken:
777	atomic_unlock_and_stop_thread_idle();
778
779	/* Per-thread SPRs */
780	mtspr(SPRN_LPCR,	sprs.lpcr);
781	mtspr(SPRN_HFSCR,	sprs.hfscr);
782	mtspr(SPRN_FSCR,	sprs.fscr);
783	mtspr(SPRN_PID,		sprs.pid);
784	mtspr(SPRN_PURR,	sprs.purr);
785	mtspr(SPRN_SPURR,	sprs.spurr);
786	mtspr(SPRN_DSCR,	sprs.dscr);
787	mtspr(SPRN_CIABR,	sprs.ciabr);
788
789	mtspr(SPRN_MMCRA,	sprs.mmcra);
790	mtspr(SPRN_MMCR0,	sprs.mmcr0);
791	mtspr(SPRN_MMCR1,	sprs.mmcr1);
792	mtspr(SPRN_MMCR2,	sprs.mmcr2);
793	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
794		mtspr(SPRN_LDBAR, sprs.ldbar);
795
796	mtspr(SPRN_SPRG3,	local_paca->sprg_vdso);
797
798	if (!radix_enabled())
799		__slb_restore_bolted_realmode();
800
801out:
802	mtmsr(MSR_KERNEL);
803
804	return srr1;
805}
806
807#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
808/*
809 * This is used in working around bugs in thread reconfiguration
810 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
811 * memory and the way that XER[SO] is checkpointed.
812 * This function forces the core into SMT4 in order by asking
813 * all other threads not to stop, and sending a message to any
814 * that are in a stop state.
815 * Must be called with preemption disabled.
816 */
817void pnv_power9_force_smt4_catch(void)
818{
819	int cpu, cpu0, thr;
820	int awake_threads = 1;		/* this thread is awake */
821	int poke_threads = 0;
822	int need_awake = threads_per_core;
823
824	cpu = smp_processor_id();
825	cpu0 = cpu & ~(threads_per_core - 1);
826	for (thr = 0; thr < threads_per_core; ++thr) {
827		if (cpu != cpu0 + thr)
828			atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
829	}
830	/* order setting dont_stop vs testing requested_psscr */
831	smp_mb();
832	for (thr = 0; thr < threads_per_core; ++thr) {
833		if (!paca_ptrs[cpu0+thr]->requested_psscr)
834			++awake_threads;
835		else
836			poke_threads |= (1 << thr);
837	}
838
839	/* If at least 3 threads are awake, the core is in SMT4 already */
840	if (awake_threads < need_awake) {
841		/* We have to wake some threads; we'll use msgsnd */
842		for (thr = 0; thr < threads_per_core; ++thr) {
843			if (poke_threads & (1 << thr)) {
844				ppc_msgsnd_sync();
845				ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
846					   paca_ptrs[cpu0+thr]->hw_cpu_id);
847			}
848		}
849		/* now spin until at least 3 threads are awake */
850		do {
851			for (thr = 0; thr < threads_per_core; ++thr) {
852				if ((poke_threads & (1 << thr)) &&
853				    !paca_ptrs[cpu0+thr]->requested_psscr) {
854					++awake_threads;
855					poke_threads &= ~(1 << thr);
856				}
857			}
858		} while (awake_threads < need_awake);
859	}
860}
861EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
862
863void pnv_power9_force_smt4_release(void)
864{
865	int cpu, cpu0, thr;
866
867	cpu = smp_processor_id();
868	cpu0 = cpu & ~(threads_per_core - 1);
869
870	/* clear all the dont_stop flags */
871	for (thr = 0; thr < threads_per_core; ++thr) {
872		if (cpu != cpu0 + thr)
873			atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
874	}
875}
876EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
877#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
878
879struct p10_sprs {
880	/*
881	 * SPRs that get lost in shallow states:
882	 *
883	 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
884	 * isa300 idle routines restore CR, LR.
885	 * CTR is volatile
886	 * idle thread doesn't use FP or VEC
887	 * kernel doesn't use TAR
888	 * HSPRG1 is only live in HV interrupt entry
889	 * SPRG2 is only live in KVM guests, KVM handles it.
890	 */
891};
892
893static unsigned long power10_idle_stop(unsigned long psscr)
894{
895	int cpu = raw_smp_processor_id();
896	int first = cpu_first_thread_sibling(cpu);
897	unsigned long *state = &paca_ptrs[first]->idle_state;
898	unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
899	unsigned long srr1;
900	unsigned long pls;
901//	struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
902	bool sprs_saved = false;
903
904	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
905		/* EC=ESL=0 case */
906
907		/*
908		 * Wake synchronously. SRESET via xscom may still cause
909		 * a 0x100 powersave wakeup with SRR1 reason!
910		 */
911		srr1 = isa300_idle_stop_noloss(psscr);		/* go idle */
912		if (likely(!srr1))
913			return 0;
914
915		/*
916		 * Registers not saved, can't recover!
917		 * This would be a hardware bug
918		 */
919		BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
920
921		goto out;
922	}
923
924	/* EC=ESL=1 case */
925	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
926		/* XXX: save SPRs for deep state loss here. */
927
928		sprs_saved = true;
929
930		atomic_start_thread_idle();
931	}
932
933	srr1 = isa300_idle_stop_mayloss(psscr);		/* go idle */
934
935	psscr = mfspr(SPRN_PSSCR);
936
937	WARN_ON_ONCE(!srr1);
938	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
939
940	if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
941		hmi_exception_realmode(NULL);
942
943	/*
944	 * On POWER10, SRR1 bits do not match exactly as expected.
945	 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
946	 * just always test PSSCR for SPR/TB state loss.
947	 */
948	pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
949	if (likely(pls < deep_spr_loss_state)) {
950		if (sprs_saved)
951			atomic_stop_thread_idle();
952		goto out;
953	}
954
955	/* HV state loss */
956	BUG_ON(!sprs_saved);
957
958	atomic_lock_thread_idle();
959
960	if ((*state & core_thread_mask) != 0)
961		goto core_woken;
962
963	/* XXX: restore per-core SPRs here */
964
965	if (pls >= pnv_first_tb_loss_level) {
966		/* TB loss */
967		if (opal_resync_timebase() != OPAL_SUCCESS)
968			BUG();
969	}
970
971	/*
972	 * isync after restoring shared SPRs and before unlocking. Unlock
973	 * only contains hwsync which does not necessarily do the right
974	 * thing for SPRs.
975	 */
976	isync();
977
978core_woken:
979	atomic_unlock_and_stop_thread_idle();
980
981	/* XXX: restore per-thread SPRs here */
982
983	if (!radix_enabled())
984		__slb_restore_bolted_realmode();
985
986out:
987	mtmsr(MSR_KERNEL);
988
989	return srr1;
990}
991
992#ifdef CONFIG_HOTPLUG_CPU
993static unsigned long arch300_offline_stop(unsigned long psscr)
994{
995	unsigned long srr1;
996
997	if (cpu_has_feature(CPU_FTR_ARCH_31))
998		srr1 = power10_idle_stop(psscr);
999	else
1000		srr1 = power9_idle_stop(psscr);
1001
1002	return srr1;
1003}
1004#endif
1005
1006void arch300_idle_type(unsigned long stop_psscr_val,
1007				      unsigned long stop_psscr_mask)
1008{
1009	unsigned long psscr;
1010	unsigned long srr1;
1011
1012	if (!prep_irq_for_idle_irqsoff())
1013		return;
1014
1015	psscr = mfspr(SPRN_PSSCR);
1016	psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
1017
1018	__ppc64_runlatch_off();
1019	if (cpu_has_feature(CPU_FTR_ARCH_31))
1020		srr1 = power10_idle_stop(psscr);
1021	else
1022		srr1 = power9_idle_stop(psscr);
1023	__ppc64_runlatch_on();
1024
1025	fini_irq_for_idle_irqsoff();
1026
1027	irq_set_pending_from_srr1(srr1);
1028}
1029
1030/*
1031 * Used for ppc_md.power_save which needs a function with no parameters
1032 */
1033static void arch300_idle(void)
1034{
1035	arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
1036}
1037
1038#ifdef CONFIG_HOTPLUG_CPU
1039
1040void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
1041{
1042	u64 pir = get_hard_smp_processor_id(cpu);
1043
1044	mtspr(SPRN_LPCR, lpcr_val);
1045
1046	/*
1047	 * Program the LPCR via stop-api only if the deepest stop state
1048	 * can lose hypervisor context.
1049	 */
1050	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
1051		opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
1052}
1053
1054/*
1055 * pnv_cpu_offline: A function that puts the CPU into the deepest
1056 * available platform idle state on a CPU-Offline.
1057 * interrupts hard disabled and no lazy irq pending.
1058 */
1059unsigned long pnv_cpu_offline(unsigned int cpu)
1060{
1061	unsigned long srr1;
1062
1063	__ppc64_runlatch_off();
1064
1065	if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
1066		unsigned long psscr;
1067
1068		psscr = mfspr(SPRN_PSSCR);
1069		psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
1070						pnv_deepest_stop_psscr_val;
1071		srr1 = arch300_offline_stop(psscr);
1072	} else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
1073		srr1 = power7_offline();
1074	} else {
1075		/* This is the fallback method. We emulate snooze */
1076		while (!generic_check_cpu_restart(cpu)) {
1077			HMT_low();
1078			HMT_very_low();
1079		}
1080		srr1 = 0;
1081		HMT_medium();
1082	}
1083
1084	__ppc64_runlatch_on();
1085
1086	return srr1;
1087}
1088#endif
1089
1090/*
1091 * Power ISA 3.0 idle initialization.
1092 *
1093 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
1094 * Register (PSSCR) to control idle behavior.
1095 *
1096 * PSSCR layout:
1097 * ----------------------------------------------------------
1098 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
1099 * ----------------------------------------------------------
1100 * 0      4     41   42    43   44     48    54   56    60
1101 *
1102 * PSSCR key fields:
1103 *	Bits 0:3  - Power-Saving Level Status (PLS). This field indicates the
1104 *	lowest power-saving state the thread entered since stop instruction was
1105 *	last executed.
1106 *
1107 *	Bit 41 - Status Disable(SD)
1108 *	0 - Shows PLS entries
1109 *	1 - PLS entries are all 0
1110 *
1111 *	Bit 42 - Enable State Loss
1112 *	0 - No state is lost irrespective of other fields
1113 *	1 - Allows state loss
1114 *
1115 *	Bit 43 - Exit Criterion
1116 *	0 - Exit from power-save mode on any interrupt
1117 *	1 - Exit from power-save mode controlled by LPCR's PECE bits
1118 *
1119 *	Bits 44:47 - Power-Saving Level Limit
1120 *	This limits the power-saving level that can be entered into.
1121 *
1122 *	Bits 60:63 - Requested Level
1123 *	Used to specify which power-saving level must be entered on executing
1124 *	stop instruction
1125 */
1126
1127int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
1128{
1129	int err = 0;
1130
1131	/*
1132	 * psscr_mask == 0xf indicates an older firmware.
1133	 * Set remaining fields of psscr to the default values.
1134	 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
1135	 */
1136	if (*psscr_mask == 0xf) {
1137		*psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
1138		*psscr_mask = PSSCR_HV_DEFAULT_MASK;
1139		return err;
1140	}
1141
1142	/*
1143	 * New firmware is expected to set the psscr_val bits correctly.
1144	 * Validate that the following invariants are correctly maintained by
1145	 * the new firmware.
1146	 * - ESL bit value matches the EC bit value.
1147	 * - ESL bit is set for all the deep stop states.
1148	 */
1149	if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
1150		err = ERR_EC_ESL_MISMATCH;
1151	} else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1152		GET_PSSCR_ESL(*psscr_val) == 0) {
1153		err = ERR_DEEP_STATE_ESL_MISMATCH;
1154	}
1155
1156	return err;
1157}
1158
1159/*
1160 * pnv_arch300_idle_init: Initializes the default idle state, first
1161 *                        deep idle state and deepest idle state on
1162 *                        ISA 3.0 CPUs.
1163 *
1164 * @np: /ibm,opal/power-mgt device node
1165 * @flags: cpu-idle-state-flags array
1166 * @dt_idle_states: Number of idle state entries
1167 * Returns 0 on success
1168 */
1169static void __init pnv_arch300_idle_init(void)
1170{
1171	u64 max_residency_ns = 0;
1172	int i;
1173
1174	/* stop is not really architected, we only have p9,p10 drivers */
1175	if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
1176		return;
1177
1178	/*
1179	 * pnv_deepest_stop_{val,mask} should be set to values corresponding to
1180	 * the deepest stop state.
1181	 *
1182	 * pnv_default_stop_{val,mask} should be set to values corresponding to
1183	 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
1184	 */
1185	pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
1186	deep_spr_loss_state = MAX_STOP_STATE + 1;
1187	for (i = 0; i < nr_pnv_idle_states; i++) {
1188		int err;
1189		struct pnv_idle_states_t *state = &pnv_idle_states[i];
1190		u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
1191
1192		/* No deep loss driver implemented for POWER10 yet */
1193		if (pvr_version_is(PVR_POWER10) &&
1194				state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
1195			continue;
1196
1197		if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1198		     (pnv_first_tb_loss_level > psscr_rl))
1199			pnv_first_tb_loss_level = psscr_rl;
1200
1201		if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
1202		     (deep_spr_loss_state > psscr_rl))
1203			deep_spr_loss_state = psscr_rl;
1204
1205		/*
1206		 * The idle code does not deal with TB loss occurring
1207		 * in a shallower state than SPR loss, so force it to
1208		 * behave like SPRs are lost if TB is lost. POWER9 would
1209		 * never encounter this, but a POWER8 core would if it
1210		 * implemented the stop instruction. So this is for forward
1211		 * compatibility.
1212		 */
1213		if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
1214		     (deep_spr_loss_state > psscr_rl))
1215			deep_spr_loss_state = psscr_rl;
1216
1217		err = validate_psscr_val_mask(&state->psscr_val,
1218					      &state->psscr_mask,
1219					      state->flags);
1220		if (err) {
1221			report_invalid_psscr_val(state->psscr_val, err);
1222			continue;
1223		}
1224
1225		state->valid = true;
1226
1227		if (max_residency_ns < state->residency_ns) {
1228			max_residency_ns = state->residency_ns;
1229			pnv_deepest_stop_psscr_val = state->psscr_val;
1230			pnv_deepest_stop_psscr_mask = state->psscr_mask;
1231			pnv_deepest_stop_flag = state->flags;
1232			deepest_stop_found = true;
1233		}
1234
1235		if (!default_stop_found &&
1236		    (state->flags & OPAL_PM_STOP_INST_FAST)) {
1237			pnv_default_stop_val = state->psscr_val;
1238			pnv_default_stop_mask = state->psscr_mask;
1239			default_stop_found = true;
1240			WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
1241		}
1242	}
1243
1244	if (unlikely(!default_stop_found)) {
1245		pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
1246	} else {
1247		ppc_md.power_save = arch300_idle;
1248		pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
1249			pnv_default_stop_val, pnv_default_stop_mask);
1250	}
1251
1252	if (unlikely(!deepest_stop_found)) {
1253		pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
1254	} else {
1255		pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
1256			pnv_deepest_stop_psscr_val,
1257			pnv_deepest_stop_psscr_mask);
1258	}
1259
1260	pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
1261		deep_spr_loss_state);
1262
1263	pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
1264		pnv_first_tb_loss_level);
1265}
1266
1267static void __init pnv_disable_deep_states(void)
1268{
1269	/*
1270	 * The stop-api is unable to restore hypervisor
1271	 * resources on wakeup from platform idle states which
1272	 * lose full context. So disable such states.
1273	 */
1274	supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
1275	pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
1276	pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
1277
1278	if (cpu_has_feature(CPU_FTR_ARCH_300) &&
1279	    (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
1280		/*
1281		 * Use the default stop state for CPU-Hotplug
1282		 * if available.
1283		 */
1284		if (default_stop_found) {
1285			pnv_deepest_stop_psscr_val = pnv_default_stop_val;
1286			pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
1287			pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
1288				pnv_deepest_stop_psscr_val);
1289		} else { /* Fallback to snooze loop for CPU-Hotplug */
1290			deepest_stop_found = false;
1291			pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
1292		}
1293	}
1294}
1295
1296/*
1297 * Probe device tree for supported idle states
1298 */
1299static void __init pnv_probe_idle_states(void)
1300{
1301	int i;
1302
1303	if (nr_pnv_idle_states < 0) {
1304		pr_warn("cpuidle-powernv: no idle states found in the DT\n");
1305		return;
1306	}
1307
1308	if (cpu_has_feature(CPU_FTR_ARCH_300))
1309		pnv_arch300_idle_init();
1310
1311	for (i = 0; i < nr_pnv_idle_states; i++)
1312		supported_cpuidle_states |= pnv_idle_states[i].flags;
1313}
1314
1315/*
1316 * This function parses device-tree and populates all the information
1317 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
1318 * which is the number of cpuidle states discovered through device-tree.
1319 */
1320
1321static int __init pnv_parse_cpuidle_dt(void)
1322{
1323	struct device_node *np;
1324	int nr_idle_states, i;
1325	int rc = 0;
1326	u32 *temp_u32;
1327	u64 *temp_u64;
1328	const char **temp_string;
1329
1330	np = of_find_node_by_path("/ibm,opal/power-mgt");
1331	if (!np) {
1332		pr_warn("opal: PowerMgmt Node not found\n");
1333		return -ENODEV;
1334	}
1335	nr_idle_states = of_property_count_u32_elems(np,
1336						"ibm,cpu-idle-state-flags");
1337
1338	pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
1339				  GFP_KERNEL);
1340	temp_u32 = kcalloc(nr_idle_states, sizeof(u32),  GFP_KERNEL);
1341	temp_u64 = kcalloc(nr_idle_states, sizeof(u64),  GFP_KERNEL);
1342	temp_string = kcalloc(nr_idle_states, sizeof(char *),  GFP_KERNEL);
1343
1344	if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
1345		pr_err("Could not allocate memory for dt parsing\n");
1346		rc = -ENOMEM;
1347		goto out;
1348	}
1349
1350	/* Read flags */
1351	if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
1352				       temp_u32, nr_idle_states)) {
1353		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
1354		rc = -EINVAL;
1355		goto out;
1356	}
1357	for (i = 0; i < nr_idle_states; i++)
1358		pnv_idle_states[i].flags = temp_u32[i];
1359
1360	/* Read latencies */
1361	if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
1362				       temp_u32, nr_idle_states)) {
1363		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
1364		rc = -EINVAL;
1365		goto out;
1366	}
1367	for (i = 0; i < nr_idle_states; i++)
1368		pnv_idle_states[i].latency_ns = temp_u32[i];
1369
1370	/* Read residencies */
1371	if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
1372				       temp_u32, nr_idle_states)) {
1373		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
1374		rc = -EINVAL;
1375		goto out;
1376	}
1377	for (i = 0; i < nr_idle_states; i++)
1378		pnv_idle_states[i].residency_ns = temp_u32[i];
1379
1380	/* For power9 and later */
1381	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1382		/* Read pm_crtl_val */
1383		if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
1384					       temp_u64, nr_idle_states)) {
1385			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
1386			rc = -EINVAL;
1387			goto out;
1388		}
1389		for (i = 0; i < nr_idle_states; i++)
1390			pnv_idle_states[i].psscr_val = temp_u64[i];
1391
1392		/* Read pm_crtl_mask */
1393		if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
1394					       temp_u64, nr_idle_states)) {
1395			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
1396			rc = -EINVAL;
1397			goto out;
1398		}
1399		for (i = 0; i < nr_idle_states; i++)
1400			pnv_idle_states[i].psscr_mask = temp_u64[i];
1401	}
1402
1403	/*
1404	 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
1405	 * ibm,cpu-idle-state-pmicr-val were never used and there is no
1406	 * plan to use it in near future. Hence, not parsing these properties
1407	 */
1408
1409	if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
1410					  temp_string, nr_idle_states) < 0) {
1411		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
1412		rc = -EINVAL;
1413		goto out;
1414	}
1415	for (i = 0; i < nr_idle_states; i++)
1416		strscpy(pnv_idle_states[i].name, temp_string[i],
1417			PNV_IDLE_NAME_LEN);
1418	nr_pnv_idle_states = nr_idle_states;
1419	rc = 0;
1420out:
1421	kfree(temp_u32);
1422	kfree(temp_u64);
1423	kfree(temp_string);
1424	of_node_put(np);
1425	return rc;
1426}
1427
1428static int __init pnv_init_idle_states(void)
1429{
1430	int cpu;
1431	int rc = 0;
1432
1433	/* Set up PACA fields */
1434	for_each_present_cpu(cpu) {
1435		struct paca_struct *p = paca_ptrs[cpu];
1436
1437		p->idle_state = 0;
1438		if (cpu == cpu_first_thread_sibling(cpu))
1439			p->idle_state = (1 << threads_per_core) - 1;
1440
1441		if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1442			/* P7/P8 nap */
1443			p->thread_idle_state = PNV_THREAD_RUNNING;
1444		} else if (pvr_version_is(PVR_POWER9)) {
1445			/* P9 stop workarounds */
1446#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
1447			p->requested_psscr = 0;
1448			atomic_set(&p->dont_stop, 0);
1449#endif
1450		}
1451	}
1452
1453	/* In case we error out nr_pnv_idle_states will be zero */
1454	nr_pnv_idle_states = 0;
1455	supported_cpuidle_states = 0;
1456
1457	if (cpuidle_disable != IDLE_NO_OVERRIDE)
1458		goto out;
1459	rc = pnv_parse_cpuidle_dt();
1460	if (rc)
1461		return rc;
1462	pnv_probe_idle_states();
1463
1464	if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1465		if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
1466			power7_fastsleep_workaround_entry = false;
1467			power7_fastsleep_workaround_exit = false;
1468		} else {
1469			struct device *dev_root;
1470			/*
1471			 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
1472			 * workaround is needed to use fastsleep. Provide sysfs
1473			 * control to choose how this workaround has to be
1474			 * applied.
1475			 */
1476			dev_root = bus_get_dev_root(&cpu_subsys);
1477			if (dev_root) {
1478				device_create_file(dev_root,
1479						   &dev_attr_fastsleep_workaround_applyonce);
1480				put_device(dev_root);
1481			}
1482		}
1483
1484		update_subcore_sibling_mask();
1485
1486		if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
1487			ppc_md.power_save = power7_idle;
1488			power7_offline_type = PNV_THREAD_NAP;
1489		}
1490
1491		if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
1492			   (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
1493			power7_offline_type = PNV_THREAD_WINKLE;
1494		else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
1495			   (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
1496			power7_offline_type = PNV_THREAD_SLEEP;
1497	}
1498
1499	if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
1500		if (pnv_save_sprs_for_deep_states())
1501			pnv_disable_deep_states();
1502	}
1503
1504out:
1505	return 0;
1506}
1507machine_subsys_initcall(powernv, pnv_init_idle_states);
1508